Skip to content

Commit

Permalink
Merge pull request #241 from eaudeweb/keyword-fixes
Browse files Browse the repository at this point in the history
Keyword fixes
  • Loading branch information
arielpontes authored Feb 10, 2020
2 parents 36c5e5e + 75b52ec commit ddf9898
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 91 deletions.
13 changes: 13 additions & 0 deletions app/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.db import models


class LowerCharField(models.CharField):

def pre_save(self, model_instance, add):
value = getattr(model_instance, self.attname, None)
if value:
value = value.lower()
setattr(model_instance, self.attname, value)
return value
else:
return super().pre_save(model_instance, add)
21 changes: 8 additions & 13 deletions app/management/commands/base/notify.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_tenders(self):

def handle(self, *args, **options):
digest = options['digest']
changed_tenders = self.scrap_tenders()
changed_tenders = self.scrape_tenders()

if changed_tenders:
BaseNotifyCommand.send_update_email(changed_tenders, digest, self.notification_type())
Expand Down Expand Up @@ -77,26 +77,21 @@ def send_update_email(tenders, digest, notification_type):
email = build_email(subject, recipients, None, html_content)
email.send()

def scrap_tenders(self):
def scrape_tenders(self):
tenders = self.get_tenders()

ungm_tenders = []
ted_tenders = []
for tender in tenders:
if tender.source == dict(SOURCE_CHOICES).get('TED'):
ted_tenders.append(tender)
else:
ungm_tenders.append(tender)
ted_tenders = tenders.filter(source=dict(SOURCE_CHOICES).get('TED'))
ungm_tenders = tenders.filter(source=dict(SOURCE_CHOICES).get('UNGM'))

changed_ted_tenders = []
if len(ted_tenders):
if ted_tenders.exists():
w = TEDWorker()
w.ftp_download_tender_archive(ted_tenders)
changed_ted_tenders = w.parse_notices(ted_tenders)
changed_ted_tenders, _ = w.parse_notices(ted_tenders)

changed_ungm_tenders = []
if len(ungm_tenders):
if ungm_tenders.exists():
w = UNGMWorker()
changed_ungm_tenders, tenders_count = w.parse_tenders(ungm_tenders)
changed_ungm_tenders, _ = w.parse_tenders(ungm_tenders)

return changed_ted_tenders + changed_ungm_tenders
3 changes: 1 addition & 2 deletions app/management/commands/notify_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ def notification_type(self):
return 'Keyword'

def get_tenders(self):
tenders = Tender.objects.filter(has_keywords=True).order_by('-published')
return tenders
return Tender.objects.filter(has_keywords=True).order_by('-published')
1 change: 1 addition & 0 deletions app/management/commands/update_ted.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ def handle(self, *args, **options):
self.style.ERROR('TED tenders update failed: {}'.format(error))
)
send_error_email(str(error))
raise
1 change: 1 addition & 0 deletions app/management/commands/update_ungm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def handle(self, *args, **kwargs):
self.style.ERROR('TED tenders update failed: {}'.format(error))
)
send_error_email(str(error))
raise

def add_arguments(self, parser):
parser.add_argument(
Expand Down
34 changes: 34 additions & 0 deletions app/migrations/0028_make_keywords_lowercase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 2.2.9 on 2020-02-06 13:46

import app.fields
from django.db import migrations
from django.db.models import F
from django.db.models.functions import Lower


def forwards_func(apps, schema_editor):
Keyword = apps.get_model("app", "Keyword")
db_alias = schema_editor.connection.alias
Keyword.objects.using(db_alias).annotate(
value_lower=Lower('value')
).update(value=F('value_lower'))


def reverse_func(apps, schema_editor):
pass


class Migration(migrations.Migration):

dependencies = [
('app', '0027_rename_winner_to_award'),
]

operations = [
migrations.RunPython(forwards_func, reverse_func),
migrations.AlterField(
model_name='keyword',
name='value',
field=app.fields.LowerCharField(max_length=50, unique=True),
),
]
18 changes: 18 additions & 0 deletions app/migrations/0029_fix_tender_keywords_related_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.9 on 2020-02-07 09:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('app', '0028_make_keywords_lowercase'),
]

operations = [
migrations.AlterField(
model_name='tender',
name='keywords',
field=models.ManyToManyField(blank=True, related_name='tenders', to='app.Keyword'),
),
]
64 changes: 38 additions & 26 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from django.contrib.postgres.fields import ArrayField
from django.contrib.auth.models import User

from app.fields import LowerCharField

import re

from tika import parser
Expand All @@ -20,11 +22,21 @@


class Keyword(models.Model):
value = models.CharField(max_length=50)
value = LowerCharField(max_length=50, unique=True)

def __str__(self):
return '{}'.format(self.value)

@staticmethod
def get_values_list():
return list(Keyword.objects.values_list('value', flat=True))


def keywords_in(text):
""" Returns a list with all the keywords found in the text content """
keywords = Keyword.get_values_list()
return list(set(keywords) & set(re.split(r"\W+", str(text).lower())))


class Tender(models.Model):
reference = models.CharField(unique=True, max_length=255)
Expand All @@ -42,15 +54,17 @@ class Tender(models.Model):
source = models.CharField(max_length=10, choices=SOURCE_CHOICES)
unspsc_codes = models.CharField(max_length=1024, null=True, blank=True)
cpv_codes = models.CharField(max_length=1024, null=True, blank=True)
seen_by = models.ForeignKey(User, on_delete=models.CASCADE, default=None, null=True, blank=True)
keywords = models.ManyToManyField(Keyword, related_name="keywords", blank=True)
seen_by = models.ForeignKey(
User, on_delete=models.CASCADE, default=None, null=True, blank=True)
keywords = models.ManyToManyField(
Keyword, related_name="tenders", blank=True)

def __str__(self):
return '{}'.format(self.title)

@cached_property
def marked_keyword_title(self):
keywords = Tender.get_keywords_setting()
keywords = Keyword.get_values_list()
title = self.title or ''
if not keywords:
return title
Expand All @@ -60,37 +74,34 @@ def marked_keyword_title(self):

@cached_property
def marked_keyword_description(self):
keywords = Tender.get_keywords_setting()
keywords = Keyword.get_values_list()
description = self.description or ''
if not keywords:
return description

regex = r'(' + r'|'.join(keywords) + r')'
return re.sub(regex, r'<mark>\1</mark>', description, flags=re.IGNORECASE)
return re.sub(
regex, r'<mark>\1</mark>', description, flags=re.IGNORECASE)

def find_keywords(self, fields):
"""
Return queryset with all Keyword objects whose value was found in the
text of any of the fields.
"""
found_keywords = []

@staticmethod
def check_contains(value):
keywords = set(Tender.get_keywords_setting())
return list(keywords & set(re.sub("[^a-zA-Z0-9 ]", " ", str(value)).lower().split()))
for field in fields:
field_content = getattr(self, field)
found_keywords += keywords_in(field_content)

@staticmethod
def get_keywords_setting():
return list(Keyword.objects.values_list('value', flat=True))
return Keyword.objects.filter(value__in=found_keywords)

def save(self, *args, **kwargs):
self.has_keywords = any(self.check_contains(getattr(self, field)) for field in fields)
keywords = list(self.find_keywords(fields))
if keywords:
self.has_keywords = True
super().save(*args, **kwargs)

self.keywords.clear()

found_keywords = []
for field in fields:
found_keywords += self.check_contains(getattr(self, field))

found_keywords = list(dict.fromkeys(found_keywords))

for keyword in found_keywords:
self.keywords.add(Keyword.objects.get(value=keyword))
self.keywords.set(keywords)


class Vendor(models.Model):
Expand Down Expand Up @@ -223,7 +234,8 @@ class Task(models.Model):
kwargs = models.CharField(max_length=255, null=True, blank=True)
started = models.DateTimeField(null=True, blank=True, default=None)
stopped = models.DateTimeField(null=True, blank=True, default=None)
status = models.CharField(max_length=255, null=True, blank=True, default="processing")
status = models.CharField(
max_length=255, null=True, blank=True, default="processing")
output = models.TextField(max_length=5055, null=True, blank=True)

def __str__(self):
Expand Down
Loading

0 comments on commit ddf9898

Please sign in to comment.