From 7698b8e070ec4ee0d72c72ddb46b9b176257af48 Mon Sep 17 00:00:00 2001 From: Daniel Oeh <968613+danieloeh@users.noreply.github.com> Date: Sat, 28 Jan 2023 18:13:06 +0100 Subject: [PATCH 1/3] Run OCR on image after saving (synchronously!) (#20) --- inventory/models.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/inventory/models.py b/inventory/models.py index 2639a4a..299bcd0 100644 --- a/inventory/models.py +++ b/inventory/models.py @@ -123,12 +123,27 @@ def image_tag(self, location=None): def update_ocr_text(self, ocr_text): self.ocr_text = ocr_text self.ocr_timestamp = make_aware(datetime.utcnow()) - self.save() + self.save(update_fields=['ocr_text', 'ocr_timestamp']) def run_ocr(self): - ocr_text = ocr_on_image_path(self.image.path) - self.update_ocr_text(ocr_text) - return ocr_text + self.ocr_text = ocr_on_image_path(self.image.path) + self.ocr_timestamp = make_aware(datetime.utcnow()) + + def save( + self, force_insert=False, force_update=False, using=None, update_fields=None + ): + if update_fields is not None and 'image' not in update_fields: + return super().save(force_insert, force_update, using, update_fields) + try: + original_instance = ItemImage.objects.get(pk=self.pk) + except ItemImage.DoesNotExist: + original_instance = None + super().save(force_insert, force_update, using, update_fields) + new_instance = ItemImage.objects.get(pk=self.pk) # Retrieve new instance with updated file path + if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path): + self.run_ocr() + super().save(force_insert, force_update, update_fields=['ocr_text', 'ocr_timestamp']) + class ItemFile(models.Model): From a04436ee35643100db26833ba131de89f1e59ec0 Mon Sep 17 00:00:00 2001 From: Daniel Oeh <968613+danieloeh@users.noreply.github.com> Date: Thu, 8 Jun 2023 17:16:22 +0200 Subject: [PATCH 2/3] Run OCR on image after saving with celery --- imzam/__init__.py | 5 +++ inventory/models.py | 77 +++++++++++++++++++++++++++------------------ inventory/tasks.py | 16 ++++++++++ requirements.txt | 4 ++- 4 files changed, 70 insertions(+), 32 deletions(-) create mode 100644 inventory/tasks.py diff --git a/imzam/__init__.py b/imzam/__init__.py index e69de29..1e3599b 100644 --- a/imzam/__init__.py +++ b/imzam/__init__.py @@ -0,0 +1,5 @@ +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app + +__all__ = ('celery_app',) \ No newline at end of file diff --git a/inventory/models.py b/inventory/models.py index 299bcd0..b43e6d8 100644 --- a/inventory/models.py +++ b/inventory/models.py @@ -1,23 +1,24 @@ -from datetime import datetime -from string import Template import urllib.parse - -from inventory.ocr_util import ocr_on_image_path -from paho.mqtt import client as mqttc +from datetime import datetime from pydoc import describe -from typing_extensions import Required +from string import Template from xml.etree.ElementTree import Comment -from django.db import models -from django.core import validators + +import celery from computedfields.models import ComputedFieldsModel, computed +from django.conf import settings +from django.core import validators +from django.db import models from django.forms import ValidationError from django.urls import reverse from django.utils.html import escape from django.utils.safestring import mark_safe -from django.conf import settings -from django.utils.translation import gettext_lazy as _ from django.utils.timezone import make_aware +from django.utils.translation import gettext_lazy as _ +from paho.mqtt import client as mqttc +from typing_extensions import Required +from inventory.ocr_util import ocr_on_image_path # Create your models here. @@ -29,7 +30,8 @@ class Meta: # TODO use UUID as id? # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - name = models.CharField(_("item name"), max_length=512, blank=True, null=True) + name = models.CharField( + _("item name"), max_length=512, blank=True, null=True) description = models.TextField(_("description"), blank=True) # TODO implement signal for automatic adoption by parent_location # https://stackoverflow.com/questions/43857902/django-set-foreign-key-to-parent_location-value-on-delete @@ -108,9 +110,12 @@ def get_item_upload_path(instance, filename): class ItemImage(models.Model): image = models.ImageField(_("image"), upload_to=get_item_upload_path) - description = models.CharField(_("description"), max_length=512, blank=True) - item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item")) - ocr_text = models.TextField(_("ocr text"), blank=True, null=True, editable=False) + description = models.CharField( + _("description"), max_length=512, blank=True) + item = models.ForeignKey( + "Item", on_delete=models.CASCADE, verbose_name=_("item")) + ocr_text = models.TextField( + _("ocr text"), blank=True, null=True, editable=False) ocr_timestamp = models.DateTimeField(blank=True, null=True) def image_tag(self, location=None): @@ -129,6 +134,9 @@ def run_ocr(self): self.ocr_text = ocr_on_image_path(self.image.path) self.ocr_timestamp = make_aware(datetime.utcnow()) + def save_ocr_text(self): + super().save(update_fields=['ocr_text', 'ocr_timestamp']) + def save( self, force_insert=False, force_update=False, using=None, update_fields=None ): @@ -139,17 +147,19 @@ def save( except ItemImage.DoesNotExist: original_instance = None super().save(force_insert, force_update, using, update_fields) - new_instance = ItemImage.objects.get(pk=self.pk) # Retrieve new instance with updated file path - if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path): - self.run_ocr() - super().save(force_insert, force_update, update_fields=['ocr_text', 'ocr_timestamp']) - + # Retrieve new instance with updated file path + new_instance = ItemImage.objects.get(pk=self.pk) + if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path): + celery.current_app.send_task( + 'inventory.tasks.run_ocr_on_item_image', (self.pk,)) class ItemFile(models.Model): file = models.FileField(_("file"), upload_to=get_item_upload_path) - description = models.CharField(_("description"), max_length=512, blank=True) - item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("file")) + description = models.CharField( + _("description"), max_length=512, blank=True) + item = models.ForeignKey( + "Item", on_delete=models.CASCADE, verbose_name=_("file")) class ItemBarcode(models.Model): @@ -165,7 +175,8 @@ class ItemBarcode(models.Model): blank=True, verbose_name=_("type"), ) - item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("Item")) + item = models.ForeignKey( + "Item", on_delete=models.CASCADE, verbose_name=_("Item")) def __str__(self): return f"{repr(self.data)} ({self.type})" @@ -331,12 +342,12 @@ def get_lablary_url(self, location=None): def image_tag(self, location=None): return mark_safe( - '' % escape(self.get_lablary_url(location)) + '' % escape( + self.get_lablary_url(location)) ) image_tag.short_description = "Rendered label" image_tag.allow_tags = True - def send_to_printer(self, location=None): c = mqttc.Client(**settings.MQTT_CLIENT_KWARGS) if settings.MQTT_SERVER_SSL: @@ -345,7 +356,8 @@ def send_to_printer(self, location=None): c.username_pw_set(**settings.MQTT_PASSWORD_AUTH) c.connect(**settings.MQTT_SERVER_KWARGS) msg = c.publish( - settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl(location) + settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl( + location) ) # Messages to forbidden topics wil be silently ignored! Nothing we can do about it. msg.wait_for_publish() @@ -419,7 +431,8 @@ def clean(self): # ensure uniqueness on short_name and name per type, if type demands it if self.type.unique: # get all locations with a LocType defined as unique - unique_locs = Location.objects.exclude(pk=self.pk).filter(type__unique=True) + unique_locs = Location.objects.exclude( + pk=self.pk).filter(type__unique=True) if self.short_name in unique_locs.values_list("short_name", flat=True): raise ValidationError( {"short_name": "Short name must be unique, as defined by type."} @@ -457,7 +470,8 @@ def __str__(self): @computed( models.CharField(_("unique identifier"), max_length=64, unique=True), - depends=[("self", ["short_name"]), ("parent_location", ["unique_identifier"])], + depends=[("self", ["short_name"]), + ("parent_location", ["unique_identifier"])], ) def unique_identifier(self): if self.type.unique: @@ -465,7 +479,8 @@ def unique_identifier(self): return self.parent_location.unique_identifier + "." + self.short_name @computed( - models.CharField(_("locatable identifier"), max_length=512, unique=True), + models.CharField(_("locatable identifier"), + max_length=512, unique=True), depends=[ ("self", ["short_name"]), ("parent_location", ["locatable_identifier"]), @@ -522,7 +537,7 @@ def get_absolute_url(self): "view_location", kwargs={"pk": self.pk, "unique_identifier": self.unique_identifier}, ) - + def get_admin_url(self): return reverse( "admin:inventory_location_change", @@ -535,7 +550,8 @@ class Meta: unique_together = ["item", "location"] order_with_respect_to = "location" - item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item")) + item = models.ForeignKey( + "Item", on_delete=models.CASCADE, verbose_name=_("item")) location = models.ForeignKey( "Location", on_delete=models.CASCADE, verbose_name=_("location") ) @@ -565,6 +581,5 @@ def amount_text(self): else: return f"{self.amount_without_zeros} {self.item.measurement_unit.short}" - def __str__(self): return f"{self.amount_text} @ {self.location.locatable_identifier}" diff --git a/inventory/tasks.py b/inventory/tasks.py new file mode 100644 index 0000000..cf2610f --- /dev/null +++ b/inventory/tasks.py @@ -0,0 +1,16 @@ +import logging + +from celery import shared_task +from django.apps import apps + +from inventory.models import ItemImage + +logger = logging.getLogger(__name__) + + +@shared_task +def run_ocr_on_item_image(pk): + logger.info(f"Running OCR on image {pk}") + item_image = ItemImage.objects.get(pk=pk) + item_image.run_ocr() + item_image.save_ocr_text() diff --git a/requirements.txt b/requirements.txt index 0275573..dfcc89c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,6 @@ pymaybe django-ipware django-bootstrap-icons tqdm -pytesseract \ No newline at end of file +pytesseract +celery +redis \ No newline at end of file From 018c1ff30b165e9d109815d8b86027e34bc60233 Mon Sep 17 00:00:00 2001 From: Daniel Oeh <968613+danieloeh@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:15:46 +0200 Subject: [PATCH 3/3] Added celery to prod configs --- docker-compose.yml | 8 ++- imzam/celery.py | 22 +++++++++ imzam/local_settings.example_dev.py | 7 +++ imzam/settings.py | 12 +++++ .../management/commands/celery_worker.py | 49 +++++++++++++++++++ inventory/models.py | 6 +-- inventory/tasks.py | 7 +-- web-entrypoint.sh | 1 + 8 files changed, 102 insertions(+), 10 deletions(-) create mode 100644 imzam/celery.py create mode 100644 inventory/management/commands/celery_worker.py diff --git a/docker-compose.yml b/docker-compose.yml index e757a93..06711ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,5 +33,11 @@ services: depends_on: db: condition: service_healthy - restart: unless-stopped + celery_redis: + condition: service_healthy + restart: unless-stopped + celery_redis: + image: redis + healthcheck: + test: ["CMD", "redis-cli","ping"] diff --git a/imzam/celery.py b/imzam/celery.py new file mode 100644 index 0000000..91bfc19 --- /dev/null +++ b/imzam/celery.py @@ -0,0 +1,22 @@ +import os + +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'imzam.settings') + +app = Celery('imzam') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + + +@app.task(bind=True, ignore_result=True) +def debug_task(self): + print(f'Request: {self.request!r}') \ No newline at end of file diff --git a/imzam/local_settings.example_dev.py b/imzam/local_settings.example_dev.py index dd859ee..0e3d8f3 100644 --- a/imzam/local_settings.example_dev.py +++ b/imzam/local_settings.example_dev.py @@ -24,3 +24,10 @@ OIDC_RP_CLIENT_ID = '' OIDC_RP_CLIENT_SECRET = '' + +# ================================================================ +# Celery config +# ================================================================ +# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False +# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker +CELERY_TASK_ALWAYS_EAGER = True \ No newline at end of file diff --git a/imzam/settings.py b/imzam/settings.py index e5f2be2..706e436 100644 --- a/imzam/settings.py +++ b/imzam/settings.py @@ -233,6 +233,18 @@ username=os.getenv("MQTT_ZAMIP_USERNAME", "inv.zam.haus-django"), password=os.getenv("MQTT_ZAMIP_PASSWORD", "")) +# ================================================================ +# Celery config +# ================================================================ +CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True +CELERY_BROKER_CONNECTION_MAX_RETRIES = 5 + +# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False +# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker +CELERY_TASK_ALWAYS_EAGER = False + +CELERY_BROKER_URL = "redis://celery_redis" + # Overwrite default settings with local_settings.py configuration if not os.getenv("IGNORE_LOCAL_SETTINGS", False): diff --git a/inventory/management/commands/celery_worker.py b/inventory/management/commands/celery_worker.py new file mode 100644 index 0000000..4ea0510 --- /dev/null +++ b/inventory/management/commands/celery_worker.py @@ -0,0 +1,49 @@ +""" +Utility script for starting Celery worker +""" +import shlex +import subprocess +import imzam.celery +from django.core.management.base import BaseCommand, CommandError + +CELERY_APP_NAME = "imzam" + + +def start_celery(background=False): + cmd = shlex.split(f'celery -A {CELERY_APP_NAME} worker -l INFO') + if background: + subprocess.Popen(cmd, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + else: + subprocess.call(cmd) + + +def shutdown_celery(): + cmd = shlex.split(f'celery -A {CELERY_APP_NAME} control shutdown') + subprocess.call(cmd) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + 'command', help="Command for celery worker (either 'run' or 'stop')", type=str) + parser.add_argument('--background', action='store_true', + default=False, help="Run worker in background") + + def handle(self, *args, **kwargs): + command = kwargs['command'] + background = kwargs['background'] + if command == 'run': + print( + f'Starting celery worker{" in background" if background else ""}') + start_celery(background) + elif command == 'stop': + print('Stopping celery worker') + if background: + raise CommandError( + f"Invalid option '--background' for 'stop' command") + shutdown_celery() + else: + raise CommandError( + f"Unknown command: must be either 'run' or 'stop'.") diff --git a/inventory/models.py b/inventory/models.py index b43e6d8..6094ec3 100644 --- a/inventory/models.py +++ b/inventory/models.py @@ -4,7 +4,6 @@ from string import Template from xml.etree.ElementTree import Comment -import celery from computedfields.models import ComputedFieldsModel, computed from django.conf import settings from django.core import validators @@ -17,7 +16,7 @@ from django.utils.translation import gettext_lazy as _ from paho.mqtt import client as mqttc from typing_extensions import Required - +from inventory.tasks import run_ocr_on_item_image from inventory.ocr_util import ocr_on_image_path # Create your models here. @@ -150,8 +149,7 @@ def save( # Retrieve new instance with updated file path new_instance = ItemImage.objects.get(pk=self.pk) if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path): - celery.current_app.send_task( - 'inventory.tasks.run_ocr_on_item_image', (self.pk,)) + run_ocr_on_item_image.delay(self.pk) class ItemFile(models.Model): diff --git a/inventory/tasks.py b/inventory/tasks.py index cf2610f..8f35dd9 100644 --- a/inventory/tasks.py +++ b/inventory/tasks.py @@ -2,15 +2,12 @@ from celery import shared_task from django.apps import apps - -from inventory.models import ItemImage - logger = logging.getLogger(__name__) @shared_task def run_ocr_on_item_image(pk): - logger.info(f"Running OCR on image {pk}") - item_image = ItemImage.objects.get(pk=pk) + logger.warning(f"Running OCR on image {pk}") + item_image = apps.get_model('inventory', 'ItemImage').objects.get(pk=pk) item_image.run_ocr() item_image.save_ocr_text() diff --git a/web-entrypoint.sh b/web-entrypoint.sh index 255608f..c4e6891 100755 --- a/web-entrypoint.sh +++ b/web-entrypoint.sh @@ -3,4 +3,5 @@ ./manage.py compilemessages ./manage.py collectstatic --noinput ./manage.py migrate +./manage.py celery_worker run --background gunicorn imzam.wsgi -b 0.0.0.0:8000