From 7698b8e070ec4ee0d72c72ddb46b9b176257af48 Mon Sep 17 00:00:00 2001
From: Daniel Oeh <968613+danieloeh@users.noreply.github.com>
Date: Sat, 28 Jan 2023 18:13:06 +0100
Subject: [PATCH 1/3] Run OCR on image after saving (synchronously!) (#20)
---
inventory/models.py | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/inventory/models.py b/inventory/models.py
index 2639a4a..299bcd0 100644
--- a/inventory/models.py
+++ b/inventory/models.py
@@ -123,12 +123,27 @@ def image_tag(self, location=None):
def update_ocr_text(self, ocr_text):
self.ocr_text = ocr_text
self.ocr_timestamp = make_aware(datetime.utcnow())
- self.save()
+ self.save(update_fields=['ocr_text', 'ocr_timestamp'])
def run_ocr(self):
- ocr_text = ocr_on_image_path(self.image.path)
- self.update_ocr_text(ocr_text)
- return ocr_text
+ self.ocr_text = ocr_on_image_path(self.image.path)
+ self.ocr_timestamp = make_aware(datetime.utcnow())
+
+ def save(
+ self, force_insert=False, force_update=False, using=None, update_fields=None
+ ):
+ if update_fields is not None and 'image' not in update_fields:
+ return super().save(force_insert, force_update, using, update_fields)
+ try:
+ original_instance = ItemImage.objects.get(pk=self.pk)
+ except ItemImage.DoesNotExist:
+ original_instance = None
+ super().save(force_insert, force_update, using, update_fields)
+ new_instance = ItemImage.objects.get(pk=self.pk) # Retrieve new instance with updated file path
+ if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path):
+ self.run_ocr()
+ super().save(force_insert, force_update, update_fields=['ocr_text', 'ocr_timestamp'])
+
class ItemFile(models.Model):
From a04436ee35643100db26833ba131de89f1e59ec0 Mon Sep 17 00:00:00 2001
From: Daniel Oeh <968613+danieloeh@users.noreply.github.com>
Date: Thu, 8 Jun 2023 17:16:22 +0200
Subject: [PATCH 2/3] Run OCR on image after saving with celery
---
imzam/__init__.py | 5 +++
inventory/models.py | 77 +++++++++++++++++++++++++++------------------
inventory/tasks.py | 16 ++++++++++
requirements.txt | 4 ++-
4 files changed, 70 insertions(+), 32 deletions(-)
create mode 100644 inventory/tasks.py
diff --git a/imzam/__init__.py b/imzam/__init__.py
index e69de29..1e3599b 100644
--- a/imzam/__init__.py
+++ b/imzam/__init__.py
@@ -0,0 +1,5 @@
+# This will make sure the app is always imported when
+# Django starts so that shared_task will use this app.
+from .celery import app as celery_app
+
+__all__ = ('celery_app',)
\ No newline at end of file
diff --git a/inventory/models.py b/inventory/models.py
index 299bcd0..b43e6d8 100644
--- a/inventory/models.py
+++ b/inventory/models.py
@@ -1,23 +1,24 @@
-from datetime import datetime
-from string import Template
import urllib.parse
-
-from inventory.ocr_util import ocr_on_image_path
-from paho.mqtt import client as mqttc
+from datetime import datetime
from pydoc import describe
-from typing_extensions import Required
+from string import Template
from xml.etree.ElementTree import Comment
-from django.db import models
-from django.core import validators
+
+import celery
from computedfields.models import ComputedFieldsModel, computed
+from django.conf import settings
+from django.core import validators
+from django.db import models
from django.forms import ValidationError
from django.urls import reverse
from django.utils.html import escape
from django.utils.safestring import mark_safe
-from django.conf import settings
-from django.utils.translation import gettext_lazy as _
from django.utils.timezone import make_aware
+from django.utils.translation import gettext_lazy as _
+from paho.mqtt import client as mqttc
+from typing_extensions import Required
+from inventory.ocr_util import ocr_on_image_path
# Create your models here.
@@ -29,7 +30,8 @@ class Meta:
# TODO use UUID as id?
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
- name = models.CharField(_("item name"), max_length=512, blank=True, null=True)
+ name = models.CharField(
+ _("item name"), max_length=512, blank=True, null=True)
description = models.TextField(_("description"), blank=True)
# TODO implement signal for automatic adoption by parent_location
# https://stackoverflow.com/questions/43857902/django-set-foreign-key-to-parent_location-value-on-delete
@@ -108,9 +110,12 @@ def get_item_upload_path(instance, filename):
class ItemImage(models.Model):
image = models.ImageField(_("image"), upload_to=get_item_upload_path)
- description = models.CharField(_("description"), max_length=512, blank=True)
- item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item"))
- ocr_text = models.TextField(_("ocr text"), blank=True, null=True, editable=False)
+ description = models.CharField(
+ _("description"), max_length=512, blank=True)
+ item = models.ForeignKey(
+ "Item", on_delete=models.CASCADE, verbose_name=_("item"))
+ ocr_text = models.TextField(
+ _("ocr text"), blank=True, null=True, editable=False)
ocr_timestamp = models.DateTimeField(blank=True, null=True)
def image_tag(self, location=None):
@@ -129,6 +134,9 @@ def run_ocr(self):
self.ocr_text = ocr_on_image_path(self.image.path)
self.ocr_timestamp = make_aware(datetime.utcnow())
+ def save_ocr_text(self):
+ super().save(update_fields=['ocr_text', 'ocr_timestamp'])
+
def save(
self, force_insert=False, force_update=False, using=None, update_fields=None
):
@@ -139,17 +147,19 @@ def save(
except ItemImage.DoesNotExist:
original_instance = None
super().save(force_insert, force_update, using, update_fields)
- new_instance = ItemImage.objects.get(pk=self.pk) # Retrieve new instance with updated file path
- if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path):
- self.run_ocr()
- super().save(force_insert, force_update, update_fields=['ocr_text', 'ocr_timestamp'])
-
+ # Retrieve new instance with updated file path
+ new_instance = ItemImage.objects.get(pk=self.pk)
+ if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path):
+ celery.current_app.send_task(
+ 'inventory.tasks.run_ocr_on_item_image', (self.pk,))
class ItemFile(models.Model):
file = models.FileField(_("file"), upload_to=get_item_upload_path)
- description = models.CharField(_("description"), max_length=512, blank=True)
- item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("file"))
+ description = models.CharField(
+ _("description"), max_length=512, blank=True)
+ item = models.ForeignKey(
+ "Item", on_delete=models.CASCADE, verbose_name=_("file"))
class ItemBarcode(models.Model):
@@ -165,7 +175,8 @@ class ItemBarcode(models.Model):
blank=True,
verbose_name=_("type"),
)
- item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("Item"))
+ item = models.ForeignKey(
+ "Item", on_delete=models.CASCADE, verbose_name=_("Item"))
def __str__(self):
return f"{repr(self.data)} ({self.type})"
@@ -331,12 +342,12 @@ def get_lablary_url(self, location=None):
def image_tag(self, location=None):
return mark_safe(
- '' % escape(self.get_lablary_url(location))
+ '' % escape(
+ self.get_lablary_url(location))
)
image_tag.short_description = "Rendered label"
image_tag.allow_tags = True
-
def send_to_printer(self, location=None):
c = mqttc.Client(**settings.MQTT_CLIENT_KWARGS)
if settings.MQTT_SERVER_SSL:
@@ -345,7 +356,8 @@ def send_to_printer(self, location=None):
c.username_pw_set(**settings.MQTT_PASSWORD_AUTH)
c.connect(**settings.MQTT_SERVER_KWARGS)
msg = c.publish(
- settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl(location)
+ settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl(
+ location)
)
# Messages to forbidden topics wil be silently ignored! Nothing we can do about it.
msg.wait_for_publish()
@@ -419,7 +431,8 @@ def clean(self):
# ensure uniqueness on short_name and name per type, if type demands it
if self.type.unique:
# get all locations with a LocType defined as unique
- unique_locs = Location.objects.exclude(pk=self.pk).filter(type__unique=True)
+ unique_locs = Location.objects.exclude(
+ pk=self.pk).filter(type__unique=True)
if self.short_name in unique_locs.values_list("short_name", flat=True):
raise ValidationError(
{"short_name": "Short name must be unique, as defined by type."}
@@ -457,7 +470,8 @@ def __str__(self):
@computed(
models.CharField(_("unique identifier"), max_length=64, unique=True),
- depends=[("self", ["short_name"]), ("parent_location", ["unique_identifier"])],
+ depends=[("self", ["short_name"]),
+ ("parent_location", ["unique_identifier"])],
)
def unique_identifier(self):
if self.type.unique:
@@ -465,7 +479,8 @@ def unique_identifier(self):
return self.parent_location.unique_identifier + "." + self.short_name
@computed(
- models.CharField(_("locatable identifier"), max_length=512, unique=True),
+ models.CharField(_("locatable identifier"),
+ max_length=512, unique=True),
depends=[
("self", ["short_name"]),
("parent_location", ["locatable_identifier"]),
@@ -522,7 +537,7 @@ def get_absolute_url(self):
"view_location",
kwargs={"pk": self.pk, "unique_identifier": self.unique_identifier},
)
-
+
def get_admin_url(self):
return reverse(
"admin:inventory_location_change",
@@ -535,7 +550,8 @@ class Meta:
unique_together = ["item", "location"]
order_with_respect_to = "location"
- item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item"))
+ item = models.ForeignKey(
+ "Item", on_delete=models.CASCADE, verbose_name=_("item"))
location = models.ForeignKey(
"Location", on_delete=models.CASCADE, verbose_name=_("location")
)
@@ -565,6 +581,5 @@ def amount_text(self):
else:
return f"{self.amount_without_zeros} {self.item.measurement_unit.short}"
-
def __str__(self):
return f"{self.amount_text} @ {self.location.locatable_identifier}"
diff --git a/inventory/tasks.py b/inventory/tasks.py
new file mode 100644
index 0000000..cf2610f
--- /dev/null
+++ b/inventory/tasks.py
@@ -0,0 +1,16 @@
+import logging
+
+from celery import shared_task
+from django.apps import apps
+
+from inventory.models import ItemImage
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task
+def run_ocr_on_item_image(pk):
+ logger.info(f"Running OCR on image {pk}")
+ item_image = ItemImage.objects.get(pk=pk)
+ item_image.run_ocr()
+ item_image.save_ocr_text()
diff --git a/requirements.txt b/requirements.txt
index 0275573..dfcc89c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,6 @@ pymaybe
django-ipware
django-bootstrap-icons
tqdm
-pytesseract
\ No newline at end of file
+pytesseract
+celery
+redis
\ No newline at end of file
From 018c1ff30b165e9d109815d8b86027e34bc60233 Mon Sep 17 00:00:00 2001
From: Daniel Oeh <968613+danieloeh@users.noreply.github.com>
Date: Fri, 9 Jun 2023 14:15:46 +0200
Subject: [PATCH 3/3] Added celery to prod configs
---
docker-compose.yml | 8 ++-
imzam/celery.py | 22 +++++++++
imzam/local_settings.example_dev.py | 7 +++
imzam/settings.py | 12 +++++
.../management/commands/celery_worker.py | 49 +++++++++++++++++++
inventory/models.py | 6 +--
inventory/tasks.py | 7 +--
web-entrypoint.sh | 1 +
8 files changed, 102 insertions(+), 10 deletions(-)
create mode 100644 imzam/celery.py
create mode 100644 inventory/management/commands/celery_worker.py
diff --git a/docker-compose.yml b/docker-compose.yml
index e757a93..06711ab 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -33,5 +33,11 @@ services:
depends_on:
db:
condition: service_healthy
- restart: unless-stopped
+ celery_redis:
+ condition: service_healthy
+ restart: unless-stopped
+ celery_redis:
+ image: redis
+ healthcheck:
+ test: ["CMD", "redis-cli","ping"]
diff --git a/imzam/celery.py b/imzam/celery.py
new file mode 100644
index 0000000..91bfc19
--- /dev/null
+++ b/imzam/celery.py
@@ -0,0 +1,22 @@
+import os
+
+from celery import Celery
+
+# Set the default Django settings module for the 'celery' program.
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'imzam.settings')
+
+app = Celery('imzam')
+
+# Using a string here means the worker doesn't have to serialize
+# the configuration object to child processes.
+# - namespace='CELERY' means all celery-related configuration keys
+# should have a `CELERY_` prefix.
+app.config_from_object('django.conf:settings', namespace='CELERY')
+
+# Load task modules from all registered Django apps.
+app.autodiscover_tasks()
+
+
+@app.task(bind=True, ignore_result=True)
+def debug_task(self):
+ print(f'Request: {self.request!r}')
\ No newline at end of file
diff --git a/imzam/local_settings.example_dev.py b/imzam/local_settings.example_dev.py
index dd859ee..0e3d8f3 100644
--- a/imzam/local_settings.example_dev.py
+++ b/imzam/local_settings.example_dev.py
@@ -24,3 +24,10 @@
OIDC_RP_CLIENT_ID = ''
OIDC_RP_CLIENT_SECRET = ''
+
+# ================================================================
+# Celery config
+# ================================================================
+# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False
+# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker
+CELERY_TASK_ALWAYS_EAGER = True
\ No newline at end of file
diff --git a/imzam/settings.py b/imzam/settings.py
index e5f2be2..706e436 100644
--- a/imzam/settings.py
+++ b/imzam/settings.py
@@ -233,6 +233,18 @@
username=os.getenv("MQTT_ZAMIP_USERNAME", "inv.zam.haus-django"),
password=os.getenv("MQTT_ZAMIP_PASSWORD", ""))
+# ================================================================
+# Celery config
+# ================================================================
+CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
+CELERY_BROKER_CONNECTION_MAX_RETRIES = 5
+
+# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False
+# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker
+CELERY_TASK_ALWAYS_EAGER = False
+
+CELERY_BROKER_URL = "redis://celery_redis"
+
# Overwrite default settings with local_settings.py configuration
if not os.getenv("IGNORE_LOCAL_SETTINGS", False):
diff --git a/inventory/management/commands/celery_worker.py b/inventory/management/commands/celery_worker.py
new file mode 100644
index 0000000..4ea0510
--- /dev/null
+++ b/inventory/management/commands/celery_worker.py
@@ -0,0 +1,49 @@
+"""
+Utility script for starting Celery worker
+"""
+import shlex
+import subprocess
+import imzam.celery
+from django.core.management.base import BaseCommand, CommandError
+
+CELERY_APP_NAME = "imzam"
+
+
+def start_celery(background=False):
+ cmd = shlex.split(f'celery -A {CELERY_APP_NAME} worker -l INFO')
+ if background:
+ subprocess.Popen(cmd, stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL)
+ else:
+ subprocess.call(cmd)
+
+
+def shutdown_celery():
+ cmd = shlex.split(f'celery -A {CELERY_APP_NAME} control shutdown')
+ subprocess.call(cmd)
+
+
+class Command(BaseCommand):
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ 'command', help="Command for celery worker (either 'run' or 'stop')", type=str)
+ parser.add_argument('--background', action='store_true',
+ default=False, help="Run worker in background")
+
+ def handle(self, *args, **kwargs):
+ command = kwargs['command']
+ background = kwargs['background']
+ if command == 'run':
+ print(
+ f'Starting celery worker{" in background" if background else ""}')
+ start_celery(background)
+ elif command == 'stop':
+ print('Stopping celery worker')
+ if background:
+ raise CommandError(
+ f"Invalid option '--background' for 'stop' command")
+ shutdown_celery()
+ else:
+ raise CommandError(
+ f"Unknown command: must be either 'run' or 'stop'.")
diff --git a/inventory/models.py b/inventory/models.py
index b43e6d8..6094ec3 100644
--- a/inventory/models.py
+++ b/inventory/models.py
@@ -4,7 +4,6 @@
from string import Template
from xml.etree.ElementTree import Comment
-import celery
from computedfields.models import ComputedFieldsModel, computed
from django.conf import settings
from django.core import validators
@@ -17,7 +16,7 @@
from django.utils.translation import gettext_lazy as _
from paho.mqtt import client as mqttc
from typing_extensions import Required
-
+from inventory.tasks import run_ocr_on_item_image
from inventory.ocr_util import ocr_on_image_path
# Create your models here.
@@ -150,8 +149,7 @@ def save(
# Retrieve new instance with updated file path
new_instance = ItemImage.objects.get(pk=self.pk)
if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path):
- celery.current_app.send_task(
- 'inventory.tasks.run_ocr_on_item_image', (self.pk,))
+ run_ocr_on_item_image.delay(self.pk)
class ItemFile(models.Model):
diff --git a/inventory/tasks.py b/inventory/tasks.py
index cf2610f..8f35dd9 100644
--- a/inventory/tasks.py
+++ b/inventory/tasks.py
@@ -2,15 +2,12 @@
from celery import shared_task
from django.apps import apps
-
-from inventory.models import ItemImage
-
logger = logging.getLogger(__name__)
@shared_task
def run_ocr_on_item_image(pk):
- logger.info(f"Running OCR on image {pk}")
- item_image = ItemImage.objects.get(pk=pk)
+ logger.warning(f"Running OCR on image {pk}")
+ item_image = apps.get_model('inventory', 'ItemImage').objects.get(pk=pk)
item_image.run_ocr()
item_image.save_ocr_text()
diff --git a/web-entrypoint.sh b/web-entrypoint.sh
index 255608f..c4e6891 100755
--- a/web-entrypoint.sh
+++ b/web-entrypoint.sh
@@ -3,4 +3,5 @@
./manage.py compilemessages
./manage.py collectstatic --noinput
./manage.py migrate
+./manage.py celery_worker run --background
gunicorn imzam.wsgi -b 0.0.0.0:8000