Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run OCR on image after saving (synchronously!) (#20) #26

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,11 @@ services:
depends_on:
db:
condition: service_healthy
restart: unless-stopped
celery_redis:
condition: service_healthy
restart: unless-stopped
celery_redis:
image: redis
healthcheck:
test: ["CMD", "redis-cli","ping"]

5 changes: 5 additions & 0 deletions imzam/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# This will make sure the app is always imported when
# Django starts so that shared_task will use this app.
from .celery import app as celery_app

__all__ = ('celery_app',)
22 changes: 22 additions & 0 deletions imzam/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os

from celery import Celery

# Set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'imzam.settings')

app = Celery('imzam')

# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')

# Load task modules from all registered Django apps.
app.autodiscover_tasks()


@app.task(bind=True, ignore_result=True)
def debug_task(self):
print(f'Request: {self.request!r}')
7 changes: 7 additions & 0 deletions imzam/local_settings.example_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@

OIDC_RP_CLIENT_ID = ''
OIDC_RP_CLIENT_SECRET = ''

# ================================================================
# Celery config
# ================================================================
# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False
# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker
CELERY_TASK_ALWAYS_EAGER = True
12 changes: 12 additions & 0 deletions imzam/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,18 @@
username=os.getenv("MQTT_ZAMIP_USERNAME", "inv.zam.haus-django"),
password=os.getenv("MQTT_ZAMIP_PASSWORD", ""))

# ================================================================
# Celery config
# ================================================================
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
CELERY_BROKER_CONNECTION_MAX_RETRIES = 5

# Setting CELERY_TASK_ALWAYS_EAGER executes all celery tasks locally by blocking until the task returns. Set this to False
# if you have a local running instance of redis and use the command 'python manage.py celery_worker run' to start the celery worker
CELERY_TASK_ALWAYS_EAGER = False

CELERY_BROKER_URL = "redis://celery_redis"


# Overwrite default settings with local_settings.py configuration
if not os.getenv("IGNORE_LOCAL_SETTINGS", False):
Expand Down
49 changes: 49 additions & 0 deletions inventory/management/commands/celery_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Utility script for starting Celery worker
"""
import shlex
import subprocess
import imzam.celery
from django.core.management.base import BaseCommand, CommandError

CELERY_APP_NAME = "imzam"


def start_celery(background=False):
cmd = shlex.split(f'celery -A {CELERY_APP_NAME} worker -l INFO')
if background:
subprocess.Popen(cmd, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
else:
subprocess.call(cmd)


def shutdown_celery():
cmd = shlex.split(f'celery -A {CELERY_APP_NAME} control shutdown')
subprocess.call(cmd)


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
'command', help="Command for celery worker (either 'run' or 'stop')", type=str)
parser.add_argument('--background', action='store_true',
default=False, help="Run worker in background")

def handle(self, *args, **kwargs):
command = kwargs['command']
background = kwargs['background']
if command == 'run':
print(
f'Starting celery worker{" in background" if background else ""}')
start_celery(background)
elif command == 'stop':
print('Stopping celery worker')
if background:
raise CommandError(
f"Invalid option '--background' for 'stop' command")
shutdown_celery()
else:
raise CommandError(
f"Unknown command: must be either 'run' or 'stop'.")
93 changes: 61 additions & 32 deletions inventory/models.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
from datetime import datetime
from string import Template
import urllib.parse

from inventory.ocr_util import ocr_on_image_path
from paho.mqtt import client as mqttc
from datetime import datetime
from pydoc import describe
from typing_extensions import Required
from string import Template
from xml.etree.ElementTree import Comment
from django.db import models
from django.core import validators

from computedfields.models import ComputedFieldsModel, computed
from django.conf import settings
from django.core import validators
from django.db import models
from django.db.models.signals import pre_delete
from django.dispatch.dispatcher import receiver
from django.forms import ValidationError
from django.urls import reverse
from django.utils.html import escape
from django.utils.safestring import mark_safe
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from django.utils.timezone import make_aware
from django.db.models.signals import pre_delete
from django.dispatch.dispatcher import receiver
from django.utils.translation import gettext_lazy as _
from paho.mqtt import client as mqttc
from sorl.thumbnail import delete
from typing_extensions import Required

from inventory.ocr_util import ocr_on_image_path
from inventory.tasks import run_ocr_on_item_image

# Create your models here.

Expand All @@ -32,7 +33,8 @@ class Meta:

# TODO use UUID as id?
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(_("item name"), max_length=512, blank=True, null=True)
name = models.CharField(
_("item name"), max_length=512, blank=True, null=True)
description = models.TextField(_("description"), blank=True)
# TODO implement signal for automatic adoption by parent_location
# https://stackoverflow.com/questions/43857902/django-set-foreign-key-to-parent_location-value-on-delete
Expand Down Expand Up @@ -111,9 +113,12 @@ def get_item_upload_path(instance, filename):

class ItemImage(models.Model):
image = models.ImageField(_("image"), upload_to=get_item_upload_path)
description = models.CharField(_("description"), max_length=512, blank=True)
item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item"))
ocr_text = models.TextField(_("ocr text"), blank=True, null=True, editable=False)
description = models.CharField(
_("description"), max_length=512, blank=True)
item = models.ForeignKey(
"Item", on_delete=models.CASCADE, verbose_name=_("item"))
ocr_text = models.TextField(
_("ocr text"), blank=True, null=True, editable=False)
ocr_timestamp = models.DateTimeField(blank=True, null=True)

def image_tag(self, location=None):
Expand All @@ -126,12 +131,29 @@ def image_tag(self, location=None):
def update_ocr_text(self, ocr_text):
self.ocr_text = ocr_text
self.ocr_timestamp = make_aware(datetime.utcnow())
self.save()
self.save(update_fields=['ocr_text', 'ocr_timestamp'])

def run_ocr(self):
ocr_text = ocr_on_image_path(self.image.path)
self.update_ocr_text(ocr_text)
return ocr_text
self.ocr_text = ocr_on_image_path(self.image.path)
self.ocr_timestamp = make_aware(datetime.utcnow())

def save_ocr_text(self):
super().save(update_fields=['ocr_text', 'ocr_timestamp'])

def save(
self, force_insert=False, force_update=False, using=None, update_fields=None
):
if update_fields is not None and 'image' not in update_fields:
return super().save(force_insert, force_update, using, update_fields)
try:
original_instance = ItemImage.objects.get(pk=self.pk)
except ItemImage.DoesNotExist:
original_instance = None
super().save(force_insert, force_update, using, update_fields)
# Retrieve new instance with updated file path
new_instance = ItemImage.objects.get(pk=self.pk)
if original_instance is None or (original_instance.ocr_text == new_instance.ocr_text and original_instance.image.path != new_instance.image.path):
run_ocr_on_item_image.delay(self.pk)


@receiver(pre_delete, sender=ItemImage)
Expand All @@ -144,8 +166,10 @@ def delete_image(sender, instance, **kwargs):

class ItemFile(models.Model):
file = models.FileField(_("file"), upload_to=get_item_upload_path)
description = models.CharField(_("description"), max_length=512, blank=True)
item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("file"))
description = models.CharField(
_("description"), max_length=512, blank=True)
item = models.ForeignKey(
"Item", on_delete=models.CASCADE, verbose_name=_("file"))


@receiver(pre_delete, sender=ItemImage)
Expand All @@ -168,7 +192,8 @@ class ItemBarcode(models.Model):
blank=True,
verbose_name=_("type"),
)
item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("Item"))
item = models.ForeignKey(
"Item", on_delete=models.CASCADE, verbose_name=_("Item"))

def __str__(self):
return f"{repr(self.data)} ({self.type})"
Expand Down Expand Up @@ -334,12 +359,12 @@ def get_lablary_url(self, location=None):

def image_tag(self, location=None):
return mark_safe(
'<img width="100%%" src="%s" />' % escape(self.get_lablary_url(location))
'<img width="100%%" src="%s" />' % escape(
self.get_lablary_url(location))
)
image_tag.short_description = "Rendered label"
image_tag.allow_tags = True


def send_to_printer(self, location=None):
c = mqttc.Client(**settings.MQTT_CLIENT_KWARGS)
if settings.MQTT_SERVER_SSL:
Expand All @@ -348,7 +373,8 @@ def send_to_printer(self, location=None):
c.username_pw_set(**settings.MQTT_PASSWORD_AUTH)
c.connect(**settings.MQTT_SERVER_KWARGS)
msg = c.publish(
settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl(location)
settings.MQTT_PRINTER_TOPIC, payload=self.generate_label_zpl(
location)
)
# Messages to forbidden topics wil be silently ignored! Nothing we can do about it.
msg.wait_for_publish()
Expand Down Expand Up @@ -420,7 +446,8 @@ def clean(self):
# ensure uniqueness on short_name and name per type, if type demands it
if self.type.unique:
# get all locations with a LocType defined as unique
unique_locs = Location.objects.exclude(pk=self.pk).filter(type__unique=True)
unique_locs = Location.objects.exclude(
pk=self.pk).filter(type__unique=True)
if self.short_name in unique_locs.values_list("short_name", flat=True):
raise ValidationError(
{"short_name": "Short name must be unique, as defined by type."}
Expand Down Expand Up @@ -458,15 +485,17 @@ def __str__(self):

@computed(
models.CharField(_("unique identifier"), max_length=64, unique=True),
depends=[("self", ["short_name"]), ("parent_location", ["unique_identifier"])],
depends=[("self", ["short_name"]),
("parent_location", ["unique_identifier"])],
)
def unique_identifier(self):
if self.type.unique:
return self.short_name
return self.parent_location.unique_identifier + "." + self.short_name

@computed(
models.CharField(_("locatable identifier"), max_length=512, unique=True),
models.CharField(_("locatable identifier"),
max_length=512, unique=True),
depends=[
("self", ["short_name"]),
("parent_location", ["locatable_identifier"]),
Expand Down Expand Up @@ -523,7 +552,7 @@ def get_absolute_url(self):
"view_location",
kwargs={"pk": self.pk, "unique_identifier": self.unique_identifier},
)

def get_admin_url(self):
return reverse(
"admin:inventory_location_change",
Expand All @@ -536,7 +565,8 @@ class Meta:
unique_together = ["item", "location"]
order_with_respect_to = "location"

item = models.ForeignKey("Item", on_delete=models.CASCADE, verbose_name=_("item"))
item = models.ForeignKey(
"Item", on_delete=models.CASCADE, verbose_name=_("item"))
location = models.ForeignKey(
"Location", on_delete=models.CASCADE, verbose_name=_("location")
)
Expand Down Expand Up @@ -566,6 +596,5 @@ def amount_text(self):
else:
return f"{self.amount_without_zeros} {self.item.measurement_unit.short}"


def __str__(self):
return f"{self.amount_text} @ {self.location.locatable_identifier}"
13 changes: 13 additions & 0 deletions inventory/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import logging

from celery import shared_task
from django.apps import apps
logger = logging.getLogger(__name__)


@shared_task
def run_ocr_on_item_image(pk):
logger.warning(f"Running OCR on image {pk}")
item_image = apps.get_model('inventory', 'ItemImage').objects.get(pk=pk)
item_image.run_ocr()
item_image.save_ocr_text()
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ django-ipware
django-bootstrap-icons
tqdm
pytesseract
celery
redis
sorl-thumbnail
1 change: 1 addition & 0 deletions web-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
./manage.py compilemessages
./manage.py collectstatic --noinput
./manage.py migrate
./manage.py celery_worker run --background
gunicorn imzam.wsgi -b 0.0.0.0:8000