Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix docker cuda deps #1312

Merged
merged 12 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE as ocrd_core_base
ARG BASE_IMAGE=ubuntu:20.04
FROM $BASE_IMAGE AS ocrd_core_base
ARG BASE_IMAGE=ubuntu:20.04
ARG FIXUP=echo
MAINTAINER OCR-D
ENV DEBIAN_FRONTEND noninteractive
ENV PYTHONIOENCODING utf8
ARG VCS_REF=unknown
ARG BUILD_DATE=unknown
LABEL \
maintainer="https://ocr-d.de/en/contact" \
org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url="https://github.com/OCR-D/core" \
org.label-schema.build-date=$BUILD_DATE \
org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \
org.opencontainers.image.title="core" \
org.opencontainers.image.description="OCR-D framework" \
org.opencontainers.image.source="https://github.com/OCR-D/core" \
org.opencontainers.image.documentation="https://github.com/OCR-D/core/blob/${VCS_REF}/README.md" \
org.opencontainers.image.revision=$VCS_REF \
org.opencontainers.image.created=$BUILD_DATE \
org.opencontainers.image.base.name=$BASE_IMAGE


ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONIOENCODING=utf8
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV PIP=pip
Expand Down Expand Up @@ -45,7 +62,7 @@ WORKDIR /data

CMD ["/usr/local/bin/ocrd", "--help"]

FROM ocrd_core_base as ocrd_core_test
FROM ocrd_core_base AS ocrd_core_test
# Optionally skip make assets with this arg
ARG SKIP_ASSETS
WORKDIR /build/core
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG BASE_IMAGE
ARG BASE_IMAGE=docker.io/ocrd/core
FROM $BASE_IMAGE AS ocrd_core_base

ENV MAMBA_EXE=/usr/local/bin/conda
Expand All @@ -13,6 +13,8 @@ WORKDIR /build/core
COPY Makefile .

RUN make deps-cuda
# Smoke Test
RUN ocrd --version

WORKDIR /data

Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.cuda-tf1
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
ARG BASE_IMAGE
ARG BASE_IMAGE=docker.io/ocrd/core-cuda
FROM $BASE_IMAGE AS ocrd_core_base

WORKDIR /build/core

COPY Makefile .

RUN make deps-tf1
# Smoke Test
RUN ocrd --version

WORKDIR /data

Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.cuda-tf2
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
ARG BASE_IMAGE
ARG BASE_IMAGE=docker.io/ocrd/core-cuda
FROM $BASE_IMAGE AS ocrd_core_base

WORKDIR /build/core

COPY Makefile .

RUN make deps-tf2
# Smoke Test
RUN ocrd --version

WORKDIR /data

Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.cuda-torch
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
ARG BASE_IMAGE
ARG BASE_IMAGE=docker.io/ocrd/core-cuda
FROM $BASE_IMAGE AS ocrd_core_base

WORKDIR /build

COPY Makefile .

RUN make deps-torch
# Smoke Test
RUN ocrd --version

WORKDIR /data

Expand Down
57 changes: 43 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,34 @@ help:
PIP_INSTALL ?= $(PIP) install
PIP_INSTALL_CONFIG_OPTION ?=

.PHONY: deps-cuda deps-ubuntu deps-test

deps-cuda: CONDA_EXE ?= /usr/local/bin/conda
deps-cuda: export CONDA_PREFIX ?= /conda
deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'
deps-cuda:
curl --retry 6 -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
.PHONY: get-conda deps-cuda deps-ubuntu deps-test

ifeq ($(shell command -v conda),)
# Conda installation: get Micromamba distribution
get-conda: CONDA_EXE ?= /usr/local/bin/conda
get-conda: export CONDA_PREFIX ?= /conda
# first part of recipe: see micro.mamba.pm/install.sh
get-conda: OS != uname
get-conda: PLATFORM = $(subst Darwin,osx,$(subst Linux,linux,$(OS)))
get-conda: MACHINE = $(or $(filter aarch64 arm64 ppc64le, $(ARCH)), 64)
get-conda: URL = https://micro.mamba.pm/api/micromamba/$(PLATFORM)-$(MACHINE)/latest
get-conda:
curl --retry 6 -Ls $(URL) | tar -xvj bin/micromamba
mv bin/micromamba $(CONDA_EXE)
# Install Conda system-wide (for interactive / login shells)
echo 'export MAMBA_EXE=$(CONDA_EXE) MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) CONDA_PREFIX=$(CONDA_PREFIX) PATH=$(CONDA_PREFIX)/bin:$$PATH' >> /etc/profile.d/98-conda.sh
# workaround for tf-keras#62
echo 'export XLA_FLAGS=--xla_gpu_cuda_data_dir=$(CONDA_PREFIX)/' >> /etc/profile.d/98-conda.sh
mkdir -p $(CONDA_PREFIX)/lib $(CONDA_PREFIX)/include
echo $(CONDA_PREFIX)/lib >> /etc/ld.so.conf.d/conda.conf
else
# Conda installation already present: do nothing
get-conda: ;
endif

# Dependencies for CUDA installation via Conda
deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'
deps-cuda: get-conda
# Get CUDA toolkit, including compiler and libraries with dev,
# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc):
#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
Expand All @@ -79,7 +93,6 @@ deps-cuda:
# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!),
# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space),
# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway):
MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
conda install -c nvidia/label/cuda-11.8.0 \
cuda-nvcc \
cuda-cccl \
Expand Down Expand Up @@ -145,25 +158,33 @@ deps-tf1:
pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \
$(PYTHON) -m wheel pack $$name && \
$(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; \
$(PIP) install "numpy<1.24"; \
$(PIP) install "numpy<1.24" -r requirements.txt; \
else \
$(PIP) install "tensorflow-gpu<2.0"; \
$(PIP) install "tensorflow-gpu<2.0" -r requirements.txt; \
fi

deps-tf2:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \
$(PIP) install tensorflow; \
$(PIP) install tensorflow -r requirements.txt; \
else \
$(PIP) install "tensorflow[and-cuda]"; \
$(PIP) install "tensorflow[and-cuda]" -r requirements.txt; \
fi

deps-torch:
$(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118
$(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 -r requirements.txt

# deps-*: always mix core's requirements.txt with additional deps,
# so pip does not ignore the older version reqs,
# but instead tries to find a mutually compatible set.

# Dependencies for deployment in an ubuntu/debian linux
deps-ubuntu:
apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev

# Dependencies for deployment via Conda
deps-conda: get-conda
conda install -c conda-forge python==3.8.* imagemagick geos pkgconfig

# Install test python deps via pip
deps-test:
$(PIP) install -U pip
Expand Down Expand Up @@ -395,8 +416,16 @@ docker-cuda-torch: DOCKER_FILE = Dockerfile.cuda-torch

docker-cuda-torch: docker-cuda

# if the current ref is a release, then use it as tag instead of :latest
docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch: GIT_TAG := $(strip $(shell git describe --tags | grep -x "v[0-9]\.[0-9][[0-9]\.[0-9]"))
docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch:
$(DOCKER_BUILD) -f $(DOCKER_FILE) $(DOCKER_TAG:%=-t %) --target ocrd_core_base --build-arg BASE_IMAGE=$(lastword $(DOCKER_BASE_IMAGE)) $(DOCKER_ARGS) .
$(DOCKER_BUILD) -f $(DOCKER_FILE) $(DOCKER_TAG:%=-t %) \
$(if $(GIT_TAG),$(DOCKER_TAG:%=-t %:$(GIT_TAG))) \
--target ocrd_core_base \
--build-arg BASE_IMAGE=$(lastword $(DOCKER_BASE_IMAGE)) \
--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
$(DOCKER_ARGS) .

# Build wheels and source dist and twine upload them
pypi: build
Expand Down
2 changes: 1 addition & 1 deletion repo/spec
Submodule spec updated 1 files
+0 −7 CHANGELOG.md
62 changes: 43 additions & 19 deletions src/ocrd/ocrd-all-tool.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,45 @@
{
"ocrd-dummy": {
"executable": "ocrd-dummy",
"description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
"steps": [
"preprocessing/optimization"
],
"categories": [
"Image preprocessing"
],
"input_file_grp": "DUMMY_INPUT",
"output_file_grp": "DUMMY_OUTPUT",
"parameters": {
"copy_files": {
"type": "boolean",
"default": false,
"description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
}
}
"ocrd-dummy": {
"executable": "ocrd-dummy",
"description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
"steps": [
"preprocessing/optimization"
],
"categories": [
"Image preprocessing"
],
"input_file_grp_cardinality": 1,
"output_file_grp_cardinality": 1,
"parameters": {
"copy_files": {
"type": "boolean",
"default": false,
"description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
}
}
}
},
"ocrd-filter": {
"executable": "ocrd-filter",
"description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
"steps": [
"recognition/post-correction"
],
"categories": [
"Quality assurance"
],
"input_file_grp_cardinality": 1,
"output_file_grp_cardinality": 1,
"parameters": {
"select": {
"type": "string",
"default": "//*[ends-with(local-name(),'Region')]",
"description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
},
"plot": {
"type": "boolean",
"default": false,
"description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
}
}
}
}
2 changes: 1 addition & 1 deletion tests/cli/test_resmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ocrd.resource_manager import OcrdResourceManager

runner = CliRunner()
executable = 'ocrd-dummy'
executable = 'ocrd-test-dummy'

@fixture
def mgr_with_tmp_path(tmp_path):
Expand Down
15 changes: 15 additions & 0 deletions tests/network/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ services:
args:
BASE_IMAGE: 'ubuntu:22.04'
target: ocrd_core_test
pull_policy: build
hostname: ${OCRD_PS_HOST}
container_name: ocrd_network_processing_server
depends_on:
Expand Down Expand Up @@ -84,6 +85,13 @@ services:

ocrd_dummy_processing_worker:
image: "ocrd_core_test"
build:
context: ../../
dockerfile: Dockerfile
args:
BASE_IMAGE: 'ubuntu:22.04'
target: ocrd_core_test
pull_policy: build
depends_on:
ocrd_network_processing_server:
condition: service_healthy
Expand All @@ -100,6 +108,13 @@ services:

ocrd_network_core_test:
image: "ocrd_core_test"
build:
context: ../../
dockerfile: Dockerfile
args:
BASE_IMAGE: 'ubuntu:22.04'
target: ocrd_core_test
pull_policy: build
container_name: core_test
depends_on:
ocrd_network_processing_server:
Expand Down
6 changes: 3 additions & 3 deletions tests/processor/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def test_run_output_parallel(start_mets_server):
parameter={"sleep": 2},
mets_server_url=mets_server_url)
run_time = time.time() - start_time
assert run_time < 3.2, f"run_processor took {run_time}s"
assert run_time < 3.5, f"run_processor took {run_time}s"
assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
config.reset_defaults()

Expand All @@ -547,14 +547,14 @@ def test_run_output_parallel_caching(start_mets_server):
start_time = time.time()
proc1 = run_processor(DummyProcessorWithOutputSleep, **kwargs)
run_time = time.time() - start_time
assert run_time < 3.2, f"run_processor took {run_time}s"
assert run_time < 3.5, f"run_processor took {run_time}s"
assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
start_time = time.time()
proc2 = run_processor(DummyProcessorWithOutputSleep, **kwargs)
assert proc1 is proc2, "instance_caching must yield identical processor objects for equal parameters"
run_time = time.time() - start_time
# should be faster with default config.OCRD_EXISTING_OUTPUT==SKIP
assert run_time < 1.2, f"run_processor took {run_time}s"
assert run_time < 1.5, f"run_processor took {run_time}s"
config.reset_defaults()

if __name__ == "__main__":
Expand Down
Loading