-
Notifications
You must be signed in to change notification settings - Fork 270
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: Update Dockerfile.unstructured to include system dependencies …
…and Python packages (#1169)
- Loading branch information
1 parent
d19ca35
commit 1bf7a36
Showing
2 changed files
with
83 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: Build and Publish Unstructured Docker Image | ||
|
||
on: | ||
workflow_dispatch: | ||
|
||
env: | ||
REGISTRY_BASE: ragtoriches | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout Repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.10' | ||
|
||
- name: Install toml package | ||
run: pip install toml | ||
|
||
- name: Determine version | ||
id: version | ||
run: | | ||
echo "REGISTRY_IMAGE=${{ env.REGISTRY_BASE }}/unst-prod" >> $GITHUB_OUTPUT | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Docker Auth | ||
uses: docker/login-action@v3 | ||
with: | ||
username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }} | ||
password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }} | ||
|
||
- name: Build and push image | ||
uses: docker/build-push-action@v5 | ||
with: | ||
context: ./py | ||
file: ./py/Dockerfile.unstructured | ||
platforms: linux/amd64,linux/arm64 | ||
push: true | ||
tags: | | ||
${{ steps.version.outputs.REGISTRY_IMAGE }}:${{ steps.version.outputs.RELEASE_VERSION }} | ||
${{ steps.version.outputs.REGISTRY_IMAGE }}:latest | ||
provenance: false | ||
sbom: false | ||
|
||
- name: Verify manifest | ||
run: | | ||
docker buildx imagetools inspect ${{ steps.version.outputs.REGISTRY_IMAGE }}:${{ steps.version.outputs.RELEASE_VERSION }} | ||
docker buildx imagetools inspect ${{ steps.version.outputs.REGISTRY_IMAGE }}:latest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
FROM python:3.10-slim AS builder | ||
|
||
# Install system dependencies (including those needed for Unstructured and OpenCV) | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \ | ||
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \ | ||
poppler-utils libmagic1 pandoc libreoffice \ | ||
libgl1-mesa-glx libglib2.0-0 \ | ||
&& apt-get clean && rm -rf /var/lib/apt/lists/* | ||
|
||
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata | ||
|
||
ENV PYTHONDONTWRITEBYTECODE=1 | ||
ENV PYTHONUNBUFFERED=1 | ||
|
||
WORKDIR /app | ||
|
||
RUN pip install --no-cache-dir unstructured "unstructured[all-docs]" | ||
|
||
|
||
RUN python -c "from unstructured.partition.model_init import initialize; initialize()" | ||
|
||
RUN pip install gunicorn uvicorn fastapi httpx | ||
|
||
COPY core/integrations/unstructured/main.py . | ||
|
||
EXPOSE 7275 | ||
|
||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7275"] |