CatchTheTornado
diff --git a/‎.dockerignore
Lines changed: 30 additions & 0 deletions b/‎.dockerignore
Lines changed: 30 additions & 0 deletions
diff --git a/‎.env.example
Lines changed: 1 addition & 1 deletion b/‎.env.example
Lines changed: 1 addition & 1 deletion
diff --git a/‎.env.localhost.example
Lines changed: 1 addition & 1 deletion b/‎.env.localhost.example
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore
Lines changed: 7 additions & 2 deletions b/‎.gitignore
Lines changed: 7 additions & 2 deletions
diff --git a/‎Makefile
Lines changed: 123 additions & 0 deletions b/‎Makefile
Lines changed: 123 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 56 additions & 14 deletions b/‎README.md
Lines changed: 56 additions & 14 deletions
diff --git a/‎app/Dockerfile
Lines changed: 0 additions & 35 deletions b/‎app/Dockerfile
Lines changed: 0 additions & 35 deletions
@@ -0,0 +1,30 @@
+# Virtual environments
+.venv
+
+# Documentation and examples
+docs/
+examples/
+screenshots/
+
+# Scripts and local storage
+scripts/
+storage/
+
+# Python cache files
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+
+# Editor/IDE settings
+.idea/
+.vscode/
+
+# Build artifacts
+build/
+dist/
+*.egg-info/
+
+# Version control
+.git/
+.gitignore
@@ -1,7 +1,7 @@
 #APP_ENV=production # sets the app into prod mode, othervise dev mode with auto-reload on code changes
 REDIS_CACHE_URL=redis://redis:6379/1
 OLLAMA_HOST=http://ollama:11434
-STORAGE_PROFILE_PATH=/storage_profiles
+STORAGE_PROFILE_PATH=./storage_profiles
 LLAMA_VISION_PROMPT="You are OCR. Convert image to markdown."
 
 # CLI settings
 
@@ -16,7 +16,7 @@ CELERY_RESULT_BACKEND=redis://localhost:6379/0
 OLLAMA_HOST=http://localhost:11434
 APP_ENV=development  # Default to development mode
 
-STORAGE_PROFILE_PATH=../storage_profiles
+STORAGE_PROFILE_PATH=./storage_profiles
 
 # AWS S3
 #AWS_ACCESS_KEY_ID=your-access-key-id
 
@@ -1,10 +1,15 @@
-app/__pycache__/*
+text_extract_api/__pycache__/*
 .env
 .env.local
 *.pyc
 .venv
+.dvenv
 .DS_Store
 storage/*
 client_secret*.json    
 .env.localhost
-
+.idea
+# Python good practice ignore
+*.egg-info/
+/build/
+.pyproject.hash
@@ -0,0 +1,123 @@
+SHELL := /bin/bash
+
+export DISABLE_VENV ?= 0
+export DISABLE_LOCAL_OLLAMA ?= 0
+
+.PHONY: help
+help:
+	@echo "Available commands:"
+	@echo " make install (recommended)       - Automatic setup for local or Docker"
+	@echo " make run      					 - Start the local application server"
+	@echo " make run-docker                  - Run Docker containers with CPU support"
+	@echo " make run-docker-gpu              - Run Docker containers with GPU support"
+	@echo " make clean                       - Clean the project environment"
+	@echo " make clear-cache                 - Clear application cache"
+
+.PHONY: install
+install:
+	@width=$$(tput cols || echo 100); \
+	[ "$$width" -gt "100" ] && width=100; \
+	padding=$$(printf "%$${width}s" "" | tr ' ' '#'); \
+	printf "\n\e[1;34m%s\e[0m\n" "$$padding"; \
+	printf "\e[1;34m###%*sCatchTheTornado/text-extract-api%*s###\e[0m\n" $$(($$width / 2 - 21)) "" $$(($$width / 2 - 17)) ""; \
+	printf "\e[1;34m###%*sAUTOMATIC SETUP%*s###\e[0m\n" $$(($$width / 2 - 10)) "" $$(($$width / 2 - 11)) ""; \
+	printf "%s\n" "$$padding"; \
+	printf "\e[1;34m   Do you want to run the application locally or with Docker?\e[0m\n"; \
+	printf "\e[1;33m   [L] \e[0m Local - Run the application locally\n"; \
+	printf "\e[1;33m   [D] \e[0m Docker - Run the axpplication in Docker\n"; \
+	read -p "   > " choice; \
+	case "$$choice" in \
+		[lL]) echo -e "\033[1;32m   ✔ You chose: Local Setup\033[0m"; $(MAKE) setup-local ;; \
+		[dD]) echo -e "\033[1;32m   ✔ You chose: Docker\033[0m"; $(MAKE) setup-docker ;; \
+		*) echo "Invalid option. Exiting."; exit 1 ;; \
+	esac
+
+.PHONY: setup-local
+setup-local:
+	@rm -f .pyproject.hash
+	@if [ ! -f .env ]; then \
+		printf  "\n\e[1;34m Copy .env.localhost.example to .env.localhost \e[0m"; \
+	  	cp .env.localhost.example .env.localhost; \
+	fi
+	@while true; do \
+		printf  "\n\e[1;34m   Python setup environment...\e[0m"; \
+		printf "\e[1;34m\n   Do you want to install requirements?\e[0m\n"; \
+		printf "\e[1;33m   [y] \e[0m Yes - Install and then run application locally\n"; \
+		printf "\e[1;33m   [n] \e[0m No  - Skip and run application locally \n"; \
+	read -p "   > " choice; \
+		case "$$choice" in \
+			[yY]) \
+				echo -e "\033[1;32m   ✔ Installing Python dependencies...\033[0m"; \
+				$(MAKE) install-requirements; \
+				$(MAKE) run; \
+				break; \
+				;; \
+			[nN]|[sS]) \
+				echo -e "\033[1;33m   Skipping requirement installation. Starting the local server instead...\033[0m"; \
+				$(MAKE) run; \
+				break; \
+				;; \
+			*) \
+				echo -e "\033[1;31m   Invalid input: Please enter 'y', 'n', or 's' to proceed.\033[0m"; \
+				;; \
+		esac; \
+	done
+
+.PHONY: install-linux
+install-linux:
+	@echo -e "\033[1;34m   Installing Linux dependencies...\033[0m"; \
+	sudo apt update && sudo apt install -y libmagic1 tesseract-ocr poppler-utils pkg-config
+
+.PHONY: install-macos
+install-macos:
+	@echo -e "\033[1;34m   Installing macOS dependencies...\033[0m"; \
+	brew update && brew install libmagic tesseract poppler pkg-config ghostscript ffmpeg automake autoconf
+
+.PHONY: install-requirements
+install-requirements:
+	@if [ "$$(uname)" = "Linux" ]; then $(MAKE) install-linux; \
+	elif [ "$$(uname)" = "Darwin" ]; then $(MAKE) install-macos; \
+	else echo "Unsupported OS. Exiting."; exit 1; fi; \
+
+.PHONY: run
+run:
+	@echo "Starting the local application server..."; \
+	DISABLE_VENV=$(DISABLE_VENV) DISABLE_LOCAL_OLLAMA=$(DISABLE_LOCAL_OLLAMA) ./run.sh
+
+.PHONY: setup-docker
+setup-docker:
+	@rm -f .pyproject.hash
+	@if [ ! -f .env ]; then \
+		printf  "\n\e[1;34m Copy .env.example to .env \e[0m"; \
+	  	cp .env.example .env; \
+	fi
+	@echo -e "\033[1;34m   Available Docker options:\033[0m"; \
+	echo -e "\033[1;33m     1:\033[0m Run Docker containers with CPU support"; \
+	echo -e "\033[1;33m     2:\033[0m Run Docker containers with GPU support"; \
+	read -p "Enter your choice (1 = CPU, 2 = GPU, any other key to exit): " docker_choice; \
+	case "$$docker_choice" in \
+		1) $(MAKE) run-docker ;; \
+		2) $(MAKE) run-docker-gpu ;; \
+		*) echo -e "\033[1;34m   Exiting without starting Docker.\033[0m" ;; \
+	esac
+
+.PHONY: run-docker
+run-docker:
+	@echo -e "\033[1;34m   Starting Docker container with CPU support...\033[0m";
+	@docker-compose -f docker-compose.yml up --build
+
+.PHONY: run-docker-gpu
+run-docker-gpu:
+	@echo -e "\033[1;34m   Starting Docker container with GPU support...\033[0m";
+	@docker-compose -f docker-compose.gpu.yml -p text-extract-api-gpu up --build
+
+.PHONY: clean
+clean:
+	@echo "Cleaning project..."; \
+	docker-compose down -v; \
+	$(MAKE) clean-cache
+
+.PHONY: clean-python-cache
+clean-cache:
+	find . -type d -name '__pycache__' -exec rm -rf {} + && find . -type f -name '*.pyc' -delete
+
@@ -45,19 +45,54 @@ Before running the example see [getting started](#getting-started)
 
 You might want to run the app directly on your machine for development purposes OR to use for example Apple GPUs (which are not supported by Docker at the moment).
 
+### Prerequisites
+
 To have it up and running please execute the following steps:
 
 [Download and install Ollama](https://ollama.com/download)
 [Download and install Docker](https://www.docker.com/products/docker-desktop/)
 
-If you are on Mac or just need to have your dependencies well organized, create a [virtual python env](https://docs.python.org/3/library/venv.html):
+> ### Setting Up Ollama on a Remote Host
+> 
+> To connect to an external Ollama instance, set the environment variable: `OLLAMA_HOST=http://address:port`, e.g.:
+> ```bash
+> OLLAMA_HOST=http(s)://127.0.0.1:5000
+> ```
+> 
+> If you want to disable the local Ollama model, use env `DISABLE_LOCAL_OLLAMA=1`, e.g.
+> ```bash
+> DISABLE_LOCAL_OLLAMA=1 make install
+> ```
+> **Note**: When local Ollama is disabled, ensure the required model is downloaded on the external instance.  
+> 
+> Currently, the `DISABLE_LOCAL_OLLAMA` variable cannot be used to disable Ollama in Docker. As a workaround, remove the `ollama` service from `docker-compose.yml` or `docker-compose.gpu.yml`.  
+>
+> Support for using the variable in Docker environments will be added in a future release.
+
+
+### Clone the Repository
+
+First, clone the repository and change current directory to it:
+
+```sh
+git clone https://github.com/CatchTheTornado/text-extract-api.git
+cd text-extract-api
+```
+
+### Setup with `Makefile`
+
+Be default application create [virtual python env](https://docs.python.org/3/library/venv.html): `.venv`. You can disable this functionality on local setup by adding `DISABLE_VENV=1` before running script:
 
 ```bash
-python3 -m venv .venv
-source .venv/bin/activate
-# now you've got access to `python` and `pip` commands
+DISABLE_VENV=1 make install 
+```
+
+```bash
+DISABLE_VENV=1 make run 
 ```
 
+### Manual setup
+
 Configure environment variables:
 
 ```bash
@@ -67,6 +102,9 @@ cp .env.localhost.example .env.localhost
 You might want to just use the defaults - should be fine. After ENV variables are set, just execute:
 
 ```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
 chmod +x run.sh
 run.sh
 ```
@@ -84,7 +122,7 @@ python client/cli.py ocr_upload --file examples/example-mri.pdf --ocr_cache --pr
 To have multiple tasks runing at once - for concurrent processing please run the following command to start single worker process:
 
 ```bash
-celery -A main.celery worker --loglevel=info --pool=solo & # to scale by concurrent processing please run this line as many times as many concurrent processess you want to have running
+celery -A text_extract_api.tasks worker --loglevel=info --pool=solo & # to scale by concurrent processing please run this line as many times as many concurrent processess you want to have running
 ```
 
 ## Online demo
@@ -98,7 +136,7 @@ Open in the browser: <a href="https://demo.doctractor.com/">demo.doctractor.com<
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
-pip install -r app/requirements.txt
+pip install -e .
 export OCR_UPLOAD_URL=https://doctractor:Aekie2ao@api.doctractor.com/ocr/upload
 export RESULT_URL=https://doctractor:Aekie2ao@api.doctractor.com/ocr/result/
 
@@ -129,7 +167,11 @@ git clone https://github.com/CatchTheTornado/text-extract-api.git
 cd text-extract-api
 ```
 
-### Setup environmental variables
+### Using `Makefile`
+You can use the `make install` and `make run` command to setup the Docker environment for `text-extract-api`. You can find the manual steps required to do so described below.
+
+
+### Manual setup
 
 Create `.env` file in the root directory and set the necessary environment variables. You can use the `.env.example` file as a template:
 
@@ -148,9 +190,9 @@ cp .env.example.localhost .env
 Then modify the variables inside the file:
 
 ```bash
-#APP_ENV=production # sets the app into prod mode, othervise dev mode with auto-reload on code changes
+#APP_ENV=production # sets the app into prod mode, otherwise dev mode with auto-reload on code changes
 REDIS_CACHE_URL=redis://localhost:6379/1
-STORAGE_PROFILE_PATH=/storage_profiles
+STORAGE_PROFILE_PATH=./storage_profiles
 LLAMA_VISION_PROMPT="You are OCR. Convert image to markdown."
 
 # CLI settings
@@ -182,7 +224,7 @@ docker-compose up --build
 ... for GPU support run:
 
 ```bash
-docker-compose -f docker-compose.gpu.yml up --build
+docker-compose -f docker-compose.gpu.yml -p text-extract-api-gpu up --build
 ```
 
 **Note:** While on Mac - Docker does not support Apple GPUs. In this case you might want to run the application natively without the Docker Compose please check [how to run it natively with GPU support](#getting-started)
@@ -206,15 +248,15 @@ If the on-prem is too much hassle [ask us about the hosted/cloud edition](mailto
 python3 -m venv .venv
 source .venv/bin/activate
 # now you've got access to `python` and `pip` within your virutal env.
-pip install -r app/requirements.txt # install main project requirements
+pip install -e . # install main project requirements
 ```
 
 
 The project includes a CLI for interacting with the API. To make it work first run:
 
 ```bash
 cd client
-pip install -r requirements.txt
+pip install -e .
 ```
 
 
@@ -263,7 +305,7 @@ python client/cli.py ocr_upload --file examples/example-mri.pdf --ocr_cache --pr
 ```
 
 The `ocr` command can store the results using the `storage_profiles`:
-  - **storage_profile**: Used to save the result - the `default` profile (`/storage_profiles/default.yaml`) is used by default; if empty file is not saved
+  - **storage_profile**: Used to save the result - the `default` profile (`./storage_profiles/default.yaml`) is used by default; if empty file is not saved
   - **storage_filename**: Outputting filename - relative path of the `root_path` set in the storage profile - by default a relative path to `/storage` folder; can use placeholders for dynamic formatting: `{file_name}`, `{file_extension}`, `{Y}`, `{mm}`, `{dd}` - for date formatting, `{HH}`, `{MM}`, `{SS}` - for time formatting
 
 
@@ -361,7 +403,7 @@ apiClient.uploadFile(formData).then(response => {
   - **ocr_cache**: Whether to cache the OCR result (true or false).
   - **prompt**: When provided, will be used for Ollama processing the OCR result
   - **model**: When provided along with the prompt - this model will be used for LLM processing
-  - **storage_profile**: Used to save the result - the `default` profile (`/storage_profiles/default.yaml`) is used by default; if empty file is not saved
+  - **storage_profile**: Used to save the result - the `default` profile (`./storage_profiles/default.yaml`) is used by default; if empty file is not saved
   - **storage_filename**: Outputting filename - relative path of the `root_path` set in the storage profile - by default a relative path to `/storage` folder; can use placeholders for dynamic formatting: `{file_name}`, `{file_extension}`, `{Y}`, `{mm}`, `{dd}` - for date formatting, `{HH}`, `{MM}`, `{SS}` - for time formatting
 
 Example: