forked from pavelanni/ai-starter-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
512 lines (471 loc) · 17 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
# Detect the operating system
ifeq ($(OS),Windows_NT)
DETECTED_OS := Windows
else
DETECTED_OS := $(shell uname -s)
endif
# Project-specific variables
PARSING_DIR := utils/parsing/unstructured-api
PARSING_VENV := venv
# Set OS-specific variables and commands
ifeq ($(DETECTED_OS),Windows)
PYTHON := python
PIP := pip
HOME := $(USERPROFILE)
MKDIR := mkdir
RM := rmdir /s /q
FIND := where
PARSING_VENV_ACTIVATE := $(PARSING_DIR)\$(PARSING_VENV)\Scripts\activate.bat
else
PYTHON := python3
PIP := pip3
MKDIR := mkdir -p
RM := rm -rf
FIND := find
PARSING_VENV_ACTIVATE := $(PARSING_VENV)/bin/activate
endif
# Common variables
PYENV_ROOT := $(HOME)/.pyenv
PATH := $(PYENV_ROOT)/bin:$(PATH)
DEFAULT_PYTHON_VERSION := 3.11.3
VENV_PATH := .venv
PYTHON_VERSION_RANGE := ">=3.10,<3.13"
TEST_SUITE_VENV := .test_suite_venv
TEST_SUITE_REQUIREMENTS := tests/requirements.txt
BASE_REQUIREMENTS := base-requirements.txt
# Default target
.PHONY: all
all:
@make ensure-system-dependencies && \
make venv && \
make install && \
make start-parsing-service && \
make post-process || (echo "An error occurred during setup. Please check the output above." && exit 1)
# Repl.it specific targets for kit installation
.PHONY: replit-kit
replit-kit:
@if [ -z "$(KIT)" ]; then \
echo "Error: KIT variable is not set. Usage: make replit-kit KIT=<kit_name> [RUN_COMMAND=<command>]"; \
exit 1; \
fi
@echo "Setting up kit $(KIT) for Repl.it..."
@if [ ! -d "$(KIT)" ]; then \
echo "Error: Kit directory '$(KIT)' not found."; \
exit 1; \
fi
@echo "Installing dependencies for kit $(KIT)..."
@cd $(KIT) && \
pip install --upgrade pip && \
if [ -f "requirements.txt" ]; then \
pip install -r requirements.txt --no-cache-dir; \
else \
echo "Warning: requirements.txt not found in $(KIT). Skipping kit-specific dependencies."; \
fi
@echo "Downgrading NLTK to version 3.8.1..."
@pip install nltk==3.8.1 --no-cache-dir
@echo "Downloading NLTK punkt resource..."
@python -c "import nltk; nltk.download('punkt')"
@echo "Kit $(KIT) setup complete."
@if [ -n "$(RUN_COMMAND)" ]; then \
echo "Running command: $(RUN_COMMAND)"; \
cd $(KIT) && eval $(RUN_COMMAND); \
else \
echo "No run command specified. Setup complete."; \
fi
# Update the existing replit target to include the new kit option
.PHONY: replit
replit:
@if [ -n "$(KIT)" ]; then \
make replit-kit KIT=$(KIT) RUN_COMMAND="$(RUN_COMMAND)"; \
make post-process-replit; \
else \
make replit-install post-process-replit; \
fi
.PHONY: replit-install
replit-install:
@echo "Installing dependencies for Repl.it (skipping system dependencies)..."
pip install --upgrade pip
pip install -r $(BASE_REQUIREMENTS) --no-cache-dir
.PHONY: start-parsing-service-replit
start-parsing-service-replit: replit-setup-parsing-service
@echo "Starting parsing service in the background..."
@cd $(PARSING_DIR) && \
make run-web-app > parsing_service.log 2>&1 & \
echo $$! > parsing_service.pid
@echo "Parsing service started. PID stored in $(PARSING_DIR)/parsing_service.pid"
@echo "Use 'make parsing-log' to view the service log."
.PHONY: replit-setup-parsing-service
replit-setup-parsing-service:
@echo "Setting up parsing service for Repl.it..."
@echo "Current directory: $$(pwd)"
@echo "PARSING_DIR: $(PARSING_DIR)"
@if [ -d "$(PARSING_DIR)" ]; then \
cd $(PARSING_DIR) && \
echo "Changed to directory: $$(pwd)" && \
if [ -f "Makefile" ]; then \
echo "Running make install..." && \
make install; \
else \
echo "Error: Makefile not found in $(PARSING_DIR)"; \
exit 1; \
fi; \
else \
echo "Error: Directory $(PARSING_DIR) not found"; \
exit 1; \
fi
.PHONY: post-process-replit
post-process-replit:
@echo "Post-processing installation for Repl.it..."
pip uninstall -y google-search-results
pip install google-search-results==2.4.2
# Ensure system dependencies (Poppler and Tesseract)
.PHONY: ensure-system-dependencies
ensure-system-dependencies: ensure-poppler ensure-tesseract
# Ensure Poppler is installed
.PHONY: ensure-poppler
ensure-poppler:
ifeq ($(DETECTED_OS),Windows)
@where pdftoppm >nul 2>&1 || (echo Poppler not found. Please install it manually from https://github.com/oschwartz10612/poppler-windows/releases/ && exit 1)
else ifeq ($(DETECTED_OS),Darwin)
@if ! command -v pdftoppm &> /dev/null; then \
echo "Poppler not found. Installing Poppler..."; \
brew install poppler; \
else \
echo "Poppler is already installed: $$(which pdftoppm)"; \
fi
else
@if ! command -v pdftoppm &> /dev/null; then \
echo "Poppler not found. Installing Poppler..."; \
sudo apt-get update && sudo apt-get install -y poppler-utils; \
elif ! dpkg-query -W -f='$${Status}' poppler-utils 2>/dev/null | grep -q "ok installed"; then \
echo "Poppler not found. Installing Poppler..."; \
sudo apt-get update && sudo apt-get install -y poppler-utils; \
else \
echo "Poppler is already installed: $$(which pdftoppm)"; \
fi
endif
# Ensure libheif is installed
.PHONY: ensure-libheif
ensure-libheif:
ifeq ($(DETECTED_OS),Windows)
@echo "libheif installation on Windows is not supported in this Makefile. Please install it manually."
else ifeq ($(DETECTED_OS),Darwin)
@if ! brew list libheif &>/dev/null; then \
echo "libheif not found. Installing libheif..."; \
brew install libheif; \
else \
echo "libheif is already installed."; \
fi
else
@if ! dpkg -s libheif-dev &>/dev/null; then \
echo "libheif not found. Installing libheif..."; \
sudo apt-get update && sudo apt-get install -y libheif-dev; \
else \
echo "libheif is already installed."; \
fi
endif
# Ensure Tesseract is installed
.PHONY: ensure-tesseract
ensure-tesseract:
ifeq ($(DETECTED_OS),Windows)
@where tesseract >nul 2>&1 || (echo Tesseract not found. Please install it manually from https://github.com/UB-Mannheim/tesseract/wiki && exit 1)
else ifeq ($(DETECTED_OS),Darwin)
@if ! command -v tesseract &> /dev/null; then \
echo "Tesseract not found. Installing Tesseract..."; \
brew install tesseract; \
else \
echo "Tesseract is already installed."; \
fi
else
@if ! command -v tesseract &> /dev/null; then \
echo "Tesseract not found. Installing Tesseract..."; \
sudo apt-get update && sudo apt-get install -y tesseract-ocr; \
else \
echo "Tesseract is already installed."; \
fi
endif
# Ensure pyenv is available and set up
.PHONY: ensure-pyenv
ensure-pyenv:
ifeq ($(DETECTED_OS),Windows)
@echo "pyenv is not supported on Windows. Please install Python $(DEFAULT_PYTHON_VERSION) manually."
else
@if command -v pyenv &> /dev/null; then \
echo "pyenv found. Setting up environment..."; \
export PATH="$(HOME)/.pyenv/bin:$$PATH"; \
eval "$$(pyenv init -)"; \
else \
echo "pyenv not found. Installing pyenv..."; \
if [ "$(DETECTED_OS)" = "Darwin" ]; then \
brew install pyenv; \
else \
curl https://pyenv.run | bash; \
echo 'export PYENV_ROOT="$$HOME/.pyenv"' >> ~/.bashrc; \
echo 'command -v pyenv >/dev/null || export PATH="$$PYENV_ROOT/bin:$$PATH"' >> ~/.bashrc; \
echo 'eval "$$(pyenv init -)"' >> ~/.bashrc; \
source ~/.bashrc; \
fi; \
export PATH="$(HOME)/.pyenv/bin:$$PATH"; \
eval "$$(pyenv init -)"; \
fi
endif
# Install specific Python versions using pyenv
.PHONY: install-python-versions
install-python-versions: ensure-pyenv
ifeq ($(DETECTED_OS),Windows)
@echo "Please ensure Python $(DEFAULT_PYTHON_VERSION) is installed manually on Windows."
else
@if [ ! -d $(PYENV_ROOT)/versions/$(DEFAULT_PYTHON_VERSION) ]; then \
echo "Installing Python $(DEFAULT_PYTHON_VERSION)..."; \
pyenv install $(DEFAULT_PYTHON_VERSION); \
else \
echo "Python $(DEFAULT_PYTHON_VERSION) is already installed."; \
fi
endif
# Create base virtual environment
.PHONY: create-base-venv
create-base-venv: ensure-pyenv
@echo "Creating or updating base virtual environment..."
@if [ ! -d $(VENV_PATH) ]; then \
pyenv install -s $(DEFAULT_PYTHON_VERSION); \
pyenv local $(DEFAULT_PYTHON_VERSION); \
$(PYTHON) -m venv $(VENV_PATH); \
. $(VENV_PATH)/bin/activate; \
$(PIP) install --upgrade pip; \
deactivate; \
else \
echo "Base virtual environment already exists."; \
fi
# Create or use existing virtual environment
.PHONY: venv
venv: create-base-venv install-python-versions
@echo "Checking for virtual environment..."
@if [ ! -d $(VENV_PATH) ]; then \
echo "Creating new virtual environment..."; \
$(PYTHON) -m venv $(VENV_PATH); \
else \
echo "Using existing virtual environment."; \
fi
# Ensure qpdf is installed (for pikepdf)
.PHONY: ensure-qpdf
ensure-qpdf:
ifeq ($(DETECTED_OS),Windows)
@where qpdf >nul 2>&1 || (echo qpdf not found. Please install it manually from https://github.com/qpdf/qpdf/releases)
else ifeq ($(DETECTED_OS),Darwin)
@if ! command -v qpdf &> /dev/null; then \
echo "qpdf not found. Installing qpdf..."; \
brew install qpdf; \
else \
echo "qpdf is already installed."; \
fi
else
@if ! command -v qpdf &> /dev/null; then \
echo "qpdf not found. Installing qpdf..."; \
sudo apt-get update && sudo apt-get install -y qpdf; \
else \
echo "qpdf is already installed."; \
fi
endif
# Install dependencies
.PHONY: install
install: ensure-qpdf ensure-system-dependencies ensure-libheif
@echo "Installing dependencies..."
@. $(VENV_PATH)/bin/activate && \
$(PIP) install --upgrade pip && \
$(PIP) install -r $(BASE_REQUIREMENTS) --no-cache-dir && \
deactivate
# Post-process installation
.PHONY: post-process
post-process:
@echo "Post-processing installation..."
@. $(VENV_PATH)/bin/activate && \
$(PIP) uninstall -y google-search-results && \
$(PIP) install google-search-results==2.4.2 && \
deactivate
# Set up parsing service
.PHONY: setup-parsing-service
setup-parsing-service: install-python-versions
@echo "Setting up parsing service..."
@cd $(PARSING_DIR) && ( \
echo "Current directory: $(shell pwd)"; \
echo "PARSING_DIR: $(PARSING_DIR)"; \
echo "PARSING_VENV: $(PARSING_VENV)"; \
if [ ! -d $(PARSING_VENV) ]; then \
echo "Creating new virtual environment for parsing service..."; \
$(PYTHON) -m venv $(PARSING_VENV); \
else \
echo "Using existing virtual environment for parsing service."; \
fi; \
echo "Activating virtual environment: $(PARSING_VENV_ACTIVATE)"; \
. $(PARSING_VENV_ACTIVATE) && \
echo "Upgrading pip..."; \
$(PIP) install --upgrade pip && \
echo "Installing requirements..."; \
make install && \
echo "Deactivating virtual environment..."; \
deactivate || true; \
)
# Start parsing service in the background
.PHONY: start-parsing-service
start-parsing-service: setup-parsing-service
@echo "Starting parsing service in the background..."
ifeq ($(DETECTED_OS),Windows)
@cd $(PARSING_DIR) && ( \
$(PARSING_VENV_ACTIVATE) && \
start /b make run-web-app > parsing_service.log 2>&1 && \
echo "Parsing service started. Check parsing_service.log for details." && \
deactivate \
)
else
@cd $(PARSING_DIR) && \
bash -c '. $(PARSING_VENV_ACTIVATE) && \
make run-web-app > parsing_service.log 2>&1 & \
echo $$! > parsing_service.pid && \
conda deactivate \
deactivate' || true
@echo "Parsing service started. PID stored in $(PARSING_DIR)/parsing_service.pid"
endif
@echo "Use 'make parsing-log' to view the service log."
# Stop parsing service
.PHONY: stop-parsing-service
stop-parsing-service:
@echo "Stopping parsing service..."
ifeq ($(DETECTED_OS),Windows)
@for /f "tokens=5" %a in ('netstat -aon ^| find ":8005" ^| find "LISTENING"') do taskkill /F /PID %a
else
@PID=$$(lsof -ti tcp:8005); \
if [ -n "$$PID" ]; then \
kill -9 $$PID && \
echo "Parsing service stopped (PID: $$PID)."; \
else \
echo "No parsing service found running on port 8005."; \
fi
@rm -f $(PARSING_DIR)/parsing_service.pid
endif
# View parsing service log
.PHONY: parsing-log
parsing-log:
ifeq ($(DETECTED_OS),Windows)
@if exist $(PARSING_DIR)\parsing_service.log (type $(PARSING_DIR)\parsing_service.log) else (echo Parsing service log not found. Is the service running?)
else
@if [ -f $(PARSING_DIR)/parsing_service.log ]; then \
tail -f $(PARSING_DIR)/parsing_service.log; \
else \
echo "Parsing service log not found. Is the service running?"; \
fi
endif
# Check parsing service status
.PHONY: parsing-status
parsing-status:
ifeq ($(DETECTED_OS),Windows)
@netstat -ano | findstr :8005 | findstr LISTENING > nul
@if %errorlevel% equ 0 (echo Parsing service is running.) else (echo Parsing service is not running.)
else
@if [ -f $(PARSING_DIR)/parsing_service.pid ]; then \
PID=$$(cat $(PARSING_DIR)/parsing_service.pid); \
if ps -p $$PID > /dev/null; then \
echo "Parsing service is running (PID: $$PID)"; \
else \
echo "Parsing service is not running (stale PID file found)"; \
rm $(PARSING_DIR)/parsing_service.pid; \
fi \
else \
echo "Parsing service is not running (no PID file found)"; \
fi
endif
# Docker-related commands
.PHONY: docker-build
docker-build:
@echo "Building Docker image..."
docker build -t ai-starter-kit .
.PHONY: docker-run
docker-run: docker-build
@echo "Running Docker container..."
docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit
.PHONY: docker-shell
docker-shell: docker-build
@echo "Opening a shell in the Docker container..."
docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash
.PHONY: docker-run-kit
docker-run-kit: docker-build
@echo "Running specific kit in Docker container..."
@if [ -z "$(KIT)" ]; then \
echo "Error: KIT variable is not set. Usage: make docker-run-kit KIT=<kit_name> [COMMAND=<command>]"; \
exit 1; \
fi
@if [ -z "$(COMMAND)" ]; then \
docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash -c "cd $(KIT) && streamlit run streamlit/app.py --browser.gatherUsageStats false"; \
else \
docker run -it --rm -p 8005:8005 -p 8501:8501 ai-starter-kit /bin/bash -c "cd $(KIT) && $(COMMAND)"; \
fi
# Set up test suite
.PHONY: setup-test-suite
setup-test-suite: ensure-pyenv
@echo "Setting up test suite environment..."
@if [ ! -d $(PYENV_ROOT)/versions/$(DEFAULT_PYTHON_VERSION) ]; then \
echo "Installing Python $(DEFAULT_PYTHON_VERSION) for test suite..."; \
pyenv install $(DEFAULT_PYTHON_VERSION); \
else \
echo "Python $(DEFAULT_PYTHON_VERSION) is already installed."; \
fi
@pyenv local $(DEFAULT_PYTHON_VERSION)
@$(PYTHON) -m venv $(TEST_SUITE_VENV)
@. $(TEST_SUITE_VENV)/bin/activate && \
$(PIP) install --upgrade pip && \
$(PIP) install -r $(TEST_SUITE_REQUIREMENTS) --no-cache-dir && \
deactivate
.PHONY: clean-test-suite
clean-test-suite:
@echo "Cleaning up test suite environment..."
@rm -rf $(TEST_SUITE_VENV)
@pyenv local --unset
# Clean up
.PHONY: clean
clean: stop-parsing-service
@echo "Cleaning up..."
ifeq ($(DETECTED_OS),Windows)
@if exist $(VENV_PATH) rmdir /s /q $(VENV_PATH)
@if exist $(PARSING_DIR)\$(PARSING_VENV) rmdir /s /q $(PARSING_DIR)\$(PARSING_VENV)
@for /r %x in (*.pyc) do @del "%x"
@for /d /r %x in (__pycache__) do @if exist "%x" rd /s /q "%x"
else
@rm -rf $(VENV_PATH)
@rm -rf $(PARSING_DIR)/$(PARSING_VENV)
@find . -type f -name '*.pyc' -delete
@find . -type d -name '__pycache__' -delete
endif
# Format code using black
.PHONY: format
format:
@echo "Formatting code..."
@. $(VENV_PATH)/bin/activate && \
black . && \
deactivate
.PHONY: help
help:
@echo "Available targets:"
@echo " all : Set up main project, create or use venv, install dependencies, start parsing service, and post-process"
@echo " replit : Set up project for Repl.it (skips pyenv check)"
@echo " ensure-system-dependencies : Ensure Poppler and Tesseract are installed"
@echo " ensure-poppler : Install Poppler if not already installed"
@echo " ensure-tesseract : Install Tesseract if not already installed"
@echo " ensure-pyenv : Install pyenv if not already installed (not supported on Windows)"
@echo " install-python-versions: Install specific Python version ($(DEFAULT_PYTHON_VERSION)) (not supported on Windows)"
@echo " ensure-qpdf : Install qpdf if not already installed (required for pikepdf)"
@echo " venv : Create or use existing virtual environment"
@echo " install : Install dependencies using pip"
@echo " post-process : Perform post-installation steps (reinstall google-search-results)"
@echo " setup-parsing-service : Set up the parsing service environment"
@echo " start-parsing-service : Start the parsing service in the background"
@echo " stop-parsing-service : Stop the running parsing service"
@echo " parsing-log : View the parsing service log"
@echo " parsing-status : Check the status of the parsing service"
@echo " docker-build : Build Docker image"
@echo " docker-run : Run Docker container"
@echo " docker-shell : Open a shell in the Docker container"
@echo " docker-run-kit : Run a specific kit in the Docker container. Usage: make docker-run-kit KIT=<kit_name> [COMMAND=<command>]"
@echo " setup-test-suite : Set up the test suite environment"
@echo " clean-test-suite : Clean up the test suite environment"
@echo " clean : Remove all virtual environments and cache files, stop parsing service"
@echo " format : Format code using black"
@echo " help : Show this help message"