Skip to content

Commit 1650a06

Browse files
authored
Merge pull request #98 from ajcwebdev/refactor
Refactor Whisper and Ollama Docker Configuration
2 parents f45f08c + 3799355 commit 1650a06

30 files changed

+1150
-1230
lines changed

.github/Dockerfile

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,56 +4,95 @@
44
# 1) Node base image - Using Debian slim for smaller footprint
55
# ---------------------------------------------------
66

7+
# 1. Use Node 22-slim as base image
78
FROM node:22-slim AS base
89

9-
# Install only required system dependencies
10+
# 2. Install system dependencies
1011
RUN apt-get update && apt-get install -y --no-install-recommends \
1112
ffmpeg git make curl ca-certificates cmake python3 python3-pip \
12-
libopenblas-dev g++ build-essential && rm -rf /var/lib/apt/lists/* \
13-
&& apt-get clean
14-
15-
RUN update-ca-certificates
13+
libopenblas-dev g++ build-essential \
14+
&& rm -rf /var/lib/apt/lists/* \
15+
&& apt-get clean \
16+
&& update-ca-certificates
1617

18+
# 3. Set working directory to /usr/src/app
1719
WORKDIR /usr/src/app
1820

19-
# Install yt-dlp
21+
# 4. Install yt-dlp
2022
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp \
21-
-o /usr/local/bin/yt-dlp && \
22-
chmod a+rx /usr/local/bin/yt-dlp
23+
-o /usr/local/bin/yt-dlp \
24+
&& chmod a+rx /usr/local/bin/yt-dlp
2325

24-
# Install tsx globally
26+
# 5. Install tsx globally
2527
RUN npm install -g tsx
2628

27-
# Install whisper.cpp and download models
28-
RUN git clone --depth=1 https://github.com/ggerganov/whisper.cpp.git && \
29-
cd whisper.cpp && \
30-
cmake -B build && \
31-
cmake --build build -j --config Release && \
32-
./models/download-ggml-model.sh large-v3-turbo && \
33-
./models/download-ggml-model.sh base && \
34-
./models/download-ggml-model.sh tiny && \
35-
rm -rf .git
36-
37-
# Copy package files and install deps
29+
# 6. Clone whisper.cpp at specific tag (v1.7.3)
30+
RUN git clone --depth=1 https://github.com/ggerganov/whisper.cpp.git
31+
32+
# 7. Configure with CMake (verbose output)
33+
RUN cmake -B whisper.cpp/build -S whisper.cpp \
34+
-DGGML_NATIVE=OFF \
35+
-DCMAKE_BUILD_TYPE=Release \
36+
-DCMAKE_VERBOSE_MAKEFILE=ON \
37+
-DWHISPER_BUILD_EXAMPLES=ON
38+
39+
# 8. Build; if it fails, print CMakeError.log & CMakeOutput.log
40+
RUN cmake --build whisper.cpp/build --config Release -j \
41+
|| (cat whisper.cpp/build/CMakeFiles/CMakeError.log && \
42+
cat whisper.cpp/build/CMakeFiles/CMakeOutput.log && false)
43+
44+
# 9. Print out the files in the build/bin folder for debugging
45+
RUN ls -l whisper.cpp/build/bin || true
46+
47+
# 10. Make sure the whisper-cli binary is executable + symlink to /usr/local/bin
48+
RUN chmod +x /usr/src/app/whisper.cpp/build/bin/whisper-cli \
49+
&& ln -s /usr/src/app/whisper.cpp/build/bin/whisper-cli /usr/local/bin/whisper-cli
50+
51+
# 11. Confirm that whisper-cli is found
52+
RUN ls -l /usr/local/bin/whisper-cli
53+
54+
# 12. Download the “base” and “tiny” models
55+
RUN whisper.cpp/models/download-ggml-model.sh base && \
56+
whisper.cpp/models/download-ggml-model.sh tiny
57+
58+
# 12a. Expose environment variables to reference model paths
59+
ENV WHISPER_BASE_MODEL=/usr/src/app/whisper.cpp/models/ggml-base.bin
60+
ENV WHISPER_TINY_MODEL=/usr/src/app/whisper.cpp/models/ggml-tiny.bin
61+
62+
# 13. Copy package.json and package-lock.json
3863
COPY package*.json ./
64+
65+
# 14. Install Node deps
3966
RUN npm ci --production && npm cache clean --force
4067

41-
# Copy source code
68+
# 15. Copy source code
4269
COPY src ./src
70+
71+
# 16. Copy Docker entrypoint script
4372
COPY .github/docker-entrypoint.sh ./
73+
74+
# 17. Set permissions to make the script executable
4475
RUN chmod +x /usr/src/app/docker-entrypoint.sh
4576

4677
# ---------------------------------------------------
4778
# 2) Setup Ollama with models
4879
# ---------------------------------------------------
4980

81+
# 1. Use the Ollama image as a base
5082
FROM ollama/ollama:latest AS ollama
83+
84+
# 2. Set working directory to Ollama config directory
5185
WORKDIR /root/.ollama
5286

53-
# Start Ollama server and pull models
87+
# 3. Start Ollama server and pull models
5488
RUN ollama serve & \
5589
sleep 10 && \
56-
ollama pull llama3.2:1b && \
90+
ollama pull qwen2.5:0.5b && \
91+
ollama ls && \
92+
echo "Listing /root/.ollama after qwen2.5:0.5b pull:" && \
93+
ls -lh /root/.ollama || true && \
94+
echo "Listing /root/.ollama/models after qwen2.5:0.5b pull:" && \
95+
ls -lh /root/.ollama/models || true && \
5796
pkill ollama
5897

5998
# ---------------------------------------------------
@@ -62,20 +101,30 @@ RUN ollama serve & \
62101

63102
FROM base
64103

65-
# Copy Ollama binary and the pre-downloaded models
104+
# 1. Copy Ollama binary
66105
COPY --from=ollama /bin/ollama /usr/local/bin/ollama
106+
107+
# 2. Copy pre-downloaded models
67108
COPY --from=ollama /root/.ollama /root/.ollama
68109

110+
RUN echo "Listing /root/.ollama in final stage:" && \
111+
ls -lh /root/.ollama || true && \
112+
echo "Listing /root/.ollama/models in final stage:" && \
113+
ls -lh /root/.ollama/models || true
114+
115+
# Set environment variables for Whisper
69116
ENV WHISPER_FORCE_CPU=1
70117
ENV WHISPER_NO_GPU=1
71118

72-
# Create content directory first
119+
# 3. Create content directory first
73120
RUN mkdir -p /usr/src/app/content
74121

75-
# Set proper permissions for the entire app directory including content
76-
RUN chown -R node:node /usr/src/app && \
77-
chmod -R 755 /usr/src/app && \
78-
chmod 777 /usr/src/app/content # Ensure content dir is fully writable
122+
# 4. Set proper permissions for the entire app directory including content
123+
RUN chown -R node:node /usr/src/app \
124+
&& chmod -R 755 /usr/src/app \
125+
&& chmod 777 /usr/src/app/content \
126+
&& chown -R node:node /root/.ollama \
127+
&& chmod -R 755 /root/.ollama
79128

80129
# Switch to non-root user
81130
USER node

.github/docker-entrypoint.sh

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,37 @@
11
#!/bin/sh
22
# .github/docker-entrypoint.sh
33

4+
# Enable error logging
5+
set -e
6+
7+
log_error() {
8+
echo "Error: $1" >&2
9+
if [ -f "/tmp/cmake_config.log" ]; then
10+
echo "CMake configuration log:" >&2
11+
cat /tmp/cmake_config.log >&2
12+
fi
13+
if [ -f "/tmp/cmake_build.log" ]; then
14+
echo "CMake build log:" >&2
15+
cat /tmp/cmake_build.log >&2
16+
fi
17+
exit 1
18+
}
19+
20+
# Start Ollama server in the background
21+
echo "Starting Ollama server..."
22+
ollama serve &
23+
24+
# Wait for Ollama server to start
25+
sleep 5
26+
427
# If first argument is "serve", then start the server.
528
if [ "$1" = "serve" ]; then
629
echo "Starting Autoshow server..."
730
# Remove first arg ("serve") so we don't pass that to the server script.
831
shift
9-
exec tsx --no-warnings --experimental-sqlite src/server/index.ts "$@"
32+
tsx --no-warnings --experimental-sqlite src/server/index.ts "$@" || log_error "Server failed to start"
1033
fi
1134

1235
# Otherwise, run the CLI by default.
1336
echo "Running Autoshow CLI..."
14-
exec tsx --no-warnings --experimental-sqlite src/cli/commander.ts "$@"
37+
tsx --no-warnings --experimental-sqlite src/cli/commander.ts "$@" || log_error "CLI failed to start"

docs/docker.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ You can run any of the `as` CLI commands by passing arguments to the container v
1414
npm run docker-cli -- \
1515
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
1616
--whisper base \
17-
--ollama "LLAMA_3_2_3B"
17+
--ollama
1818
```
1919

2020
### Run the Server with Docker

docs/examples.md

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ npm run as -- \
125125
--whisper base \
126126
--chatgpt GPT_4_TURBO \
127127
--prompt summary shortChapters \
128-
--noCleanUp
128+
--saveAudio
129129
```
130130

131131
Here’s what’s happening in this single command:
@@ -137,7 +137,7 @@ Here’s what’s happening in this single command:
137137
5. **Transcription**: Uses the `--whisper base` model to transcribe each video in a Docker container.
138138
6. **LLM**: Uses OpenAI ChatGPT’s GPT-4 Turbo model (`--chatgpt GPT_4_TURBO`) to process the transcripts.
139139
7. **Prompt**: Generates both a summary and short chapter descriptions (`--prompt summary shortChapters`).
140-
8. **No Clean Up**: Keeps any intermediary or downloaded files around (`--noCleanUp`) so you can inspect them after the run.
140+
8. **No Clean Up**: Keeps any intermediary or downloaded files around (`--saveAudio`) so you can inspect them after the run.
141141

142142
### Process Podcast RSS Feed
143143

@@ -248,15 +248,15 @@ npm run as -- \
248248
--speakerLabels \
249249
--chatgpt GPT_4 \
250250
--prompt summary longChapters \
251-
--noCleanUp
251+
--saveAudio
252252
```
253253

254254
- **Input**: Process an RSS feed
255255
- **RSS**: Skip the first 2 items with `--skip 2`
256256
- **Transcription**: Use AssemblyAI (`--assembly`) with speaker labels (`--speakerLabels`)
257257
- **LLM**: Use ChatGPT’s GPT-4 model (`--chatgpt GPT_4`)
258258
- **Prompt**: Request both a summary and long chapters
259-
- **Utility**: Keep all intermediate files (`--noCleanUp`)
259+
- **Utility**: Keep all intermediate files (`--saveAudio`)
260260

261261
## Transcription Options
262262

@@ -674,22 +674,35 @@ npm run test-docker
674674
Benchmark tests, each compare different size models for `whisper.cpp` and a Dockerized version.
675675

676676
```bash
677-
npm run bench-tiny
678-
npm run bench-base
679-
npm run bench-small
680-
npm run bench-medium
681-
npm run bench-large
682-
npm run bench-turbo
677+
npx tsx --test test/bench/tiny.test.ts
678+
npx tsx --test test/bench/base.test.ts
679+
npx tsx --test test/bench/small.test.ts
680+
npx tsx --test test/bench/medium.test.ts
681+
npx tsx --test test/bench/large.test.ts
682+
npx tsx --test test/bench/turbo.test.ts
683+
```
684+
685+
Test all available models for a certain LLM service.
686+
687+
```bash
688+
npx tsx --test test/models/chatgpt.test.ts
689+
npx tsx --test test/models/claude.test.ts
690+
npx tsx --test test/models/cohere.test.ts
691+
npx tsx --test test/models/gemini.test.ts
692+
npx tsx --test test/models/mistral.test.ts
693+
npx tsx --test test/models/fireworks.test.ts
694+
npx tsx --test test/models/together.test.ts
695+
npx tsx --test test/models/groq.test.ts
683696
```
684697

685698
## Skip Cleanup of Intermediate Files
686699

687-
If you want to keep downloaded or temporary files for debugging or reprocessing purposes, use `--noCleanUp`. This prevents the CLI from deleting intermediary or cached files after finishing its run.
700+
If you want to keep the downloaded audio file for debugging or reprocessing purposes, use `--saveAudio`. This prevents the CLI from deleting WAV files after finishing its run.
688701

689702
```bash
690703
npm run as -- \
691704
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
692-
--noCleanUp
705+
--saveAudio
693706
```
694707

695708
## Chat with Show Notes

package.json

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
"scripts": {
1919
"tsx:base": "tsx --env-file=.env --no-warnings --experimental-sqlite",
2020
"setup": "bash ./scripts/setup.sh",
21-
"setup-docker": "docker build -t autoshow -f .github/Dockerfile .",
22-
"docker-setup": "docker build -t autoshow -f .github/Dockerfile .",
21+
"setup-docker": "docker build --progress=plain -t autoshow -f .github/Dockerfile .",
22+
"docker-setup": "docker build --progress=plain -t autoshow -f .github/Dockerfile .",
2323
"setup-all": "npm run setup && npm run docker-setup",
2424
"as": "npm run tsx:base -- src/cli/commander.ts",
2525
"video": "npm run as -- --video",
@@ -29,32 +29,20 @@
2929
"rss": "npm run as -- --rss",
3030
"info": "npm run as -- --info",
3131
"serve": "npm run tsx:base -- --watch --experimental-sqlite src/server/index.ts",
32-
"test-server-local": "npm run tsx:base -- src/server/tests/fetch-local.ts",
33-
"ts": "npm run tsx:base -- src/server/tests/fetch-local.ts",
34-
"test-server-all": "npm run tsx:base -- src/server/tests/fetch-all.ts",
32+
"clean": "npm run tsx:base scripts/cleanContent.ts",
33+
"ta": "tsx --test test/all.test.ts",
34+
"test-all": "tsx --test test/all.test.ts",
3535
"t": "npm run test-local",
36-
"bench-tiny": "tsx --test test/bench/tiny.test.ts",
37-
"bench-base": "tsx --test test/bench/base.test.ts",
38-
"bench-small": "tsx --test test/bench/small.test.ts",
39-
"bench-medium": "tsx --test test/bench/medium.test.ts",
40-
"bench-large": "tsx --test test/bench/large.test.ts",
41-
"bench-turbo": "tsx --test test/bench/turbo.test.ts",
42-
"test-models-chatgpt": "tsx --test test/models/chatgpt.test.ts",
43-
"test-models-claude": "tsx --test test/models/claude.test.ts",
44-
"test-models-cohere": "tsx --test test/models/cohere.test.ts",
45-
"test-models-gemini": "tsx --test test/models/gemini.test.ts",
46-
"test-models-mistral": "tsx --test test/models/mistral.test.ts",
47-
"test-models-fireworks": "tsx --test test/models/fireworks.test.ts",
48-
"test-models-together": "tsx --test test/models/together.test.ts",
49-
"test-models-groq": "tsx --test test/models/groq.test.ts",
5036
"test-local": "tsx --test test/local.test.ts",
5137
"test-docker": "tsx --test test/docker.test.ts",
5238
"test-services": "tsx --test test/services.test.ts",
53-
"test-all": "tsx --test test/all.test.ts",
54-
"ta": "tsx --test test/all.test.ts",
55-
"clean": "npm run tsx:base scripts/cleanContent.ts",
39+
"test-server-all": "npm run tsx:base -- src/server/tests/fetch-all.ts",
40+
"ts": "npm run tsx:base -- src/server/tests/fetch-local.ts",
41+
"test-server-local": "npm run tsx:base -- src/server/tests/fetch-local.ts",
5642
"docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
5743
"docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
44+
"docker-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -lh /usr/src/app && ls -lh /usr/src/app/whisper.cpp/models && ls -lh /usr/src/app/whisper.cpp/build/bin'",
45+
"ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -l /usr/local/bin/ollama && ls -lh /root/.ollama'",
5846
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
5947
"bun": "bun --env-file=.env --no-warnings src/cli/commander.ts",
6048
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/cli/commander.ts"

scripts/setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ else
5757
check_ollama_server
5858

5959
# Check and pull required models
60-
check_and_pull_model "llama3.2:1b" && check_and_pull_model "llama3.2:3b"
60+
check_and_pull_model "llama3.2:1b" && check_and_pull_model "qwen2.5:0.5b"
6161
fi
6262

6363
# Install npm dependencies

src/cli/commander.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import { argv, exit } from 'node:process'
1515
import { fileURLToPath } from 'node:url'
1616
import { Command } from 'commander'
17-
import { generatePrompt } from '../process-steps/04-select-prompt'
18-
import { validateAction, validateLLM, validateTranscription, processAction } from '../utils/validate-option'
17+
import { selectPrompts } from '../process-steps/04-select-prompt'
18+
import { validateProcessAction, validateLLM, validateTranscription, processAction } from '../utils/validate-option'
1919
import { l, err, logCompletionSeparator } from '../utils/logging'
2020
import { envVarsMap } from '../utils/globals'
2121
import type { ProcessingOptions } from '../types/process'
@@ -66,7 +66,7 @@ program
6666
.option('--prompt <sections...>', 'Specify prompt sections to include')
6767
.option('--printPrompt <sections...>', 'Print the prompt sections without processing')
6868
.option('--customPrompt <filePath>', 'Use a custom prompt from a markdown file')
69-
.option('--noCleanUp', 'Do not delete intermediary files after processing')
69+
.option('--saveAudio', 'Do not delete intermediary files after processing')
7070
// Added options to override environment variables from CLI
7171
/**
7272
* Additional CLI options to allow passing API keys from the command line,
@@ -120,13 +120,13 @@ program.action(async (options: ProcessingOptions) => {
120120

121121
// If the user just wants to print prompts, do that and exit
122122
if (options.printPrompt) {
123-
const prompt = await generatePrompt(options.printPrompt)
123+
const prompt = await selectPrompts({ printPrompt: options.printPrompt })
124124
console.log(prompt)
125125
exit(0)
126126
}
127127

128128
// 1) Validate which action was chosen
129-
const action = validateAction(options)
129+
const action = validateProcessAction(options, "action")
130130

131131
// 2) Validate LLM
132132
const llmServices = validateLLM(options)

0 commit comments

Comments
 (0)