Skip to content

Commit e6923ae

Browse files
authored
Merge pull request #99 from ajcwebdev/staging
Add Deepgram and Assembly Models
2 parents 1650a06 + ba92d48 commit e6923ae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+694
-486
lines changed

.github/docker-entrypoint.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ fi
3434

3535
# Otherwise, run the CLI by default.
3636
echo "Running Autoshow CLI..."
37-
tsx --no-warnings --experimental-sqlite src/cli/commander.ts "$@" || log_error "CLI failed to start"
37+
tsx --no-warnings --experimental-sqlite src/commander.ts "$@" || log_error "CLI failed to start"

docs/examples.md

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -266,42 +266,37 @@ If neither the `--deepgram` or `--assembly` option is included for transcription
266266

267267
```bash
268268
# tiny model
269-
npm run as -- \
270-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
271-
--whisper tiny
269+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny
272270

273271
# base model
274-
npm run as -- \
275-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
276-
--whisper base
272+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper base
277273

278274
# small model
279-
npm run as -- \
280-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
281-
--whisper small
275+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper small
282276

283277
# medium model
284-
npm run as -- \
285-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
286-
--whisper medium
278+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper medium
287279

288280
# large-v2 model
289-
npm run as -- \
290-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
291-
--whisper large-v2
281+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper large-v2
292282

293283
# large-v3-turbo model
294-
npm run as -- \
295-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
296-
--whisper large-v3-turbo
284+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper large-v3-turbo
297285
```
298286

299287
### Deepgram
300288

301289
```bash
302-
npm run as -- \
303-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
304-
--deepgram
290+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram
291+
```
292+
293+
Select model:
294+
295+
```bash
296+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram BASE
297+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram ENHANCED
298+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram NOVA
299+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram NOVA_2
305300
```
306301

307302
Include Deepgram API key directly in CLI command instead of in `.env` file:
@@ -316,9 +311,14 @@ npm run as -- \
316311
### Assembly
317312

318313
```bash
319-
npm run as -- \
320-
--video "https://www.youtube.com/watch?v=MORMZXEaONk" \
321-
--assembly
314+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly
315+
```
316+
317+
Select model:
318+
319+
```bash
320+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly NANO
321+
npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly BEST
322322
```
323323

324324
Include speaker labels and number of speakers:
@@ -695,6 +695,13 @@ npx tsx --test test/models/together.test.ts
695695
npx tsx --test test/models/groq.test.ts
696696
```
697697

698+
Test all available models for a certain transcription service.
699+
700+
```bash
701+
npx tsx --test test/models/deepgram.test.ts
702+
npx tsx --test test/models/assembly.test.ts
703+
```
704+
698705
## Skip Cleanup of Intermediate Files
699706

700707
If you want to keep the downloaded audio file for debugging or reprocessing purposes, use `--saveAudio`. This prevents the CLI from deleting WAV files after finishing its run.

docs/scripts.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ Sets up a base command for running TypeScript files using `tsx`, a TypeScript ex
7272

7373
### `as`
7474

75-
Executes the main command-line interface (CLI) application. Runs `src/cli/commander.ts` using `tsx` with the base options defined in `tsx:base`.
75+
Executes the main command-line interface (CLI) application. Runs `src/commander.ts` using `tsx` with the base options defined in `tsx:base`.
7676

7777
```json
78-
"as": "npm run tsx:base -- src/cli/commander.ts"
78+
"as": "npm run tsx:base -- src/commander.ts"
7979
```
8080

8181
### `serve`
@@ -248,7 +248,7 @@ Runs the CLI application using `Bun`, an alternative JavaScript runtime.
248248
- `--no-warnings`: Suppresses warnings during execution.
249249

250250
```json
251-
"bun": "bun --env-file=.env --no-warnings src/cli/commander.ts"
251+
"bun": "bun --env-file=.env --no-warnings src/commander.ts"
252252
```
253253

254254
## `deno`
@@ -263,5 +263,5 @@ Runs the CLI application using `Deno`, another JavaScript and TypeScript runtime
263263
- `--unstable-sloppy-imports`: Enables experimental import features.
264264

265265
```json
266-
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/cli/commander.ts"
266+
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/commander.ts"
267267
```

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"setup-docker": "docker build --progress=plain -t autoshow -f .github/Dockerfile .",
2222
"docker-setup": "docker build --progress=plain -t autoshow -f .github/Dockerfile .",
2323
"setup-all": "npm run setup && npm run docker-setup",
24-
"as": "npm run tsx:base -- src/cli/commander.ts",
24+
"as": "npm run tsx:base -- src/commander.ts",
2525
"video": "npm run as -- --video",
2626
"urls": "npm run as -- --urls",
2727
"playlist": "npm run as -- --playlist",
@@ -44,8 +44,8 @@
4444
"docker-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -lh /usr/src/app && ls -lh /usr/src/app/whisper.cpp/models && ls -lh /usr/src/app/whisper.cpp/build/bin'",
4545
"ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -l /usr/local/bin/ollama && ls -lh /root/.ollama'",
4646
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
47-
"bun": "bun --env-file=.env --no-warnings src/cli/commander.ts",
48-
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/cli/commander.ts"
47+
"bun": "bun --env-file=.env --no-warnings src/commander.ts",
48+
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/commander.ts"
4949
},
5050
"dependencies": {
5151
"@anthropic-ai/sdk": "0.33.1",

src/cli/commander.ts renamed to src/commander.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env node
22

3-
// src/cli/commander.ts
3+
// src/commander.ts
44

55
/**
66
* Autoshow CLI Application
@@ -14,11 +14,11 @@
1414
import { argv, exit } from 'node:process'
1515
import { fileURLToPath } from 'node:url'
1616
import { Command } from 'commander'
17-
import { selectPrompts } from '../process-steps/04-select-prompt'
18-
import { validateProcessAction, validateLLM, validateTranscription, processAction } from '../utils/validate-option'
19-
import { l, err, logCompletionSeparator } from '../utils/logging'
20-
import { envVarsMap } from '../utils/globals'
21-
import type { ProcessingOptions } from '../types/process'
17+
import { selectPrompts } from './process-steps/04-select-prompt'
18+
import { validateProcessAction, validateLLM, validateTranscription, processAction } from './utils/validate-option'
19+
import { l, err, logCompletionSeparator } from './utils/logging'
20+
import { envVarsMap } from './utils/globals/llms'
21+
import type { ProcessingOptions } from './utils/types/process'
2222

2323
// Initialize the command-line interface using Commander.js
2424
const program = new Command()
@@ -49,8 +49,8 @@ program
4949
.option('--info', 'Skip processing and write metadata to JSON objects (supports --urls, --rss, --playlist, --channel)')
5050
// Transcription service options
5151
.option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
52-
.option('--deepgram', 'Use Deepgram for transcription')
53-
.option('--assembly', 'Use AssemblyAI for transcription')
52+
.option('--deepgram [model]', 'Use Deepgram for transcription with optional model specification')
53+
.option('--assembly [model]', 'Use AssemblyAI for transcription with optional model specification')
5454
.option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
5555
// LLM service options
5656
.option('--ollama [model]', 'Use Ollama for processing with optional model specification')

src/llms/chatgpt.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import { env } from 'node:process'
44
import { OpenAI } from 'openai'
5-
import { GPT_MODELS } from '../utils/llm-globals'
5+
import { GPT_MODELS } from '../utils/globals/llms'
66
import { err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, ChatGPTModelType } from '../types/llms'
7+
import type { ChatGPTModelType } from '../utils/types/llms'
88

99
/**
1010
* Main function to call ChatGPT API.
@@ -15,11 +15,11 @@ import type { LLMFunction, ChatGPTModelType } from '../types/llms'
1515
* @returns {Promise<string>} A Promise that resolves with the generated text.
1616
* @throws {Error} If an error occurs during API call.
1717
*/
18-
export const callChatGPT: LLMFunction = async (
18+
export const callChatGPT = async (
1919
prompt: string,
2020
transcript: string,
2121
model: string = 'GPT_4o_MINI'
22-
): Promise<string> => {
22+
) => {
2323
if (!env['OPENAI_API_KEY']) {
2424
throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.')
2525
}

src/llms/claude.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import { env } from 'node:process'
44
import { Anthropic } from '@anthropic-ai/sdk'
5-
import { CLAUDE_MODELS } from '../utils/llm-globals'
5+
import { CLAUDE_MODELS } from '../utils/globals/llms'
66
import { err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, ClaudeModelType } from '../types/llms'
7+
import type { ClaudeModelType } from '../utils/types/llms'
88

99
/**
1010
* Main function to call Claude API.
@@ -15,11 +15,11 @@ import type { LLMFunction, ClaudeModelType } from '../types/llms'
1515
* @returns {Promise<string>} A Promise that resolves with the generated text.
1616
* @throws {Error} If an error occurs during the API call.
1717
*/
18-
export const callClaude: LLMFunction = async (
18+
export const callClaude = async (
1919
prompt: string,
2020
transcript: string,
2121
model: string = 'CLAUDE_3_HAIKU'
22-
): Promise<string> => {
22+
) => {
2323
if (!env['ANTHROPIC_API_KEY']) {
2424
throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.')
2525
}

src/llms/cohere.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import { env } from 'node:process'
44
import { CohereClient } from 'cohere-ai'
5-
import { COHERE_MODELS } from '../utils/llm-globals'
5+
import { COHERE_MODELS } from '../utils/globals/llms'
66
import { err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, CohereModelType } from '../types/llms'
7+
import type { CohereModelType } from '../utils/types/llms'
88

99
/**
1010
* Main function to call Cohere API.
@@ -15,11 +15,11 @@ import type { LLMFunction, CohereModelType } from '../types/llms'
1515
* @returns {Promise<string>} A Promise that resolves when the API call is complete.
1616
* @throws {Error} If an error occurs during the API call.
1717
*/
18-
export const callCohere: LLMFunction = async (
18+
export const callCohere = async (
1919
prompt: string,
2020
transcript: string,
2121
model: string = 'COMMAND_R'
22-
): Promise<string> => {
22+
) => {
2323
if (!env['COHERE_API_KEY']) {
2424
throw new Error('COHERE_API_KEY environment variable is not set. Please set it to your Cohere API key.')
2525
}

src/llms/fireworks.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// src/llms/fireworks.ts
22

33
import { env } from 'node:process'
4-
import { FIREWORKS_MODELS } from '../utils/llm-globals'
4+
import { FIREWORKS_MODELS } from '../utils/globals/llms'
55
import { err, logAPIResults } from '../utils/logging'
6-
import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms'
6+
import type { FireworksModelType, FireworksResponse } from '../utils/types/llms'
77

88
/**
99
* Main function to call Fireworks AI API.
@@ -13,11 +13,11 @@ import type { LLMFunction, FireworksModelType, FireworksResponse } from '../type
1313
* @returns {Promise<string>} A Promise that resolves with the generated text.
1414
* @throws {Error} If an error occurs during the API call.
1515
*/
16-
export const callFireworks: LLMFunction = async (
16+
export const callFireworks = async (
1717
prompt: string,
1818
transcript: string,
1919
model: string | FireworksModelType = 'LLAMA_3_2_3B'
20-
): Promise<string> => {
20+
) => {
2121
if (!env['FIREWORKS_API_KEY']) {
2222
throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.')
2323
}

src/llms/gemini.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import { env } from 'node:process'
44
import { GoogleGenerativeAI } from "@google/generative-ai"
5-
import { GEMINI_MODELS } from '../utils/llm-globals'
5+
import { GEMINI_MODELS } from '../utils/globals/llms'
66
import { err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, GeminiModelType } from '../types/llms'
7+
import type { GeminiModelType } from '../utils/types/llms'
88

99
/**
1010
* Utility function to introduce a delay
@@ -21,11 +21,11 @@ const delay = (ms: number): Promise<void> => new Promise(resolve => setTimeout(r
2121
* @returns {Promise<string>} A Promise that resolves when the API call is complete.
2222
* @throws {Error} If an error occurs during the API call.
2323
*/
24-
export const callGemini: LLMFunction = async (
24+
export const callGemini = async (
2525
prompt: string,
2626
transcript: string,
2727
model: string = 'GEMINI_1_5_FLASH'
28-
): Promise<string> => {
28+
) => {
2929
if (!env['GEMINI_API_KEY']) {
3030
throw new Error('GEMINI_API_KEY environment variable is not set. Please set it to your Gemini API key.')
3131
}

src/llms/groq.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// src/llms/groq.ts
22

33
import { env } from 'node:process'
4-
import { GROQ_MODELS } from '../utils/llm-globals'
4+
import { GROQ_MODELS } from '../utils/globals/llms'
55
import { err, logAPIResults } from '../utils/logging'
6-
import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms'
6+
import type { GroqModelType, GroqChatCompletionResponse } from '../utils/types/llms'
77

88
/**
99
* Function to call the Groq chat completion API.
@@ -13,11 +13,11 @@ import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../
1313
* @returns {Promise<string>} A Promise that resolves when the API call is complete.
1414
* @throws {Error} If an error occurs during the API call.
1515
*/
16-
export const callGroq: LLMFunction = async (
16+
export const callGroq = async (
1717
prompt: string,
1818
transcript: string,
1919
model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW'
20-
): Promise<string> => {
20+
) => {
2121
if (!env['GROQ_API_KEY']) {
2222
throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.')
2323
}

src/llms/mistral.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import { env } from 'node:process'
44
import { Mistral } from '@mistralai/mistralai'
5-
import { MISTRAL_MODELS } from '../utils/llm-globals'
5+
import { MISTRAL_MODELS } from '../utils/globals/llms'
66
import { err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, MistralModelType } from '../types/llms'
7+
import type { MistralModelType } from '../utils/types/llms'
88

99
/**
1010
* Main function to call Mistral AI API.
@@ -14,11 +14,11 @@ import type { LLMFunction, MistralModelType } from '../types/llms'
1414
* @returns {Promise<string>} A Promise that resolves when the API call is complete.
1515
* @throws {Error} If an error occurs during the API call.
1616
*/
17-
export const callMistral: LLMFunction = async (
17+
export const callMistral = async (
1818
prompt: string,
1919
transcript: string,
2020
model: string = 'MISTRAL_NEMO'
21-
): Promise<string> => {
21+
) => {
2222
if (!env['MISTRAL_API_KEY']) {
2323
throw new Error('MISTRAL_API_KEY environment variable is not set. Please set it to your Mistral API key.')
2424
}

src/llms/ollama.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// src/llms/ollama.ts
22

33
import { env } from 'node:process'
4-
import { OLLAMA_MODELS } from '../utils/llm-globals'
4+
import { OLLAMA_MODELS } from '../utils/globals/llms'
55
import { l, err, logAPIResults } from '../utils/logging'
66
import { checkOllamaServerAndModel } from '../utils/validate-option'
7-
import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types/llms'
7+
import type { OllamaModelType, OllamaResponse } from '../utils/types/llms'
88

99
/**
1010
* callOllama()
@@ -17,11 +17,11 @@ import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types/llms
1717
* @param {string | OllamaModelType} [model='QWEN_2_5_0B'] - The Ollama model to use.
1818
* @returns {Promise<string>} A Promise resolving with the generated text.
1919
*/
20-
export const callOllama: LLMFunction = async (
20+
export const callOllama = async (
2121
prompt: string,
2222
transcript: string,
2323
model: string | OllamaModelType = 'QWEN_2_5_0B'
24-
): Promise<string> => {
24+
) => {
2525
l.wait('\n callOllama called with arguments:')
2626
l.wait(` - model: ${model}`)
2727

0 commit comments

Comments
 (0)