From f33069794a60b60f678a28515fcc6336ce931dda Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:09:49 -0600 Subject: [PATCH 1/6] refactor generate markdown --- src/process-commands/video.ts | 7 +- src/process-steps/01-generate-markdown.ts | 133 ++++++++-------------- 2 files changed, 54 insertions(+), 86 deletions(-) diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index d5882b0d..478f511a 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -11,7 +11,7 @@ import { runTranscription } from '../process-steps/03-run-transcription' import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' -import { readFile } from 'fs/promises' +import { readFile, writeFile } from 'fs/promises' import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' @@ -83,6 +83,11 @@ export async function processVideo( llmOutput ) + // Write final front matter to a file + await writeFile(`${finalPath}.md`, frontMatter) + l.dim(frontMatter) + l.success(` Front matter successfully created and saved:\n - ${finalPath}.md`) + // Optional cleanup if (!options.noCleanUp) { await cleanUpFiles(finalPath) diff --git a/src/process-steps/01-generate-markdown.ts b/src/process-steps/01-generate-markdown.ts index 3149763e..8bdd2c0a 100644 --- a/src/process-steps/01-generate-markdown.ts +++ b/src/process-steps/01-generate-markdown.ts @@ -1,12 +1,11 @@ // src/process-steps/01-generate-markdown.ts /** - * @file Utility for generating markdown files with front matter for different content types. + * @file Utility for generating markdown content with front matter for different content types. * Supports YouTube videos, playlists, local files, and RSS feed items. * @packageDocumentation */ -import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' import { execFilePromise } from '../utils/globals' import { l, err } from '../utils/logging' @@ -43,7 +42,6 @@ export function sanitizeTitle(title: string): string { * 1. Sanitizes input titles for safe filename creation * 2. Extracts metadata based on content type * 3. Generates appropriate front matter - * 4. Creates and saves the markdown file * * @param {ProcessingOptions} options - The processing options specifying the type of content to generate. * Valid options include: video, playlist, urls, file, and rss. @@ -53,7 +51,7 @@ export function sanitizeTitle(title: string): string { * - For RSS: An RSSItem object containing feed item details * @returns {Promise} A promise that resolves to an object containing: * - frontMatter: The generated front matter content as a string - * - finalPath: The path where the markdown file is saved + * - finalPath: The path (base name) derived for the content * - filename: The sanitized filename * - metadata: An object containing all metadata fields * @throws {Error} If invalid options are provided or if metadata extraction fails. @@ -106,11 +104,10 @@ export async function generateMarkdown( .slice(0, 200) // Limit the length to 200 characters } - // Initialize variables for front matter content, final file path, sanitized filename, and metadata - let frontMatter: string[] // Array to hold front matter lines - let finalPath: string // The path where the markdown file will be saved - let filename: string // The sanitized filename - let metadata: { // Object to hold metadata fields + let frontMatter: string[] + let finalPath: string + let filename: string + let metadata: { showLink: string channel: string channelURL: string @@ -120,37 +117,32 @@ export async function generateMarkdown( coverImage: string } - // Determine which processing option is selected switch (true) { - // If any of these options are true, process as a video case !!options.video: case !!options.playlist: case !!options.urls: case !!options.channel: try { - // Execute yt-dlp command to extract metadata const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', // Restrict filenames to ASCII characters - '--print', '%(webpage_url)s', // Print the webpage URL - '--print', '%(channel)s', // Print the channel name - '--print', '%(uploader_url)s', // Print the uploader's URL - '--print', '%(title)s', // Print the video title - '--print', '%(upload_date>%Y-%m-%d)s', // Print the upload date in YYYY-MM-DD format - '--print', '%(thumbnail)s', // Print the thumbnail URL - input as string, // The video URL provided as input + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + input as string, ]) - // Split the output into individual metadata fields const [ - showLink, // The video URL - videoChannel, // The channel name - uploader_url, // The uploader's URL - videoTitle, // The video title - formattedDate, // The upload date - thumbnail, // The thumbnail URL + showLink, + videoChannel, + uploader_url, + videoTitle, + formattedDate, + thumbnail, ] = stdout.trim().split('\n') - // Validate that all required metadata fields are present if ( !showLink || !videoChannel || @@ -162,12 +154,9 @@ export async function generateMarkdown( throw new Error('Incomplete metadata received from yt-dlp.') } - // Generate the sanitized filename using the upload date and video title filename = `${formattedDate}-${sanitizeTitle(videoTitle)}` - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object metadata = { showLink: showLink, channel: videoChannel, @@ -178,38 +167,29 @@ export async function generateMarkdown( coverImage: thumbnail, } - // Construct the front matter content as an array of strings frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // The video URL - `channel: "${metadata.channel}"`, // The channel name - `channelURL: "${metadata.channelURL}"`, // The uploader's URL - `title: "${metadata.title}"`, // The video title - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // The upload date - `coverImage: "${metadata.coverImage}"`, // The thumbnail URL + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] } catch (error) { - // Log the error and rethrow it for upstream handling err(`Error extracting metadata for ${input}: ${error instanceof Error ? error.message : String(error)}`) throw error } break - // If the file option is selected case !!options.file: - // Get the original filename from the input path const originalFilename = basename(input as string) - // Remove the file extension to get the filename without extension const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') - - // Sanitize the filename to make it safe for use in paths filename = sanitizeTitle(filenameWithoutExt) - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object for a file metadata = { showLink: originalFilename, channel: '', @@ -220,40 +200,33 @@ export async function generateMarkdown( coverImage: '', } - // Construct the front matter content for a file frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // The original filename - `channel: "${metadata.channel}"`, // Empty channel field - `channelURL: "${metadata.channelURL}"`, // Empty channel URL field - `title: "${metadata.title}"`, // Use the original filename as the title - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // Empty publish date - `coverImage: "${metadata.coverImage}"`, // Empty cover image + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] break - // If the RSS option is selected case !!options.rss: - // Cast the input to an RSSItem type const item = input as RSSItem - // Destructure necessary fields from the RSS item const { - publishDate, // Publication date - title: rssTitle, // Title of the RSS item - coverImage, // Cover image URL - showLink, // Link to the content - channel: rssChannel, // Channel name - channelURL, // Channel URL + publishDate, + title: rssTitle, + coverImage, + showLink, + channel: rssChannel, + channelURL, } = item - // Generate the sanitized filename using the publish date and title filename = `${publishDate}-${sanitizeTitle(rssTitle)}` - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object for an RSS item metadata = { showLink: showLink, channel: rssChannel, @@ -264,38 +237,28 @@ export async function generateMarkdown( coverImage: coverImage, } - // Construct the front matter content for an RSS item frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // Link to the content - `channel: "${metadata.channel}"`, // Channel name - `channelURL: "${metadata.channelURL}"`, // Channel URL - `title: "${metadata.title}"`, // Title of the RSS item - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // Publication date - `coverImage: "${metadata.coverImage}"`, // Cover image URL + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] break - // If no valid option is provided, throw an error default: throw new Error('Invalid option provided for markdown generation.') } - // Join the front matter array into a single string with newline separators const frontMatterContent = frontMatter.join('\n') - // Write the front matter content to a markdown file at the specified path - await writeFile(`${finalPath}.md`, frontMatterContent) - - // Log the front matter content in dimmed text + // Only log front matter; do not write to file here l.dim(frontMatterContent) - // Log the current step in the process l.step('\nStep 1 - Generating markdown...\n') - // Log a success message indicating where the file was saved - l.success(` Front matter successfully created and saved:\n - ${finalPath}.md`) - // Return an object containing the front matter, final path, filename, and metadata return { frontMatter: frontMatterContent, finalPath, filename, metadata } } \ No newline at end of file From e4cfed42570910c4862fa0e6f31543519d13d775 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 3 Jan 2025 02:45:11 -0600 Subject: [PATCH 2/6] more logging and less file read and writes --- src/llms/ollama.ts | 55 +++++----- src/process-commands/file.ts | 34 ++++++- src/process-commands/rss.ts | 3 +- src/process-commands/video.ts | 39 +++++-- src/process-steps/01-generate-markdown.ts | 31 ++---- src/process-steps/02-download-audio.ts | 24 ++++- src/process-steps/03-run-transcription.ts | 118 +++++++--------------- src/process-steps/04-select-prompt.ts | 38 +++++-- src/process-steps/05-run-llm.ts | 84 +++++---------- src/process-steps/06-clean-up-files.ts | 6 +- src/transcription/whisper.ts | 57 +++++++---- src/types/logging.ts | 2 + src/types/transcription.ts | 2 +- src/utils/logging.ts | 4 + test/local.test.ts | 32 +----- 15 files changed, 263 insertions(+), 266 deletions(-) diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index 63e55813..683e70c1 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -14,14 +14,17 @@ import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } * * In a single-container approach: * - We assume 'ollama' binary is installed inside the container. - * - We'll try to connect to 'localhost:11434' or a custom port from env, - * and if it's not running, we'll spawn `ollama serve`. + * - We'll try to connect to 'localhost:11434' or a custom port from env. */ export const callOllama: LLMFunction = async ( promptAndTranscript: string, tempPath: string, model: string | OllamaModelType = 'LLAMA_3_2_1B' ) => { + l.wait('\n callOllama called with arguments:') + l.wait(` - model: ${model}`) + l.wait(` - tempPath: ${tempPath}`) + try { // Get the model configuration and ID const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B' @@ -29,10 +32,11 @@ export const callOllama: LLMFunction = async ( const ollamaModelName = modelConfig.modelId l.wait(` - modelName: ${modelKey}\n - ollamaModelName: ${ollamaModelName}`) - + // Host & port for Ollama const ollamaHost = env['OLLAMA_HOST'] || 'localhost' const ollamaPort = env['OLLAMA_PORT'] || '11434' + l.wait(`\n Using Ollama host: ${ollamaHost}, port: ${ollamaPort}`) // Check if Ollama server is up async function checkServer(): Promise { @@ -53,7 +57,7 @@ export const callOllama: LLMFunction = async ( l.wait('\n Ollama server is not running. Attempting to start...') const ollamaProcess = spawn('ollama', ['serve'], { detached: true, - stdio: 'ignore' + stdio: 'ignore', }) ollamaProcess.unref() @@ -64,7 +68,7 @@ export const callOllama: LLMFunction = async ( l.wait(' - Ollama server is now ready.') break } - await new Promise(resolve => setTimeout(resolve, 1000)) + await new Promise((resolve) => setTimeout(resolve, 1000)) attempts++ } if (attempts === 30) { @@ -74,21 +78,20 @@ export const callOllama: LLMFunction = async ( } // Check and pull model if needed + l.wait(`\n Checking if model is available: ${ollamaModelName}`) try { const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) if (!tagsResponse.ok) { throw new Error(`HTTP error! status: ${tagsResponse.status}`) } - const tagsData = await tagsResponse.json() as OllamaTagsResponse - const isModelAvailable = tagsData.models.some(model => model.name === ollamaModelName) + const tagsData = (await tagsResponse.json()) as OllamaTagsResponse + const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName) if (!isModelAvailable) { - l.wait(`\n Model ${ollamaModelName} is not available, pulling the model...`) + l.wait(`\n Model ${ollamaModelName} is not available, pulling...`) const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, + headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ name: ollamaModelName }), }) if (!pullResponse.ok) { @@ -98,11 +101,13 @@ export const callOllama: LLMFunction = async ( throw new Error('Response body is null') } + // Stream the pull response const reader = pullResponse.body.getReader() const decoder = new TextDecoder() while (true) { const { done, value } = await reader.read() if (done) break + const chunk = decoder.decode(value) const lines = chunk.split('\n') for (const line of lines) { @@ -110,30 +115,28 @@ export const callOllama: LLMFunction = async ( try { const response = JSON.parse(line) if (response.status === 'success') { - l.wait(` - Model ${ollamaModelName} has been pulled successfully...\n`) + l.wait(` - Model ${ollamaModelName} pulled successfully.\n`) break } } catch (parseError) { - err(`Error parsing JSON: ${parseError}`) + err(`Error parsing JSON while pulling model: ${parseError}`) } } } } else { - l.wait(`\n Model ${ollamaModelName} is already available...\n`) + l.wait(`\n Model ${ollamaModelName} is already available.\n`) } } catch (error) { - err(`Error checking/pulling model: ${error instanceof Error ? error.message : String(error)}`) + err(`Error checking/pulling model: ${(error as Error).message}`) throw error } - l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using ${ollamaModelName} model`) + l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`) // Call Ollama's /api/chat endpoint in streaming mode const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, + headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: ollamaModelName, messages: [{ role: 'user', content: promptAndTranscript }], @@ -148,6 +151,7 @@ export const callOllama: LLMFunction = async ( throw new Error('Response body is null') } + l.wait('\n Successfully connected to Ollama /api/chat streaming endpoint.') const reader = response.body.getReader() const decoder = new TextDecoder() let fullContent = '' @@ -169,7 +173,7 @@ export const callOllama: LLMFunction = async ( const parsedResponse = JSON.parse(line) as OllamaResponse if (parsedResponse.message?.content) { if (isFirstChunk) { - l.wait(` - Receiving streaming response from Ollama...`) + l.wait(` - Streaming response from Ollama (first chunk received)`) isFirstChunk = false } fullContent += parsedResponse.message.content @@ -184,26 +188,25 @@ export const callOllama: LLMFunction = async ( } if (parsedResponse.done) { - // Log final results using standardized logging function logAPIResults({ modelName: modelKey, stopReason: 'stop', tokenUsage: { input: totalPromptTokens || undefined, output: totalCompletionTokens || undefined, - total: totalPromptTokens + totalCompletionTokens || undefined - } + total: totalPromptTokens + totalCompletionTokens || undefined, + }, }) } } catch (parseError) { - err(`Error parsing JSON: ${parseError}`) + err(`Error parsing JSON from Ollama response: ${parseError}`) } } } - // Write final content to the specified temp file + l.wait(`\n Completed streaming from Ollama. Writing output to temp file: ${tempPath}`) await writeFile(tempPath, fullContent) - + l.wait(`\n Ollama output successfully written to '${tempPath}' (length: ${fullContent.length} chars)`) } catch (error) { err(`Error in callOllama: ${error instanceof Error ? error.message : String(error)}`) err(`Stack Trace: ${error instanceof Error ? error.stack : 'No stack trace available'}`) diff --git a/src/process-commands/file.ts b/src/process-commands/file.ts index a96fdde7..07a6bbe4 100644 --- a/src/process-commands/file.ts +++ b/src/process-commands/file.ts @@ -42,35 +42,53 @@ export async function processFile( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - l.opts('Parameters passed to processFile:\n') - l.opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`) + // Log function inputs + l.info('processFile called with the following arguments:') + l.opts(` - filePath: ${filePath}`) + l.opts(` - llmServices: ${llmServices}`) + l.opts(` - transcriptServices: ${transcriptServices}\n`) try { // Step 1 - Generate markdown + l.step('Step 1 - Generating markdown...') const { frontMatter, finalPath, filename, metadata } = await generateMarkdown(options, filePath) // Step 2 - Convert to WAV + l.step('Step 2 - Converting file to WAV...') await downloadAudio(options, filePath, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + l.step('Step 3 - Transcribing audio...') + const transcript = await runTranscription(options, finalPath, transcriptServices) + l.wait(`\n Successfully read transcript file: ${finalPath}.txt (length: ${transcript.length} characters)`) // Step 4 - Select Prompt - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') + l.step('\nStep 4 - Selecting prompt...\n') + if (options.customPrompt) { + l.wait(`\n Reading custom prompt file:\n - ${options.customPrompt}`) + } + const promptText = await readFile(options.customPrompt || '', 'utf-8').catch((err) => { + l.warn(` Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) + return '' + }) + l.wait(`\n Prompt text length: ${promptText.length}`) // Step 5 - Run LLM (optional) + l.step('\nStep 5 - Running LLM (if applicable)...') const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) let generatedPrompt = '' if (!promptText) { + l.wait('\n No custom prompt text found, importing default prompt generator...') const defaultPrompt = await import('../process-steps/04-select-prompt') generatedPrompt = await defaultPrompt.generatePrompt(options.prompt, undefined) + l.wait(`\n Default prompt generated (length: ${generatedPrompt.length})`) } else { generatedPrompt = promptText } // Insert into DB + l.wait('\n Inserting show note into the database...') insertShowNote( metadata.showLink ?? '', metadata.channel ?? '', @@ -84,10 +102,16 @@ export async function processFile( transcript, llmOutput ) + l.wait('\n Show note inserted successfully.\n') + // Step 6 - Cleanup if (!options.noCleanUp) { + l.step('\nStep 6 - Cleaning up temporary files...') await cleanUpFiles(finalPath) + l.wait('\n Cleanup completed.\n') } + + l.wait(' processFile completed successfully.') } catch (error) { err(`Error processing file: ${(error as Error).message}`) process.exit(1) diff --git a/src/process-commands/rss.ts b/src/process-commands/rss.ts index 23b28673..539cce45 100644 --- a/src/process-commands/rss.ts +++ b/src/process-commands/rss.ts @@ -187,8 +187,7 @@ async function processItem( await downloadAudio(options, item.showLink, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + const transcript = await runTranscription(options, finalPath, transcriptServices) // Step 4 - Select Prompt const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index 478f511a..bc73e5f2 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -40,35 +40,52 @@ export async function processVideo( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - l.opts('Parameters passed to processVideo:\n') - l.opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`) + // Log function inputs + l.opts('processVideo called with the following arguments:\n') + l.opts(` - url: ${url}`) + l.opts(` - llmServices: ${llmServices}`) + l.opts(` - transcriptServices: ${transcriptServices}\n`) try { // Step 1 - Generate markdown + l.step('\nStep 1 - Generating markdown...') const { frontMatter, finalPath, filename, metadata } = await generateMarkdown(options, url) // Step 2 - Download audio and convert to WAV + l.step('\nStep 2 - Downloading/converting audio...\n') await downloadAudio(options, url, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + l.step('\nStep 3 - Transcribing audio...\n') + const transcript = await runTranscription(options, finalPath, transcriptServices) // Step 4 - Select Prompt - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') - + l.step('\nStep 4 - Selecting prompt...\n') + if (options.customPrompt) { + l.info(`\n Reading custom prompt file: ${options.customPrompt}`) + } + const promptText = await readFile(options.customPrompt || '', 'utf-8').catch((err) => { + l.warn(`\n Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) + return '' + }) + l.wait(`\n Prompt text length: ${promptText.length}`) + // Step 5 - Run LLM (optional) + l.step(`\nStep 5 - Running LLM processing on transcript (if applicable)...\n`) const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) let generatedPrompt = '' if (!promptText) { + l.wait('\n No custom prompt text found, importing default prompt generator...') const defaultPrompt = await import('../process-steps/04-select-prompt') generatedPrompt = await defaultPrompt.generatePrompt(options.prompt, undefined) + l.wait(`\n Default prompt generated (length: ${generatedPrompt.length})`) } else { generatedPrompt = promptText } // Insert into DB + l.wait('\n Inserting show note into the database...') insertShowNote( metadata.showLink ?? '', metadata.channel ?? '', @@ -82,18 +99,22 @@ export async function processVideo( transcript, llmOutput ) + l.wait('\n Show note inserted successfully.\n') // Write final front matter to a file + l.wait(`\n Writing front matter to file:\n - ${finalPath}.md`) await writeFile(`${finalPath}.md`, frontMatter) - l.dim(frontMatter) - l.success(` Front matter successfully created and saved:\n - ${finalPath}.md`) + l.wait(`\n Successfully wrote front matter to file:\n - ${finalPath}.md\n`) // Optional cleanup if (!options.noCleanUp) { + l.step('Step 6 - Cleaning up temporary files...\n') await cleanUpFiles(finalPath) + l.wait('\n Cleanup completed.\n') } - // Return transcript or some relevant string + // Return transcript + l.wait(' Returning transcript from processVideo...') return transcript } catch (error) { err('Error processing video:', (error as Error).message) diff --git a/src/process-steps/01-generate-markdown.ts b/src/process-steps/01-generate-markdown.ts index 8bdd2c0a..083df888 100644 --- a/src/process-steps/01-generate-markdown.ts +++ b/src/process-steps/01-generate-markdown.ts @@ -81,28 +81,9 @@ export async function generateMarkdown( options: ProcessingOptions, input: string | RSSItem ): Promise { - /** - * Sanitizes a title string for use in filenames by: - * - Removing special characters except spaces and hyphens - * - Converting spaces and underscores to hyphens - * - Converting to lowercase - * - Limiting length to 200 characters - * - * @param {string} title - The title to sanitize. - * @returns {string} The sanitized title safe for use in filenames. - * - * @example - * sanitizeTitle('My Video Title! (2024)') // returns 'my-video-title-2024' - */ - function sanitizeTitle(title: string): string { - return title - .replace(/[^\w\s-]/g, '') // Remove all non-word characters except spaces and hyphens - .trim() // Remove leading and trailing whitespace - .replace(/[\s_]+/g, '-') // Replace spaces and underscores with hyphens - .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen - .toLowerCase() // Convert to lowercase - .slice(0, 200) // Limit the length to 200 characters - } + // Log function inputs + l.wait('\n generateMarkdown called with the following arguments:\n') + l.wait(` - input: ${typeof input === 'string' ? input : JSON.stringify(input, null, 2)}`) let frontMatter: string[] let finalPath: string @@ -134,6 +115,7 @@ export async function generateMarkdown( input as string, ]) + l.wait('\n Metadata extraction with yt-dlp completed. Parsing output...\n') const [ showLink, videoChannel, @@ -185,6 +167,7 @@ export async function generateMarkdown( break case !!options.file: + l.wait('\n Generating markdown for a local file...') const originalFilename = basename(input as string) const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') filename = sanitizeTitle(filenameWithoutExt) @@ -214,6 +197,7 @@ export async function generateMarkdown( break case !!options.rss: + l.wait('Generating markdown for an RSS item...') const item = input as RSSItem const { publishDate, @@ -258,7 +242,8 @@ export async function generateMarkdown( // Only log front matter; do not write to file here l.dim(frontMatterContent) - l.step('\nStep 1 - Generating markdown...\n') + // Log return values + l.wait(` generateMarkdown returning:\n\n - finalPath: ${finalPath}\n - filename: ${filename}\n`) return { frontMatter: frontMatterContent, finalPath, filename, metadata } } \ No newline at end of file diff --git a/src/process-steps/02-download-audio.ts b/src/process-steps/02-download-audio.ts index a91cca7d..7dc2b564 100644 --- a/src/process-steps/02-download-audio.ts +++ b/src/process-steps/02-download-audio.ts @@ -74,13 +74,17 @@ export async function downloadAudio( input: string, filename: string ): Promise { + // Log function inputs + l.wait('\n downloadAudio called with the following arguments:\n') + l.wait(` - input: ${input}`) + l.wait(` - filename: ${filename}`) + // Define output paths using the provided filename const finalPath = `content/${filename}` const outputPath = `${finalPath}.wav` // Handle online content (YouTube, RSS feeds, etc.) if (options.video || options.playlist || options.urls || options.rss || options.channel) { - l.step('\nStep 2 - Downloading URL audio...\n') try { // Download and convert audio using yt-dlp const { stderr } = await execFilePromise('yt-dlp', [ @@ -97,7 +101,7 @@ export async function downloadAudio( if (stderr) { err(`yt-dlp warnings: ${stderr}`) } - l.success(` Audio downloaded successfully:\n - ${outputPath}`) + l.wait(`\n Audio downloaded successfully, output path for WAV file:\n - ${outputPath}`) } catch (error) { err( `Error downloading audio: ${ @@ -109,7 +113,7 @@ export async function downloadAudio( } // Handle local file processing else if (options.file) { - l.step('\nStep 2 - Processing file audio...\n') + l.step('\nStep 2 - Processing local file audio via ffmpeg...\n') // Define supported media formats const supportedFormats: Set = new Set([ // Audio formats @@ -119,22 +123,29 @@ export async function downloadAudio( ]) try { // Verify file exists and is accessible + l.wait(`\n Checking file access:\n - ${input}`) await access(input) + l.wait(`\n File ${input} is accessible. Attempting to read file data for type detection...`) + // Read file and determine its type const buffer = await readFile(input) + l.wait(`\n Successfully read file: ${buffer.length} bytes`) + const fileType = await fileTypeFromBuffer(buffer) + l.wait(`\n File type detection result: ${fileType?.ext ?? 'unknown'}`) + // Validate file type is supported if (!fileType || !supportedFormats.has(fileType.ext as SupportedFileType)) { throw new Error( fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) } - l.wait(` File type detected as ${fileType.ext}, converting to WAV...\n`) // Convert to standardized WAV format using ffmpeg + l.wait(` Running ffmpeg command for ${input} -> ${outputPath}\n`) await execPromise( `ffmpeg -i "${input}" -ar 16000 -ac 1 -c:a pcm_s16le "${outputPath}"` ) - l.success(` File converted to WAV format successfully:\n - ${outputPath}`) + l.wait(` File converted to WAV format successfully:\n - ${outputPath}`) } catch (error) { err(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error @@ -144,5 +155,8 @@ export async function downloadAudio( else { throw new Error('Invalid option provided for audio download/processing.') } + + // Log return value + l.wait(`\n downloadAudio returning:\n\n - outputPath: ${outputPath}\n`) return outputPath } \ No newline at end of file diff --git a/src/process-steps/03-run-transcription.ts b/src/process-steps/03-run-transcription.ts index b9e18def..7bfc09e7 100644 --- a/src/process-steps/03-run-transcription.ts +++ b/src/process-steps/03-run-transcription.ts @@ -1,16 +1,10 @@ // src/process-steps/03-run-transcription.ts -/** - * @file Orchestrator for running transcription services on audio files. - * Manages the routing and execution of various transcription services, - * both local and cloud-based. - * @packageDocumentation - */ - +import { readFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper' import { callDeepgram } from '../transcription/deepgram' import { callAssembly } from '../transcription/assembly' -import { l } from '../utils/logging' +import { l, err } from '../utils/logging' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' @@ -19,85 +13,51 @@ import type { TranscriptServices } from '../types/transcription' * Routes the transcription request to the appropriate service handler * and manages the execution process. * - * Available transcription services: - * Local Services: - * - whisper: Default Whisper.cpp implementation - * - whisperDocker: Whisper.cpp running in Docker - * - * Cloud Services: - * - deepgram: Deepgram's API service - * - assembly: AssemblyAI's API service - * - * @param {ProcessingOptions} options - Configuration options including: - * - whisper: Whisper model specification - * - whisperDocker: Docker-based Whisper model - * - speakerLabels: Enable speaker detection (Assembly) - * - Additional service-specific options - * - * @param {string} finalPath - Base path for input/output files: - * - Input audio: `${finalPath}.wav` - * - Output transcript: `${finalPath}.txt` - * - * @param {string} frontMatter - YAML front matter content for the transcript - * (Reserved for future use with metadata) - * - * @param {TranscriptServices} [transcriptServices] - The transcription service to use: - * - 'whisper': Local Whisper.cpp - * - 'whisperDocker': Containerized Whisper - * - 'deepgram': Deepgram API - * - 'assembly': AssemblyAI API - * - * @returns {Promise} Resolves when transcription is complete - * - * @throws {Error} If: - * - Unknown transcription service is specified - * - Service-specific initialization fails - * - Transcription process fails - * - File operations fail - * - * @example - * // Using local Whisper - * await runTranscription( - * { whisper: 'base' }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'whisper' - * ) - * - * @example - * // Using AssemblyAI with speaker labels - * await runTranscription( - * { speakerLabels: true }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'assembly' - * ) + * @param {ProcessingOptions} options - Configuration options + * @param {string} finalPath - Base path for input/output files + * @param {TranscriptServices} [transcriptServices] - The transcription service to use + * @returns {Promise} The complete transcript */ export async function runTranscription( options: ProcessingOptions, finalPath: string, transcriptServices?: TranscriptServices -): Promise { - l.step(`\nStep 3 - Running transcription on audio file using ${transcriptServices}...`) +): Promise { + // Log function call + l.wait('\n runTranscription called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) + l.wait(` - transcriptServices: ${transcriptServices}`) - // Route to appropriate transcription service - switch (transcriptServices) { - case 'deepgram': - // Cloud-based service with advanced features - await callDeepgram(finalPath) - break + try { + switch (transcriptServices) { + case 'deepgram': + l.wait('Using Deepgram transcription service...') + // Deepgram might write the .txt file to disk. Then read it. + await callDeepgram(finalPath) + l.success('Deepgram transcription completed successfully.') + // Read the transcript from file (if that's how Deepgram is implemented) + return readFile(`${finalPath}.txt`, 'utf8') - case 'assembly': - // Cloud-based service with speaker diarization - await callAssembly(options, finalPath) - break + case 'assembly': + l.wait('Using AssemblyAI transcription service...') + // Assembly might write the .txt file to disk. Then read it. + await callAssembly(options, finalPath) + l.success('AssemblyAI transcription completed successfully.') + // Read the transcript from file + return readFile(`${finalPath}.txt`, 'utf8') - case 'whisper': - // Local transcription with whisper.cpp - await callWhisper(options, finalPath) - break + case 'whisper': + l.wait('Using local Whisper transcription service...') + // Call whisper and return the final text content in memory + const whisperTranscript = await callWhisper(options, finalPath) + l.wait('\n Whisper transcription completed successfully.') + return whisperTranscript - default: - throw new Error(`Unknown transcription service: ${transcriptServices}`) + default: + throw new Error(`Unknown transcription service: ${transcriptServices}`) + } + } catch (error) { + err(`Error during runTranscription: ${(error as Error).message}`) + throw error } } \ No newline at end of file diff --git a/src/process-steps/04-select-prompt.ts b/src/process-steps/04-select-prompt.ts index 724bc4a8..7fca15a6 100644 --- a/src/process-steps/04-select-prompt.ts +++ b/src/process-steps/04-select-prompt.ts @@ -1,8 +1,8 @@ -// src/process-steps/04-prompt.ts +// src/process-steps/04-select-prompt.ts import type { PromptSection } from '../types/process' import { readFile } from 'fs/promises' -import { err } from '../utils/logging' +import { err, l } from '../utils/logging' /** * Define the structure for different sections of the prompt @@ -228,8 +228,13 @@ const sections = { * @throws {Error} If the file cannot be read or is invalid */ export async function readCustomPrompt(filePath: string): Promise { + l.wait('\n readCustomPrompt called with arguments:\n') + l.wait(` - filePath: ${filePath}`) + try { + l.wait(`\n Reading custom prompt file:\n - ${filePath}`) const customPrompt = await readFile(filePath, 'utf8') + l.wait(`\n Successfully read custom prompt file, character length:\n\n - ${customPrompt.length}`) return customPrompt.trim() } catch (error) { err(`Error reading custom prompt file: ${(error as Error).message}`) @@ -248,27 +253,40 @@ export async function generatePrompt( prompt: string[] = ['summary', 'longChapters'], customPromptPath?: string ): Promise { + l.wait('\n generatePrompt called with arguments:\n') + l.wait(` - prompt: ${JSON.stringify(prompt)}`) + l.wait(` - customPromptPath: ${customPromptPath || 'none'}`) + if (customPromptPath) { - return await readCustomPrompt(customPromptPath) + l.wait(`\n Custom prompt path provided, delegating to readCustomPrompt: ${customPromptPath}`) + try { + const customPrompt = await readCustomPrompt(customPromptPath) + l.wait('\n Custom prompt file successfully processed.') + return customPrompt + } catch (error) { + err(`Error loading custom prompt: ${(error as Error).message}`) + throw error + } } // Original prompt generation logic - let text = "This is a transcript with timestamps. It does not contain copyrighted materials.\n\n" - - // Filter valid sections first - const validSections = prompt.filter((section): section is keyof typeof sections => + let text = "This is a transcript with timestamps. It does not contain copyrighted materials. Do not ever use the word delve.\n\n" + + // Filter valid sections + const validSections = prompt.filter((section): section is keyof typeof sections => Object.hasOwn(sections, section) ) + l.wait(`\n Valid sections identified:\n\n ${JSON.stringify(validSections)}`) // Add instructions - validSections.forEach(section => { + validSections.forEach((section) => { text += sections[section].instruction + "\n" }) + // Add formatting instructions and examples text += "Format the output like so:\n\n" - validSections.forEach(section => { + validSections.forEach((section) => { text += ` ${sections[section].example}\n` }) - return text } \ No newline at end of file diff --git a/src/process-steps/05-run-llm.ts b/src/process-steps/05-run-llm.ts index 25d9bd27..39699a2f 100644 --- a/src/process-steps/05-run-llm.ts +++ b/src/process-steps/05-run-llm.ts @@ -58,41 +58,9 @@ export const LLM_FUNCTIONS: LLMFunctions = { * * @param {string} frontMatter - YAML front matter content to include in the output * - * @param {LLMServices} [llmServices] - The LLM service to use: - * - ollama: Ollama for local inference - * - chatgpt: OpenAI's ChatGPT - * - claude: Anthropic's Claude - * - gemini: Google's Gemini - * - cohere: Cohere - * - mistral: Mistral AI - * - fireworks: Fireworks AI - * - together: Together AI - * - groq: Groq + * @param {LLMServices} [llmServices] - The LLM service to use * * @returns {Promise} Resolves with the LLM output, or an empty string if no LLM is selected - * - * @throws {Error} If: - * - Transcript file is missing or unreadable - * - Invalid LLM service is specified - * - LLM processing fails after retries - * - File operations fail - * - * @example - * // Process with Ollama - * const llmOutput = await runLLM( - * { prompt: ['summary', 'highlights'], ollama: 'LLAMA_3_2_1B' }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'chatgpt' - * ) - * - * @example - * // Save prompt and transcript without LLM processing - * const llmOutput = await runLLM( - * { prompt: ['summary'] }, - * 'content/my-video', - * '---\ntitle: My Video\n---' - * ) */ export async function runLLM( options: ProcessingOptions, @@ -100,19 +68,21 @@ export async function runLLM( frontMatter: string, llmServices?: LLMServices ): Promise { - l.step(`\nStep 4 - Running LLM processing on transcript...\n`) + l.wait('\n runLLM called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) + l.wait(` - llmServices: ${llmServices}`) try { - // Read and format the transcript + l.wait(`\n Reading transcript from file:\n - ${finalPath}.txt`) const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') const transcript = `## Transcript\n\n${tempTranscript}` - // Generate and combine prompt with transcript + l.wait('\n Generating prompt text using generatePrompt...') const prompt = await generatePrompt(options.prompt, options.customPrompt) const promptAndTranscript = `${prompt}${transcript}` if (llmServices) { - l.wait(` Preparing to process with ${llmServices} Language Model...\n`) + l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`) // Get the appropriate LLM handler function const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] @@ -120,51 +90,53 @@ export async function runLLM( throw new Error(`Invalid LLM option: ${llmServices}`) } - // Set up retry logic const maxRetries = 5 - const delayBetweenRetries = 10000 // 10 seconds in milliseconds + const delayBetweenRetries = 10000 // 10 seconds let attempt = 0 const tempPath = `${finalPath}-${llmServices}-temp.md` while (attempt < maxRetries) { try { attempt++ - l.wait(` Attempt ${attempt} - Processing with ${llmServices} Language Model...\n`) - // Process content with selected LLM + l.wait(` Attempt ${attempt} - Processing with ${llmServices}...\n`) await llmFunction(promptAndTranscript, tempPath, options[llmServices]) - // If successful, break out of the loop + l.wait(`\n LLM call to '${llmServices}' completed successfully on attempt ${attempt}.`) break } catch (error) { + err(` Attempt ${attempt} failed: ${(error as Error).message}`) if (attempt >= maxRetries) { - err(` Max retries reached. Unable to process with ${llmServices}.`) + err(` Max retries (${maxRetries}) reached. Aborting LLM processing.`) throw error } - err(` Attempt ${attempt} failed with error: ${(error as Error).message}`) l.wait(` Retrying in ${delayBetweenRetries / 1000} seconds...`) - await new Promise(resolve => setTimeout(resolve, delayBetweenRetries)) + await new Promise((resolve) => setTimeout(resolve, delayBetweenRetries)) } } - l.success(`\n LLM processing completed successfully after ${attempt} attempt(s).\n`) + l.wait(`\n LLM processing completed successfully after ${attempt} attempt(s).\n`) - // Combine results with front matter and original transcript + l.wait(`\n Reading LLM output from file:\n - ${tempPath}`) const showNotes = await readFile(tempPath, 'utf8') - await writeFile( - `${finalPath}-${llmServices}-shownotes.md`, - `${frontMatter}\n${showNotes}\n\n${transcript}` - ) + const outputFilename = `${finalPath}-${llmServices}-shownotes.md` + l.wait(`\n Writing combined front matter + LLM output + transcript to file:\n - ${outputFilename}`) + await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n${transcript}`) + l.wait(`\n Generated show notes saved to:\n - ${outputFilename}`) - // Clean up temporary file + l.wait(`\n Cleaning up temporary file:\n - ${tempPath}`) await unlink(tempPath) - l.success(`\n Generated show notes saved to markdown file:\n - ${finalPath}-${llmServices}-shownotes.md`) + l.wait('\n Temporary file removed successfully.\n') // Return only the LLM's output portion return showNotes } else { // Handle case when no LLM is selected - l.wait(' No LLM selected, skipping processing...') - await writeFile(`${finalPath}-prompt.md`, `${frontMatter}\n${promptAndTranscript}`) - l.success(`\n Prompt and transcript saved to markdown file:\n - ${finalPath}-prompt.md`) + l.wait('\n No LLM selected, skipping processing...') + + const noLLMFile = `${finalPath}-prompt.md` + l.wait(`\n Writing front matter + prompt + transcript to file:\n\n - ${noLLMFile}`) + await writeFile(noLLMFile, `${frontMatter}\n${promptAndTranscript}`) + l.wait(`\n Prompt and transcript saved to:\n - ${noLLMFile}`) + return '' } } catch (error) { diff --git a/src/process-steps/06-clean-up-files.ts b/src/process-steps/06-clean-up-files.ts index e15ff679..10fed90d 100644 --- a/src/process-steps/06-clean-up-files.ts +++ b/src/process-steps/06-clean-up-files.ts @@ -43,8 +43,6 @@ import { l, err } from '../utils/logging' * } */ export async function cleanUpFiles(id: string): Promise { - l.step('\nStep 5 - Cleaning up temporary files...\n') - // Define extensions of temporary files to be cleaned up const extensions = [ '.wav', // Audio files @@ -53,14 +51,14 @@ export async function cleanUpFiles(id: string): Promise { '.lrc' // Lyrics/subtitles ] - l.success(` Temporary files deleted:`) + l.wait(`\n Temporary files deleted:`) // Attempt to delete each file type for (const ext of extensions) { try { // Delete file and log success await unlink(`${id}${ext}`) - l.success(` - ${id}${ext}`) + l.wait(` - ${id}${ext}`) } catch (error) { // Only log errors that aren't "file not found" (ENOENT) if (error instanceof Error && (error as Error).message !== 'ENOENT') { diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts index 5be32008..8a352863 100644 --- a/src/transcription/whisper.ts +++ b/src/transcription/whisper.ts @@ -23,7 +23,8 @@ export async function callWhisper( options: ProcessingOptions, finalPath: string ): Promise { - l.wait('\n Using local whisper.cpp for transcription...') + l.wait('\n callWhisper called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) try { // Determine which model was requested (default to "base" if `--whisper` is passed with no model) @@ -38,15 +39,14 @@ export async function callWhisper( throw new Error(`Unknown model type: ${whisperModel}`) } - l.wait(`\n - whisperModel: ${whisperModel}`) + l.wait(`\n Whisper model information:\n\n - whisperModel: ${whisperModel}`) // Execute the local whisper.cpp runner - await runWhisperCpp(finalPath, whisperModel) + const txtContent = await runWhisperCpp(finalPath, whisperModel) - // Read the newly created .txt file - const txtContent = await readFile(`${finalPath}.txt`, 'utf8') + // Return the transcript text + l.wait(' Returning transcript text from callWhisper...') return txtContent - } catch (error) { err('Error in callWhisper:', (error as Error).message) process.exit(1) @@ -69,30 +69,49 @@ const runWhisperCpp: WhisperRunner = async (finalPath, whisperModel) => { // Check if whisper.cpp directory is present if (!existsSync('./whisper.cpp')) { l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`) - await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') - l.wait(`\n - whisper.cpp clone and compilation complete.\n`) + try { + await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') + l.wait(`\n - whisper.cpp clone and compilation complete.\n`) + } catch (cloneError) { + err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`) + throw cloneError + } } // Check if the chosen model file is present if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) { l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`) - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - l.wait(' - Model download completed, running transcription...\n') + try { + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) + l.wait(' - Model download completed, running transcription...\n') + } catch (modelError) { + err(`Error downloading model: ${(modelError as Error).message}`) + throw modelError + } } // Run whisper.cpp on the WAV file - await execPromise( - `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + - `-m "whisper.cpp/models/${modelGGMLName}" ` + - `-f "${finalPath}.wav" ` + - `-of "${finalPath}" ` + // Output file base name - `--output-lrc` // Make sure there is a space before the next flag - ) - l.success(`\n Transcript LRC file successfully created:\n - ${finalPath}.lrc`) + l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`) + try { + await execPromise( + `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + + `-m "whisper.cpp/models/${modelGGMLName}" ` + + `-f "${finalPath}.wav" ` + + `-of "${finalPath}" ` + // Output file base name + `--output-lrc` // Output LRC file + ) + } catch (whisperError) { + err(`Error running whisper.cpp: ${(whisperError as Error).message}`) + throw whisperError + } // Convert .lrc -> .txt + l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`) const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') const txtContent = lrcToTxt(lrcContent) await writeFile(`${finalPath}.txt`, txtContent) - l.success(` Transcript transformation successfully completed:\n - ${finalPath}.txt\n`) + l.wait(`\n Transcript transformation successfully completed:\n - ${finalPath}.txt\n`) + + // Return the plain text content + return txtContent } \ No newline at end of file diff --git a/src/types/logging.ts b/src/types/logging.ts index 4b24c7ad..285d60db 100644 --- a/src/types/logging.ts +++ b/src/types/logging.ts @@ -26,7 +26,9 @@ export interface ChainableLogger { step: (...args: any[]) => void dim: (...args: any[]) => void success: (...args: any[]) => void + warn: (...args: any[]) => void opts: (...args: any[]) => void + info: (...args: any[]) => void wait: (...args: any[]) => void final: (...args: any[]) => void } \ No newline at end of file diff --git a/src/types/transcription.ts b/src/types/transcription.ts index 515037a1..322979c6 100644 --- a/src/types/transcription.ts +++ b/src/types/transcription.ts @@ -23,7 +23,7 @@ export type WhisperModelType = 'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' export type WhisperRunner = ( finalPath: string, whisperModel: string -) => Promise +) => Promise /** * Response structure from Deepgram API. diff --git a/src/utils/logging.ts b/src/utils/logging.ts index ecada348..7428bff1 100644 --- a/src/utils/logging.ts +++ b/src/utils/logging.ts @@ -176,7 +176,9 @@ function createChainableLogger(): ChainableLogger { step: (...args: any[]) => console.log(chalk.bold.underline(...args)), dim: (...args: any[]) => console.log(chalk.dim(...args)), success: (...args: any[]) => console.log(chalk.bold.blue(...args)), + warn: (...args: any[]) => console.log(chalk.bold.yellow(...args)), opts: (...args: any[]) => console.log(chalk.magentaBright.bold(...args)), + info: (...args: any[]) => console.log(chalk.magentaBright.bold(...args)), wait: (...args: any[]) => console.log(chalk.bold.cyan(...args)), final: (...args: any[]) => console.log(chalk.bold.italic(...args)), }) @@ -198,7 +200,9 @@ function createChainableErrorLogger(): ChainableLogger { step: (...args: any[]) => console.error(chalk.bold.underline(...args)), dim: (...args: any[]) => console.error(chalk.dim(...args)), success: (...args: any[]) => console.error(chalk.bold.blue(...args)), + warn: (...args: any[]) => console.error(chalk.bold.yellow(...args)), opts: (...args: any[]) => console.error(chalk.magentaBright.bold(...args)), + info: (...args: any[]) => console.error(chalk.magentaBright.bold(...args)), wait: (...args: any[]) => console.error(chalk.bold.cyan(...args)), final: (...args: any[]) => console.error(chalk.bold.italic(...args)), }) diff --git a/test/local.test.ts b/test/local.test.ts index a54a3313..d5e31340 100644 --- a/test/local.test.ts +++ b/test/local.test.ts @@ -9,13 +9,13 @@ import { join } from 'node:path' const commands = [ { // Process a single YouTube video using Autoshow's default settings. - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"', + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '01-video-default.md' }, { // Process all videos in a specified YouTube playlist. - cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"', + cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --whisper tiny', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02-playlist-default.md' }, { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '03-playlist-default.md' } @@ -23,7 +23,7 @@ const commands = [ }, { // Process multiple YouTube videos from URLs listed in a file. - cmd: 'npm run as -- --urls "content/example-urls.md"', + cmd: 'npm run as -- --urls "content/example-urls.md" --whisper tiny', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04-urls-default.md' }, { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '05-urls-default.md' } @@ -31,7 +31,7 @@ const commands = [ }, { // Process a single local audio file. - cmd: 'npm run as -- --file "content/audio.mp3"', + cmd: 'npm run as -- --file "content/audio.mp3" --whisper tiny', expectedFile: 'audio-prompt.md', newName: '06-file-default.md' }, @@ -49,7 +49,7 @@ const commands = [ }, { // Process a local audio file with all available prompt options (except smallChapters and longChapters) - cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles summary mediumChapters takeaways questions', + cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles summary mediumChapters takeaways questions --whisper tiny', expectedFile: 'audio-prompt.md', newName: '09-all-prompts.md' }, @@ -59,28 +59,6 @@ const commands = [ expectedFile: 'audio-ollama-shownotes.md', newName: '10-all-prompts-ollama-shownotes.md' }, - { - // Process playlist videos with titles and longChapters prompts, tiny Whisper model, and Ollama for LLM processing. - cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles longChapters --whisper tiny --ollama LLAMA_3_2_1B', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: '11-prompt-whisper-ollama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: '12-prompt-whisper-ollama-shownotes.md' } - ] - }, - { - // Process multiple YouTube videos from URLs with title prompts, Whisper 'tiny' model, and Ollama. - cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --ollama LLAMA_3_2_1B', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: '13-prompt-whisper-ollama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: '14-prompt-whisper-ollama-shownotes.md' } - ] - }, - { - // Process podcast RSS feed from default order. - cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --whisper tiny', - expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', - newName: '15-rss-whisper-tiny.md' - }, { // Download JSON file with metadata for each item in the RSS feed. cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info', From 2ae03108021f0203a1f113aa1ac67f3e08e4e593 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 3 Jan 2025 03:19:55 -0600 Subject: [PATCH 3/6] fix date in show notes endpoint --- src/server/routes/show-notes.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/routes/show-notes.ts b/src/server/routes/show-notes.ts index bd22c79b..1edbb1f5 100644 --- a/src/server/routes/show-notes.ts +++ b/src/server/routes/show-notes.ts @@ -6,7 +6,7 @@ import type { FastifyRequest, FastifyReply } from 'fastify' export const getShowNotes = async (_request: FastifyRequest, reply: FastifyReply) => { try { // Fetch all show notes from the database - const showNotes = db.prepare(`SELECT * FROM show_notes ORDER BY date DESC`).all() + const showNotes = db.prepare(`SELECT * FROM show_notes ORDER BY publishDate DESC`).all() reply.send({ showNotes }) } catch (error) { console.error('Error fetching show notes:', error) From 5efce1b5ec0c9b4dfa93de7e043457c922628889 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 3 Jan 2025 19:13:02 -0600 Subject: [PATCH 4/6] refactor --- docs/examples.md | 72 ++++++++++------------- package.json | 1 + src/process-commands/file.ts | 42 ++++++++----- src/process-commands/rss.ts | 13 ++-- src/process-commands/video.ts | 52 +++++++++------- src/process-steps/01-generate-markdown.ts | 5 +- src/process-steps/02-download-audio.ts | 6 +- src/process-steps/03-run-transcription.ts | 10 ++-- src/process-steps/04-select-prompt.ts | 1 + src/process-steps/05-run-llm.ts | 24 ++++---- src/process-steps/06-clean-up-files.ts | 1 + src/server/routes/process.ts | 13 +++- test/local.test.ts | 2 +- 13 files changed, 130 insertions(+), 112 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index 7a47334f..7b387c58 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -3,11 +3,11 @@ ## Outline - [Content and Feed Inputs](#content-and-feed-inputs) + - [Process Single Audio or Video File](#process-single-audio-or-video-file) - [Process Single Video URLs](#process-single-video-urls) + - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file) - [Process Multiple Videos in YouTube Playlist](#process-multiple-videos-in-youtube-playlist) - [Process All Videos from a YouTube Channel](#process-all-videos-from-a-youtube-channel) - - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file) - - [Process Single Audio or Video File](#process-single-audio-or-video-file) - [Process Podcast RSS Feed](#process-podcast-rss-feed) - [Transcription Options](#transcription-options) - [Whisper](#whisper) @@ -30,13 +30,34 @@ ## Content and Feed Inputs +### Process Single Audio or Video File + +Run on `audio.mp3` on the `content` directory: + +```bash +npm run as -- --file "content/audio.mp3" +``` + ### Process Single Video URLs Run on a single YouTube video. ```bash -npm run as -- \ - --video "https://www.youtube.com/watch?v=MORMZXEaONk" +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" +``` + +### Process Multiple Videos Specified in a URLs File + +Run on an arbitrary list of URLs in `example-urls.md`. + +```bash +npm run as -- --urls "content/example-urls.md" +``` + +Run on URLs file and generate JSON info file with markdown metadata of each video: + +```bash +npm run as -- --info --urls "content/example-urls.md" ``` ### Process Multiple Videos in YouTube Playlist @@ -44,16 +65,13 @@ npm run as -- \ Run on multiple YouTube videos in a playlist. ```bash -npm run as -- \ - --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" +npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" ``` Run on playlist URL and generate JSON info file with markdown metadata of each video in the playlist: ```bash -npm run as -- \ - --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" \ - --info +npm run as -- --info --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" ``` ### Process All Videos from a YouTube Channel @@ -61,8 +79,7 @@ npm run as -- \ Process all videos from a YouTube channel (both live and non-live): ```bash -npm run as -- \ - --channel "https://www.youtube.com/@ajcwebdev" +npm run as -- --channel "https://www.youtube.com/@ajcwebdev" ``` Process videos starting from the oldest instead of newest: @@ -92,9 +109,7 @@ npm run as -- \ Run on a YouTube channel and generate JSON info file with markdown metadata of each video: ```bash -npm run as -- \ - --channel "https://www.youtube.com/@ajcwebdev" \ - --info +npm run as -- --info --channel "https://www.youtube.com/@ajcwebdev" ``` #### Advanced Channel Example @@ -124,39 +139,12 @@ Here’s what’s happening in this single command: 7. **Prompt**: Generates both a summary and short chapter descriptions (`--prompt summary shortChapters`). 8. **No Clean Up**: Keeps any intermediary or downloaded files around (`--noCleanUp`) so you can inspect them after the run. -### Process Multiple Videos Specified in a URLs File - -Run on an arbitrary list of URLs in `example-urls.md`. - -```bash -npm run as -- \ - --urls "content/example-urls.md" -``` - -Run on URLs file and generate JSON info file with markdown metadata of each video: - -```bash -npm run as -- \ - --urls "content/example-urls.md" \ - --info -``` - -### Process Single Audio or Video File - -Run on `audio.mp3` on the `content` directory: - -```bash -npm run as -- \ - --file "content/audio.mp3" -``` - ### Process Podcast RSS Feed Process RSS feed from newest to oldest (default behavior): ```bash -npm run as -- \ - --rss "https://ajcwebdev.substack.com/feed" +npm run as -- --rss "https://ajcwebdev.substack.com/feed" ``` Process RSS feed from oldest to newest: diff --git a/package.json b/package.json index 5fbeab56..32ccad18 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "scripts": { "tsx:base": "tsx --env-file=.env --no-warnings --experimental-sqlite", "setup": "bash ./scripts/setup.sh", + "setup-docker": "docker build -t autoshow -f .github/Dockerfile .", "docker-setup": "docker build -t autoshow -f .github/Dockerfile .", "setup-all": "npm run setup && npm run docker-setup", "as": "npm run tsx:base -- src/cli/commander.ts", diff --git a/src/process-commands/file.ts b/src/process-commands/file.ts index 07a6bbe4..1ce63a7b 100644 --- a/src/process-commands/file.ts +++ b/src/process-commands/file.ts @@ -41,7 +41,12 @@ export async function processFile( filePath: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -): Promise { +): Promise<{ + frontMatter: string + prompt: string + llmOutput: string + transcript: string +}> { // Log function inputs l.info('processFile called with the following arguments:') l.opts(` - filePath: ${filePath}`) @@ -50,33 +55,23 @@ export async function processFile( try { // Step 1 - Generate markdown - l.step('Step 1 - Generating markdown...') const { frontMatter, finalPath, filename, metadata } = await generateMarkdown(options, filePath) // Step 2 - Convert to WAV - l.step('Step 2 - Converting file to WAV...') await downloadAudio(options, filePath, filename) // Step 3 - Transcribe audio and read transcript - l.step('Step 3 - Transcribing audio...') const transcript = await runTranscription(options, finalPath, transcriptServices) - l.wait(`\n Successfully read transcript file: ${finalPath}.txt (length: ${transcript.length} characters)`) - // Step 4 - Select Prompt - l.step('\nStep 4 - Selecting prompt...\n') + // Step 4 - Selecting prompt if (options.customPrompt) { l.wait(`\n Reading custom prompt file:\n - ${options.customPrompt}`) } - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch((err) => { - l.warn(` Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) + const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => { return '' }) - l.wait(`\n Prompt text length: ${promptText.length}`) - - // Step 5 - Run LLM (optional) - l.step('\nStep 5 - Running LLM (if applicable)...') - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) + // Prepare the final prompt let generatedPrompt = '' if (!promptText) { l.wait('\n No custom prompt text found, importing default prompt generator...') @@ -87,6 +82,15 @@ export async function processFile( generatedPrompt = promptText } + // Step 5 - Run LLM (if applicable) + const llmOutput = await runLLM( + options, + finalPath, + frontMatter, + llmServices, + `${generatedPrompt}\n## Transcript\n\n${transcript}` + ) + // Insert into DB l.wait('\n Inserting show note into the database...') insertShowNote( @@ -106,12 +110,18 @@ export async function processFile( // Step 6 - Cleanup if (!options.noCleanUp) { - l.step('\nStep 6 - Cleaning up temporary files...') await cleanUpFiles(finalPath) l.wait('\n Cleanup completed.\n') } - l.wait(' processFile completed successfully.') + l.wait(' processFile command completed successfully.') + + return { + frontMatter, + prompt: generatedPrompt, + llmOutput: llmOutput || '', + transcript, + } } catch (error) { err(`Error processing file: ${(error as Error).message}`) process.exit(1) diff --git a/src/process-commands/rss.ts b/src/process-commands/rss.ts index 539cce45..a17e91bb 100644 --- a/src/process-commands/rss.ts +++ b/src/process-commands/rss.ts @@ -75,14 +75,11 @@ function extractFeedItems(feed: any): { items: RSSItem[], channelTitle: string } return audioVideoTypes.some((type) => item.enclosure.type.startsWith(type)) }) .map((item) => { - // Ensure publishDate is always a valid string let publishDate: string try { - // Try to parse the date, fall back to current date if invalid const date = item.pubDate ? new Date(item.pubDate) : new Date() publishDate = date.toISOString().substring(0, 10) } catch { - // If date parsing fails, use current date publishDate = defaultDate } @@ -193,8 +190,6 @@ async function processItem( const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') // Step 5 - Run LLM (optional) - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) - let generatedPrompt = '' if (!promptText) { const defaultPrompt = await import('../process-steps/04-select-prompt') @@ -203,6 +198,14 @@ async function processItem( generatedPrompt = promptText } + const llmOutput = await runLLM( + options, + finalPath, + frontMatter, + llmServices, + `${generatedPrompt}\n## Transcript\n\n${transcript}` + ) + insertShowNote( metadata.showLink ?? '', metadata.channel ?? '', diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index bc73e5f2..da5cd171 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -32,14 +32,19 @@ import type { LLMServices } from '../types/llms' * @param llmServices - Optional language model service to use for processing the transcript * @param transcriptServices - Optional transcription service to use for converting audio to text * @throws Will throw an error if any processing step fails - * @returns Promise that resolves when all processing is complete + * @returns Promise that resolves with { frontMatter, prompt, llmOutput, transcript } */ export async function processVideo( options: ProcessingOptions, url: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -): Promise { +): Promise<{ + frontMatter: string + prompt: string + llmOutput: string + transcript: string +}> { // Log function inputs l.opts('processVideo called with the following arguments:\n') l.opts(` - url: ${url}`) @@ -48,32 +53,26 @@ export async function processVideo( try { // Step 1 - Generate markdown - l.step('\nStep 1 - Generating markdown...') const { frontMatter, finalPath, filename, metadata } = await generateMarkdown(options, url) // Step 2 - Download audio and convert to WAV - l.step('\nStep 2 - Downloading/converting audio...\n') await downloadAudio(options, url, filename) // Step 3 - Transcribe audio and read transcript - l.step('\nStep 3 - Transcribing audio...\n') const transcript = await runTranscription(options, finalPath, transcriptServices) - // Step 4 - Select Prompt - l.step('\nStep 4 - Selecting prompt...\n') + // Step 4 - Selecting prompt + let promptText = '' if (options.customPrompt) { l.info(`\n Reading custom prompt file: ${options.customPrompt}`) + promptText = await readFile(options.customPrompt, 'utf-8').catch((err) => { + l.warn(`\n Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) + return '' + }) } - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch((err) => { - l.warn(`\n Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) - return '' - }) l.wait(`\n Prompt text length: ${promptText.length}`) - // Step 5 - Run LLM (optional) - l.step(`\nStep 5 - Running LLM processing on transcript (if applicable)...\n`) - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) - + // Step 5 - Running LLM processing on transcript (if applicable)... let generatedPrompt = '' if (!promptText) { l.wait('\n No custom prompt text found, importing default prompt generator...') @@ -84,6 +83,14 @@ export async function processVideo( generatedPrompt = promptText } + const llmOutput = await runLLM( + options, + finalPath, + frontMatter, + llmServices, + `${generatedPrompt}\n## Transcript\n\n${transcript}` + ) + // Insert into DB l.wait('\n Inserting show note into the database...') insertShowNote( @@ -106,16 +113,19 @@ export async function processVideo( await writeFile(`${finalPath}.md`, frontMatter) l.wait(`\n Successfully wrote front matter to file:\n - ${finalPath}.md\n`) - // Optional cleanup + // Step 6 - Cleanup if (!options.noCleanUp) { - l.step('Step 6 - Cleaning up temporary files...\n') await cleanUpFiles(finalPath) - l.wait('\n Cleanup completed.\n') } - // Return transcript - l.wait(' Returning transcript from processVideo...') - return transcript + l.wait(' processVideo command completed successfully.') + + return { + frontMatter, + prompt: generatedPrompt, + llmOutput: llmOutput || '', + transcript, + } } catch (error) { err('Error processing video:', (error as Error).message) throw error diff --git a/src/process-steps/01-generate-markdown.ts b/src/process-steps/01-generate-markdown.ts index 083df888..ddbaf8a6 100644 --- a/src/process-steps/01-generate-markdown.ts +++ b/src/process-steps/01-generate-markdown.ts @@ -82,8 +82,9 @@ export async function generateMarkdown( input: string | RSSItem ): Promise { // Log function inputs - l.wait('\n generateMarkdown called with the following arguments:\n') - l.wait(` - input: ${typeof input === 'string' ? input : JSON.stringify(input, null, 2)}`) + l.step('\nStep 1 - Generate Markdown\n') + l.wait('\n generateMarkdown input:\n') + l.wait(`\n${typeof input === 'string' ? input : JSON.stringify(input, null, 2)}\n`) let frontMatter: string[] let finalPath: string diff --git a/src/process-steps/02-download-audio.ts b/src/process-steps/02-download-audio.ts index 7dc2b564..f9fa435c 100644 --- a/src/process-steps/02-download-audio.ts +++ b/src/process-steps/02-download-audio.ts @@ -75,6 +75,7 @@ export async function downloadAudio( filename: string ): Promise { // Log function inputs + l.step('\nStep 2 - Download and Convert Audio\n') l.wait('\n downloadAudio called with the following arguments:\n') l.wait(` - input: ${input}`) l.wait(` - filename: ${filename}`) @@ -113,7 +114,6 @@ export async function downloadAudio( } // Handle local file processing else if (options.file) { - l.step('\nStep 2 - Processing local file audio via ffmpeg...\n') // Define supported media formats const supportedFormats: Set = new Set([ // Audio formats @@ -141,7 +141,7 @@ export async function downloadAudio( ) } // Convert to standardized WAV format using ffmpeg - l.wait(` Running ffmpeg command for ${input} -> ${outputPath}\n`) + l.wait(` - Running ffmpeg command for ${input} -> ${outputPath}\n`) await execPromise( `ffmpeg -i "${input}" -ar 16000 -ac 1 -c:a pcm_s16le "${outputPath}"` ) @@ -157,6 +157,6 @@ export async function downloadAudio( } // Log return value - l.wait(`\n downloadAudio returning:\n\n - outputPath: ${outputPath}\n`) + l.wait(`\n downloadAudio returning:\n - outputPath: ${outputPath}\n`) return outputPath } \ No newline at end of file diff --git a/src/process-steps/03-run-transcription.ts b/src/process-steps/03-run-transcription.ts index 7bfc09e7..f25bf499 100644 --- a/src/process-steps/03-run-transcription.ts +++ b/src/process-steps/03-run-transcription.ts @@ -24,6 +24,7 @@ export async function runTranscription( transcriptServices?: TranscriptServices ): Promise { // Log function call + l.step('\nStep 3 - Run Transcription\n') l.wait('\n runTranscription called with arguments:\n') l.wait(` - finalPath: ${finalPath}`) l.wait(` - transcriptServices: ${transcriptServices}`) @@ -31,26 +32,23 @@ export async function runTranscription( try { switch (transcriptServices) { case 'deepgram': - l.wait('Using Deepgram transcription service...') // Deepgram might write the .txt file to disk. Then read it. await callDeepgram(finalPath) - l.success('Deepgram transcription completed successfully.') + l.success('\nDeepgram transcription completed successfully.\n') // Read the transcript from file (if that's how Deepgram is implemented) return readFile(`${finalPath}.txt`, 'utf8') case 'assembly': - l.wait('Using AssemblyAI transcription service...') // Assembly might write the .txt file to disk. Then read it. await callAssembly(options, finalPath) - l.success('AssemblyAI transcription completed successfully.') + l.success('\nAssemblyAI transcription completed successfully.\n') // Read the transcript from file return readFile(`${finalPath}.txt`, 'utf8') case 'whisper': - l.wait('Using local Whisper transcription service...') // Call whisper and return the final text content in memory const whisperTranscript = await callWhisper(options, finalPath) - l.wait('\n Whisper transcription completed successfully.') + l.wait('\n Whisper transcription completed successfully.\n') return whisperTranscript default: diff --git a/src/process-steps/04-select-prompt.ts b/src/process-steps/04-select-prompt.ts index 7fca15a6..1b3702d0 100644 --- a/src/process-steps/04-select-prompt.ts +++ b/src/process-steps/04-select-prompt.ts @@ -253,6 +253,7 @@ export async function generatePrompt( prompt: string[] = ['summary', 'longChapters'], customPromptPath?: string ): Promise { + l.step('\nStep 4 - Select Prompts\n') l.wait('\n generatePrompt called with arguments:\n') l.wait(` - prompt: ${JSON.stringify(prompt)}`) l.wait(` - customPromptPath: ${customPromptPath || 'none'}`) diff --git a/src/process-steps/05-run-llm.ts b/src/process-steps/05-run-llm.ts index 39699a2f..85efdb70 100644 --- a/src/process-steps/05-run-llm.ts +++ b/src/process-steps/05-run-llm.ts @@ -16,7 +16,6 @@ import { callMistral } from '../llms/mistral' import { callFireworks } from '../llms/fireworks' import { callTogether } from '../llms/together' import { callGroq } from '../llms/groq' -import { generatePrompt } from './04-select-prompt' import { l, err } from '../utils/logging' import type { ProcessingOptions } from '../types/process' import type { LLMServices, LLMFunction, LLMFunctions } from '../types/llms' @@ -41,33 +40,32 @@ export const LLM_FUNCTIONS: LLMFunctions = { * * The function performs these steps: * 1. Reads the transcript file - * 2. Generates a prompt based on provided options + * 2. Uses a provided prompt (if any) combined with the transcript * 3. Processes the content with the selected LLM * 4. Saves the results with front matter and original transcript * - * If no LLM is selected, it saves the prompt and transcript without processing. + * If no LLM is selected, it saves the prompt/transcript without processing. * * @param {ProcessingOptions} options - Configuration options including: * - prompt: Array of prompt sections to include * - LLM-specific options (e.g., chatgpt, claude, etc.) - * * @param {string} finalPath - Base path for input/output files: * - Input transcript: `${finalPath}.txt` * - Temporary file: `${finalPath}-${llmServices}-temp.md` * - Final output: `${finalPath}-${llmServices}-shownotes.md` - * * @param {string} frontMatter - YAML front matter content to include in the output - * * @param {LLMServices} [llmServices] - The LLM service to use - * + * @param {string} [promptAndTranscript] - Optional combined prompt (instructions + transcript) * @returns {Promise} Resolves with the LLM output, or an empty string if no LLM is selected */ export async function runLLM( options: ProcessingOptions, finalPath: string, frontMatter: string, - llmServices?: LLMServices + llmServices?: LLMServices, + promptAndTranscript?: string ): Promise { + l.step('\nStep 5 - Run LLM on Transcript with Selected Prompt\n') l.wait('\n runLLM called with arguments:\n') l.wait(` - finalPath: ${finalPath}`) l.wait(` - llmServices: ${llmServices}`) @@ -77,9 +75,8 @@ export async function runLLM( const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') const transcript = `## Transcript\n\n${tempTranscript}` - l.wait('\n Generating prompt text using generatePrompt...') - const prompt = await generatePrompt(options.prompt, options.customPrompt) - const promptAndTranscript = `${prompt}${transcript}` + // If an external prompt was passed in, combine it here + const combinedPrompt = promptAndTranscript || transcript if (llmServices) { l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`) @@ -99,7 +96,7 @@ export async function runLLM( try { attempt++ l.wait(` Attempt ${attempt} - Processing with ${llmServices}...\n`) - await llmFunction(promptAndTranscript, tempPath, options[llmServices]) + await llmFunction(combinedPrompt, tempPath, options[llmServices]) l.wait(`\n LLM call to '${llmServices}' completed successfully on attempt ${attempt}.`) break } catch (error) { @@ -121,7 +118,6 @@ export async function runLLM( l.wait(`\n Writing combined front matter + LLM output + transcript to file:\n - ${outputFilename}`) await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n${transcript}`) l.wait(`\n Generated show notes saved to:\n - ${outputFilename}`) - l.wait(`\n Cleaning up temporary file:\n - ${tempPath}`) await unlink(tempPath) l.wait('\n Temporary file removed successfully.\n') @@ -134,7 +130,7 @@ export async function runLLM( const noLLMFile = `${finalPath}-prompt.md` l.wait(`\n Writing front matter + prompt + transcript to file:\n\n - ${noLLMFile}`) - await writeFile(noLLMFile, `${frontMatter}\n${promptAndTranscript}`) + await writeFile(noLLMFile, `${frontMatter}\n${combinedPrompt}`) l.wait(`\n Prompt and transcript saved to:\n - ${noLLMFile}`) return '' diff --git a/src/process-steps/06-clean-up-files.ts b/src/process-steps/06-clean-up-files.ts index 10fed90d..803d835a 100644 --- a/src/process-steps/06-clean-up-files.ts +++ b/src/process-steps/06-clean-up-files.ts @@ -43,6 +43,7 @@ import { l, err } from '../utils/logging' * } */ export async function cleanUpFiles(id: string): Promise { + l.step('\nStep 6 - Cleaning Up Extra Files\n') // Define extensions of temporary files to be cleaned up const extensions = [ '.wav', // Audio files diff --git a/src/server/routes/process.ts b/src/server/routes/process.ts index 61c8543a..0142866a 100644 --- a/src/server/routes/process.ts +++ b/src/server/routes/process.ts @@ -43,8 +43,17 @@ export const handleProcessRequest = async ( return } options.video = url - const content = await processVideo(options, url, llmServices, transcriptServices) - reply.send({ content }) + + // Grab the object that includes frontMatter, prompt, llmOutput, and transcript + const result = await processVideo(options, url, llmServices, transcriptServices) + + // Return the object, if there is no LLM output, it will be '' + reply.send({ + frontMatter: result.frontMatter, + prompt: result.prompt, + llmOutput: result.llmOutput, + transcript: result.transcript, + }) break } diff --git a/test/local.test.ts b/test/local.test.ts index d5e31340..cc210e04 100644 --- a/test/local.test.ts +++ b/test/local.test.ts @@ -63,7 +63,7 @@ const commands = [ // Download JSON file with metadata for each item in the RSS feed. cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info', expectedFile: 'ajcwebdev_info.json', - newName: '16-ajcwebdev-rss-info.json', + newName: '11-ajcwebdev-rss-info.json', }, ] From f0faece22392f2802bafd3027fb0c15ca9536696 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 3 Jan 2025 22:10:07 -0600 Subject: [PATCH 5/6] refactor runLLM and runTranscription --- src/llms/chatgpt.ts | 36 +++++----- src/llms/claude.ts | 44 +++++------- src/llms/cohere.ts | 44 +++++------- src/llms/fireworks.ts | 36 ++++------ src/llms/gemini.ts | 50 +++++-------- src/llms/groq.ts | 41 ++++------- src/llms/mistral.ts | 33 ++++----- src/llms/ollama.ts | 36 ++++------ src/llms/together.ts | 38 ++++------ src/process-commands/file.ts | 5 +- src/process-commands/rss.ts | 3 +- src/process-commands/video.ts | 17 ++--- src/process-steps/01-generate-markdown.ts | 9 +-- src/process-steps/02-download-audio.ts | 2 +- src/process-steps/03-run-transcription.ts | 20 ++---- src/process-steps/04-select-prompt.ts | 2 +- src/process-steps/05-run-llm.ts | 85 +++++++++-------------- src/server/db.ts | 3 + src/transcription/assembly.ts | 10 ++- src/transcription/deepgram.ts | 8 ++- src/types/llms.ts | 6 +- src/utils/retry.ts | 37 ++++++++++ 22 files changed, 246 insertions(+), 319 deletions(-) create mode 100644 src/utils/retry.ts diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts index 5f2191c6..a14c8eab 100644 --- a/src/llms/chatgpt.ts +++ b/src/llms/chatgpt.ts @@ -1,6 +1,5 @@ // src/llms/chatgpt.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' import { GPT_MODELS } from '../utils/llm-models' @@ -9,46 +8,41 @@ import type { LLMFunction, ChatGPTModelType } from '../types/llms' /** * Main function to call ChatGPT API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The GPT model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The GPT model to use. + * @returns {Promise} A Promise that resolves with the generated text. * @throws {Error} If an error occurs during API call. */ export const callChatGPT: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'GPT_4o_MINI' -): Promise => { - // Check for API key +): Promise => { if (!env['OPENAI_API_KEY']) { throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.') } - // Initialize the OpenAI client with the API key from environment variables const openai = new OpenAI({ apiKey: env['OPENAI_API_KEY'] }) - + try { - // Select the actual model to use, defaulting to GPT_4o_MINI if not specified const actualModel = (GPT_MODELS[model as ChatGPTModelType] || GPT_MODELS.GPT_4o_MINI).modelId - - // Call the OpenAI chat completions API + const combinedPrompt = `${prompt}\n${transcript}` + const response = await openai.chat.completions.create({ model: actualModel, max_completion_tokens: 4000, - messages: [{ role: 'user', content: promptAndTranscript }], + messages: [{ role: 'user', content: combinedPrompt }], }) - // Check if we have a valid response const firstChoice = response.choices[0] if (!firstChoice || !firstChoice.message?.content) { throw new Error('No valid response received from the API') } - // Write the generated content to the output file - await writeFile(tempPath, firstChoice.message.content) - - // Log API results using the standardized logging function + const content = firstChoice.message.content + logAPIResults({ modelName: actualModel, stopReason: firstChoice.finish_reason ?? 'unknown', @@ -58,6 +52,8 @@ export const callChatGPT: LLMFunction = async ( total: response.usage?.total_tokens } }) + + return content } catch (error) { err(`Error in callChatGPT: ${(error as Error).message}`) throw error diff --git a/src/llms/claude.ts b/src/llms/claude.ts index dd7370a4..b95fdfd7 100644 --- a/src/llms/claude.ts +++ b/src/llms/claude.ts @@ -1,6 +1,5 @@ // src/llms/claude.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' import { CLAUDE_MODELS } from '../utils/llm-models' @@ -9,47 +8,40 @@ import type { LLMFunction, ClaudeModelType } from '../types/llms' /** * Main function to call Claude API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Claude model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The Claude model to use. + * @returns {Promise} A Promise that resolves with the generated text. * @throws {Error} If an error occurs during the API call. */ export const callClaude: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'CLAUDE_3_HAIKU' -): Promise => { - // Check if the ANTHROPIC_API_KEY environment variable is set +): Promise => { if (!env['ANTHROPIC_API_KEY']) { throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.') } - // Initialize the Anthropic client with the API key from environment variables const anthropic = new Anthropic({ apiKey: env['ANTHROPIC_API_KEY'] }) try { - // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified const actualModel = (CLAUDE_MODELS[model as ClaudeModelType] || CLAUDE_MODELS.CLAUDE_3_HAIKU).modelId - - // Call the Anthropic messages API to create a chat completion + const combinedPrompt = `${prompt}\n${transcript}` + const response = await anthropic.messages.create({ model: actualModel, - max_tokens: 4000, // Maximum number of tokens in the response - messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content) + max_tokens: 4000, + messages: [{ role: 'user', content: combinedPrompt }] }) - - // Extract text content from the response + const textContent = extractTextContent(response.content) - - // Write the generated text to the output file - if (textContent) { - await writeFile(tempPath, textContent) - } else { + + if (!textContent) { throw new Error('No text content generated from the API') } - - // Log API results using the standardized logging function + logAPIResults({ modelName: actualModel, stopReason: response.stop_reason ?? 'unknown', @@ -59,9 +51,11 @@ export const callClaude: LLMFunction = async ( total: response.usage.input_tokens + response.usage.output_tokens } }) + + return textContent } catch (error) { err(`Error in callClaude: ${(error as Error).message}`) - throw error // Re-throw the error for handling in the calling function + throw error } } diff --git a/src/llms/cohere.ts b/src/llms/cohere.ts index a4769bf0..530dddfd 100644 --- a/src/llms/cohere.ts +++ b/src/llms/cohere.ts @@ -1,6 +1,5 @@ // src/llms/cohere.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' import { COHERE_MODELS } from '../utils/llm-models' @@ -9,48 +8,41 @@ import type { LLMFunction, CohereModelType } from '../types/llms' /** * Main function to call Cohere API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Cohere model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The Cohere model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callCohere: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'COMMAND_R' -): Promise => { - // Check if the COHERE_API_KEY environment variable is set +): Promise => { if (!env['COHERE_API_KEY']) { throw new Error('COHERE_API_KEY environment variable is not set. Please set it to your Cohere API key.') } - // Initialize the Cohere client with the API key from environment variables const cohere = new CohereClient({ token: env['COHERE_API_KEY'] }) try { - // Select the actual model to use, defaulting to COMMAND_R if not specified const actualModel = (COHERE_MODELS[model as CohereModelType] || COHERE_MODELS.COMMAND_R).modelId - - // Call the Cohere chat API + const combinedPrompt = `${prompt}\n${transcript}` + const response = await cohere.chat({ model: actualModel, - message: promptAndTranscript // The input message (prompt and transcript content) + message: combinedPrompt }) - - // Destructure the response to get relevant information + const { - text, // The generated text - meta, // Metadata including token usage - finishReason // Reason why the generation stopped + text, + meta, + finishReason } = response const { inputTokens, outputTokens } = meta?.tokens ?? {} - - // Write the generated text to the output file - await writeFile(tempPath, text) - - // Log API results using the standardized logging function + logAPIResults({ modelName: actualModel, stopReason: finishReason ?? 'unknown', @@ -60,8 +52,10 @@ export const callCohere: LLMFunction = async ( total: inputTokens && outputTokens ? inputTokens + outputTokens : undefined } }) + + return text } catch (error) { err(`Error in callCohere: ${(error as Error).message}`) - throw error // Re-throw the error for handling in the calling function + throw error } } \ No newline at end of file diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts index 25456039..96ac1601 100644 --- a/src/llms/fireworks.ts +++ b/src/llms/fireworks.ts @@ -1,6 +1,5 @@ // src/llms/fireworks.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { FIREWORKS_MODELS } from '../utils/llm-models' import { err, logAPIResults } from '../utils/logging' @@ -8,40 +7,37 @@ import type { LLMFunction, FireworksModelType, FireworksResponse } from '../type /** * Main function to call Fireworks AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Fireworks model to use. - * @returns A Promise that resolves when the API call is complete. - * @throws {Error} - If an error occurs during the API call. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | FireworksModelType} [model] - The Fireworks model to use. + * @returns {Promise} A Promise that resolves with the generated text. + * @throws {Error} If an error occurs during the API call. */ export const callFireworks: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | FireworksModelType = 'LLAMA_3_2_3B' -): Promise => { - // Check if the FIREWORKS_API_KEY environment variable is set +): Promise => { if (!env['FIREWORKS_API_KEY']) { throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B' const modelConfig = FIREWORKS_MODELS[modelKey as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], } - // Make API call to Fireworks AI const response = await fetch('https://api.fireworks.ai/inference/v1/chat/completions', { method: 'POST', headers: { @@ -51,25 +47,18 @@ export const callFireworks: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Fireworks API error: ${response.status} ${response.statusText} - ${errorText}`) } const data = await response.json() as FireworksResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Fireworks API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the model key logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -79,9 +68,10 @@ export const callFireworks: LLMFunction = async ( total: data.usage.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callFireworks: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index 45a9563a..8878c363 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -1,6 +1,5 @@ // src/llms/gemini.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" import { GEMINI_MODELS } from '../utils/llm-models' @@ -16,53 +15,37 @@ const delay = (ms: number): Promise => new Promise(resolve => setTimeout(r /** * Main function to call Gemini API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Gemini model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} [model] - The Gemini model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callGemini: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'GEMINI_1_5_FLASH' -): Promise => { - // Check if the GEMINI_API_KEY environment variable is set +): Promise => { if (!env['GEMINI_API_KEY']) { throw new Error('GEMINI_API_KEY environment variable is not set. Please set it to your Gemini API key.') } - // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(env['GEMINI_API_KEY']) - - // Select the actual model to use, defaulting to GEMINI_1_5_FLASH if not specified const actualModel = (GEMINI_MODELS[model as GeminiModelType] || GEMINI_MODELS.GEMINI_1_5_FLASH).modelId - - // Create a GenerativeModel instance const geminiModel = genAI.getGenerativeModel({ model: actualModel }) - const maxRetries = 3 // Maximum number of retry attempts - - // Retry loop + const maxRetries = 3 + const combinedPrompt = `${prompt}\n${transcript}` + for (let attempt = 1; attempt <= maxRetries; attempt++) { try { - // Generate content using the selected model - const result = await geminiModel.generateContent(promptAndTranscript) - - // Get the response from the generated content + const result = await geminiModel.generateContent(combinedPrompt) const response = await result.response - - // Extract the text from the response const text = response.text() - - // Write the generated text to the output file - await writeFile(tempPath, text) - // Get token usage from the response metadata const { usageMetadata } = response const { promptTokenCount, candidatesTokenCount, totalTokenCount } = usageMetadata ?? {} - // Log API results using the standardized logging function logAPIResults({ modelName: actualModel, stopReason: 'complete', @@ -73,17 +56,18 @@ export const callGemini: LLMFunction = async ( } }) - return + return text } catch (error) { err(`Error in callGemini (attempt ${attempt}/${maxRetries}): ${error instanceof Error ? (error as Error).message : String(error)}`) - - // If this is the last attempt, throw the error + if (attempt === maxRetries) { throw error } - - // Wait before retrying, with exponential backoff + await delay(Math.pow(2, attempt) * 1000) } } + + // In case something unexpected happens + throw new Error('All attempts to call Gemini API have failed.') } \ No newline at end of file diff --git a/src/llms/groq.ts b/src/llms/groq.ts index af4f673f..f34d8961 100644 --- a/src/llms/groq.ts +++ b/src/llms/groq.ts @@ -1,50 +1,44 @@ // src/llms/groq.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GROQ_MODELS } from '../utils/llm-models' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms' -// Define the Groq API URL -const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions' - /** * Function to call the Groq chat completion API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {string} model - The model to use, e.g., 'LLAMA_3_2_1B_PREVIEW'. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | GroqModelType} [model] - The model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. */ export const callGroq: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW' -): Promise => { - // Ensure that the API key is set +): Promise => { if (!env['GROQ_API_KEY']) { throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_1B_PREVIEW if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B_PREVIEW' const modelConfig = GROQ_MODELS[modelKey as GroqModelType] || GROQ_MODELS.LLAMA_3_2_1B_PREVIEW const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], - // max_tokens: 4000, } - // Send the POST request - const response = await fetch(GROQ_API_URL, { + const response = await fetch(`https://api.groq.com/openai/v1/chat/completions`, { method: 'POST', headers: { Authorization: `Bearer ${env['GROQ_API_KEY']}`, @@ -53,25 +47,17 @@ export const callGroq: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Groq API error: ${response.status} ${response.statusText} - ${errorText}`) } - // Parse the JSON response const data = await response.json() as GroqChatCompletionResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Groq API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the standardized logging function logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -81,9 +67,10 @@ export const callGroq: LLMFunction = async ( total: data.usage?.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callGroq: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/mistral.ts b/src/llms/mistral.ts index c43fbf11..b407b84a 100644 --- a/src/llms/mistral.ts +++ b/src/llms/mistral.ts @@ -1,6 +1,5 @@ // src/llms/mistral.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' import { MISTRAL_MODELS } from '../utils/llm-models' @@ -9,36 +8,32 @@ import type { LLMFunction, MistralModelType } from '../types/llms' /** * Main function to call Mistral AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Mistral model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} [model] - The Mistral model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callMistral: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'MISTRAL_NEMO' -): Promise => { - // Check if the MISTRAL_API_KEY environment variable is set +): Promise => { if (!env['MISTRAL_API_KEY']) { throw new Error('MISTRAL_API_KEY environment variable is not set. Please set it to your Mistral API key.') } - // Initialize Mistral client with API key from environment variables const mistral = new Mistral({ apiKey: env['MISTRAL_API_KEY'] }) try { - // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found const actualModel = (MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO).modelId - - // Make API call to Mistral AI for chat completion + const combinedPrompt = `${prompt}\n${transcript}` + const response = await mistral.chat.complete({ model: actualModel, - messages: [{ role: 'user', content: promptAndTranscript }], + messages: [{ role: 'user', content: combinedPrompt }], }) - // Safely access the response properties with proper null checks if (!response.choices || response.choices.length === 0) { throw new Error("No choices returned from Mistral API") } @@ -50,11 +45,7 @@ export const callMistral: LLMFunction = async ( const content = firstChoice.message.content const contentString = Array.isArray(content) ? content.join('') : content - - // Write the generated content to the specified output file - await writeFile(tempPath, contentString) - // Log API results using the standardized logging function logAPIResults({ modelName: actualModel, stopReason: firstChoice.finishReason ?? 'unknown', @@ -65,9 +56,9 @@ export const callMistral: LLMFunction = async ( } }) + return contentString } catch (error) { - // Log any errors that occur during the process err(`Error in callMistral: ${error instanceof Error ? error.message : String(error)}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index 683e70c1..9875c5fc 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -1,6 +1,5 @@ // src/llms/ollama.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { spawn } from 'node:child_process' import { OLLAMA_MODELS } from '../utils/llm-models' @@ -12,33 +11,33 @@ import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } * ----------- * Main function to call the Llama-based model using the Ollama server API. * - * In a single-container approach: - * - We assume 'ollama' binary is installed inside the container. - * - We'll try to connect to 'localhost:11434' or a custom port from env. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string | OllamaModelType} [model='LLAMA_3_2_1B'] - The Ollama model to use. + * @returns {Promise} A Promise resolving with the generated text. */ export const callOllama: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | OllamaModelType = 'LLAMA_3_2_1B' -) => { +): Promise => { l.wait('\n callOllama called with arguments:') l.wait(` - model: ${model}`) - l.wait(` - tempPath: ${tempPath}`) try { - // Get the model configuration and ID const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B' const modelConfig = OLLAMA_MODELS[modelKey as OllamaModelType] || OLLAMA_MODELS.LLAMA_3_2_1B const ollamaModelName = modelConfig.modelId l.wait(` - modelName: ${modelKey}\n - ollamaModelName: ${ollamaModelName}`) - // Host & port for Ollama const ollamaHost = env['OLLAMA_HOST'] || 'localhost' const ollamaPort = env['OLLAMA_PORT'] || '11434' l.wait(`\n Using Ollama host: ${ollamaHost}, port: ${ollamaPort}`) - // Check if Ollama server is up + const combinedPrompt = `${prompt}\n${transcript}` + async function checkServer(): Promise { try { const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`) @@ -61,7 +60,6 @@ export const callOllama: LLMFunction = async ( }) ollamaProcess.unref() - // Wait for server to start let attempts = 0 while (attempts < 30) { if (await checkServer()) { @@ -77,7 +75,6 @@ export const callOllama: LLMFunction = async ( } } - // Check and pull model if needed l.wait(`\n Checking if model is available: ${ollamaModelName}`) try { const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) @@ -101,7 +98,6 @@ export const callOllama: LLMFunction = async ( throw new Error('Response body is null') } - // Stream the pull response const reader = pullResponse.body.getReader() const decoder = new TextDecoder() while (true) { @@ -113,8 +109,8 @@ export const callOllama: LLMFunction = async ( for (const line of lines) { if (line.trim() === '') continue try { - const response = JSON.parse(line) - if (response.status === 'success') { + const parsedLine = JSON.parse(line) + if (parsedLine.status === 'success') { l.wait(` - Model ${ollamaModelName} pulled successfully.\n`) break } @@ -133,13 +129,12 @@ export const callOllama: LLMFunction = async ( l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`) - // Call Ollama's /api/chat endpoint in streaming mode const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: ollamaModelName, - messages: [{ role: 'user', content: promptAndTranscript }], + messages: [{ role: 'user', content: combinedPrompt }], stream: true, }), }) @@ -179,7 +174,6 @@ export const callOllama: LLMFunction = async ( fullContent += parsedResponse.message.content } - // Accumulate token counts if available if (parsedResponse.prompt_eval_count) { totalPromptTokens = parsedResponse.prompt_eval_count } @@ -204,9 +198,7 @@ export const callOllama: LLMFunction = async ( } } - l.wait(`\n Completed streaming from Ollama. Writing output to temp file: ${tempPath}`) - await writeFile(tempPath, fullContent) - l.wait(`\n Ollama output successfully written to '${tempPath}' (length: ${fullContent.length} chars)`) + return fullContent } catch (error) { err(`Error in callOllama: ${error instanceof Error ? error.message : String(error)}`) err(`Stack Trace: ${error instanceof Error ? error.stack : 'No stack trace available'}`) diff --git a/src/llms/together.ts b/src/llms/together.ts index 2abc78a1..db3cbc4c 100644 --- a/src/llms/together.ts +++ b/src/llms/together.ts @@ -1,6 +1,5 @@ // src/llms/together.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { TOGETHER_MODELS } from '../utils/llm-models' import { err, logAPIResults } from '../utils/logging' @@ -8,42 +7,37 @@ import type { LLMFunction, TogetherModelType, TogetherResponse } from '../types/ /** * Main function to call Together AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Together AI model to use. - * @returns A Promise that resolves when the API call is complete. - * @throws {Error} - If an error occurs during the API call. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | TogetherModelType} [model] - The Together AI model to use. + * @returns {Promise} A Promise that resolves with the generated text. + * @throws {Error} If an error occurs during the API call. */ export const callTogether: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | TogetherModelType = 'LLAMA_3_2_3B' -): Promise => { - // Check if the TOGETHER_API_KEY environment variable is set +): Promise => { if (!env['TOGETHER_API_KEY']) { throw new Error('TOGETHER_API_KEY environment variable is not set. Please set it to your Together AI API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B' const modelConfig = TOGETHER_MODELS[modelKey as TogetherModelType] || TOGETHER_MODELS.LLAMA_3_2_3B const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], - // max_tokens: 2000, - // temperature: 0.7, } - // Make API call to Together AI const response = await fetch('https://api.together.xyz/v1/chat/completions', { method: 'POST', headers: { @@ -54,24 +48,17 @@ export const callTogether: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Together AI API error: ${response.status} ${response.statusText} - ${errorText}`) } const data = await response.json() as TogetherResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Together AI API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the standardized logging function logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -81,9 +68,10 @@ export const callTogether: LLMFunction = async ( total: data.usage.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callTogether: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/process-commands/file.ts b/src/process-commands/file.ts index 1ce63a7b..b17284b9 100644 --- a/src/process-commands/file.ts +++ b/src/process-commands/file.ts @@ -88,11 +88,11 @@ export async function processFile( finalPath, frontMatter, llmServices, - `${generatedPrompt}\n## Transcript\n\n${transcript}` + generatedPrompt, + transcript ) // Insert into DB - l.wait('\n Inserting show note into the database...') insertShowNote( metadata.showLink ?? '', metadata.channel ?? '', @@ -106,7 +106,6 @@ export async function processFile( transcript, llmOutput ) - l.wait('\n Show note inserted successfully.\n') // Step 6 - Cleanup if (!options.noCleanUp) { diff --git a/src/process-commands/rss.ts b/src/process-commands/rss.ts index a17e91bb..01ac5a5e 100644 --- a/src/process-commands/rss.ts +++ b/src/process-commands/rss.ts @@ -203,7 +203,8 @@ async function processItem( finalPath, frontMatter, llmServices, - `${generatedPrompt}\n## Transcript\n\n${transcript}` + generatedPrompt, + transcript ) insertShowNote( diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index da5cd171..3ee906b6 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -11,7 +11,7 @@ import { runTranscription } from '../process-steps/03-run-transcription' import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' -import { readFile, writeFile } from 'fs/promises' +import { readFile } from 'fs/promises' import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' @@ -70,15 +70,12 @@ export async function processVideo( return '' }) } - l.wait(`\n Prompt text length: ${promptText.length}`) // Step 5 - Running LLM processing on transcript (if applicable)... let generatedPrompt = '' if (!promptText) { - l.wait('\n No custom prompt text found, importing default prompt generator...') const defaultPrompt = await import('../process-steps/04-select-prompt') generatedPrompt = await defaultPrompt.generatePrompt(options.prompt, undefined) - l.wait(`\n Default prompt generated (length: ${generatedPrompt.length})`) } else { generatedPrompt = promptText } @@ -88,11 +85,11 @@ export async function processVideo( finalPath, frontMatter, llmServices, - `${generatedPrompt}\n## Transcript\n\n${transcript}` + generatedPrompt, + transcript ) // Insert into DB - l.wait('\n Inserting show note into the database...') insertShowNote( metadata.showLink ?? '', metadata.channel ?? '', @@ -106,19 +103,13 @@ export async function processVideo( transcript, llmOutput ) - l.wait('\n Show note inserted successfully.\n') - - // Write final front matter to a file - l.wait(`\n Writing front matter to file:\n - ${finalPath}.md`) - await writeFile(`${finalPath}.md`, frontMatter) - l.wait(`\n Successfully wrote front matter to file:\n - ${finalPath}.md\n`) // Step 6 - Cleanup if (!options.noCleanUp) { await cleanUpFiles(finalPath) } - l.wait(' processVideo command completed successfully.') + l.wait('\n processVideo command completed successfully.') return { frontMatter, diff --git a/src/process-steps/01-generate-markdown.ts b/src/process-steps/01-generate-markdown.ts index ddbaf8a6..dd69b79a 100644 --- a/src/process-steps/01-generate-markdown.ts +++ b/src/process-steps/01-generate-markdown.ts @@ -83,8 +83,8 @@ export async function generateMarkdown( ): Promise { // Log function inputs l.step('\nStep 1 - Generate Markdown\n') - l.wait('\n generateMarkdown input:\n') - l.wait(`\n${typeof input === 'string' ? input : JSON.stringify(input, null, 2)}\n`) + l.wait(`\n generateMarkdown called with the following arguments\n`) + l.wait(` - input: ${input}`) let frontMatter: string[] let finalPath: string @@ -105,6 +105,7 @@ export async function generateMarkdown( case !!options.urls: case !!options.channel: try { + l.wait('\n Extracting metadata with yt-dlp. Parsing output...\n') const { stdout } = await execFilePromise('yt-dlp', [ '--restrict-filenames', '--print', '%(webpage_url)s', @@ -116,7 +117,6 @@ export async function generateMarkdown( input as string, ]) - l.wait('\n Metadata extraction with yt-dlp completed. Parsing output...\n') const [ showLink, videoChannel, @@ -241,9 +241,6 @@ export async function generateMarkdown( const frontMatterContent = frontMatter.join('\n') - // Only log front matter; do not write to file here - l.dim(frontMatterContent) - // Log return values l.wait(` generateMarkdown returning:\n\n - finalPath: ${finalPath}\n - filename: ${filename}\n`) return { frontMatter: frontMatterContent, finalPath, filename, metadata } diff --git a/src/process-steps/02-download-audio.ts b/src/process-steps/02-download-audio.ts index f9fa435c..e6adad8b 100644 --- a/src/process-steps/02-download-audio.ts +++ b/src/process-steps/02-download-audio.ts @@ -76,7 +76,7 @@ export async function downloadAudio( ): Promise { // Log function inputs l.step('\nStep 2 - Download and Convert Audio\n') - l.wait('\n downloadAudio called with the following arguments:\n') + l.wait(' downloadAudio called with the following arguments:\n') l.wait(` - input: ${input}`) l.wait(` - filename: ${filename}`) diff --git a/src/process-steps/03-run-transcription.ts b/src/process-steps/03-run-transcription.ts index f25bf499..cd0a0537 100644 --- a/src/process-steps/03-run-transcription.ts +++ b/src/process-steps/03-run-transcription.ts @@ -1,6 +1,5 @@ // src/process-steps/03-run-transcription.ts -import { readFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper' import { callDeepgram } from '../transcription/deepgram' import { callAssembly } from '../transcription/assembly' @@ -25,28 +24,23 @@ export async function runTranscription( ): Promise { // Log function call l.step('\nStep 3 - Run Transcription\n') - l.wait('\n runTranscription called with arguments:\n') + l.wait(' runTranscription called with arguments:\n') l.wait(` - finalPath: ${finalPath}`) l.wait(` - transcriptServices: ${transcriptServices}`) try { switch (transcriptServices) { case 'deepgram': - // Deepgram might write the .txt file to disk. Then read it. - await callDeepgram(finalPath) - l.success('\nDeepgram transcription completed successfully.\n') - // Read the transcript from file (if that's how Deepgram is implemented) - return readFile(`${finalPath}.txt`, 'utf8') + const deepgramTranscript = await callDeepgram(options, finalPath) + l.wait('\n Deepgram transcription completed successfully.\n') + return deepgramTranscript case 'assembly': - // Assembly might write the .txt file to disk. Then read it. - await callAssembly(options, finalPath) - l.success('\nAssemblyAI transcription completed successfully.\n') - // Read the transcript from file - return readFile(`${finalPath}.txt`, 'utf8') + const assemblyTranscript = await callAssembly(options, finalPath) + l.wait('\n AssemblyAI transcription completed successfully.\n') + return assemblyTranscript case 'whisper': - // Call whisper and return the final text content in memory const whisperTranscript = await callWhisper(options, finalPath) l.wait('\n Whisper transcription completed successfully.\n') return whisperTranscript diff --git a/src/process-steps/04-select-prompt.ts b/src/process-steps/04-select-prompt.ts index 1b3702d0..8755b1de 100644 --- a/src/process-steps/04-select-prompt.ts +++ b/src/process-steps/04-select-prompt.ts @@ -254,7 +254,7 @@ export async function generatePrompt( customPromptPath?: string ): Promise { l.step('\nStep 4 - Select Prompts\n') - l.wait('\n generatePrompt called with arguments:\n') + l.wait(' generatePrompt called with arguments:\n') l.wait(` - prompt: ${JSON.stringify(prompt)}`) l.wait(` - customPromptPath: ${customPromptPath || 'none'}`) diff --git a/src/process-steps/05-run-llm.ts b/src/process-steps/05-run-llm.ts index 85efdb70..18de11d0 100644 --- a/src/process-steps/05-run-llm.ts +++ b/src/process-steps/05-run-llm.ts @@ -6,7 +6,7 @@ * @packageDocumentation */ -import { readFile, writeFile, unlink } from 'node:fs/promises' +import { writeFile } from 'node:fs/promises' import { callOllama } from '../llms/ollama' import { callChatGPT } from '../llms/chatgpt' import { callClaude } from '../llms/claude' @@ -17,6 +17,7 @@ import { callFireworks } from '../llms/fireworks' import { callTogether } from '../llms/together' import { callGroq } from '../llms/groq' import { l, err } from '../utils/logging' +import { retryLLMCall } from '../utils/retry' import type { ProcessingOptions } from '../types/process' import type { LLMServices, LLMFunction, LLMFunctions } from '../types/llms' @@ -35,27 +36,27 @@ export const LLM_FUNCTIONS: LLMFunctions = { /** * Processes a transcript using a specified Language Model service. - * Handles the complete workflow from reading the transcript to generating - * and saving the final markdown output. + * Handles the complete workflow from combining the transcript to generating + * and saving the final markdown output for multiple LLM services. * * The function performs these steps: - * 1. Reads the transcript file - * 2. Uses a provided prompt (if any) combined with the transcript - * 3. Processes the content with the selected LLM - * 4. Saves the results with front matter and original transcript + * 1. Combines the transcript with a provided prompt (if any) + * 2. Processes the content with the selected LLM + * 3. Saves the results with front matter and transcript or prompt+transcript * - * If no LLM is selected, it saves the prompt/transcript without processing. + * If no LLM is selected, it writes the front matter, prompt, and transcript to a file. + * If an LLM is selected, it writes the front matter, showNotes, and transcript to a file. * * @param {ProcessingOptions} options - Configuration options including: * - prompt: Array of prompt sections to include * - LLM-specific options (e.g., chatgpt, claude, etc.) * @param {string} finalPath - Base path for input/output files: - * - Input transcript: `${finalPath}.txt` - * - Temporary file: `${finalPath}-${llmServices}-temp.md` - * - Final output: `${finalPath}-${llmServices}-shownotes.md` + * - Final output: `${finalPath}-${llmServices}-shownotes.md` (if LLM is used) + * - Otherwise: `${finalPath}-prompt.md` * @param {string} frontMatter - YAML front matter content to include in the output * @param {LLMServices} [llmServices] - The LLM service to use - * @param {string} [promptAndTranscript] - Optional combined prompt (instructions + transcript) + * @param {string} [prompt] - Optional prompt or instructions to process + * @param {string} [transcript] - The transcript content * @returns {Promise} Resolves with the LLM output, or an empty string if no LLM is selected */ export async function runLLM( @@ -63,73 +64,51 @@ export async function runLLM( finalPath: string, frontMatter: string, llmServices?: LLMServices, - promptAndTranscript?: string + prompt?: string, + transcript?: string ): Promise { l.step('\nStep 5 - Run LLM on Transcript with Selected Prompt\n') - l.wait('\n runLLM called with arguments:\n') + l.wait(' runLLM called with arguments:\n') l.wait(` - finalPath: ${finalPath}`) - l.wait(` - llmServices: ${llmServices}`) + l.wait(` - llmServices: ${llmServices}\n`) + l.wait(` frontMatter:\n\n${frontMatter}`) + l.wait(` prompt:\n\n${prompt}`) + l.wait(` transcript:\n\n${transcript}`) try { - l.wait(`\n Reading transcript from file:\n - ${finalPath}.txt`) - const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') - const transcript = `## Transcript\n\n${tempTranscript}` - - // If an external prompt was passed in, combine it here - const combinedPrompt = promptAndTranscript || transcript + const combinedPrompt = `${prompt || ''}\n${transcript || ''}` if (llmServices) { l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`) - // Get the appropriate LLM handler function const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] if (!llmFunction) { throw new Error(`Invalid LLM option: ${llmServices}`) } - const maxRetries = 5 - const delayBetweenRetries = 10000 // 10 seconds - let attempt = 0 - const tempPath = `${finalPath}-${llmServices}-temp.md` + let showNotes = '' - while (attempt < maxRetries) { - try { - attempt++ - l.wait(` Attempt ${attempt} - Processing with ${llmServices}...\n`) - await llmFunction(combinedPrompt, tempPath, options[llmServices]) - l.wait(`\n LLM call to '${llmServices}' completed successfully on attempt ${attempt}.`) - break - } catch (error) { - err(` Attempt ${attempt} failed: ${(error as Error).message}`) - if (attempt >= maxRetries) { - err(` Max retries (${maxRetries}) reached. Aborting LLM processing.`) - throw error - } - l.wait(` Retrying in ${delayBetweenRetries / 1000} seconds...`) - await new Promise((resolve) => setTimeout(resolve, delayBetweenRetries)) - } - } + await retryLLMCall( + async () => { + showNotes = await llmFunction(prompt || '', transcript || '', options[llmServices]) + }, + 5, + 5000 + ) - l.wait(`\n LLM processing completed successfully after ${attempt} attempt(s).\n`) + l.wait(`\n LLM processing completed successfully.\n`) - l.wait(`\n Reading LLM output from file:\n - ${tempPath}`) - const showNotes = await readFile(tempPath, 'utf8') const outputFilename = `${finalPath}-${llmServices}-shownotes.md` l.wait(`\n Writing combined front matter + LLM output + transcript to file:\n - ${outputFilename}`) - await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n${transcript}`) + await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n## Transcript\n\n${transcript}`) l.wait(`\n Generated show notes saved to:\n - ${outputFilename}`) - l.wait(`\n Cleaning up temporary file:\n - ${tempPath}`) - await unlink(tempPath) - l.wait('\n Temporary file removed successfully.\n') - // Return only the LLM's output portion return showNotes } else { - // Handle case when no LLM is selected l.wait('\n No LLM selected, skipping processing...') const noLLMFile = `${finalPath}-prompt.md` - l.wait(`\n Writing front matter + prompt + transcript to file:\n\n - ${noLLMFile}`) + l.wait(`\n Writing front matter + prompt + transcript to file:\n - ${noLLMFile}`) await writeFile(noLLMFile, `${frontMatter}\n${combinedPrompt}`) l.wait(`\n Prompt and transcript saved to:\n - ${noLLMFile}`) diff --git a/src/server/db.ts b/src/server/db.ts index f4f88771..e4a3094d 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -1,6 +1,7 @@ // src/server/db.ts import { DatabaseSync } from 'node:sqlite' +import { l } from '../utils/logging' // Initialize the database connection export const db = new DatabaseSync('show_notes.db', { open: true }) @@ -51,6 +52,7 @@ export function insertShowNote( transcript: string, llmOutput: string ): void { + l.wait('\n Inserting show note into the database...') db.prepare(` INSERT INTO show_notes ( showLink, @@ -79,4 +81,5 @@ export function insertShowNote( transcript, llmOutput ) + l.wait('\n - Show note inserted successfully.\n') } \ No newline at end of file diff --git a/src/transcription/assembly.ts b/src/transcription/assembly.ts index 7f655ad5..f758203a 100644 --- a/src/transcription/assembly.ts +++ b/src/transcription/assembly.ts @@ -30,8 +30,12 @@ const BASE_URL = 'https://api.assemblyai.com/v2' * @returns Promise - The formatted transcript content * @throws Error if any step of the process fails (upload, transcription request, polling, formatting) */ -export async function callAssembly(options: ProcessingOptions, finalPath: string): Promise { - l.wait('\n Using AssemblyAI for transcription...') +export async function callAssembly( + options: ProcessingOptions, + finalPath: string +): Promise { + l.wait('\n Using AssemblyAI for transcription...\n') + l.wait(`\n Options:\n\n${JSON.stringify(options)}`) if (!env['ASSEMBLY_API_KEY']) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') @@ -69,7 +73,7 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string if (!upload_url) { throw new Error('Upload URL not returned by AssemblyAI.') } - l.success(' Audio file uploaded successfully.') + l.wait(' - Audio file uploaded successfully.') // Step 2: Requesting the transcription const transcriptionOptions: AssemblyAITranscriptionOptions = { diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts index 79a72fce..91990b80 100644 --- a/src/transcription/deepgram.ts +++ b/src/transcription/deepgram.ts @@ -12,16 +12,22 @@ import { writeFile, readFile } from 'node:fs/promises' import { env } from 'node:process' import { l, err } from '../utils/logging' import { formatDeepgramTranscript } from '../utils/format-transcript' +import type { ProcessingOptions } from '../types/process' import type { DeepgramResponse } from '../types/transcription' /** * Main function to handle transcription using Deepgram API. + * @param options - Additional processing options (e.g., speaker labels) * @param finalPath - The base filename (without extension) for input/output files * @returns Promise - The formatted transcript content * @throws Error if any step of the process fails (upload, transcription request, formatting) */ -export async function callDeepgram(finalPath: string): Promise { +export async function callDeepgram( + options: ProcessingOptions, + finalPath: string +): Promise { l.wait('\n Using Deepgram for transcription...\n') + l.wait(`\n Options:\n\n${JSON.stringify(options)}`) if (!env['DEEPGRAM_API_KEY']) { throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') diff --git a/src/types/llms.ts b/src/types/llms.ts index af4ef378..8a700a54 100644 --- a/src/types/llms.ts +++ b/src/types/llms.ts @@ -50,10 +50,10 @@ export type LLMOptions = { * @param llmModel - The specific LLM model to use (optional) */ export type LLMFunction = ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, llmModel?: string -) => Promise +) => Promise /** * Mapping of LLM option keys to their corresponding functions. diff --git a/src/utils/retry.ts b/src/utils/retry.ts new file mode 100644 index 00000000..fa06ed92 --- /dev/null +++ b/src/utils/retry.ts @@ -0,0 +1,37 @@ +// src/utils/retry.ts + +import { l, err } from './logging' + +/** + * Retries a given LLM call with the specified maximum attempts and delay between retries. + * + * @param {() => Promise} fn - The function to execute for the LLM call + * @param {number} maxRetries - The maximum number of retry attempts + * @param {number} delayBetweenRetries - Delay in milliseconds between retry attempts + * @returns {Promise} Resolves when the function succeeds or rejects after max attempts + */ +export async function retryLLMCall( + fn: () => Promise, + maxRetries: number, + delayBetweenRetries: number +): Promise { + let attempt = 0 + + while (attempt < maxRetries) { + try { + attempt++ + l.wait(` Attempt ${attempt} - Processing LLM call...\n`) + await fn() + l.wait(`\n LLM call completed successfully on attempt ${attempt}.`) + return + } catch (error) { + err(` Attempt ${attempt} failed: ${(error as Error).message}`) + if (attempt >= maxRetries) { + err(` Max retries (${maxRetries}) reached. Aborting LLM processing.`) + throw error + } + l.wait(` Retrying in ${delayBetweenRetries / 1000} seconds...`) + await new Promise((resolve) => setTimeout(resolve, delayBetweenRetries)) + } + } +} \ No newline at end of file From 729d3f7d41855f1755aa80977a59e9ae1d61880c Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sat, 4 Jan 2025 00:50:14 -0600 Subject: [PATCH 6/6] refactor whisper and ollama --- package.json | 2 +- src/llms/chatgpt.ts | 2 +- src/llms/claude.ts | 2 +- src/llms/cohere.ts | 2 +- src/llms/fireworks.ts | 2 +- src/llms/gemini.ts | 2 +- src/llms/groq.ts | 2 +- src/llms/mistral.ts | 2 +- src/llms/ollama.ts | 162 +++----------------- src/llms/together.ts | 2 +- src/process-commands/file.ts | 22 +-- src/process-commands/rss.ts | 20 +-- src/process-commands/video.ts | 22 +-- src/process-steps/05-run-llm.ts | 79 ++++------ src/transcription/assembly.ts | 13 +- src/transcription/deepgram.ts | 13 +- src/transcription/whisper.ts | 116 ++++++-------- src/types/process.ts | 22 ++- src/utils/{llm-models.ts => llm-globals.ts} | 26 +++- src/utils/logging.ts | 2 +- src/utils/validate-option.ts | 111 +++++++++++++- 21 files changed, 279 insertions(+), 347 deletions(-) rename src/utils/{llm-models.ts => llm-globals.ts} (92%) diff --git a/package.json b/package.json index 32ccad18..00c1177a 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ "test-services": "tsx --test test/services.test.ts", "test-all": "tsx --test test/all.test.ts", "ta": "tsx --test test/all.test.ts", - "clean": "tsx scripts/cleanContent.ts", + "clean": "npm run tsx:base scripts/cleanContent.ts", "docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow", "docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve", "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af", diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts index a14c8eab..cde20977 100644 --- a/src/llms/chatgpt.ts +++ b/src/llms/chatgpt.ts @@ -2,7 +2,7 @@ import { env } from 'node:process' import { OpenAI } from 'openai' -import { GPT_MODELS } from '../utils/llm-models' +import { GPT_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, ChatGPTModelType } from '../types/llms' diff --git a/src/llms/claude.ts b/src/llms/claude.ts index b95fdfd7..5968d216 100644 --- a/src/llms/claude.ts +++ b/src/llms/claude.ts @@ -2,7 +2,7 @@ import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' -import { CLAUDE_MODELS } from '../utils/llm-models' +import { CLAUDE_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, ClaudeModelType } from '../types/llms' diff --git a/src/llms/cohere.ts b/src/llms/cohere.ts index 530dddfd..691dea23 100644 --- a/src/llms/cohere.ts +++ b/src/llms/cohere.ts @@ -2,7 +2,7 @@ import { env } from 'node:process' import { CohereClient } from 'cohere-ai' -import { COHERE_MODELS } from '../utils/llm-models' +import { COHERE_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, CohereModelType } from '../types/llms' diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts index 96ac1601..ca3a7282 100644 --- a/src/llms/fireworks.ts +++ b/src/llms/fireworks.ts @@ -1,7 +1,7 @@ // src/llms/fireworks.ts import { env } from 'node:process' -import { FIREWORKS_MODELS } from '../utils/llm-models' +import { FIREWORKS_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms' diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index 8878c363..d8ed69ed 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -2,7 +2,7 @@ import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" -import { GEMINI_MODELS } from '../utils/llm-models' +import { GEMINI_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, GeminiModelType } from '../types/llms' diff --git a/src/llms/groq.ts b/src/llms/groq.ts index f34d8961..de6a1b33 100644 --- a/src/llms/groq.ts +++ b/src/llms/groq.ts @@ -1,7 +1,7 @@ // src/llms/groq.ts import { env } from 'node:process' -import { GROQ_MODELS } from '../utils/llm-models' +import { GROQ_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms' diff --git a/src/llms/mistral.ts b/src/llms/mistral.ts index b407b84a..1a17ddb5 100644 --- a/src/llms/mistral.ts +++ b/src/llms/mistral.ts @@ -2,7 +2,7 @@ import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' -import { MISTRAL_MODELS } from '../utils/llm-models' +import { MISTRAL_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, MistralModelType } from '../types/llms' diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index 9875c5fc..c3f65663 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -1,10 +1,10 @@ // src/llms/ollama.ts import { env } from 'node:process' -import { spawn } from 'node:child_process' -import { OLLAMA_MODELS } from '../utils/llm-models' +import { OLLAMA_MODELS } from '../utils/llm-globals' import { l, err, logAPIResults } from '../utils/logging' -import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } from '../types/llms' +import { checkServerAndModel } from '../utils/validate-option' +import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types/llms' /** * callOllama() @@ -38,94 +38,7 @@ export const callOllama: LLMFunction = async ( const combinedPrompt = `${prompt}\n${transcript}` - async function checkServer(): Promise { - try { - const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`) - return serverResponse.ok - } catch (error) { - return false - } - } - - if (await checkServer()) { - l.wait('\n Ollama server is already running...') - } else { - if (ollamaHost === 'ollama') { - throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.') - } else { - l.wait('\n Ollama server is not running. Attempting to start...') - const ollamaProcess = spawn('ollama', ['serve'], { - detached: true, - stdio: 'ignore', - }) - ollamaProcess.unref() - - let attempts = 0 - while (attempts < 30) { - if (await checkServer()) { - l.wait(' - Ollama server is now ready.') - break - } - await new Promise((resolve) => setTimeout(resolve, 1000)) - attempts++ - } - if (attempts === 30) { - throw new Error('Ollama server failed to become ready in time.') - } - } - } - - l.wait(`\n Checking if model is available: ${ollamaModelName}`) - try { - const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) - if (!tagsResponse.ok) { - throw new Error(`HTTP error! status: ${tagsResponse.status}`) - } - const tagsData = (await tagsResponse.json()) as OllamaTagsResponse - const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName) - - if (!isModelAvailable) { - l.wait(`\n Model ${ollamaModelName} is not available, pulling...`) - const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: ollamaModelName }), - }) - if (!pullResponse.ok) { - throw new Error(`Failed to initiate pull for model ${ollamaModelName}`) - } - if (!pullResponse.body) { - throw new Error('Response body is null') - } - - const reader = pullResponse.body.getReader() - const decoder = new TextDecoder() - while (true) { - const { done, value } = await reader.read() - if (done) break - - const chunk = decoder.decode(value) - const lines = chunk.split('\n') - for (const line of lines) { - if (line.trim() === '') continue - try { - const parsedLine = JSON.parse(line) - if (parsedLine.status === 'success') { - l.wait(` - Model ${ollamaModelName} pulled successfully.\n`) - break - } - } catch (parseError) { - err(`Error parsing JSON while pulling model: ${parseError}`) - } - } - } - } else { - l.wait(`\n Model ${ollamaModelName} is already available.\n`) - } - } catch (error) { - err(`Error checking/pulling model: ${(error as Error).message}`) - throw error - } + await checkServerAndModel(ollamaHost, ollamaPort, ollamaModelName) l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`) @@ -135,68 +48,29 @@ export const callOllama: LLMFunction = async ( body: JSON.stringify({ model: ollamaModelName, messages: [{ role: 'user', content: combinedPrompt }], - stream: true, + stream: false, }), }) if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`) } - if (!response.body) { - throw new Error('Response body is null') - } - - l.wait('\n Successfully connected to Ollama /api/chat streaming endpoint.') - const reader = response.body.getReader() - const decoder = new TextDecoder() - let fullContent = '' - let isFirstChunk = true - let totalPromptTokens = 0 - let totalCompletionTokens = 0 - - while (true) { - const { done, value } = await reader.read() - if (done) break - const chunk = decoder.decode(value) - const lines = chunk.split('\n') + const data = await response.json() as OllamaResponse + const fullContent = data?.message?.content || '' - for (const line of lines) { - if (line.trim() === '') continue + const totalPromptTokens = data.prompt_eval_count ?? 0 + const totalCompletionTokens = data.eval_count ?? 0 - try { - const parsedResponse = JSON.parse(line) as OllamaResponse - if (parsedResponse.message?.content) { - if (isFirstChunk) { - l.wait(` - Streaming response from Ollama (first chunk received)`) - isFirstChunk = false - } - fullContent += parsedResponse.message.content - } - - if (parsedResponse.prompt_eval_count) { - totalPromptTokens = parsedResponse.prompt_eval_count - } - if (parsedResponse.eval_count) { - totalCompletionTokens = parsedResponse.eval_count - } - - if (parsedResponse.done) { - logAPIResults({ - modelName: modelKey, - stopReason: 'stop', - tokenUsage: { - input: totalPromptTokens || undefined, - output: totalCompletionTokens || undefined, - total: totalPromptTokens + totalCompletionTokens || undefined, - }, - }) - } - } catch (parseError) { - err(`Error parsing JSON from Ollama response: ${parseError}`) - } - } - } + logAPIResults({ + modelName: modelKey, + stopReason: 'stop', + tokenUsage: { + input: totalPromptTokens || undefined, + output: totalCompletionTokens || undefined, + total: totalPromptTokens + totalCompletionTokens || undefined, + }, + }) return fullContent } catch (error) { diff --git a/src/llms/together.ts b/src/llms/together.ts index db3cbc4c..55934848 100644 --- a/src/llms/together.ts +++ b/src/llms/together.ts @@ -1,7 +1,7 @@ // src/llms/together.ts import { env } from 'node:process' -import { TOGETHER_MODELS } from '../utils/llm-models' +import { TOGETHER_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, TogetherModelType, TogetherResponse } from '../types/llms' diff --git a/src/process-commands/file.ts b/src/process-commands/file.ts index b17284b9..0566ec89 100644 --- a/src/process-commands/file.ts +++ b/src/process-commands/file.ts @@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' import { readFile } from 'fs/promises' -import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -23,8 +22,7 @@ import type { LLMServices } from '../types/llms' * 2. Converts the file to the required audio format * 3. Transcribes the audio content * 4. Processes the transcript with a language model (if specified) - * 5. Saves the show notes into the database - * 6. Cleans up temporary files (unless disabled) + * 5. Cleans up temporary files (unless disabled) * * Unlike processVideo, this function handles local files and doesn't need * to check for external dependencies like yt-dlp. @@ -87,24 +85,10 @@ export async function processFile( options, finalPath, frontMatter, - llmServices, - generatedPrompt, - transcript - ) - - // Insert into DB - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', - frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) // Step 6 - Cleanup diff --git a/src/process-commands/rss.ts b/src/process-commands/rss.ts index 01ac5a5e..f7ee9413 100644 --- a/src/process-commands/rss.ts +++ b/src/process-commands/rss.ts @@ -14,7 +14,6 @@ import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { validateRSSOptions } from '../utils/validate-option' import { l, err, logRSSProcessingAction, logRSSProcessingStatus, logRSSSeparator } from '../utils/logging' import { parser } from '../utils/globals' -import { insertShowNote } from '../server/db' import type { ProcessingOptions, RSSItem } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -198,27 +197,14 @@ async function processItem( generatedPrompt = promptText } - const llmOutput = await runLLM( + await runLLM( options, finalPath, frontMatter, - llmServices, - generatedPrompt, - transcript - ) - - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', - frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) if (!options.noCleanUp) { diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index 3ee906b6..aeea305e 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' import { readFile } from 'fs/promises' -import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -24,8 +23,7 @@ import type { LLMServices } from '../types/llms' * 3. Downloads and extracts audio * 4. Transcribes the audio content * 5. Processes the transcript with a language model (if specified) - * 6. Saves the show notes into the database - * 7. Cleans up temporary files (unless disabled) + * 6. Cleans up temporary files (unless disabled) * * @param options - Configuration options for processing * @param url - The URL of the video to process @@ -84,24 +82,10 @@ export async function processVideo( options, finalPath, frontMatter, - llmServices, - generatedPrompt, - transcript - ) - - // Insert into DB - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', - frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) // Step 6 - Cleanup diff --git a/src/process-steps/05-run-llm.ts b/src/process-steps/05-run-llm.ts index 18de11d0..d01e233f 100644 --- a/src/process-steps/05-run-llm.ts +++ b/src/process-steps/05-run-llm.ts @@ -2,37 +2,17 @@ /** * @file Orchestrator for running Language Model (LLM) processing on transcripts. - * Handles prompt generation, LLM processing, and file management for multiple LLM services. + * Handles prompt generation, LLM processing, file management for multiple LLM services. * @packageDocumentation */ import { writeFile } from 'node:fs/promises' -import { callOllama } from '../llms/ollama' -import { callChatGPT } from '../llms/chatgpt' -import { callClaude } from '../llms/claude' -import { callGemini } from '../llms/gemini' -import { callCohere } from '../llms/cohere' -import { callMistral } from '../llms/mistral' -import { callFireworks } from '../llms/fireworks' -import { callTogether } from '../llms/together' -import { callGroq } from '../llms/groq' +import { insertShowNote } from '../server/db' import { l, err } from '../utils/logging' import { retryLLMCall } from '../utils/retry' -import type { ProcessingOptions } from '../types/process' -import type { LLMServices, LLMFunction, LLMFunctions } from '../types/llms' - -// Map of available LLM service handlers -export const LLM_FUNCTIONS: LLMFunctions = { - ollama: callOllama, - chatgpt: callChatGPT, - claude: callClaude, - gemini: callGemini, - cohere: callCohere, - mistral: callMistral, - fireworks: callFireworks, - together: callTogether, - groq: callGroq, -} +import { LLM_FUNCTIONS } from '../utils/llm-globals' +import type { ProcessingOptions, EpisodeMetadata } from '../types/process' +import type { LLMServices, LLMFunction } from '../types/llms' /** * Processes a transcript using a specified Language Model service. @@ -43,6 +23,7 @@ export const LLM_FUNCTIONS: LLMFunctions = { * 1. Combines the transcript with a provided prompt (if any) * 2. Processes the content with the selected LLM * 3. Saves the results with front matter and transcript or prompt+transcript + * 4. Inserts show notes into the database * * If no LLM is selected, it writes the front matter, prompt, and transcript to a file. * If an LLM is selected, it writes the front matter, showNotes, and transcript to a file. @@ -54,18 +35,20 @@ export const LLM_FUNCTIONS: LLMFunctions = { * - Final output: `${finalPath}-${llmServices}-shownotes.md` (if LLM is used) * - Otherwise: `${finalPath}-prompt.md` * @param {string} frontMatter - YAML front matter content to include in the output + * @param {string} prompt - Optional prompt or instructions to process + * @param {string} transcript - The transcript content + * @param {EpisodeMetadata} metadata - The metadata object from generateMarkdown * @param {LLMServices} [llmServices] - The LLM service to use - * @param {string} [prompt] - Optional prompt or instructions to process - * @param {string} [transcript] - The transcript content * @returns {Promise} Resolves with the LLM output, or an empty string if no LLM is selected */ export async function runLLM( options: ProcessingOptions, finalPath: string, frontMatter: string, + prompt: string, + transcript: string, + metadata: EpisodeMetadata, llmServices?: LLMServices, - prompt?: string, - transcript?: string ): Promise { l.step('\nStep 5 - Run LLM on Transcript with Selected Prompt\n') l.wait(' runLLM called with arguments:\n') @@ -76,44 +59,50 @@ export async function runLLM( l.wait(` transcript:\n\n${transcript}`) try { - const combinedPrompt = `${prompt || ''}\n${transcript || ''}` - + let showNotesResult = '' if (llmServices) { l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`) - const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] + if (!llmFunction) { throw new Error(`Invalid LLM option: ${llmServices}`) } - let showNotes = '' await retryLLMCall( async () => { - showNotes = await llmFunction(prompt || '', transcript || '', options[llmServices]) + showNotes = await llmFunction(prompt, transcript, options[llmServices]) }, 5, 5000 ) - l.wait(`\n LLM processing completed successfully.\n`) - const outputFilename = `${finalPath}-${llmServices}-shownotes.md` - l.wait(`\n Writing combined front matter + LLM output + transcript to file:\n - ${outputFilename}`) await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n## Transcript\n\n${transcript}`) - l.wait(`\n Generated show notes saved to:\n - ${outputFilename}`) - - return showNotes + l.wait(`\n LLM processing completed, combined front matter + LLM output + transcript written to:\n - ${outputFilename}`) + showNotesResult = showNotes } else { l.wait('\n No LLM selected, skipping processing...') - const noLLMFile = `${finalPath}-prompt.md` l.wait(`\n Writing front matter + prompt + transcript to file:\n - ${noLLMFile}`) - await writeFile(noLLMFile, `${frontMatter}\n${combinedPrompt}`) - l.wait(`\n Prompt and transcript saved to:\n - ${noLLMFile}`) - - return '' + await writeFile(noLLMFile, `${frontMatter}\n${prompt}\n## Transcript\n\n${transcript}`) } + + insertShowNote( + metadata.showLink ?? '', + metadata.channel ?? '', + metadata.channelURL ?? '', + metadata.title ?? '', + metadata.description ?? '', + metadata.publishDate ?? '', + metadata.coverImage ?? '', + frontMatter, + prompt, + transcript, + showNotesResult + ) + + return showNotesResult } catch (error) { err(`Error running Language Model: ${(error as Error).message}`) throw error diff --git a/src/transcription/assembly.ts b/src/transcription/assembly.ts index f758203a..21f27869 100644 --- a/src/transcription/assembly.ts +++ b/src/transcription/assembly.ts @@ -6,9 +6,9 @@ // 2. Request transcription of the uploaded file. // 3. Poll for completion until the transcript is ready or fails. // 4. Once completed, format the transcript using a helper function from transcription-utils.ts. -// 5. Save the final formatted transcript to a .txt file and also create an empty .lrc file as required by the pipeline. +// 5. Return the formatted transcript. -import { writeFile, readFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import { env } from 'node:process' import { l, err } from '../utils/logging' import { formatAssemblyTranscript } from '../utils/format-transcript' @@ -115,15 +115,6 @@ export async function callAssembly( // Step 4: Formatting the transcript // The formatAssemblyTranscript function handles all formatting logic including speaker labels and timestamps. const txtContent = formatAssemblyTranscript(transcript, speakerLabels || false) - - // Step 5: Write the formatted transcript to a .txt file - await writeFile(`${finalPath}.txt`, txtContent) - l.wait(`\n Transcript saved...\n - ${finalPath}.txt\n`) - - // Create an empty LRC file to satisfy pipeline expectations (even if we don't use it for this service) - await writeFile(`${finalPath}.lrc`, '') - l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`) - return txtContent } catch (error) { // If any error occurred at any step, log it and rethrow diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts index 91990b80..e00cb92c 100644 --- a/src/transcription/deepgram.ts +++ b/src/transcription/deepgram.ts @@ -6,9 +6,9 @@ // 2. Send it to Deepgram for transcription with chosen parameters (model, formatting, punctuation, etc.). // 3. Check for successful response and extract the transcription results. // 4. Format the returned words array using formatDeepgramTranscript to add timestamps and newlines. -// 5. Write the formatted transcript to a .txt file and create an empty .lrc file. +// 5. Return the formatted transcript. -import { writeFile, readFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import { env } from 'node:process' import { l, err } from '../utils/logging' import { formatDeepgramTranscript } from '../utils/format-transcript' @@ -73,15 +73,6 @@ export async function callDeepgram( // Format the returned words array const txtContent = formatDeepgramTranscript(alternative.words) - - // Write the formatted transcript to a .txt file - await writeFile(`${finalPath}.txt`, txtContent) - l.wait(`\n Transcript saved:\n - ${finalPath}.txt\n`) - - // Create an empty LRC file to meet pipeline expectations - await writeFile(`${finalPath}.lrc`, '') - l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`) - return txtContent } catch (error) { // If any error occurred at any step, log it and rethrow diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts index 8a352863..e7436103 100644 --- a/src/transcription/whisper.ts +++ b/src/transcription/whisper.ts @@ -5,13 +5,13 @@ * It provides a streamlined, single-container approach for audio transcription. */ -import { readFile, writeFile } from 'node:fs/promises' +import { readFile, unlink } from 'node:fs/promises' import { existsSync } from 'node:fs' import { lrcToTxt } from '../utils/format-transcript' import { WHISPER_MODELS, execPromise } from '../utils/globals' import { l, err } from '../utils/logging' import type { ProcessingOptions } from '../types/process' -import type { WhisperModelType, WhisperRunner } from '../types/transcription' +import type { WhisperModelType } from '../types/transcription' /** * Main function to handle transcription using local Whisper.cpp. @@ -41,77 +41,59 @@ export async function callWhisper( l.wait(`\n Whisper model information:\n\n - whisperModel: ${whisperModel}`) - // Execute the local whisper.cpp runner - const txtContent = await runWhisperCpp(finalPath, whisperModel) + const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType] + l.wait(` - modelGGMLName: ${modelGGMLName}`) - // Return the transcript text - l.wait(' Returning transcript text from callWhisper...') - return txtContent - } catch (error) { - err('Error in callWhisper:', (error as Error).message) - process.exit(1) - } -} - -/** - * Runs transcription using the local whisper.cpp build inside this container. - * - * Steps: - * 1. If whisper.cpp is not cloned/built locally, do so. - * 2. Download model if not present. - * 3. Invoke whisper.cpp to create an LRC file. - * 4. Convert LRC to plain text for final transcript. - */ -const runWhisperCpp: WhisperRunner = async (finalPath, whisperModel) => { - const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType] - l.wait(` - modelGGMLName: ${modelGGMLName}`) + // Check if whisper.cpp directory is present + if (!existsSync('./whisper.cpp')) { + l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`) + try { + await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') + l.wait(`\n - whisper.cpp clone and compilation complete.\n`) + } catch (cloneError) { + err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`) + throw cloneError + } + } - // Check if whisper.cpp directory is present - if (!existsSync('./whisper.cpp')) { - l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`) - try { - await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') - l.wait(`\n - whisper.cpp clone and compilation complete.\n`) - } catch (cloneError) { - err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`) - throw cloneError + // Check if the chosen model file is present + if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) { + l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`) + try { + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) + l.wait(' - Model download completed, running transcription...\n') + } catch (modelError) { + err(`Error downloading model: ${(modelError as Error).message}`) + throw modelError + } } - } - // Check if the chosen model file is present - if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) { - l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`) + // Run whisper.cpp on the WAV file + l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`) try { - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - l.wait(' - Model download completed, running transcription...\n') - } catch (modelError) { - err(`Error downloading model: ${(modelError as Error).message}`) - throw modelError + await execPromise( + `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + + `-m "whisper.cpp/models/${modelGGMLName}" ` + + `-f "${finalPath}.wav" ` + + `-of "${finalPath}" ` + // Output file base name + `--output-lrc` // Output LRC file + ) + } catch (whisperError) { + err(`Error running whisper.cpp: ${(whisperError as Error).message}`) + throw whisperError } - } - - // Run whisper.cpp on the WAV file - l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`) - try { - await execPromise( - `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + - `-m "whisper.cpp/models/${modelGGMLName}" ` + - `-f "${finalPath}.wav" ` + - `-of "${finalPath}" ` + // Output file base name - `--output-lrc` // Output LRC file - ) - } catch (whisperError) { - err(`Error running whisper.cpp: ${(whisperError as Error).message}`) - throw whisperError - } - // Convert .lrc -> .txt - l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`) - const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') - const txtContent = lrcToTxt(lrcContent) - await writeFile(`${finalPath}.txt`, txtContent) - l.wait(`\n Transcript transformation successfully completed:\n - ${finalPath}.txt\n`) + // Convert .lrc -> .txt + l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`) + const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + const txtContent = lrcToTxt(lrcContent) + await unlink(`${finalPath}.lrc`) - // Return the plain text content - return txtContent + // Return the transcript text + l.wait(' Returning transcript text from callWhisper...') + return txtContent + } catch (error) { + err('Error in callWhisper:', (error as Error).message) + process.exit(1) + } } \ No newline at end of file diff --git a/src/types/process.ts b/src/types/process.ts index 54f8f11e..c6de884d 100644 --- a/src/types/process.ts +++ b/src/types/process.ts @@ -3,6 +3,26 @@ import type { TranscriptServices, WhisperModelType } from './transcription' import type { LLMServices } from './llms' +/** + * @interface EpisodeMetadata + * @property {string} [showLink] + * @property {string} [channel] + * @property {string} [channelURL] + * @property {string} [title] + * @property {string} [description] + * @property {string} [publishDate] + * @property {string} [coverImage] + */ +export interface EpisodeMetadata { + showLink?: string + channel?: string + channelURL?: string + title?: string + description?: string + publishDate?: string + coverImage?: string +} + /** * @description Pre-handler to override environment variables from request body if provided. * This ensures that API keys can be passed in the request and used for the session, @@ -184,7 +204,7 @@ export type HandlerFunction = ( input: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -) => Promise | Promise +) => Promise | Promise | Promise // Content Types /** diff --git a/src/utils/llm-models.ts b/src/utils/llm-globals.ts similarity index 92% rename from src/utils/llm-models.ts rename to src/utils/llm-globals.ts index 690aa950..83f66142 100644 --- a/src/utils/llm-models.ts +++ b/src/utils/llm-globals.ts @@ -1,4 +1,14 @@ -// src/utils/llm-models.ts +// src/utils/llm-globals.ts + +import { callOllama } from '../llms/ollama' +import { callChatGPT } from '../llms/chatgpt' +import { callClaude } from '../llms/claude' +import { callGemini } from '../llms/gemini' +import { callCohere } from '../llms/cohere' +import { callMistral } from '../llms/mistral' +import { callFireworks } from '../llms/fireworks' +import { callTogether } from '../llms/together' +import { callGroq } from '../llms/groq' import type { ModelConfig, @@ -11,8 +21,22 @@ import type { TogetherModelType, FireworksModelType, GroqModelType, + LLMFunctions } from '../types/llms' +// Map of available LLM service handlers +export const LLM_FUNCTIONS: LLMFunctions = { + ollama: callOllama, + chatgpt: callChatGPT, + claude: callClaude, + gemini: callGemini, + cohere: callCohere, + mistral: callMistral, + fireworks: callFireworks, + together: callTogether, + groq: callGroq, +} + /** * Configuration for Ollama models, mapping model types to their display names and identifiers. * Each model has a human-readable name and a corresponding model identifier used for API calls. diff --git a/src/utils/logging.ts b/src/utils/logging.ts index 7428bff1..b58585bb 100644 --- a/src/utils/logging.ts +++ b/src/utils/logging.ts @@ -5,7 +5,7 @@ import type { ModelConfigValue } from '../types/llms' import type { TokenUsage, CostCalculation, APILogInfo, ChainableLogger } from '../types/logging' import { GPT_MODELS, CLAUDE_MODELS, GEMINI_MODELS, COHERE_MODELS, MISTRAL_MODELS, OLLAMA_MODELS, FIREWORKS_MODELS, TOGETHER_MODELS, GROQ_MODELS -} from './llm-models' +} from './llm-globals' import chalk from 'chalk' /** diff --git a/src/utils/validate-option.ts b/src/utils/validate-option.ts index 6898620c..39a4b94d 100644 --- a/src/utils/validate-option.ts +++ b/src/utils/validate-option.ts @@ -1,17 +1,124 @@ // src/utils/validate-option.ts import { exit } from 'node:process' -import { err } from '../utils/logging' +import { spawn } from 'node:child_process' import { processVideo } from '../process-commands/video' import { processPlaylist } from '../process-commands/playlist' import { processChannel } from '../process-commands/channel' import { processURLs } from '../process-commands/urls' import { processFile } from '../process-commands/file' import { processRSS } from '../process-commands/rss' +import { l, err } from '../utils/logging' import { ACTION_OPTIONS, LLM_OPTIONS, TRANSCRIPT_OPTIONS, otherOptions } from '../utils/globals' import type { ProcessingOptions, ValidAction, HandlerFunction, ProcessRequestBody } from '../types/process' import type { TranscriptServices } from '../types/transcription' -import type { LLMServices } from '../types/llms' +import type { LLMServices, OllamaTagsResponse } from '../types/llms' + +/** + * checkServerAndModel() + * --------------------- + * Checks if the Ollama server is running, attempts to start it if not running, + * and ensures that the specified model is available. If not, it will pull the model. + * + * @param {string} ollamaHost - The Ollama host + * @param {string} ollamaPort - The Ollama port + * @param {string} ollamaModelName - The Ollama model name + * @returns {Promise} + */ +export async function checkServerAndModel( + ollamaHost: string, + ollamaPort: string, + ollamaModelName: string +): Promise { + async function checkServer(): Promise { + try { + const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`) + return serverResponse.ok + } catch (error) { + return false + } + } + + if (await checkServer()) { + l.wait('\n Ollama server is already running...') + } else { + if (ollamaHost === 'ollama') { + throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.') + } else { + l.wait('\n Ollama server is not running. Attempting to start...') + const ollamaProcess = spawn('ollama', ['serve'], { + detached: true, + stdio: 'ignore', + }) + ollamaProcess.unref() + + let attempts = 0 + while (attempts < 30) { + if (await checkServer()) { + l.wait(' - Ollama server is now ready.') + break + } + await new Promise((resolve) => setTimeout(resolve, 1000)) + attempts++ + } + if (attempts === 30) { + throw new Error('Ollama server failed to become ready in time.') + } + } + } + + l.wait(`\n Checking if model is available: ${ollamaModelName}`) + try { + const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) + if (!tagsResponse.ok) { + throw new Error(`HTTP error! status: ${tagsResponse.status}`) + } + const tagsData = (await tagsResponse.json()) as OllamaTagsResponse + const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName) + + if (!isModelAvailable) { + l.wait(`\n Model ${ollamaModelName} is not available, pulling...`) + const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: ollamaModelName }), + }) + if (!pullResponse.ok) { + throw new Error(`Failed to initiate pull for model ${ollamaModelName}`) + } + if (!pullResponse.body) { + throw new Error('Response body is null') + } + + const reader = pullResponse.body.getReader() + const decoder = new TextDecoder() + while (true) { + const { done, value } = await reader.read() + if (done) break + + const chunk = decoder.decode(value) + const lines = chunk.split('\n') + for (const line of lines) { + if (line.trim() === '') continue + try { + const parsedLine = JSON.parse(line) + if (parsedLine.status === 'success') { + l.wait(` - Model ${ollamaModelName} pulled successfully.\n`) + break + } + } catch (parseError) { + err(`Error parsing JSON while pulling model: ${parseError}`) + } + } + } + } else { + l.wait(`\n Model ${ollamaModelName} is already available.\n`) + } + } catch (error) { + err(`Error checking/pulling model: ${(error as Error).message}`) + throw error + } +} // Map each action to its corresponding handler function export const PROCESS_HANDLERS: Record = {