diff --git a/.cspell.json b/.cspell.json index 67d72553..4eab5c11 100644 --- a/.cspell.json +++ b/.cspell.json @@ -32,7 +32,6 @@ "sonarjs", "pico", "timespan", - "HUGGINGFACE", "mistralai" ], "dictionaries": ["typescript", "node", "software-terms"], diff --git a/.env.example b/.env.example index 63dffa92..bcd1a94e 100644 --- a/.env.example +++ b/.env.example @@ -36,6 +36,3 @@ KERNEL_PUBLIC_KEY="" # Logger level, default is INFO LOG_LEVEL="" - -# Huggingface API key -HUGGINGFACE_API_KEY="" \ No newline at end of file diff --git a/src/configuration/data-purge-config.ts b/src/configuration/data-purge-config.ts index f5331453..02b29d86 100644 --- a/src/configuration/data-purge-config.ts +++ b/src/configuration/data-purge-config.ts @@ -1,5 +1,29 @@ import { Type, Static } from "@sinclair/typebox"; +const openAiType = Type.Object( + { + /** + * AI model to use for comment evaluation. + */ + model: Type.String({ + default: "gpt-4o-2024-08-06", + description: "OpenAI model, e.g. gpt-4o", + examples: ["gpt-4o"], + }), + /** + * Specific endpoint to send the comments to. + */ + endpoint: Type.String({ + default: "https://api.openai.com/v1", + pattern: /^(https?:\/\/[^\s$.?#].\S*)$/i.source, + description: "OpenAI endpoint for requests", + examples: ["https://api.openai.com/v1"], + }), + }, + { default: {} } +); + + export const dataPurgeConfigurationType = Type.Object({ skipCommentsWhileAssigned: Type.Union([Type.Literal("all"), Type.Literal("exact"), Type.Literal("none")], { default: "all", @@ -10,6 +34,7 @@ export const dataPurgeConfigurationType = Type.Object({ "- 'none': Includes all comments, regardless of assignment status or timing.", examples: ["all", "exact", "none"], }), + openAi: openAiType, }); export type DataPurgeConfiguration = Static; diff --git a/src/parser/data-purge-module.ts b/src/parser/data-purge-module.ts index e92abb5a..1e200648 100644 --- a/src/parser/data-purge-module.ts +++ b/src/parser/data-purge-module.ts @@ -5,6 +5,7 @@ import { IssueActivity } from "../issue-activity"; import { parseGitHubUrl } from "../start"; import { BaseModule } from "../types/module"; import { Result } from "../types/results"; +import OpenAI from 'openai'; /** * Removes the data in the comments that we do not want to be processed. @@ -13,6 +14,11 @@ export class DataPurgeModule extends BaseModule { readonly _configuration: DataPurgeConfiguration | null = this.context.config.incentives.dataPurge; _assignmentPeriods: UserAssignments = {}; + readonly _openAi = new OpenAI({ + apiKey: this.context.env.OPENAI_API_KEY, + ...(this._configuration?.openAi.endpoint && { baseURL: this._configuration.openAi.endpoint }), + }); + get enabled(): boolean { if (!this._configuration) { this.context.logger.error("Invalid / missing configuration detected for DataPurgeModule, disabling."); @@ -44,27 +50,28 @@ export class DataPurgeModule extends BaseModule { return false; } + + async _generateImageDescription(imageUrl: string): Promise { try { - // Fetch image data from URL const imageResponse = await fetch(imageUrl); const imageData = await imageResponse.arrayBuffer(); - - // Send to HuggingFace API - const response = await fetch( - "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large", - { - headers: { - Authorization: `Bearer ${this.context.env.HUGGINGFACE_API_KEY}`, - "Content-Type": "application/json", - }, - method: "POST", - body: Buffer.from(imageData), - } - ); - - const result = await response.json(); - return result[0]?.generated_text || null; + const base64Image = Buffer.from(imageData).toString('base64'); + const response = await this._openAi.chat.completions.create({ + model: "chatgpt-4o-latest", + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Describe this image concisely in one paragraph." }, + { type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64Image}` } } + ] + } + ], + max_tokens: 300 + }); + + return response.choices[0]?.message?.content || null; } catch (error) { this.context.logger.error(`Failed to generate image description: ${error}`); return null; @@ -73,35 +80,18 @@ export class DataPurgeModule extends BaseModule { async _generateChatResponse(userMessage: string): Promise { try { - // Define the Hugging Face API endpoint - const url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3/v1/chat/completions"; - - // Construct the payload - const payload = { - model: "mistralai/Mistral-7B-Instruct-v0.3", + const response = await this._openAi.chat.completions.create({ + model: "gpt-4o-2024-08-06", messages: [ { role: "user", - content: userMessage, - }, + content: userMessage + } ], - max_tokens: 500, - stream: false, - }; - - // Send request to Hugging Face API - const response = await fetch(url, { - headers: { - Authorization: `Bearer ${this.context.env.HUGGINGFACE_API_KEY}`, - "Content-Type": "application/json", - }, - method: "POST", - body: JSON.stringify(payload), + max_tokens: 500 }); - // Parse the response - const result = await response.json(); - return result.choices?.[0]?.message?.content || null; + return response.choices[0]?.message?.content || null; } catch (error) { this.context.logger.error(`Failed to generate chat response: ${error}`); return null; @@ -110,31 +100,35 @@ export class DataPurgeModule extends BaseModule { async _generateLinkDescription(linkUrl: string): Promise { try { - // Fetch the content of the link const linkResponse = await fetch(linkUrl); - const contentType = linkResponse.headers.get("content-type"); + const contentType = linkResponse.headers.get('content-type'); - // Only process text/html or text/plain content - if (!contentType || (!contentType.includes("text/html") && !contentType.includes("text/plain"))) { + if (!contentType || (!contentType.includes('text/html') && !contentType.includes('text/plain'))) { this.context.logger.info(`Skipping non-HTML content: ${contentType}, ${linkUrl}`); return null; } const linkData = await linkResponse.text(); const cleanText = linkData - .replace(/)<[^<]*)*<\/script>/gi, "") // Remove scripts - .replace(/)<[^<]*)*<\/style>/gi, "") // Remove styles - .replace(/<[^>]+>/g, " ") // Remove HTML tags - .replace(/\s+/g, " ") // Normalize whitespace - .replace(/{\s*"props".*$/s, "") // Remove JSON data + .replace(/)<[^<]*)*<\/script>/gi, '') + .replace(/)<[^<]*)*<\/style>/gi, '') + .replace(/<[^>]+>/g, ' ') + .replace(/\s+/g, ' ') + .replace(/{\s*"props".*$/s, '') .trim(); - const generatedTextDescription = await this._generateChatResponse( - "Summarize the following webpage code into a concise and easy-to-understand text explanation of one paragraph with no bullet points. Focus on describing the purpose, structure, and functionality of the code, including key elements such as layout, styles, scripts, and any interactive features. Avoid technical jargon unless necessary" + - cleanText - ); + const response = await this._openAi.chat.completions.create({ + model: "gpt-4o-2024-08-06", + messages: [ + { + role: "user", + content: `Summarize the following webpage code into a concise and easy-to-understand text explanation of one paragraph with no bullet points. Focus on describing the purpose, structure, and functionality of the code, including key elements such as layout, styles, scripts, and any interactive features. Avoid technical jargon unless necessary: ${cleanText}` + } + ], + max_tokens: 500 + }); - return generatedTextDescription; + return response.choices[0]?.message?.content || null; } catch (error) { this.context.logger.error(`Failed to generate link description: ${error}`); return null; diff --git a/src/types/env-type.ts b/src/types/env-type.ts index 6f202327..8dc31683 100644 --- a/src/types/env-type.ts +++ b/src/types/env-type.ts @@ -14,7 +14,6 @@ const envConfigSchema = Type.Object({ PERMIT_ERC20_TOKENS_NO_FEE_WHITELIST: Type.String(), KERNEL_PUBLIC_KEY: Type.Optional(Type.String()), LOG_LEVEL: Type.Enum(LOG_LEVEL, { default: LOG_LEVEL.INFO }), - HUGGINGFACE_API_KEY: Type.String(), }); export type EnvConfig = Static;