Add functionality to query user input based on code blocks, send rele…

…vant files to ChatGPT API
codeday · Aug 21, 2024 · a18a8b0 · a18a8b0
1 parent d7d9a3a
commit a18a8b0
Show file tree

Hide file tree

Showing 8 changed files with 333 additions and 116 deletions.
diff --git a/frontend/src/components/Chatbot.js b/frontend/src/components/Chatbot.js
@@ -17,7 +17,7 @@ function Chatbot() {
           <textarea id="user-input" placeholder="Type your message here..."></textarea>
           <div className="submit-buttons">
             <button id="send-button" onClick={ sendMessage }>Enter</button>
-            <button id="query-button" onClick={ sendCodebaseQuery }>Get relevant files</button>
+            <button id="query-button" onClick={ sendCodebaseQuery }>Query codebase</button>
           </div>
       </div>
     </div>

diff --git a/frontend/src/interface.js b/frontend/src/interface.js
@@ -67,6 +67,19 @@ export async function sendMessage() {
   queryButton.disabled = false;
 }
 
+// Send most relevant files to ChatGPT API
+export async function sendFiles(files, userInput) {
+  let request = "";
+
+  for (let i = 0; i < files.length; i++) {
+    request = request.concat(files[i] + "\n");
+  }
+
+  request = request.concat(userInput);
+
+  await fetchChatGPTResponse(request);
+}
+
 // Create a query from the user input that will be used to find the most relevant files
 export async function sendCodebaseQuery() {
   const userInput = document.getElementById("user-input").value;
@@ -157,5 +170,6 @@ export async function fetchPineconeResponse(userInput) {
     appendMessage("Error", botMessage.error);
   } else {
     appendMessage("Assistant", botMessage.text);
+    sendFiles(botMessage.files, userInput);
   }
 }
diff --git a/server/config/pineconeConfig/embeddingConfig.js b/server/config/pineconeConfig/embeddingConfig.js
diff --git a/server/config/pineconeConfig/pineconeManager.js b/server/config/pineconeConfig/pineconeManager.js
@@ -26,6 +26,16 @@ class PineconeManager {
         this.index = this.pc.index(indexName);
     }
 
+    /**
+     * Creates a delay for a specified amount of time.
+     * 
+     * @param {number} ms - The delay time in milliseconds.
+     * @returns {Promise} A promise that resolves after the specified time.
+     */
+    delay(ms) {
+        return new Promise(resolve => setTimeout(resolve, ms));
+    }
+
     /**
      * Initializes the Pinecone index with the specified configuration.
      * Creates the index on the Pinecone server if it does not already exist.
@@ -45,57 +55,70 @@ class PineconeManager {
                 },
             },
         });
+
         this.index = this.pc.index(this.indexName);  // Reinitialize the index after creation
+        await this.delay(3000); // 3 second delay 
     }
 
     /**
      * Upserts embeddings into the specified namespace of the Pinecone index.
      * 
      * @async
-     * @param {number[]} embeddings - The embeddings vector to upsert.
-     * @param {string} [namespace="SampleCode"] - The namespace in the index to upsert to.
-     * @param {string} [id="SampleCode"] - The unique ID for the vector.
+     * @param {Object} data - The dictionary of functions and classes with embeddings.
+     * @param {string} [namespace="codebase"] - The namespace in the index to upsert to.
      * @returns {Promise<void>} A promise that resolves once the embeddings are upserted.
      */
-    async upsertEmbeddings(embeddings, namespace, id) {
-        await this.index.namespace(namespace).upsert([
-            {
-                id: id,
-                values: embeddings,
-            },
-        ]);
-    }
+    async upsertEmbeddings(data, namespace = "codebase") {
+        // Prepare the upsert request payload
+        const upsertPayload = [];
 
-    /**
-     * Retrieves and logs the statistics of the Pinecone index.
-     * 
-     * @async
-     * @returns {Promise<void>} A promise that resolves once the index stats are logged.
-     */
-    async checkIndex() {
-        const stats = await this.index.describeIndexStats();
-        console.log(stats);
+        // Handle functions
+        data.functions.forEach((func) => {
+            if (func.embedding && Array.isArray(func.embedding)) {
+                upsertPayload.push({
+                    id: func.function_name,
+                    values: func.embedding,
+                    metadata: { filepath: func.filepath, type: 'function' }
+                });
+            }
+        });
+
+        // Handle classes
+        data.classes.forEach((cls) => {
+            if (cls.embedding && Array.isArray(cls.embedding)) {
+                upsertPayload.push({
+                    id: cls.class_name,
+                    values: cls.embedding,
+                    metadata: { filepath: cls.filepath, type: 'class' }
+                });
+            }
+        });
+
+        // Upsert the data into Pinecone
+        await this.index.namespace(namespace).upsert(upsertPayload);
+        await this.delay(3000); // 3 second delay 
+        console.log('Embeddings upserted successfully.');
     }
 
     /**
-     * Performs a similarity search within the specified namespace of the Pinecone index.
-     * Logs the search results to the console.
+     * Queries the Pinecone index using the provided embedding.
      * 
      * @async
-     * @param {number[]} vector - The query vector for the similarity search.
-     * @param {number} [topK=3] - The number of top results to return.
-     * @param {string} [namespace="ns1"] - The namespace in the index to search within.
-     * @param {boolean} [includeValues=true] - Whether to include vector values in the results.
-     * @returns {Promise<void>} A promise that resolves once the search results are logged.
-     * @returns {JSON} A data structure giving the top k results.
+     * @param {Array<number>} embedding - The embedding vector to query with.
+     * @param {string} [namespace="samplecode"] - The namespace to query.
+     * @param {number} [topK=5] - The number of top results to return.
+     * @returns {Promise<Object[]>} A promise that resolves to the query results.
      */
-    async similaritySearch(vector, topK = 3, namespace, includeValues = true) {
+    async similaritySearch(embedding, namespace = "codebase", topK = 3) {
         const queryResponse = await this.index.namespace(namespace).query({
-            topK,
-            vector,
-            includeValues,
+            vector: embedding,
+            topK: topK,  // Number of top results to return
+            includeValues: true,
+            includeMetadata: true  // Include metadata in the response
         });
-        console.log(queryResponse);
+
+        console.log(queryResponse.matches);
+
         return queryResponse;
     }
 
@@ -109,31 +132,16 @@ class PineconeManager {
         await this.pc.deleteIndex(this.indexName);
     }
 
-    /**
+     /**
      * Deletes the vectors in a specified namespace.
      * 
      * @async
      * @param {string} [namespace="ns1"] - The namespace in the index to search within.
      * @returns {Promise<void>} A promise that resolves once all vectors in a namespace are deleted.
      */
-    async deleteVectorsFromNamespace(namespace) {
-        await this.index.namespace('codebase').deleteAll();
+     async deleteVectorsFromNamespace(namespace) {
+        await this.index.namespace(namespace).deleteAll();
     }
 }
 
-module.exports = PineconeManager;
-
-
-
-/* 
-Example Usage
-const pineconeManager = new PineconeManager(process.env.PINECONE_API_KEY, "SampleCode-Upsert");
-
-(async () => {
-    await pineconeManager.initPinecone();
-    await pineconeManager.upsertEmbeddings([1.0, 2.0, 3.0]);
-    await pineconeManager.checkIndex();
-    await pineconeManager.similaritySearch([1.0, 1.5]);
-    await pineconeManager.clearIndex();
-})();
- */
+module.exports = PineconeManager;
diff --git a/server/controllers/pineconeQuery.js b/server/controllers/pineconeQuery.js
@@ -1,7 +1,8 @@
 const pinecone = require("../config/pineconeConfig/pineconeInit");
-const generateEmbeddings = require("../config/pineconeConfig/embeddingConfig");
+const {generateEmbeddings, processAndUpdateDictionary}  = require("../database/embeddingService");
 const fs = require("fs");
 const path = require("path");
+const readCodeFromFile = require('../database/readCodeFromFile');
 
 const getPineconeResponse = async (req, res) => {
     const userInput = req.body.prompt;
@@ -15,39 +16,47 @@ const getPineconeResponse = async (req, res) => {
         return res.json({ text: "You haven't uploaded a codebase yet! Please try again." });
     }
 
+    shortPath = "../codebases/";
+
     if (!userInput) {
       return res.status(400).json({ error: "Input is required" });
     }
 
     if (!process.env.PINECONE_API_KEY) {
       return res.status(500).json({ error: "API key is missing" });
     }
-
+    
     const input = [userInput];
 
-    const embeddingResponse = await generateEmbeddings(input, "string");
-    const embed = embeddingResponse[0].embedding;
-
+    const embed = await generateEmbeddings(input);    
 
     try {
-        let files = await pinecone.similaritySearch(embed, 3, "codebase", true); // Using default values
-        let answer = "The most relevant files to your query were ";
+        let files = await pinecone.similaritySearch(embed); // Using default values
+       // console.log(files);
+        let answer = "The most relevant code chunks to your query are ";
+
+        const filesToSend = [];
 
         for (let i = 0; i < files.matches.length; i++) {
             if (files.matches.length == 0) {
                 answer = "No files relevant to your query could be found.";
             }
             else if (files.matches.length == 1) {
-                answer = `The most relevant file to your query is ${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}.`;
+                answer = `The most relevant file to your query is the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}.`;
             }
             else if (i == files.matches.length-1) {
-                answer = answer.concat(`and ${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}.`);
+                answer = answer.concat(`and the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}.`);
             }
             else {
-                answer = answer.concat(`${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}, `);
+                answer = answer.concat(`the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}, `);
+            }
+            const code = await readCodeFromFile(codebasePath.concat(files.matches[i].metadata.filepath.substring(shortPath.length-1)));
+            if (!filesToSend.includes(code)) {
+                filesToSend.push(code);
             }
         }
-        res.json({ text: answer }); 
+        console.log(filesToSend);
+        res.json({ text: answer, files: filesToSend }); 
     }
     catch (error) {
         console.error("Error querying user input: ", error);

diff --git a/server/database/embeddingService.js b/server/database/embeddingService.js
@@ -0,0 +1,55 @@
+const openai = require('../config/openaiConfig');
+
+// Function to generate embeddings using OpenAI's API
+async function generateEmbeddings(text) {
+    /* // Initialize OpenAI client
+    const openai = new OpenAI({
+        apiKey: process.env.OPENAI_API_KEY, // Ensure your API key is set in the environment variables
+    }); */
+
+    try {
+        // Flatten the tokens into a format suitable for OpenAI
+        // const text = tokens.map(token => JSON.stringify(token)).join('\n');
+
+        // Request embeddings
+        const response = await openai.embeddings.create({
+            model: 'text-embedding-ada-002', // Use an appropriate embedding model
+            input: text,
+            encoding_format: 'float'
+        });
+
+        /* console.log('Embedding Dimension: ', response.data[0].embedding.length);
+        console.log('OpenAI embeddings response:', response.data); */
+        // return response.data
+        return response.data[0].embedding;        
+
+    } catch (error) {
+        console.error('Error generating embeddings with OpenAI:', error);
+    }
+}
+
+// Main function to process and update the dictionary
+async function processAndUpdateDictionary(dict) {
+    for (const func of dict.functions) {
+      const embedding = await generateEmbeddings(func.code);
+      if (embedding) {
+        func.embedding = embedding;
+      }
+    }
+
+    for (const cls of dict.classes) {
+      const embedding = await generateEmbeddings(cls.code);
+      if (embedding) {
+        cls.embedding = embedding;
+      }
+    }
+
+    // console.log('Updated dictionary with embeddings:', JSON.stringify(dict, null, 2))
+    // console.log(dict);
+
+
+
+    return dict;
+}
+
+module.exports = {processAndUpdateDictionary, generateEmbeddings};