Skip to content

Commit

Permalink
Add functionality to query user input based on code blocks, send rele…
Browse files Browse the repository at this point in the history
…vant files to ChatGPT API
  • Loading branch information
aliiyuu committed Aug 21, 2024
1 parent d7d9a3a commit a18a8b0
Show file tree
Hide file tree
Showing 8 changed files with 333 additions and 116 deletions.
2 changes: 1 addition & 1 deletion frontend/src/components/Chatbot.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function Chatbot() {
<textarea id="user-input" placeholder="Type your message here..."></textarea>
<div className="submit-buttons">
<button id="send-button" onClick={ sendMessage }>Enter</button>
<button id="query-button" onClick={ sendCodebaseQuery }>Get relevant files</button>
<button id="query-button" onClick={ sendCodebaseQuery }>Query codebase</button>
</div>
</div>
</div>
Expand Down
14 changes: 14 additions & 0 deletions frontend/src/interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ export async function sendMessage() {
queryButton.disabled = false;
}

// Send most relevant files to ChatGPT API
export async function sendFiles(files, userInput) {
let request = "";

for (let i = 0; i < files.length; i++) {
request = request.concat(files[i] + "\n");
}

request = request.concat(userInput);

await fetchChatGPTResponse(request);
}

// Create a query from the user input that will be used to find the most relevant files
export async function sendCodebaseQuery() {
const userInput = document.getElementById("user-input").value;
Expand Down Expand Up @@ -157,5 +170,6 @@ export async function fetchPineconeResponse(userInput) {
appendMessage("Error", botMessage.error);
} else {
appendMessage("Assistant", botMessage.text);
sendFiles(botMessage.files, userInput);
}
}
35 changes: 0 additions & 35 deletions server/config/pineconeConfig/embeddingConfig.js

This file was deleted.

112 changes: 60 additions & 52 deletions server/config/pineconeConfig/pineconeManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ class PineconeManager {
this.index = this.pc.index(indexName);
}

/**
* Creates a delay for a specified amount of time.
*
* @param {number} ms - The delay time in milliseconds.
* @returns {Promise} A promise that resolves after the specified time.
*/
delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

/**
* Initializes the Pinecone index with the specified configuration.
* Creates the index on the Pinecone server if it does not already exist.
Expand All @@ -45,57 +55,70 @@ class PineconeManager {
},
},
});

this.index = this.pc.index(this.indexName); // Reinitialize the index after creation
await this.delay(3000); // 3 second delay
}

/**
* Upserts embeddings into the specified namespace of the Pinecone index.
*
* @async
* @param {number[]} embeddings - The embeddings vector to upsert.
* @param {string} [namespace="SampleCode"] - The namespace in the index to upsert to.
* @param {string} [id="SampleCode"] - The unique ID for the vector.
* @param {Object} data - The dictionary of functions and classes with embeddings.
* @param {string} [namespace="codebase"] - The namespace in the index to upsert to.
* @returns {Promise<void>} A promise that resolves once the embeddings are upserted.
*/
async upsertEmbeddings(embeddings, namespace, id) {
await this.index.namespace(namespace).upsert([
{
id: id,
values: embeddings,
},
]);
}
async upsertEmbeddings(data, namespace = "codebase") {
// Prepare the upsert request payload
const upsertPayload = [];

/**
* Retrieves and logs the statistics of the Pinecone index.
*
* @async
* @returns {Promise<void>} A promise that resolves once the index stats are logged.
*/
async checkIndex() {
const stats = await this.index.describeIndexStats();
console.log(stats);
// Handle functions
data.functions.forEach((func) => {
if (func.embedding && Array.isArray(func.embedding)) {
upsertPayload.push({
id: func.function_name,
values: func.embedding,
metadata: { filepath: func.filepath, type: 'function' }
});
}
});

// Handle classes
data.classes.forEach((cls) => {
if (cls.embedding && Array.isArray(cls.embedding)) {
upsertPayload.push({
id: cls.class_name,
values: cls.embedding,
metadata: { filepath: cls.filepath, type: 'class' }
});
}
});

// Upsert the data into Pinecone
await this.index.namespace(namespace).upsert(upsertPayload);
await this.delay(3000); // 3 second delay
console.log('Embeddings upserted successfully.');
}

/**
* Performs a similarity search within the specified namespace of the Pinecone index.
* Logs the search results to the console.
* Queries the Pinecone index using the provided embedding.
*
* @async
* @param {number[]} vector - The query vector for the similarity search.
* @param {number} [topK=3] - The number of top results to return.
* @param {string} [namespace="ns1"] - The namespace in the index to search within.
* @param {boolean} [includeValues=true] - Whether to include vector values in the results.
* @returns {Promise<void>} A promise that resolves once the search results are logged.
* @returns {JSON} A data structure giving the top k results.
* @param {Array<number>} embedding - The embedding vector to query with.
* @param {string} [namespace="samplecode"] - The namespace to query.
* @param {number} [topK=5] - The number of top results to return.
* @returns {Promise<Object[]>} A promise that resolves to the query results.
*/
async similaritySearch(vector, topK = 3, namespace, includeValues = true) {
async similaritySearch(embedding, namespace = "codebase", topK = 3) {
const queryResponse = await this.index.namespace(namespace).query({
topK,
vector,
includeValues,
vector: embedding,
topK: topK, // Number of top results to return
includeValues: true,
includeMetadata: true // Include metadata in the response
});
console.log(queryResponse);

console.log(queryResponse.matches);

return queryResponse;
}

Expand All @@ -109,31 +132,16 @@ class PineconeManager {
await this.pc.deleteIndex(this.indexName);
}

/**
/**
* Deletes the vectors in a specified namespace.
*
* @async
* @param {string} [namespace="ns1"] - The namespace in the index to search within.
* @returns {Promise<void>} A promise that resolves once all vectors in a namespace are deleted.
*/
async deleteVectorsFromNamespace(namespace) {
await this.index.namespace('codebase').deleteAll();
async deleteVectorsFromNamespace(namespace) {
await this.index.namespace(namespace).deleteAll();
}
}

module.exports = PineconeManager;



/*
Example Usage
const pineconeManager = new PineconeManager(process.env.PINECONE_API_KEY, "SampleCode-Upsert");
(async () => {
await pineconeManager.initPinecone();
await pineconeManager.upsertEmbeddings([1.0, 2.0, 3.0]);
await pineconeManager.checkIndex();
await pineconeManager.similaritySearch([1.0, 1.5]);
await pineconeManager.clearIndex();
})();
*/
module.exports = PineconeManager;
31 changes: 20 additions & 11 deletions server/controllers/pineconeQuery.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
const pinecone = require("../config/pineconeConfig/pineconeInit");
const generateEmbeddings = require("../config/pineconeConfig/embeddingConfig");
const {generateEmbeddings, processAndUpdateDictionary} = require("../database/embeddingService");
const fs = require("fs");
const path = require("path");
const readCodeFromFile = require('../database/readCodeFromFile');

const getPineconeResponse = async (req, res) => {
const userInput = req.body.prompt;
Expand All @@ -15,39 +16,47 @@ const getPineconeResponse = async (req, res) => {
return res.json({ text: "You haven't uploaded a codebase yet! Please try again." });
}

shortPath = "../codebases/";

if (!userInput) {
return res.status(400).json({ error: "Input is required" });
}

if (!process.env.PINECONE_API_KEY) {
return res.status(500).json({ error: "API key is missing" });
}

const input = [userInput];

const embeddingResponse = await generateEmbeddings(input, "string");
const embed = embeddingResponse[0].embedding;

const embed = await generateEmbeddings(input);

try {
let files = await pinecone.similaritySearch(embed, 3, "codebase", true); // Using default values
let answer = "The most relevant files to your query were ";
let files = await pinecone.similaritySearch(embed); // Using default values
// console.log(files);
let answer = "The most relevant code chunks to your query are ";

const filesToSend = [];

for (let i = 0; i < files.matches.length; i++) {
if (files.matches.length == 0) {
answer = "No files relevant to your query could be found.";
}
else if (files.matches.length == 1) {
answer = `The most relevant file to your query is ${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}.`;
answer = `The most relevant file to your query is the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}.`;
}
else if (i == files.matches.length-1) {
answer = answer.concat(`and ${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}.`);
answer = answer.concat(`and the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}.`);
}
else {
answer = answer.concat(`${files.matches[i].id.substring(codebasePath.length+1)} with a score of ${files.matches[i].score}, `);
answer = answer.concat(`the ${files.matches[i].metadata.type} \`\`\`${files.matches[i].id}\`\`\` (from \`\`\`${files.matches[i].metadata.filepath.substring(shortPath.length)}\`\`\`) with a score of ${files.matches[i].score}, `);
}
const code = await readCodeFromFile(codebasePath.concat(files.matches[i].metadata.filepath.substring(shortPath.length-1)));
if (!filesToSend.includes(code)) {
filesToSend.push(code);
}
}
res.json({ text: answer });
console.log(filesToSend);
res.json({ text: answer, files: filesToSend });
}
catch (error) {
console.error("Error querying user input: ", error);
Expand Down
55 changes: 55 additions & 0 deletions server/database/embeddingService.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
const openai = require('../config/openaiConfig');

// Function to generate embeddings using OpenAI's API
async function generateEmbeddings(text) {
/* // Initialize OpenAI client
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY, // Ensure your API key is set in the environment variables
}); */

try {
// Flatten the tokens into a format suitable for OpenAI
// const text = tokens.map(token => JSON.stringify(token)).join('\n');

// Request embeddings
const response = await openai.embeddings.create({
model: 'text-embedding-ada-002', // Use an appropriate embedding model
input: text,
encoding_format: 'float'
});

/* console.log('Embedding Dimension: ', response.data[0].embedding.length);
console.log('OpenAI embeddings response:', response.data); */
// return response.data
return response.data[0].embedding;

} catch (error) {
console.error('Error generating embeddings with OpenAI:', error);
}
}

// Main function to process and update the dictionary
async function processAndUpdateDictionary(dict) {
for (const func of dict.functions) {
const embedding = await generateEmbeddings(func.code);
if (embedding) {
func.embedding = embedding;
}
}

for (const cls of dict.classes) {
const embedding = await generateEmbeddings(cls.code);
if (embedding) {
cls.embedding = embedding;
}
}

// console.log('Updated dictionary with embeddings:', JSON.stringify(dict, null, 2))
// console.log(dict);



return dict;
}

module.exports = {processAndUpdateDictionary, generateEmbeddings};
Loading

0 comments on commit a18a8b0

Please sign in to comment.