From df6dd834835bbf9a79dbdd6a04190437af5ba771 Mon Sep 17 00:00:00 2001 From: Matthew Lenhard Date: Fri, 16 May 2025 14:32:52 -0400 Subject: [PATCH] feat - tests + evals --- README.md | 9 +++++++ package.json | 5 ++-- src/evals/evals.ts | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 src/evals/evals.ts diff --git a/README.md b/README.md index 7b84e65..bb0eff9 100644 --- a/README.md +++ b/README.md @@ -86,5 +86,14 @@ Pinecone Developer MCP Server provides the following tools for AI assistants to ### Limitations Only indexes with integrated inference are supported. Assistants, indexes without integrated inference, standalone embeddings, and vector search are not supported. + + +## Running evals + +The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). + +```bash +OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/tools/database/search-records.ts +``` ## Contributing We welcome your collaboration in improving the developer MCP experience. Please submit issues in the [GitHub issue tracker](https://github.com/pinecone-io/pinecone-mcp/issues). Information about contributing can be found in [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/package.json b/package.json index 673608f..e0a8d87 100644 --- a/package.json +++ b/package.json @@ -23,11 +23,12 @@ "dependencies": { "@modelcontextprotocol/sdk": "^1.10.0", "@pinecone-database/pinecone": "^5.1.1", - "zod": "^3.24.3" + "zod": "^3.24.3", + "mcp-evals": "^1.0.18" }, "devDependencies": { "@types/node": "^22.14.1", "prettier": "^3.5.3", "typescript": "^5.8.3" } -} +} \ No newline at end of file diff --git a/src/evals/evals.ts b/src/evals/evals.ts new file mode 100644 index 0000000..a7d473e --- /dev/null +++ b/src/evals/evals.ts @@ -0,0 +1,59 @@ +//evals.ts + +import { EvalConfig } from 'mcp-evals'; +import { openai } from "@ai-sdk/openai"; +import { grade, EvalFunction } from "mcp-evals"; + +const searchRecordsEval: EvalFunction = { + name: "search-records Tool Evaluation", + description: "Evaluates the search-records tool", + run: async () => { + const result = await grade(openai("gpt-4"), "Could you search for records of 'Jane Doe' in the 'userAccounts' namespace and re-rank them by relevance?"); + return JSON.parse(result); + } +}; + +const createIndexForModelEval: EvalFunction = { + name: 'createIndexForModelEval', + description: 'Evaluates the functionality of creating an index for a model', + run: async () => { + const result = await grade(openai("gpt-4"), "Please create an index named 'eval-test-index' for an embedding model using cloud AWS?"); + return JSON.parse(result); + } +}; + +const listIndexesEval: EvalFunction = { + name: 'list-indexes Tool Evaluation', + description: 'Evaluates the tool that lists indexes from the database', + run: async () => { + const result = await grade(openai("gpt-4"), "Please list all indexes in the database."); + return JSON.parse(result); + } +}; + +const describeIndexStatsEval: EvalFunction = { + name: 'describe-index-stats Evaluation', + description: 'Evaluates the functionality of describing index stats by name', + run: async () => { + const result = await grade(openai("gpt-4"), "Describe the stats for the index named 'testIndex'"); + return JSON.parse(result); + } +}; + +const upsertRecordsEval: EvalFunction = { + name: 'UpsertRecords Tool Evaluation', + description: 'Tests the upsert-records tool for adding and updating records in a data store', + run: async () => { + const result = await grade(openai("gpt-4"), "Please upsert the following records in the 'myNamespace' namespace of 'myIndex': {\"records\":[{\"id\":1,\"value\":\"TestRecord\"},{\"id\":2,\"value\":\"AnotherRecord\"}]}"); + return JSON.parse(result); + } +}; + +const config: EvalConfig = { + model: openai("gpt-4"), + evals: [searchRecordsEval, createIndexForModelEval, listIndexesEval, describeIndexStatsEval, upsertRecordsEval] +}; + +export default config; + +export const evals = [searchRecordsEval, createIndexForModelEval, listIndexesEval, describeIndexStatsEval, upsertRecordsEval]; \ No newline at end of file