pinecone-io · mclenhard · May 16, 2025
diff --git a/README.md b/README.md
@@ -86,5 +86,14 @@ Pinecone Developer MCP Server provides the following tools for AI assistants to
 ### Limitations
 Only indexes with integrated inference are supported. Assistants, indexes without integrated inference, standalone embeddings, and vector search are not supported.
 
+
+
+## Running evals
+
+The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs).
+
+```bash
+OPENAI_API_KEY=your-key  npx mcp-eval src/evals/evals.ts src/tools/database/search-records.ts
+```
 ## Contributing
 We welcome your collaboration in improving the developer MCP experience. Please submit issues in the [GitHub issue tracker](https://github.com/pinecone-io/pinecone-mcp/issues). Information about contributing can be found in [CONTRIBUTING.md](CONTRIBUTING.md).
diff --git a/package.json b/package.json
@@ -23,11 +23,12 @@
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.10.0",
     "@pinecone-database/pinecone": "^5.1.1",
-    "zod": "^3.24.3"
+    "zod": "^3.24.3",
+    "mcp-evals": "^1.0.18"
   },
   "devDependencies": {
     "@types/node": "^22.14.1",
     "prettier": "^3.5.3",
     "typescript": "^5.8.3"
   }
-}
+}
diff --git a/src/evals/evals.ts b/src/evals/evals.ts
@@ -0,0 +1,59 @@
+//evals.ts
+
+import { EvalConfig } from 'mcp-evals';
+import { openai } from "@ai-sdk/openai";
+import { grade, EvalFunction } from "mcp-evals";
+
+const searchRecordsEval: EvalFunction = {
+    name: "search-records Tool Evaluation",
+    description: "Evaluates the search-records tool",
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Could you search for records of 'Jane Doe' in the 'userAccounts' namespace and re-rank them by relevance?");
+        return JSON.parse(result);
+    }
+};
+
+const createIndexForModelEval: EvalFunction = {
+    name: 'createIndexForModelEval',
+    description: 'Evaluates the functionality of creating an index for a model',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Please create an index named 'eval-test-index' for an embedding model using cloud AWS?");
+        return JSON.parse(result);
+    }
+};
+
+const listIndexesEval: EvalFunction = {
+    name: 'list-indexes Tool Evaluation',
+    description: 'Evaluates the tool that lists indexes from the database',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Please list all indexes in the database.");
+        return JSON.parse(result);
+    }
+};
+
+const describeIndexStatsEval: EvalFunction = {
+    name: 'describe-index-stats Evaluation',
+    description: 'Evaluates the functionality of describing index stats by name',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Describe the stats for the index named 'testIndex'");
+        return JSON.parse(result);
+    }
+};
+
+const upsertRecordsEval: EvalFunction = {
+    name: 'UpsertRecords Tool Evaluation',
+    description: 'Tests the upsert-records tool for adding and updating records in a data store',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Please upsert the following records in the 'myNamespace' namespace of 'myIndex': {\"records\":[{\"id\":1,\"value\":\"TestRecord\"},{\"id\":2,\"value\":\"AnotherRecord\"}]}");
+        return JSON.parse(result);
+    }
+};
+
+const config: EvalConfig = {
+    model: openai("gpt-4"),
+    evals: [searchRecordsEval, createIndexForModelEval, listIndexesEval, describeIndexStatsEval, upsertRecordsEval]
+};
+
+export default config;
+
+export const evals = [searchRecordsEval, createIndexForModelEval, listIndexesEval, describeIndexStatsEval, upsertRecordsEval];