Skip to content
This repository has been archived by the owner on Jan 9, 2025. It is now read-only.

Commit

Permalink
feat: Extend Pinecone tasks with namespace and threshold (#106)
Browse files Browse the repository at this point in the history
Because

- Namespace insertion is supported in Pinecone API but not in the
connector.
- Sometimes users want to retrieve only relevant matches.

This commit

- make Pinecone structs unexported (as they are only used locally)
- add optional namespace parameter to Pinecone upsert
- add score threshold to Pinecone queries


## QA 🔨 

### Insert namespace
![CleanShot 2024-01-12 at 18 12
34](https://github.com/instill-ai/connector/assets/3977183/325e7331-587f-41bb-8ea9-2559096d3d79)
![CleanShot 2024-01-12 at 18 12
02](https://github.com/instill-ai/connector/assets/3977183/52925d2c-4bd9-4165-882c-a8335dce7b61)


### Query with threshold


![CleanShot 2024-01-12 at 18 09
30](https://github.com/instill-ai/connector/assets/3977183/9d69e820-8044-46d4-8b43-10acfa384d43)


![CleanShot 2024-01-12 at 18 09
05](https://github.com/instill-ai/connector/assets/3977183/9e71096a-517f-4178-9847-c28913cf929a)
  • Loading branch information
jvallesm authored Jan 12, 2024
1 parent 34d1a20 commit 17c1fc8
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 89 deletions.
126 changes: 77 additions & 49 deletions pkg/pinecone/config/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,6 @@
"input": {
"instillUIOrder": 0,
"properties": {
"filter": {
"description": "The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.",
"instillAcceptFormats": [
"semi-structured/object"
],
"instillShortDescription": "The filter to apply on vector metadata",
"instillUIOrder": 3,
"instillUpstreamTypes": [
"reference"
],
"order": 1,
"required": [],
"title": "Filter",
"type": "object"
},
"id": {
"description": "The unique ID of the vector to be used as a query vector. If present, the vector parameter will be ignored.",
"instillAcceptFormats": [
Expand All @@ -32,40 +17,44 @@
"title": "ID",
"type": "string"
},
"include_metadata": {
"default": false,
"description": "Indicates whether metadata is included in the response as well as the IDs",
"vector": {
"description": "An array of dimensions for the query vector.",
"instillAcceptFormats": [
"boolean"
"array:number",
"array:integer"
],
"instillUIOrder": 5,
"instillUIOrder": 1,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "Include Metadata",
"type": "boolean"
"items": {
"description": "A dimension of the vector",
"example": 0.8167237,
"type": "number"
},
"minItems": 1,
"title": "Vector",
"type": "array"
},
"include_values": {
"default": false,
"description": "Indicates whether vector values are included in the response",
"top_k": {
"description": "The number of results to return for each query",
"instillAcceptFormats": [
"boolean"
"integer"
],
"instillUIOrder": 4,
"instillUIOrder": 2,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "Include Values",
"type": "boolean"
"title": "Top K",
"type": "integer"
},
"namespace": {
"description": "The namespace to query",
"instillAcceptFormats": [
"string"
],
"instillUIOrder": 1,
"instillUIOrder": 3,
"instillUpstreamTypes": [
"value",
"reference",
Expand All @@ -74,37 +63,62 @@
"title": "Namespace",
"type": "string"
},
"top_k": {
"description": "The number of results to return for each query",
"filter": {
"description": "The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.",
"instillAcceptFormats": [
"semi-structured/object"
],
"instillShortDescription": "The filter to apply on vector metadata",
"instillUIOrder": 4,
"instillUpstreamTypes": [
"reference"
],
"order": 1,
"required": [],
"title": "Filter",
"type": "object"
},
"min_score": {
"description": "Exclude results whose score is below this value",
"instillAcceptFormats": [
"number",
"integer"
],
"instillUIOrder": 5,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "Minimum Score",
"type": "number"
},
"include_metadata": {
"default": false,
"description": "Indicates whether metadata is included in the response as well as the IDs",
"instillAcceptFormats": [
"boolean"
],
"instillUIOrder": 6,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "Top K",
"type": "integer"
"title": "Include Metadata",
"type": "boolean"
},
"vector": {
"description": "An array of dimensions for the query vector.",
"include_values": {
"default": false,
"description": "Indicates whether vector values are included in the response",
"instillAcceptFormats": [
"array:number",
"array:integer"
"boolean"
],
"instillUIOrder": 2,
"instillUIOrder": 7,
"instillUpstreamTypes": [
"value",
"reference"
],
"items": {
"description": "A dimension of the vector",
"example": 0.8167237,
"type": "number"
},
"minItems": 1,
"title": "Vector",
"type": "array"
"title": "Include Values",
"type": "boolean"
}
},
"required": [
Expand Down Expand Up @@ -205,7 +219,7 @@
"semi-structured/object"
],
"instillShortDescription": "The vector metadata",
"instillUIOrder": 2,
"instillUIOrder": 3,
"instillUpstreamTypes": [
"reference"
],
Expand All @@ -232,6 +246,20 @@
"minItems": 1,
"title": "Values",
"type": "array"
},
"namespace": {
"description": "The namespace to query",
"instillAcceptFormats": [
"string"
],
"instillUIOrder": 2,
"instillUpstreamTypes": [
"value",
"reference",
"template"
],
"title": "Namespace",
"type": "string"
}
},
"required": [
Expand Down
87 changes: 67 additions & 20 deletions pkg/pinecone/connector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ import (

const (
pineconeKey = "secret-key"
namespace = "pantone"
threshold = 0.9

upsertResp = `{"upsertedCount": 1}`
upsertOK = `{"upsertedCount": 1}`

queryResp = `
queryOK = `
{
"namespace": "color-schemes",
"matches": [
Expand All @@ -32,6 +34,12 @@ const (
"values": [ 2.23 ],
"metadata": { "color": "pumpkin" },
"score": 0.99
},
{
"id": "B",
"values": [ 3.32 ],
"metadata": { "color": "cerulean" },
"score": 0.87
}
]
}`
Expand All @@ -45,12 +53,17 @@ const (
)

var (
vectorA = Vector{
vectorA = vector{
ID: "A",
Values: []float64{2.23},
Metadata: map[string]any{"color": "pumpkin"},
}
queryByVector = QueryInput{
vectorB = vector{
ID: "B",
Values: []float64{3.32},
Metadata: map[string]any{"color": "cerulean"},
}
queryByVector = queryInput{
Namespace: "color-schemes",
TopK: 1,
Vector: vectorA.Values,
Expand All @@ -62,7 +75,11 @@ var (
},
},
}
queryByID = QueryInput{
queryWithThreshold = func(q queryInput, th float64) queryInput {
q.MinScore = th
return q
}
queryByID = queryInput{
Namespace: "color-schemes",
TopK: 1,
Vector: vectorA.Values,
Expand All @@ -89,58 +106,88 @@ func TestConnector_Execute(t *testing.T) {
{
name: "ok - upsert",

task: taskUpsert,
execIn: vectorA,
wantExec: UpsertOutput{RecordsUpserted: 1},
task: taskUpsert,
execIn: upsertInput{
vector: vectorA,
Namespace: namespace,
},
wantExec: upsertOutput{RecordsUpserted: 1},

wantClientPath: upsertPath,
wantClientReq: UpsertReq{Vectors: []Vector{vectorA}},
clientResp: upsertResp,
wantClientReq: upsertReq{Vectors: []vector{vectorA}, Namespace: namespace},
clientResp: upsertOK,
},
{
name: "ok - query by vector",

task: taskQuery,
execIn: queryByVector,
wantExec: QueryResp{
wantExec: queryResp{
Namespace: "color-schemes",
Matches: []Match{
Matches: []match{
{
Vector: vectorA,
vector: vectorA,
Score: 0.99,
},
{
vector: vectorB,
Score: 0.87,
},
},
},

wantClientPath: queryPath,
wantClientReq: QueryReq(queryByVector),
clientResp: queryResp,
wantClientReq: queryByVector.asRequest(),
clientResp: queryOK,
},
{
name: "ok - filter out below threshold score",

task: taskQuery,
execIn: queryWithThreshold(queryByVector, threshold),
wantExec: queryResp{
Namespace: "color-schemes",
Matches: []match{
{
vector: vectorA,
Score: 0.99,
},
},
},

wantClientPath: queryPath,
wantClientReq: queryByVector.asRequest(),
clientResp: queryOK,
},
{
name: "ok - query by ID",

task: taskQuery,
execIn: queryByID,
wantExec: QueryResp{
wantExec: queryResp{
Namespace: "color-schemes",
Matches: []Match{
Matches: []match{
{
Vector: vectorA,
vector: vectorA,
Score: 0.99,
},
{
vector: vectorB,
Score: 0.87,
},
},
},

wantClientPath: queryPath,
wantClientReq: QueryReq{
wantClientReq: queryReq{
// Vector is wiped from the request.
Namespace: "color-schemes",
TopK: 1,
ID: vectorA.ID,
IncludeValues: true,
IncludeMetadata: true,
},
clientResp: queryResp,
clientResp: queryOK,
},
}

Expand Down
Loading

0 comments on commit 17c1fc8

Please sign in to comment.