Skip to content
This repository was archived by the owner on Jan 9, 2025. It is now read-only.

Commit 7183707

Browse files
committed
feat: split Pinecone query in vector / ID
1 parent b0c21bf commit 7183707

File tree

5 files changed

+235
-61
lines changed

5 files changed

+235
-61
lines changed

pkg/pinecone/config/definitions.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
[
22
{
33
"available_tasks": [
4-
"TASK_QUERY",
4+
"TASK_QUERY_BY_VECTOR",
5+
"TASK_QUERY_BY_ID",
56
"TASK_UPSERT"
67
],
78
"custom": false,

pkg/pinecone/config/tasks.json

Lines changed: 170 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,15 @@
11
{
2-
"TASK_QUERY": {
2+
"TASK_QUERY_BY_VECTOR": {
33
"input": {
44
"instillUIOrder": 0,
55
"properties": {
6-
"id": {
7-
"description": "The unique ID of the vector to be used as a query vector. If present, the vector parameter will be ignored.",
8-
"instillAcceptFormats": [
9-
"string"
10-
],
11-
"instillShortDescription": "Query by vector ID instead of by vector",
12-
"instillUIOrder": 0,
13-
"instillUpstreamTypes": [
14-
"reference",
15-
"template"
16-
],
17-
"title": "ID",
18-
"type": "string"
19-
},
206
"vector": {
21-
"description": "An array of dimensions for the query vector.",
7+
"description": "An array of dimensions for the query vector",
228
"instillAcceptFormats": [
239
"array:number",
2410
"array:integer"
2511
],
26-
"instillUIOrder": 1,
12+
"instillUIOrder": 0,
2713
"instillUpstreamTypes": [
2814
"reference"
2915
],
@@ -41,7 +27,7 @@
4127
"instillAcceptFormats": [
4228
"integer"
4329
],
44-
"instillUIOrder": 2,
30+
"instillUIOrder": 1,
4531
"instillUpstreamTypes": [
4632
"value",
4733
"reference"
@@ -54,7 +40,7 @@
5440
"instillAcceptFormats": [
5541
"string"
5642
],
57-
"instillUIOrder": 3,
43+
"instillUIOrder": 2,
5844
"instillUpstreamTypes": [
5945
"value",
6046
"reference",
@@ -69,7 +55,7 @@
6955
"semi-structured/object"
7056
],
7157
"instillShortDescription": "The filter to apply on vector metadata",
72-
"instillUIOrder": 4,
58+
"instillUIOrder": 3,
7359
"instillUpstreamTypes": [
7460
"reference"
7561
],
@@ -84,7 +70,7 @@
8470
"number",
8571
"integer"
8672
],
87-
"instillUIOrder": 5,
73+
"instillUIOrder": 4,
8874
"instillUpstreamTypes": [
8975
"value",
9076
"reference"
@@ -98,7 +84,7 @@
9884
"instillAcceptFormats": [
9985
"boolean"
10086
],
101-
"instillUIOrder": 6,
87+
"instillUIOrder": 5,
10288
"instillUpstreamTypes": [
10389
"value",
10490
"reference"
@@ -195,6 +181,168 @@
195181
"type": "object"
196182
}
197183
},
184+
"TASK_QUERY_BY_ID": {
185+
"input": {
186+
"instillUIOrder": 0,
187+
"properties": {
188+
"id": {
189+
"description": "The unique ID of the target vector",
190+
"instillAcceptFormats": [
191+
"string"
192+
],
193+
"instillUIOrder": 0,
194+
"instillUpstreamTypes": [
195+
"reference",
196+
"template"
197+
],
198+
"title": "ID",
199+
"type": "string"
200+
},
201+
"top_k": {
202+
"description": "The number of results to return for each query",
203+
"instillAcceptFormats": [
204+
"integer"
205+
],
206+
"instillUIOrder": 1,
207+
"instillUpstreamTypes": [
208+
"value",
209+
"reference"
210+
],
211+
"title": "Top K",
212+
"type": "integer"
213+
},
214+
"namespace": {
215+
"description": "The namespace to query",
216+
"instillAcceptFormats": [
217+
"string"
218+
],
219+
"instillUIOrder": 2,
220+
"instillUpstreamTypes": [
221+
"value",
222+
"reference",
223+
"template"
224+
],
225+
"title": "Namespace",
226+
"type": "string"
227+
},
228+
"filter": {
229+
"description": "The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.",
230+
"instillAcceptFormats": [
231+
"semi-structured/object"
232+
],
233+
"instillShortDescription": "The filter to apply on vector metadata",
234+
"instillUIOrder": 3,
235+
"instillUpstreamTypes": [
236+
"reference"
237+
],
238+
"order": 1,
239+
"required": [],
240+
"title": "Filter",
241+
"type": "object"
242+
},
243+
"include_metadata": {
244+
"default": false,
245+
"description": "Indicates whether metadata is included in the response as well as the IDs",
246+
"instillAcceptFormats": [
247+
"boolean"
248+
],
249+
"instillUIOrder": 4,
250+
"instillUpstreamTypes": [
251+
"value",
252+
"reference"
253+
],
254+
"title": "Include Metadata",
255+
"type": "boolean"
256+
},
257+
"include_values": {
258+
"default": false,
259+
"description": "Indicates whether vector values are included in the response",
260+
"instillAcceptFormats": [
261+
"boolean"
262+
],
263+
"instillUIOrder": 5,
264+
"instillUpstreamTypes": [
265+
"value",
266+
"reference"
267+
],
268+
"title": "Include Values",
269+
"type": "boolean"
270+
}
271+
},
272+
"required": [
273+
"top_k",
274+
"id"
275+
],
276+
"title": "Input",
277+
"type": "object"
278+
},
279+
"output": {
280+
"instillUIOrder": 0,
281+
"properties": {
282+
"matches": {
283+
"description": "The matches returned for the query",
284+
"instillUIOrder": 1,
285+
"items": {
286+
"properties": {
287+
"id": {
288+
"description": "The ID of the matched vector",
289+
"instillFormat": "string",
290+
"instillUIOrder": 0,
291+
"title": "ID",
292+
"type": "string"
293+
},
294+
"metadata": {
295+
"description": "Metadata",
296+
"instillFormat": "semi-structured/object",
297+
"instillUIOrder": 3,
298+
"required": [],
299+
"title": "Metadata",
300+
"type": "object"
301+
},
302+
"score": {
303+
"description": "A measure of similarity between this vector and the query vector. The higher the score, the more similar they are.",
304+
"instillFormat": "number",
305+
"instillUIOrder": 1,
306+
"title": "Score",
307+
"type": "number"
308+
},
309+
"values": {
310+
"description": "Vector data values",
311+
"instillUIOrder": 2,
312+
"items": {
313+
"description": "Each float value represents one dimension",
314+
"type": "number"
315+
},
316+
"title": "Values",
317+
"type": "array"
318+
}
319+
},
320+
"required": [
321+
"id",
322+
"score"
323+
],
324+
"title": "Match",
325+
"type": "object"
326+
},
327+
"title": "Matches",
328+
"type": "array"
329+
},
330+
"namespace": {
331+
"description": "The namespace of the query",
332+
"instillFormat": "string",
333+
"instillUIOrder": 0,
334+
"title": "Namespace",
335+
"type": "string"
336+
}
337+
},
338+
"required": [
339+
"namespace",
340+
"matches"
341+
],
342+
"title": "Output",
343+
"type": "object"
344+
}
345+
},
198346
"TASK_UPSERT": {
199347
"input": {
200348
"instillUIOrder": 0,

pkg/pinecone/connector_test.go

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,9 @@ var (
6363
Values: []float64{3.32},
6464
Metadata: map[string]any{"color": "cerulean"},
6565
}
66-
queryByVector = queryInput{
66+
q = queryInput{
6767
Namespace: "color-schemes",
6868
TopK: 1,
69-
Vector: vectorA.Values,
7069
IncludeValues: true,
7170
IncludeMetadata: true,
7271
Filter: map[string]any{
@@ -75,17 +74,20 @@ var (
7574
},
7675
},
7776
}
78-
queryWithThreshold = func(q queryInput, th float64) queryInput {
77+
78+
queryByVector = queryByVectorInput{
79+
queryInput: q,
80+
Vector: vectorA.Values,
81+
}
82+
83+
queryWithThreshold = func(q queryByVectorInput, th float64) queryByVectorInput {
7984
q.MinScore = th
8085
return q
8186
}
82-
queryByID = queryInput{
83-
Namespace: "color-schemes",
84-
TopK: 1,
85-
Vector: vectorA.Values,
86-
ID: vectorA.ID,
87-
IncludeValues: true,
88-
IncludeMetadata: true,
87+
88+
queryByID = queryByIDInput{
89+
queryInput: q,
90+
ID: vectorA.ID,
8991
}
9092
)
9193

@@ -120,7 +122,7 @@ func TestConnector_Execute(t *testing.T) {
120122
{
121123
name: "ok - query by vector",
122124

123-
task: taskQuery,
125+
task: taskQueryByVector,
124126
execIn: queryByVector,
125127
wantExec: queryResp{
126128
Namespace: "color-schemes",
@@ -143,7 +145,7 @@ func TestConnector_Execute(t *testing.T) {
143145
{
144146
name: "ok - filter out below threshold score",
145147

146-
task: taskQuery,
148+
task: taskQueryByVector,
147149
execIn: queryWithThreshold(queryByVector, threshold),
148150
wantExec: queryResp{
149151
Namespace: "color-schemes",
@@ -162,7 +164,7 @@ func TestConnector_Execute(t *testing.T) {
162164
{
163165
name: "ok - query by ID",
164166

165-
task: taskQuery,
167+
task: taskQueryByID,
166168
execIn: queryByID,
167169
wantExec: queryResp{
168170
Namespace: "color-schemes",
@@ -179,15 +181,8 @@ func TestConnector_Execute(t *testing.T) {
179181
},
180182

181183
wantClientPath: queryPath,
182-
wantClientReq: queryReq{
183-
// Vector is wiped from the request.
184-
Namespace: "color-schemes",
185-
TopK: 1,
186-
ID: vectorA.ID,
187-
IncludeValues: true,
188-
IncludeMetadata: true,
189-
},
190-
clientResp: queryOK,
184+
wantClientReq: queryByID.asRequest(),
185+
clientResp: queryOK,
191186
},
192187
}
193188

0 commit comments

Comments
 (0)