Skip to content

Commit

Permalink
Add pgvector extension to CloudBerry
Browse files Browse the repository at this point in the history
This commit introduces the pgvector extension to CloudBerry,
enhancing the database with advanced vector processing capabilities.

The pgvector extension enables efficient storage, indexing,
and querying of vector data, opening up new possibilities for
high-performance similarity search, machine learning algorithms,
and other data analytics tasks.
  • Loading branch information
roseduan authored and my-ship-it committed Jul 10, 2023
1 parent de6502a commit f9a2d3a
Show file tree
Hide file tree
Showing 12 changed files with 37 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/ivfbuild.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <float.h>

#include "catalog/index.h"
#include "cdb/cdbvars.h"
#include "ivfflat.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
Expand Down Expand Up @@ -430,7 +431,7 @@ ComputeCenters(IvfflatBuildState * buildstate)
/* Sample rows */
/* TODO Ensure within maintenance_work_mem */
buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions);
if (buildstate->heap != NULL)
if (buildstate->heap != NULL && Gp_role != GP_ROLE_DISPATCH)
{
SampleRows(buildstate);

Expand Down
1 change: 1 addition & 0 deletions test/expected/btree.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t (val);
SELECT * FROM t WHERE val = '[1,2,3]';
Expand Down
2 changes: 2 additions & 0 deletions test/expected/copy.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE TABLE t2 (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
\copy t TO 'results/data.bin' WITH (FORMAT binary)
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
SELECT * FROM t2 ORDER BY val;
Expand Down
4 changes: 4 additions & 0 deletions test/expected/ivfflat_cosine.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1);
-- end_ignore
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
val
Expand Down
7 changes: 7 additions & 0 deletions test/expected/ivfflat_ip.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
SET enable_seqscan = off;
SET optimizer = off;
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1);
NOTICE: ivfflat index created with little data (seg1 127.0.1.1:7003 pid=424029)
DETAIL: This will cause low recall.
HINT: Drop the index until the table has more data.
-- end_ignore
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
val
Expand Down
4 changes: 4 additions & 0 deletions test/expected/ivfflat_l2.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val) WITH (lists = 1);
-- end_ignore
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
Expand Down
1 change: 1 addition & 0 deletions test/expected/ivfflat_options.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
CREATE INDEX ON t USING ivfflat (val) WITH (lists = 0);
ERROR: value 0 out of bounds for option "lists"
DETAIL: Valid values are between "1" and "32768".
Expand Down
4 changes: 4 additions & 0 deletions test/expected/ivfflat_unlogged.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;
CREATE UNLOGGED TABLE t (val vector(3));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val) WITH (lists = 1);
-- end_ignore
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
---------
Expand Down
3 changes: 3 additions & 0 deletions test/sql/ivfflat_cosine.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;

CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1);
-- end_ignore

INSERT INTO t (val) VALUES ('[1,2,4]');

Expand Down
3 changes: 3 additions & 0 deletions test/sql/ivfflat_ip.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;

CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1);
-- end_ignore

INSERT INTO t (val) VALUES ('[1,2,4]');

Expand Down
3 changes: 3 additions & 0 deletions test/sql/ivfflat_l2.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;

CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val) WITH (lists = 1);
-- end_ignore

INSERT INTO t (val) VALUES ('[1,2,4]');

Expand Down
4 changes: 3 additions & 1 deletion test/sql/ivfflat_unlogged.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
SET enable_seqscan = off;
SET optimizer = off;

CREATE UNLOGGED TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
-- start_ignore
CREATE INDEX ON t USING ivfflat (val) WITH (lists = 1);

-- end_ignore
SELECT * FROM t ORDER BY val <-> '[3,3,3]';

DROP TABLE t;

0 comments on commit f9a2d3a

Please sign in to comment.