diff --git a/README.md b/README.md index 0908aeaa..9093e179 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,9 @@ import ( "context" openaiembedder "github.com/henomis/lingoose/embedder/openai" + "github.com/henomis/lingoose/index" "github.com/henomis/lingoose/index/option" - simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex" + "github.com/henomis/lingoose/index/vectordb/jsondb" "github.com/henomis/lingoose/llm/openai" "github.com/henomis/lingoose/loader" qapipeline "github.com/henomis/lingoose/pipeline/qa" @@ -67,8 +68,8 @@ import ( ) func main() { - docs, _ := loader.NewPDFToTextLoader("./kb").WithPDFToTextPath("/opt/homebrew/bin/pdftotext").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background()) - index := simplevectorindex.New("db", ".", openaiembedder.New(openaiembedder.AdaEmbeddingV2)) + docs, _ := loader.NewPDFToTextLoader("./kb").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background()) + index := index.New(jsondb.New().WithPersist("db.json"), openaiembedder.New(openaiembedder.AdaEmbeddingV2)).WithIncludeContents(true) index.LoadFromDocuments(context.Background(), docs) qapipeline.New(openai.NewChat().WithVerbose(true)).WithIndex(index).Query(context.Background(), "What is the NATO purpose?", option.WithTopK(1)) } diff --git a/index/index.go b/index/index.go index c01c84ac..f1eee0ad 100644 --- a/index/index.go +++ b/index/index.go @@ -232,3 +232,9 @@ func DeepCopyMetadata(metadata types.Meta) types.Meta { } return metadataCopy } + +func GetDefaultOptions() *option.Options { + return &option.Options{ + TopK: defaultTopK, + } +} diff --git a/index/vectordb/jsondb/jsondb.go b/index/vectordb/jsondb/jsondb.go index 27084f3a..93071982 100644 --- a/index/vectordb/jsondb/jsondb.go +++ b/index/vectordb/jsondb/jsondb.go @@ -128,11 +128,14 @@ func (i *DB) Search(ctx context.Context, values []float64, options *option.Optio } func (i *DB) similaritySearch( - ctx context.Context, + _ context.Context, embedding embedder.Embedding, opts *option.Options, ) (index.SearchResults, error) { - _ = ctx + if opts == nil { + opts = index.GetDefaultOptions() + } + scores, err := i.cosineSimilarityBatch(embedding) if err != nil { return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) diff --git a/index/vectordb/milvus/milvus.go b/index/vectordb/milvus/milvus.go index 8c75136d..3b2b3184 100644 --- a/index/vectordb/milvus/milvus.go +++ b/index/vectordb/milvus/milvus.go @@ -103,6 +103,10 @@ func (d *DB) similaritySearch( values []float64, opts *option.Options, ) ([]milvusgoresponse.VectorData, error) { + if opts == nil { + opts = index.GetDefaultOptions() + } + if opts.Filter == nil { opts.Filter = "" } diff --git a/index/vectordb/pinecone/pinecone.go b/index/vectordb/pinecone/pinecone.go index 06ddbd79..2e11667e 100644 --- a/index/vectordb/pinecone/pinecone.go +++ b/index/vectordb/pinecone/pinecone.go @@ -107,6 +107,14 @@ func (d *DB) similaritySearch( values []float64, opts *option.Options, ) ([]pineconegoresponse.QueryMatch, error) { + if opts == nil { + opts = index.GetDefaultOptions() + } + + if opts.Filter == nil { + opts.Filter = map[string]string{} + } + err := d.getProjectID(ctx) if err != nil { return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) diff --git a/index/vectordb/postgres/postgres.go b/index/vectordb/postgres/postgres.go index 0a88b439..e4f4b7de 100644 --- a/index/vectordb/postgres/postgres.go +++ b/index/vectordb/postgres/postgres.go @@ -119,6 +119,10 @@ func (d *DB) similaritySearch( values []float64, opts *option.Options, ) (index.SearchResults, error) { + if opts == nil { + opts = index.GetDefaultOptions() + } + if opts.Filter == nil { opts.Filter = "" } diff --git a/index/vectordb/qdrant/qdrant.go b/index/vectordb/qdrant/qdrant.go index 66d7c7d2..ddc2d68e 100644 --- a/index/vectordb/qdrant/qdrant.go +++ b/index/vectordb/qdrant/qdrant.go @@ -127,6 +127,10 @@ func (d *DB) similaritySearch( values []float64, opts *option.Options, ) ([]qdrantresponse.PointSearchResult, error) { + if opts == nil { + opts = index.GetDefaultOptions() + } + if opts.Filter == nil { opts.Filter = qdrantrequest.Filter{} } diff --git a/index/vectordb/redis/redis.go b/index/vectordb/redis/redis.go index 13d4691d..bdd66b10 100644 --- a/index/vectordb/redis/redis.go +++ b/index/vectordb/redis/redis.go @@ -113,6 +113,10 @@ func (d *DB) similaritySearch( values []float64, opts *option.Options, ) ([]redisearch.Document, error) { + if opts == nil { + opts = index.GetDefaultOptions() + } + if opts.Filter == nil { opts.Filter = redisearch.Filter{} }