Skip to content

Commit

Permalink
Feat/81 implement qdrant support (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed Jun 16, 2023
1 parent 36bb01b commit abfe5ac
Show file tree
Hide file tree
Showing 7 changed files with 440 additions and 12 deletions.
3 changes: 0 additions & 3 deletions .github/FUNDING.yml

This file was deleted.

10 changes: 10 additions & 0 deletions embedder/embedding.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,13 @@ var (

// Embedding is the result of an embedding operation.
type Embedding []float64

func (e Embedding) ToFloat32() []float32 {

vect := make([]float32, len(e))
for i, v := range e {
vect[i] = float32(v)
}

return vect
}
109 changes: 109 additions & 0 deletions examples/embeddings/qdrant/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package main

import (
"context"
"fmt"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
"github.com/henomis/lingoose/textsplitter"
)

// download https://frontiernerds.com/files/state_of_the_union.txt

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

qdrantIndex := index.NewQdrant(
index.QdrantOptions{
CollectionName: "test",
IncludeContent: true,
CreateCollection: &index.QdrantCreateCollectionOptions{
Dimension: 1536,
Distance: index.QdrantDistanceCosine,
},
},
openaiEmbedder,
).WithAPIKeyAndEdpoint("", "http://localhost:6333")

indexIsEmpty, err := qdrantIndex.IsEmpty(context.Background())
if err != nil {
panic(err)
}

if indexIsEmpty {
err = ingestData(qdrantIndex)
if err != nil {
panic(err)
}
}

query := "What is the purpose of the NATO Alliance?"
similarities, err := qdrantIndex.SimilaritySearch(
context.Background(),
query,
index.WithTopK(3),
)
if err != nil {
panic(err)
}

content := ""
for _, similarity := range similarities {
fmt.Printf("Similarity: %f\n", similarity.Score)
fmt.Printf("Document: %s\n", similarity.Document.Content)
fmt.Println("Metadata: ", similarity.Document.Metadata)
fmt.Println("ID: ", similarity.ID)
fmt.Println("----------")
content += similarity.Document.Content + "\n"
}

llmOpenAI := openai.NewCompletion().WithVerbose(true)

prompt1 := prompt.NewPromptTemplate(
"Based on the following context answer to the question.\n\nContext:\n{{.context}}\n\nQuestion: {{.query}}").WithInputs(
map[string]string{
"query": query,
"context": content,
},
)

err = prompt1.Format(nil)
if err != nil {
panic(err)
}

_, err = llmOpenAI.Completion(context.Background(), prompt1.String())
if err != nil {
panic(err)
}

}

func ingestData(qdrantIndex *index.Qdrant) error {

documents, err := loader.NewDirectoryLoader(".", ".txt").Load(context.Background())
if err != nil {
return err
}

textSplitter := textsplitter.NewRecursiveCharacterTextSplitter(1000, 20)

documentChunks := textSplitter.SplitDocuments(documents)

for _, doc := range documentChunks {
fmt.Println(doc.Content)
fmt.Println("----------")
fmt.Println(doc.Metadata)
fmt.Println("----------")
fmt.Println()

}

return qdrantIndex.LoadFromDocuments(context.Background(), documentChunks)

}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require github.com/mitchellh/mapstructure v1.5.0
require (
github.com/google/uuid v1.3.0
github.com/henomis/pinecone-go v1.1.1
github.com/henomis/qdrant-go v1.0.0
github.com/pkoukk/tiktoken-go v0.1.1
github.com/sashabaranov/go-openai v1.11.1
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/henomis/pinecone-go v1.1.1 h1:uZmRi1XD6J/fA02Nsbb5TuGggwLet9//t0LLX/dXOds=
github.com/henomis/pinecone-go v1.1.1/go.mod h1:FsMMRjLyiJ9zHqGOlmGvjolqOp2kkbMsRm8oc85vykU=
github.com/henomis/qdrant-go v1.0.0 h1:KVd9aTvObVJgQFznM0FPMn3+zC4O1ekXgRcG61bW130=
github.com/henomis/qdrant-go v1.0.0/go.mod h1:CJ+imAe+WK3ntoIn7v7sSqimGu+/In/7ijhhT0MC5WU=
github.com/henomis/restclientgo v1.0.3 h1:y5+ydfvWJ0/7crObdnCHSn7ya/h1whD+PV4Ir2dZ9Ig=
github.com/henomis/restclientgo v1.0.3/go.mod h1:xIeTCu2ZstvRn0fCukNpzXLN3m/kRTU0i0RwAbv7Zug=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
Expand Down
18 changes: 9 additions & 9 deletions index/pinecone.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (
)

const (
defaultPineconeTopK = 10
defaultBatchUpsertSize = 32
defaultPineconeTopK = 10
defaultPineconeBatchUpsertSize = 32
)

type Pinecone struct {
Expand Down Expand Up @@ -54,7 +54,7 @@ func NewPinecone(options PineconeOptions, embedder Embedder) *Pinecone {

pineconeClient := pineconego.New(environment, apiKey)

batchUpsertSize := defaultBatchUpsertSize
batchUpsertSize := defaultPineconeBatchUpsertSize
if options.BatchUpsertSize != nil {
batchUpsertSize = *options.BatchUpsertSize
}
Expand Down Expand Up @@ -140,7 +140,7 @@ func (p *Pinecone) SimilaritySearch(ctx context.Context, query string, opts ...O
return nil, fmt.Errorf("%s: %w", ErrInternal, err)
}

searchResponses := buildSearchReponsesFromMatches(matches, p.includeContent)
searchResponses := buildSearchReponsesFromPineconeMatches(matches, p.includeContent)

return filterSearchResponses(searchResponses, pineconeOptions.topK), nil
}
Expand Down Expand Up @@ -253,9 +253,9 @@ func (p *Pinecone) createIndexIfRequired(ctx context.Context) error {

func (p *Pinecone) batchUpsert(ctx context.Context, documents []document.Document) error {

for i := 0; i < len(documents); i += defaultBatchUpsertSize {
for i := 0; i < len(documents); i += defaultPineconeBatchUpsertSize {

batchEnd := i + defaultBatchUpsertSize
batchEnd := i + defaultPineconeBatchUpsertSize
if batchEnd > len(documents) {
batchEnd = len(documents)
}
Expand All @@ -270,7 +270,7 @@ func (p *Pinecone) batchUpsert(ctx context.Context, documents []document.Documen
return err
}

vectors, err := buildVectorsFromEmbeddingsAndDocuments(embeddings, documents, i, p.includeContent)
vectors, err := buildPineconeVectorsFromEmbeddingsAndDocuments(embeddings, documents, i, p.includeContent)
if err != nil {
return err
}
Expand Down Expand Up @@ -319,7 +319,7 @@ func deepCopyMetadata(metadata types.Meta) types.Meta {
return metadataCopy
}

func buildVectorsFromEmbeddingsAndDocuments(
func buildPineconeVectorsFromEmbeddingsAndDocuments(
embeddings []embedder.Embedding,
documents []document.Document,
startIndex int,
Expand Down Expand Up @@ -355,7 +355,7 @@ func buildVectorsFromEmbeddingsAndDocuments(
return vectors, nil
}

func buildSearchReponsesFromMatches(matches []pineconeresponse.QueryMatch, includeContent bool) SearchResponses {
func buildSearchReponsesFromPineconeMatches(matches []pineconeresponse.QueryMatch, includeContent bool) SearchResponses {
searchResponses := make([]SearchResponse, len(matches))

for i, match := range matches {
Expand Down
Loading

0 comments on commit abfe5ac

Please sign in to comment.