Skip to content

Commit

Permalink
chore: refactor and document (#109)
Browse files Browse the repository at this point in the history
* chore: refactor and document

* chore: refactor index

* fix
  • Loading branch information
henomis authored Aug 20, 2023
1 parent 3b29d6d commit bfb5d2a
Show file tree
Hide file tree
Showing 20 changed files with 260 additions and 264 deletions.
2 changes: 2 additions & 0 deletions chat/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func New(promptMessages ...PromptMessage) *Chat {
return chatPromptTemplate
}

// AddPromptMessages adds a list of chat prompt templates to the chat prompt template.
func (c *Chat) AddPromptMessages(messages []PromptMessage) {
for _, message := range messages {
c.addMessagePromptTemplate(message)
Expand Down Expand Up @@ -92,6 +93,7 @@ func (p *Chat) ToMessages() (Messages, error) {
return messages, nil
}

// PromptMessages returns the chat prompt messages.
func (c *Chat) PromptMessages() PromptMessages {
return c.promptMessages
}
3 changes: 3 additions & 0 deletions embedder/cohere/cohere.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,19 @@ func New() *Embedder {
}
}

// WithAPIKey sets the API key to use for the embedder
func (e *Embedder) WithAPIKey(apiKey string) *Embedder {
e.client = coherego.New(apiKey)
return e
}

// WithModel sets the model to use for the embedder
func (e *Embedder) WithModel(model EmbedderModel) *Embedder {
e.model = model
return e
}

// Embed returns the embeddings for the given texts
func (h *Embedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) {
resp := &response.Embed{}
err := h.client.Embed(
Expand Down
3 changes: 3 additions & 0 deletions embedder/huggingface/huggingface.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,19 @@ func New() *HuggingFaceEmbedder {
}
}

// WithToken sets the API key to use for the embedder
func (h *HuggingFaceEmbedder) WithToken(token string) *HuggingFaceEmbedder {
h.token = token
return h
}

// WithModel sets the model to use for the embedder
func (h *HuggingFaceEmbedder) WithModel(model string) *HuggingFaceEmbedder {
h.model = model
return h
}

// Embed returns the embeddings for the given texts
func (h *HuggingFaceEmbedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) {
return h.featureExtraction(ctx, texts)
}
4 changes: 4 additions & 0 deletions embedder/llamacpp/llamacpp.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,25 @@ func New() *LlamaCppEmbedder {
}
}

// WithLlamaCppPath sets the path to the llamacpp binary
func (l *LlamaCppEmbedder) WithLlamaCppPath(llamacppPath string) *LlamaCppEmbedder {
l.llamacppPath = llamacppPath
return l
}

// WithModel sets the model to use for the embedder
func (l *LlamaCppEmbedder) WithModel(modelPath string) *LlamaCppEmbedder {
l.modelPath = modelPath
return l
}

// WithArgs sets the args to pass to the llamacpp binary
func (l *LlamaCppEmbedder) WithArgs(llamacppArgs []string) *LlamaCppEmbedder {
l.llamacppArgs = llamacppArgs
return l
}

// Embed returns the embeddings for the given texts
func (o *LlamaCppEmbedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) {

embeddings := make([]embedder.Embedding, len(texts))
Expand Down
2 changes: 2 additions & 0 deletions embedder/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ func New(model Model) *OpenAIEmbedder {
}
}

// WithClient sets the OpenAI client to use for the embedder
func (o *OpenAIEmbedder) WithClient(client *openai.Client) *OpenAIEmbedder {
o.openAIClient = client
return o
}

// Embed returns the embeddings for the given texts
func (o *OpenAIEmbedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) {
maxTokens := o.getMaxTokens()

Expand Down
9 changes: 5 additions & 4 deletions examples/embeddings/knowledge_base/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import (

"github.com/henomis/lingoose/chat"
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -24,7 +25,7 @@ func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

docsVectorIndex := index.NewSimpleVectorIndex("db", ".", openaiEmbedder)
docsVectorIndex := simplevectorindex.New("db", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()

if indexIsEmpty {
Expand All @@ -48,7 +49,7 @@ func main() {
break
}

similarities, err := docsVectorIndex.SimilaritySearch(context.Background(), query, index.WithTopK(3))
similarities, err := docsVectorIndex.SimilaritySearch(context.Background(), query, indexoption.WithTopK(3))
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -97,7 +98,7 @@ func main() {

}

func ingestData(docsVectorIndex *index.SimpleVectorIndex) error {
func ingestData(docsVectorIndex *simplevectorindex.Index) error {

fmt.Printf("Learning Knowledge Base...")

Expand Down
15 changes: 8 additions & 7 deletions examples/embeddings/pinecone/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,26 @@ import (
"fmt"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
pineconeindex "github.com/henomis/lingoose/index/pinecone"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
"github.com/henomis/lingoose/textsplitter"
)

// download https://frontiernerds.com/files/state_of_the_union.txt
// download https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

pineconeIndex := index.NewPinecone(
index.PineconeOptions{
pineconeIndex := pineconeindex.New(
pineconeindex.Options{
IndexName: "test",
Namespace: "test-namespace",
IncludeContent: true,
CreateIndex: &index.PineconeCreateIndexOptions{
CreateIndex: &pineconeindex.CreateIndexOptions{
Dimension: 1536,
Replicas: 1,
Metric: "cosine",
Expand All @@ -49,7 +50,7 @@ func main() {
similarities, err := pineconeIndex.SimilaritySearch(
context.Background(),
query,
index.WithTopK(3),
indexoption.WithTopK(3),
)
if err != nil {
panic(err)
Expand Down Expand Up @@ -87,7 +88,7 @@ func main() {

}

func ingestData(pineconeIndex *index.Pinecone) error {
func ingestData(pineconeIndex *pineconeindex.Index) error {

documents, err := loader.NewDirectoryLoader(".", ".txt").Load(context.Background())
if err != nil {
Expand Down
17 changes: 9 additions & 8 deletions examples/embeddings/qdrant/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,28 @@ import (
"fmt"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
qdrantindex "github.com/henomis/lingoose/index/qdrant"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
"github.com/henomis/lingoose/textsplitter"
)

// download https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
// run qdrant docker run -p 6333:6333 qdrant/qdrant
// run qdrant docker run --rm -p 6333:6333 qdrant/qdrant

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

qdrantIndex := index.NewQdrant(
index.QdrantOptions{
qdrantIndex := qdrantindex.New(
qdrantindex.Options{
CollectionName: "test",
IncludeContent: true,
CreateCollection: &index.QdrantCreateCollectionOptions{
CreateCollection: &qdrantindex.CreateCollectionOptions{
Dimension: 1536,
Distance: index.QdrantDistanceCosine,
Distance: qdrantindex.DistanceCosine,
},
},
openaiEmbedder,
Expand All @@ -47,7 +48,7 @@ func main() {
similarities, err := qdrantIndex.SimilaritySearch(
context.Background(),
query,
index.WithTopK(3),
indexoption.WithTopK(3),
)
if err != nil {
panic(err)
Expand Down Expand Up @@ -85,7 +86,7 @@ func main() {

}

func ingestData(qdrantIndex *index.Qdrant) error {
func ingestData(qdrantIndex *qdrantindex.Index) error {

documents, err := loader.NewDirectoryLoader(".", ".txt").Load(context.Background())
if err != nil {
Expand Down
14 changes: 8 additions & 6 deletions examples/embeddings/simpleVector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,25 @@ import (

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
"github.com/henomis/lingoose/textsplitter"
)

// download https://frontiernerds.com/files/state_of_the_union.txt
// download https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

docsVectorIndex := index.NewSimpleVectorIndex("docs", ".", openaiEmbedder)
docsVectorIndex := simplevectorindex.New("docs", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()

if indexIsEmpty {
err := ingestData(openaiEmbedder)
err := ingestData(docsVectorIndex, openaiEmbedder)
if err != nil {
panic(err)
}
Expand All @@ -32,7 +34,7 @@ func main() {
similarities, err := docsVectorIndex.SimilaritySearch(
context.Background(),
query,
index.WithTopK(3),
indexoption.WithTopK(3),
)
if err != nil {
panic(err)
Expand Down Expand Up @@ -72,7 +74,7 @@ func main() {
fmt.Println(output)
}

func ingestData(openaiEmbedder index.Embedder) error {
func ingestData(docsVectorIndex *simplevectorindex.Index, openaiEmbedder index.Embedder) error {

fmt.Printf("Ingesting data...")

Expand All @@ -85,7 +87,7 @@ func ingestData(openaiEmbedder index.Embedder) error {

documentChunks := textSplitter.SplitDocuments(documents)

err = index.NewSimpleVectorIndex("docs", ".", openaiEmbedder).LoadFromDocuments(context.Background(), documentChunks)
err = docsVectorIndex.LoadFromDocuments(context.Background(), documentChunks)
if err != nil {
return err
}
Expand Down
7 changes: 4 additions & 3 deletions examples/embeddings/simplekb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import (
"context"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
qapipeline "github.com/henomis/lingoose/pipeline/qa"
Expand All @@ -15,7 +16,7 @@ func main() {
query := "What is the NATO purpose?"
docs, _ := loader.NewPDFToTextLoader("./kb").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background())
openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index.NewSimpleVectorIndex("db", ".", openaiEmbedder).LoadFromDocuments(context.Background(), docs)
similarities, _ := index.NewSimpleVectorIndex("db", ".", openaiEmbedder).SimilaritySearch(context.Background(), query, index.WithTopK(3))
simplevectorindex.New("db", ".", openaiEmbedder).LoadFromDocuments(context.Background(), docs)
similarities, _ := simplevectorindex.New("db", ".", openaiEmbedder).SimilaritySearch(context.Background(), query, indexoption.WithTopK(3))
qapipeline.New(openai.NewChat().WithVerbose(true)).Run(context.Background(), query, similarities.ToDocuments())
}
2 changes: 1 addition & 1 deletion examples/pipeline/summarize/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/henomis/lingoose/textsplitter"
)

// download https://frontiernerds.com/files/state_of_the_union.txt
// download https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt

func main() {

Expand Down
15 changes: 12 additions & 3 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ import (

"github.com/henomis/lingoose/document"
"github.com/henomis/lingoose/embedder"
"github.com/henomis/lingoose/types"
)

var (
ErrInternal = "internal index error"
)

const (
defaultKeyID = "id"
defaultKeyContent = "content"
DefaultKeyID = "id"
DefaultKeyContent = "content"
)

type SearchResponse struct {
Expand All @@ -37,7 +38,7 @@ type Embedder interface {
Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error)
}

func filterSearchResponses(searchResponses SearchResponses, topK int) SearchResponses {
func FilterSearchResponses(searchResponses SearchResponses, topK int) SearchResponses {
//sort by similarity score
sort.Slice(searchResponses, func(i, j int) bool {
return searchResponses[i].Score > searchResponses[j].Score
Expand All @@ -50,3 +51,11 @@ func filterSearchResponses(searchResponses SearchResponses, topK int) SearchResp

return searchResponses[:maxTopK]
}

func DeepCopyMetadata(metadata types.Meta) types.Meta {
metadataCopy := make(types.Meta)
for k, v := range metadata {
metadataCopy[k] = v
}
return metadataCopy
}
20 changes: 20 additions & 0 deletions index/option/option.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package option

type Option func(*Options)

type Options struct {
TopK int
Filter interface{}
}

func WithTopK(topK int) Option {
return func(opts *Options) {
opts.TopK = topK
}
}

func WithFilter(filter interface{}) Option {
return func(opts *Options) {
opts.Filter = filter
}
}
Loading

0 comments on commit bfb5d2a

Please sign in to comment.