Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Add SingleStoreReader #404

Merged
merged 9 commits into from
Jul 22, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llama_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -581,5 +581,14 @@
"google keep",
"google notes"
]
},
"SingleStoreReader": {
"id": "singlestore",
"author": "singlestore",
"keywords": [
"singlestore",
"memsql"
]
}

}
34 changes: 34 additions & 0 deletions llama_hub/singlestore/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SingleStore Loader

The SingleStore Loader retrieves a set of documents from a specified table in a SingleStore database. The user initializes the loader with database information and then provides a search embedding for retrieving similar documents.

## Usage

Here's an example usage of the SingleStoreReader:

```python
from llama_index import download_loader
apeng-singlestore marked this conversation as resolved.
Show resolved Hide resolved

SingleStoreReader = download_loader("SingleStoreReader")

# Initialize the reader with your SingleStore database credentials and other relevant details
reader = SingleStoreReader(
scheme="mysql",
host="localhost",
port="3306",
user="username",
password="password",
dbname="database_name",
table_name="table_name",
content_field="text",
vector_field="embedding"
)

# The search_embedding is an embedding representation of your query_vector.
# Example search_embedding:
# search_embedding=[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
search_embedding=[n1, n2, n3, ...]

# load_data fetches documents from your SingleStore database that are similar to the search_embedding.
# The top_k argument specifies the number of similar documents to fetch.
documents = reader.load_data(search_embedding=search_embedding, top_k=5)
1 change: 1 addition & 0 deletions llama_hub/singlestore/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Init file."""
68 changes: 68 additions & 0 deletions llama_hub/singlestore/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""SingleStore reader."""

from typing import Any, Dict, List, Optional

from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
from llama_index import download_loader, ListIndex

import pymysql
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please do the imports from within the class and raise import error with appropriate error messages.

For example: https://github.com/emptycrown/llama-hub/blob/main/llama_hub/couchdb/base.py



class SingleStoreReader(BaseReader):
"""SingleStore reader.

Args:
scheme (str): Database Scheme.
host (str): Database Host.
port (str): Database Port.
user (str): Database User.
password (str): Database Password.
dbname (str): Database Name.
table_name (str): Table Name.
content_field (str): Content Field.
vector_field (str): Vector Field.
"""

def __init__(self, scheme: str, host: str, port: str, user: str, password: str, dbname: str, table_name: str, content_field: str = "text", vector_field: str = "embedding"):
"""Initialize with parameters."""
self.scheme = scheme
self.host = host
self.port = port
self.user = user
self.password = password
self.dbname = dbname
self.table_name = table_name
self.content_field = content_field
self.vector_field = vector_field

pymysql.install_as_MySQLdb()

self.DatabaseReader = download_loader('DatabaseReader')
apeng-singlestore marked this conversation as resolved.
Show resolved Hide resolved
self.reader = self.DatabaseReader(
scheme=self.scheme,
host=self.host,
port=self.port,
user=self.user,
password=self.password,
dbname=self.dbname,
)

def load_data(self, search_embedding: str, top_k: int = 5) -> List[Document]:
"""Load data from SingleStore.

Args:
search_embedding (str): The embedding to search.
top_k (int): Number of results to return.

Returns:
List[Document]: A list of documents.
"""
query = f"""
SELECT {self.content_field}, DOT_PRODUCT_F64({self.vector_field}, JSON_ARRAY_PACK_F64(\'{search_embedding}\')) AS score
FROM {self.table_name}
ORDER BY score
DESC LIMIT {top_k}
"""

return self.reader.load_data(query=query)
1 change: 1 addition & 0 deletions llama_hub/singlestore/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pymysql