From 230fe0fa50fe926e28807e0fa4a77776c79d3c67 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Thu, 26 Sep 2024 16:29:58 +0200 Subject: [PATCH 01/11] Initialize store --- src/Store/Azure/SearchStore.php | 8 +++++++- src/Store/InitializableStoreInterface.php | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/Store/InitializableStoreInterface.php diff --git a/src/Store/Azure/SearchStore.php b/src/Store/Azure/SearchStore.php index 6a1cb6c2..509472aa 100644 --- a/src/Store/Azure/SearchStore.php +++ b/src/Store/Azure/SearchStore.php @@ -7,11 +7,12 @@ use PhpLlm\LlmChain\Document\Document; use PhpLlm\LlmChain\Document\Metadata; use PhpLlm\LlmChain\Document\Vector; +use PhpLlm\LlmChain\Store\InitializableStoreInterface; use PhpLlm\LlmChain\Store\VectorStoreInterface; use Symfony\Component\Uid\Uuid; use Symfony\Contracts\HttpClient\HttpClientInterface; -final readonly class SearchStore implements VectorStoreInterface +final readonly class SearchStore implements VectorStoreInterface, InitializableStoreInterface { /** * @param string $vectorFieldName The name of the field int the index that contains the vector @@ -26,6 +27,11 @@ public function __construct( ) { } + public function initialize(array $options = []): void + { + // TODO: Add code to setup index + } + public function addDocument(Document $document): void { $this->addDocuments([$document]); diff --git a/src/Store/InitializableStoreInterface.php b/src/Store/InitializableStoreInterface.php new file mode 100644 index 00000000..ac4f801e --- /dev/null +++ b/src/Store/InitializableStoreInterface.php @@ -0,0 +1,16 @@ + $options + */ + public function initialize(array $options = []): void; +} From 3dd96a8e85991a64156d60896246d8d2f2eff2ce Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Thu, 26 Sep 2024 23:02:23 +0200 Subject: [PATCH 02/11] - --- src/Store/MongoDB/Store.php | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Store/MongoDB/Store.php b/src/Store/MongoDB/Store.php index b0c7e9b7..53c26f54 100644 --- a/src/Store/MongoDB/Store.php +++ b/src/Store/MongoDB/Store.php @@ -10,6 +10,7 @@ use PhpLlm\LlmChain\Document\Document; use PhpLlm\LlmChain\Document\Metadata; use PhpLlm\LlmChain\Document\Vector; +use PhpLlm\LlmChain\Store\InitializableStoreInterface; use PhpLlm\LlmChain\Store\VectorStoreInterface; use Psr\Log\LoggerInterface; use Symfony\Component\Uid\Uuid; @@ -37,7 +38,7 @@ * * @author Oskar Stark */ -final readonly class Store implements VectorStoreInterface +final readonly class Store implements VectorStoreInterface, InitializableStoreInterface { /** * @param string $databaseName The name of the database @@ -57,6 +58,26 @@ public function __construct( ) { } + public function initialize(array $options = []): void + { + $this->getCollection()->createSearchIndex( + definition: [ + 'fields' => [ + [ + 'numDimensions' => 1536, + 'path' => $this->vectorFieldName, + 'similarity' => 'euclidean', + 'type' => 'vector', + ], + ], + ], + options: [ + 'name' => $this->indexName, + 'type' => 'vectorSearch', + ], + ); + } + public function addDocument(Document $document): void { $this->addDocuments([$document]); From 964c72f1c2543fce0a76187115b4c112ed59fe82 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Thu, 26 Sep 2024 23:02:59 +0200 Subject: [PATCH 03/11] - --- src/Store/Azure/SearchStore.php | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/Store/Azure/SearchStore.php b/src/Store/Azure/SearchStore.php index 509472aa..6a1cb6c2 100644 --- a/src/Store/Azure/SearchStore.php +++ b/src/Store/Azure/SearchStore.php @@ -7,12 +7,11 @@ use PhpLlm\LlmChain\Document\Document; use PhpLlm\LlmChain\Document\Metadata; use PhpLlm\LlmChain\Document\Vector; -use PhpLlm\LlmChain\Store\InitializableStoreInterface; use PhpLlm\LlmChain\Store\VectorStoreInterface; use Symfony\Component\Uid\Uuid; use Symfony\Contracts\HttpClient\HttpClientInterface; -final readonly class SearchStore implements VectorStoreInterface, InitializableStoreInterface +final readonly class SearchStore implements VectorStoreInterface { /** * @param string $vectorFieldName The name of the field int the index that contains the vector @@ -27,11 +26,6 @@ public function __construct( ) { } - public function initialize(array $options = []): void - { - // TODO: Add code to setup index - } - public function addDocument(Document $document): void { $this->addDocuments([$document]); From 3fd7742b4780e8a7750d478ac92c5975bcde0b78 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Fri, 27 Sep 2024 20:31:46 +0200 Subject: [PATCH 04/11] - --- composer.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/composer.json b/composer.json index a02be09e..f279b09b 100644 --- a/composer.json +++ b/composer.json @@ -37,6 +37,9 @@ "symfony/dotenv": "^6.4 || ^7.1", "symfony/var-dumper": "^6.4 || ^7.1" }, + "conflict": { + "mongodb/mongodb": "<1.20" + }, "suggest": { "codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.", "mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.", From 8f521187c7cfe2f0c9faf14a6489a837b286c040 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Fri, 27 Sep 2024 20:36:15 +0200 Subject: [PATCH 05/11] - --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index f279b09b..8327ae88 100644 --- a/composer.json +++ b/composer.json @@ -38,7 +38,7 @@ "symfony/var-dumper": "^6.4 || ^7.1" }, "conflict": { - "mongodb/mongodb": "<1.20" + "mongodb/mongodb": "<1.20" }, "suggest": { "codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.", From dfff7bebffb82a1760829b6de62fed98b97d8fba Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 17:24:10 +0200 Subject: [PATCH 06/11] - --- src/Store/MongoDB/Store.php | 47 +++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/Store/MongoDB/Store.php b/src/Store/MongoDB/Store.php index 53c26f54..f938cfad 100644 --- a/src/Store/MongoDB/Store.php +++ b/src/Store/MongoDB/Store.php @@ -58,26 +58,6 @@ public function __construct( ) { } - public function initialize(array $options = []): void - { - $this->getCollection()->createSearchIndex( - definition: [ - 'fields' => [ - [ - 'numDimensions' => 1536, - 'path' => $this->vectorFieldName, - 'similarity' => 'euclidean', - 'type' => 'vector', - ], - ], - ], - options: [ - 'name' => $this->indexName, - 'type' => 'vectorSearch', - ], - ); - } - public function addDocument(Document $document): void { $this->addDocuments([$document]); @@ -156,6 +136,33 @@ public function query(Vector $vector, array $options = []): array return $documents; } + /** + * @param array{fields?: array} $options + */ + public function initialize(array $options = []): void + { + if ($options !== [] && !array_key_exists('fields', $options)) { + throw new \InvalidArgumentException('The only supported option is "fields"'); + } + + $this->getCollection()->createSearchIndex( + [ + 'fields' => array_merge([ + [ + 'numDimensions' => 1536, + 'path' => $this->vectorFieldName, + 'similarity' => 'euclidean', + 'type' => 'vector', + ], + ], $options['fields'] ?? []), + ], + [ + 'name' => $this->indexName, + 'type' => 'vectorSearch', + ], + ); + } + private function getCollection(): Collection { return $this->client->selectCollection($this->databaseName, $this->collectionName); From 9b8294d84445a84a378cd8cb83640c21a6cbbc9f Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 17:24:53 +0200 Subject: [PATCH 07/11] - --- src/Store/InitializableStoreInterface.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Store/InitializableStoreInterface.php b/src/Store/InitializableStoreInterface.php index ac4f801e..070d897c 100644 --- a/src/Store/InitializableStoreInterface.php +++ b/src/Store/InitializableStoreInterface.php @@ -10,7 +10,7 @@ interface InitializableStoreInterface extends StoreInterface { /** - * @param array $options + * @param array $options */ public function initialize(array $options = []): void; } From f27bf16a771691e7f68fd872e795804e8039269b Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 18:05:15 +0200 Subject: [PATCH 08/11] - --- .env | 3 + examples/store-mongodb-similarity-search.php | 75 ++++++++++++++++++++ src/Store/InitializableStoreInterface.php | 3 - src/Store/MongoDB/Store.php | 46 ++++++------ 4 files changed, 104 insertions(+), 23 deletions(-) create mode 100755 examples/store-mongodb-similarity-search.php diff --git a/.env b/.env index 2a8b994a..9dca7d38 100644 --- a/.env +++ b/.env @@ -17,3 +17,6 @@ AZURE_OPENAI_KEY= # For using SerpApi (tool) SERP_API_KEY= + +# For using MongoDB (store) +MONGODB_URI= diff --git a/examples/store-mongodb-similarity-search.php b/examples/store-mongodb-similarity-search.php new file mode 100755 index 00000000..956de205 --- /dev/null +++ b/examples/store-mongodb-similarity-search.php @@ -0,0 +1,75 @@ +loadEnv(dirname(__DIR__).'/.env'); + +// initialize the store +$store = new Store( + client: new MongoDBClient($_ENV['MONGODB_URI']), + databaseName: 'my-database', + collectionName: 'my-collection', + indexName: 'my-index', + vectorFieldName: 'vector', +); + +// our data +$movies = [ + ['headline' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'regisseur' => 'Christopher Nolan'], + ['headline' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'regisseur' => 'The Wachowskis'], + ['headline' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'regisseur' => 'Francis Ford Coppola'], +]; + +// create embeddings and documents +foreach ($movies as $movie) { + $documents[] = Document::fromText( + id: Uuid::v4(), + text: $movie['headline'].' '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// create embeddings for documents +$platform = new OpenAI(HttpClient::create(), $_ENV['OPENAI_API_KEY']); +$embedder = new DocumentEmbedder( + $embeddings = new Embeddings($platform), + $store, +); +$embedder->embed($documents); + +// initialize the index +$store->initialize(); + +$llm = new Gpt($platform, Version::gpt4oMini()); + +$similaritySearch = new SimilaritySearch($embeddings, $store); +$toolBox = new ToolBox(new ToolAnalyzer(), [$similaritySearch]); +$processor = new ChainProcessor($toolBox); +$chain = new Chain($llm, [$processor], [$processor]); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie would you recommend for the topic "mafia"?') +); +$response = $chain->call($messages); + +echo $response.PHP_EOL; diff --git a/src/Store/InitializableStoreInterface.php b/src/Store/InitializableStoreInterface.php index 070d897c..559e69ab 100644 --- a/src/Store/InitializableStoreInterface.php +++ b/src/Store/InitializableStoreInterface.php @@ -4,9 +4,6 @@ namespace PhpLlm\LlmChain\Store; -use PhpLlm\LlmChain\Document\Document; -use PhpLlm\LlmChain\Document\Vector; - interface InitializableStoreInterface extends StoreInterface { /** diff --git a/src/Store/MongoDB/Store.php b/src/Store/MongoDB/Store.php index f938cfad..c2a90c6e 100644 --- a/src/Store/MongoDB/Store.php +++ b/src/Store/MongoDB/Store.php @@ -7,12 +7,14 @@ use MongoDB\BSON\Binary; use MongoDB\Client; use MongoDB\Collection; +use MongoDB\Driver\Exception\CommandException; use PhpLlm\LlmChain\Document\Document; use PhpLlm\LlmChain\Document\Metadata; use PhpLlm\LlmChain\Document\Vector; use PhpLlm\LlmChain\Store\InitializableStoreInterface; use PhpLlm\LlmChain\Store\VectorStoreInterface; use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; use Symfony\Component\Uid\Uuid; /** @@ -49,12 +51,12 @@ */ public function __construct( private Client $client, - private LoggerInterface $logger, private string $databaseName, private string $collectionName, private string $indexName, private string $vectorFieldName = 'vector', private bool $bulkWrite = false, + private LoggerInterface $logger = new NullLogger(), ) { } @@ -141,26 +143,30 @@ public function query(Vector $vector, array $options = []): array */ public function initialize(array $options = []): void { - if ($options !== [] && !array_key_exists('fields', $options)) { - throw new \InvalidArgumentException('The only supported option is "fields"'); - } + try { + if ([] !== $options && !array_key_exists('fields', $options)) { + throw new \InvalidArgumentException('The only supported option is "fields"'); + } - $this->getCollection()->createSearchIndex( - [ - 'fields' => array_merge([ - [ - 'numDimensions' => 1536, - 'path' => $this->vectorFieldName, - 'similarity' => 'euclidean', - 'type' => 'vector', - ], - ], $options['fields'] ?? []), - ], - [ - 'name' => $this->indexName, - 'type' => 'vectorSearch', - ], - ); + $this->getCollection()->createSearchIndex( + [ + 'fields' => array_merge([ + [ + 'numDimensions' => 1536, + 'path' => $this->vectorFieldName, + 'similarity' => 'euclidean', + 'type' => 'vector', + ], + ], $options['fields'] ?? []), + ], + [ + 'name' => $this->indexName, + 'type' => 'vectorSearch', + ], + ); + } catch (CommandException $e) { + $this->logger->warning($e->getMessage()); + } } private function getCollection(): Collection From 0daf695d1f1d4eb569bd6946ddd6df64663a20ae Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 19:24:58 +0200 Subject: [PATCH 09/11] Update .env --- .env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env b/.env index 9dca7d38..2f47495e 100644 --- a/.env +++ b/.env @@ -18,5 +18,5 @@ AZURE_OPENAI_KEY= # For using SerpApi (tool) SERP_API_KEY= -# For using MongoDB (store) +# For using MongoDB Atlas (store) MONGODB_URI= From 07a059da49e1e39dd7b04d7bc97942e9cc4ff1b2 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 19:29:02 +0200 Subject: [PATCH 10/11] fix --- composer.json | 2 +- examples/store-mongodb-similarity-search.php | 15 ++++++--------- src/Store/MongoDB/Store.php | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/composer.json b/composer.json index 8327ae88..a9471904 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,7 @@ }, "require-dev": { "codewithkyrian/chromadb-php": "^0.2.1", - "mongodb/mongodb": "^1.19", + "mongodb/mongodb": "^1.20", "php-cs-fixer/shim": "^3.64", "phpstan/phpstan": "^1.12", "phpunit/phpunit": "^11.3", diff --git a/examples/store-mongodb-similarity-search.php b/examples/store-mongodb-similarity-search.php index 956de205..72d4f7a6 100755 --- a/examples/store-mongodb-similarity-search.php +++ b/examples/store-mongodb-similarity-search.php @@ -34,26 +34,23 @@ // our data $movies = [ - ['headline' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'regisseur' => 'Christopher Nolan'], - ['headline' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'regisseur' => 'The Wachowskis'], - ['headline' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'regisseur' => 'Francis Ford Coppola'], + ['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'regisseur' => 'Christopher Nolan'], + ['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'regisseur' => 'The Wachowskis'], + ['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'regisseur' => 'Francis Ford Coppola'], ]; // create embeddings and documents foreach ($movies as $movie) { $documents[] = Document::fromText( id: Uuid::v4(), - text: $movie['headline'].' '.$movie['description'], + text: $movie['title'].' '.$movie['description'], metadata: new Metadata($movie), ); } // create embeddings for documents $platform = new OpenAI(HttpClient::create(), $_ENV['OPENAI_API_KEY']); -$embedder = new DocumentEmbedder( - $embeddings = new Embeddings($platform), - $store, -); +$embedder = new DocumentEmbedder($embeddings = new Embeddings($platform), $store); $embedder->embed($documents); // initialize the index @@ -68,7 +65,7 @@ $messages = new MessageBag( Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), - Message::ofUser('Which movie would you recommend for the topic "mafia"?') + Message::ofUser('Which movie fits the theme of the mafia?') ); $response = $chain->call($messages); diff --git a/src/Store/MongoDB/Store.php b/src/Store/MongoDB/Store.php index c2a90c6e..5d9c5708 100644 --- a/src/Store/MongoDB/Store.php +++ b/src/Store/MongoDB/Store.php @@ -139,7 +139,7 @@ public function query(Vector $vector, array $options = []): array } /** - * @param array{fields?: array} $options + * @param array{fields?: array} $options */ public function initialize(array $options = []): void { From 55ec22fda5e532e7fd12d7598f548cbcb8c6ba84 Mon Sep 17 00:00:00 2001 From: Oskar Stark Date: Sat, 28 Sep 2024 20:40:47 +0200 Subject: [PATCH 11/11] - --- examples/store-mongodb-similarity-search.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/store-mongodb-similarity-search.php b/examples/store-mongodb-similarity-search.php index 72d4f7a6..8af5727c 100755 --- a/examples/store-mongodb-similarity-search.php +++ b/examples/store-mongodb-similarity-search.php @@ -23,6 +23,11 @@ require_once dirname(__DIR__).'/vendor/autoload.php'; (new Dotenv())->loadEnv(dirname(__DIR__).'/.env'); +if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MONGODB_URI'])) { + echo 'Please set OPENAI_API_KEY and MONGODB_URI environment variables.'.PHP_EOL; + exit(1); +} + // initialize the store $store = new Store( client: new MongoDBClient($_ENV['MONGODB_URI']),