Skip to content

Initialize store #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ AZURE_OPENAI_KEY=

# For using SerpApi (tool)
SERP_API_KEY=

# For using MongoDB Atlas (store)
MONGODB_URI=
5 changes: 4 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
},
"require-dev": {
"codewithkyrian/chromadb-php": "^0.2.1",
"mongodb/mongodb": "^1.19",
"mongodb/mongodb": "^1.20",
"php-cs-fixer/shim": "^3.64",
"phpstan/phpstan": "^1.12",
"phpunit/phpunit": "^11.3",
Expand All @@ -37,6 +37,9 @@
"symfony/dotenv": "^6.4 || ^7.1",
"symfony/var-dumper": "^6.4 || ^7.1"
},
"conflict": {
"mongodb/mongodb": "<1.20"
},
"suggest": {
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
Expand Down
77 changes: 77 additions & 0 deletions examples/store-mongodb-similarity-search.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<?php

use MongoDB\Client as MongoDBClient;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Document\Document;
use PhpLlm\LlmChain\Document\Metadata;
use PhpLlm\LlmChain\DocumentEmbedder;
use PhpLlm\LlmChain\Message\Message;
use PhpLlm\LlmChain\Message\MessageBag;
use PhpLlm\LlmChain\OpenAI\Model\Embeddings;
use PhpLlm\LlmChain\OpenAI\Model\Gpt;
use PhpLlm\LlmChain\OpenAI\Model\Gpt\Version;
use PhpLlm\LlmChain\OpenAI\Platform\OpenAI;
use PhpLlm\LlmChain\Store\MongoDB\Store;
use PhpLlm\LlmChain\ToolBox\ChainProcessor;
use PhpLlm\LlmChain\ToolBox\Tool\SimilaritySearch;
use PhpLlm\LlmChain\ToolBox\ToolAnalyzer;
use PhpLlm\LlmChain\ToolBox\ToolBox;
use Symfony\Component\Dotenv\Dotenv;
use Symfony\Component\HttpClient\HttpClient;
use Symfony\Component\Uid\Uuid;

require_once dirname(__DIR__).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');

if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MONGODB_URI'])) {
echo 'Please set OPENAI_API_KEY and MONGODB_URI environment variables.'.PHP_EOL;
exit(1);
}

// initialize the store
$store = new Store(
client: new MongoDBClient($_ENV['MONGODB_URI']),
databaseName: 'my-database',
collectionName: 'my-collection',
indexName: 'my-index',
vectorFieldName: 'vector',
);

// our data
$movies = [
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'regisseur' => 'Christopher Nolan'],
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'regisseur' => 'The Wachowskis'],
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'regisseur' => 'Francis Ford Coppola'],
];

// create embeddings and documents
foreach ($movies as $movie) {
$documents[] = Document::fromText(
id: Uuid::v4(),
text: $movie['title'].' '.$movie['description'],
metadata: new Metadata($movie),
);
}

// create embeddings for documents
$platform = new OpenAI(HttpClient::create(), $_ENV['OPENAI_API_KEY']);
$embedder = new DocumentEmbedder($embeddings = new Embeddings($platform), $store);
$embedder->embed($documents);

// initialize the index
$store->initialize();

$llm = new Gpt($platform, Version::gpt4oMini());

$similaritySearch = new SimilaritySearch($embeddings, $store);
$toolBox = new ToolBox(new ToolAnalyzer(), [$similaritySearch]);
$processor = new ChainProcessor($toolBox);
$chain = new Chain($llm, [$processor], [$processor]);

$messages = new MessageBag(
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
Message::ofUser('Which movie fits the theme of the mafia?')
);
$response = $chain->call($messages);

echo $response.PHP_EOL;
13 changes: 13 additions & 0 deletions src/Store/InitializableStoreInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Store;

interface InitializableStoreInterface extends StoreInterface
{
/**
* @param array<mixed> $options
*/
public function initialize(array $options = []): void;
}
38 changes: 36 additions & 2 deletions src/Store/MongoDB/Store.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
use MongoDB\BSON\Binary;
use MongoDB\Client;
use MongoDB\Collection;
use MongoDB\Driver\Exception\CommandException;
use PhpLlm\LlmChain\Document\Document;
use PhpLlm\LlmChain\Document\Metadata;
use PhpLlm\LlmChain\Document\Vector;
use PhpLlm\LlmChain\Store\InitializableStoreInterface;
use PhpLlm\LlmChain\Store\VectorStoreInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use Symfony\Component\Uid\Uuid;

/**
Expand All @@ -37,7 +40,7 @@
*
* @author Oskar Stark <oskarstark@googlemail.com>
*/
final readonly class Store implements VectorStoreInterface
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface
{
/**
* @param string $databaseName The name of the database
Expand All @@ -48,12 +51,12 @@
*/
public function __construct(
private Client $client,
private LoggerInterface $logger,
private string $databaseName,
private string $collectionName,
private string $indexName,
private string $vectorFieldName = 'vector',
private bool $bulkWrite = false,
private LoggerInterface $logger = new NullLogger(),
) {
}

Expand Down Expand Up @@ -135,6 +138,37 @@ public function query(Vector $vector, array $options = []): array
return $documents;
}

/**
* @param array{fields?: array<mixed>} $options
*/
public function initialize(array $options = []): void
{
try {
if ([] !== $options && !array_key_exists('fields', $options)) {
throw new \InvalidArgumentException('The only supported option is "fields"');
}

$this->getCollection()->createSearchIndex(
[
'fields' => array_merge([
[
'numDimensions' => 1536,
'path' => $this->vectorFieldName,
'similarity' => 'euclidean',
'type' => 'vector',
],
], $options['fields'] ?? []),
],
[
'name' => $this->indexName,
'type' => 'vectorSearch',
],
);
} catch (CommandException $e) {
$this->logger->warning($e->getMessage());
}
}

private function getCollection(): Collection
{
return $this->client->selectCollection($this->databaseName, $this->collectionName);
Expand Down