Skip to content

Commit 6ed9377

Browse files
committed
AsyncReadWriteTestSuite
1 parent b5f2783 commit 6ed9377

File tree

1 file changed

+214
-29
lines changed

1 file changed

+214
-29
lines changed

libs/standard-tests/langchain_tests/integration_tests/vectorstores.py

Lines changed: 214 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,12 @@
1515

1616

1717
class ReadWriteTestSuite(BaseStandardTests):
18-
"""Test suite for checking the read-write API of a vectorstore.
19-
20-
This test suite verifies the basic read-write API of a vectorstore.
21-
22-
The test suite is designed for synchronous vectorstores.
18+
"""Test suite for checking the synchronous read-write API of a vectorstore.
2319
2420
Implementers should subclass this test suite and provide a fixture
2521
that returns an empty vectorstore for each test.
2622
27-
The fixture should use the `get_embeddings` method to get a pre-defined
23+
The fixture should use the ``get_embeddings`` method to get a pre-defined
2824
embeddings model that should be used for this test suite.
2925
3026
Here is a template:
@@ -109,7 +105,7 @@ def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
109105
.. dropdown:: Troubleshooting
110106
111107
If this test fails, check that the test class (i.e., sub class of
112-
ReadWriteTestSuite) initializes an empty vector store in the
108+
``ReadWriteTestSuite``) initializes an empty vector store in the
113109
``vectorestore`` fixture.
114110
"""
115111
assert vectorstore.similarity_search("foo", k=1) == []
@@ -151,8 +147,8 @@ def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
151147
.. dropdown:: Troubleshooting
152148
153149
If this test fails, check that the test class (i.e., sub class of
154-
ReadWriteTestSuite) correctly clears the vector store in the ``finally``
155-
block.
150+
``ReadWriteTestSuite``) correctly clears the vector store in the
151+
``finally`` block.
156152
"""
157153
assert vectorstore.similarity_search("foo", k=1) == []
158154

@@ -181,7 +177,7 @@ def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
181177
.. dropdown:: Troubleshooting
182178
183179
If this test fails, check that ``delete`` correctly removes multiple
184-
documents when givena list of IDs.
180+
documents when given a list of IDs.
185181
"""
186182
documents = [
187183
Document(page_content="foo", metadata={"id": 1}),
@@ -388,18 +384,68 @@ def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None
388384

389385

390386
class AsyncReadWriteTestSuite(BaseStandardTests):
391-
"""Test suite for checking the **async** read-write API of a vectorstore.
392-
393-
This test suite verifies the basic read-write API of a vectorstore.
394-
395-
The test suite is designed for asynchronous vectorstores.
387+
"""Test suite for checking the async read-write API of a vectorstore.
396388
397389
Implementers should subclass this test suite and provide a fixture
398390
that returns an empty vectorstore for each test.
399391
400-
The fixture should use the `get_embeddings` method to get a pre-defined
392+
The fixture should use the ``get_embeddings`` method to get a pre-defined
401393
embeddings model that should be used for this test suite.
402-
"""
394+
395+
Here is a template:
396+
397+
.. code-block:: python
398+
399+
from typing import AsyncGenerator
400+
401+
import pytest
402+
from langchain_core.vectorstores import VectorStore
403+
from langchain_parrot_link.vectorstores import ParrotVectorStore
404+
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
405+
406+
407+
class TestAsync(AsyncReadWriteTestSuite):
408+
@pytest.fixture()
409+
def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
410+
\"\"\"Get an empty vectorstore.\"\"\"
411+
store = ParrotVectorStore(self.get_embeddings())
412+
# note: store should be EMPTY at this point
413+
# if you need to delete data, you may do so here
414+
try:
415+
yield store
416+
finally:
417+
# cleanup operations, or deleting data
418+
pass
419+
420+
In the fixture, before the ``yield`` we instantiate an empty vector store. In the
421+
``finally`` block, we call whatever logic is necessary to bring the vector store
422+
to a clean state.
423+
424+
Example:
425+
426+
.. code-block:: python
427+
428+
from typing import AsyncGenerator, Generator
429+
430+
import pytest
431+
from langchain_core.vectorstores import VectorStore
432+
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
433+
434+
from langchain_chroma import Chroma
435+
436+
437+
class TestAsync(AsyncReadWriteTestSuite):
438+
@pytest.fixture()
439+
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
440+
\"\"\"Get an empty vectorstore for unit tests.\"\"\"
441+
store = Chroma(embedding_function=self.get_embeddings())
442+
try:
443+
yield store
444+
finally:
445+
store.delete_collection()
446+
pass
447+
448+
""" # noqa: E501
403449

404450
@abstractmethod
405451
@pytest.fixture
@@ -411,17 +457,39 @@ async def vectorstore(self) -> VectorStore:
411457

412458
@staticmethod
413459
def get_embeddings() -> Embeddings:
414-
"""A pre-defined embeddings model that should be used for this test."""
460+
"""A pre-defined embeddings model that should be used for this test.
461+
462+
This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``,
463+
which uses numpy to generate random numbers based on a hash of the input text.
464+
465+
The resulting embeddings are not meaningful, but they are deterministic.
466+
"""
415467
return DeterministicFakeEmbedding(
416468
size=EMBEDDING_SIZE,
417469
)
418470

419471
async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
420-
"""Test that the vectorstore is empty."""
472+
"""Test that the vectorstore is empty.
473+
474+
.. dropdown:: Troubleshooting
475+
476+
If this test fails, check that the test class (i.e., sub class of
477+
``AsyncReadWriteTestSuite``) initializes an empty vector store in the
478+
``vectorestore`` fixture.
479+
"""
421480
assert await vectorstore.asimilarity_search("foo", k=1) == []
422481

423482
async def test_add_documents(self, vectorstore: VectorStore) -> None:
424-
"""Test adding documents into the vectorstore."""
483+
"""Test adding documents into the vectorstore.
484+
485+
.. dropdown:: Troubleshooting
486+
487+
If this test fails, check that:
488+
489+
1. We correctly initialize an empty vector store in the ``vectorestore`` fixture.
490+
2. Calling ``.asimilarity_search`` for the top ``k`` similar documents does not threshold by score.
491+
3. We do not mutate the original document object when adding it to the vector store (e.g., by adding an ID).
492+
""" # noqa: E501
425493
original_documents = [
426494
Document(page_content="foo", metadata={"id": 1}),
427495
Document(page_content="bar", metadata={"id": 2}),
@@ -445,11 +513,24 @@ async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
445513
446514
This just verifies that the fixture is set up properly to be empty
447515
after each test.
516+
517+
.. dropdown:: Troubleshooting
518+
519+
If this test fails, check that the test class (i.e., sub class of
520+
``AsyncReadWriteTestSuite``) correctly clears the vector store in the
521+
``finally`` block.
448522
"""
449523
assert await vectorstore.asimilarity_search("foo", k=1) == []
450524

451525
async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
452-
"""Test deleting documents from the vectorstore."""
526+
"""Test deleting documents from the vectorstore.
527+
528+
.. dropdown:: Troubleshooting
529+
530+
If this test fails, check that ``aadd_documents`` preserves identifiers
531+
passed in through ``ids``, and that ``delete`` correctly removes
532+
documents.
533+
"""
453534
documents = [
454535
Document(page_content="foo", metadata={"id": 1}),
455536
Document(page_content="bar", metadata={"id": 2}),
@@ -461,7 +542,13 @@ async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
461542
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
462543

463544
async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
464-
"""Test that we can delete several documents at once."""
545+
"""Test that we can delete several documents at once.
546+
547+
.. dropdown:: Troubleshooting
548+
549+
If this test fails, check that ``adelete`` correctly removes multiple
550+
documents when given a list of IDs.
551+
"""
465552
documents = [
466553
Document(page_content="foo", metadata={"id": 1}),
467554
Document(page_content="bar", metadata={"id": 2}),
@@ -474,14 +561,27 @@ async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
474561
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
475562

476563
async def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
477-
"""Deleting missing content should not raise an exception."""
564+
"""Deleting missing content should not raise an exception.
565+
566+
.. dropdown:: Troubleshooting
567+
568+
If this test fails, check that ``adelete`` does not raise an exception
569+
when deleting IDs that do not exist.
570+
"""
478571
await vectorstore.adelete(["1"])
479572
await vectorstore.adelete(["1", "2", "3"])
480573

481574
async def test_add_documents_with_ids_is_idempotent(
482575
self, vectorstore: VectorStore
483576
) -> None:
484-
"""Adding by ID should be idempotent."""
577+
"""Adding by ID should be idempotent.
578+
579+
.. dropdown:: Troubleshooting
580+
581+
If this test fails, check that adding the same document twice with the
582+
same IDs has the same effect as adding it once (i.e., it does not
583+
duplicate the documents).
584+
"""
485585
documents = [
486586
Document(page_content="foo", metadata={"id": 1}),
487587
Document(page_content="bar", metadata={"id": 2}),
@@ -497,7 +597,14 @@ async def test_add_documents_with_ids_is_idempotent(
497597
async def test_add_documents_by_id_with_mutation(
498598
self, vectorstore: VectorStore
499599
) -> None:
500-
"""Test that we can overwrite by ID using add_documents."""
600+
"""Test that we can overwrite by ID using add_documents.
601+
602+
.. dropdown:: Troubleshooting
603+
604+
If this test fails, check that when ``aadd_documents`` is called with an
605+
ID that already exists in the vector store, the content is updated
606+
rather than duplicated.
607+
"""
501608
documents = [
502609
Document(page_content="foo", metadata={"id": 1}),
503610
Document(page_content="bar", metadata={"id": 2}),
@@ -526,7 +633,26 @@ async def test_add_documents_by_id_with_mutation(
526633
]
527634

528635
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
529-
"""Test get by IDs."""
636+
"""Test get by IDs.
637+
638+
This test requires that ``get_by_ids`` be implemented on the vector store.
639+
640+
.. dropdown:: Troubleshooting
641+
642+
If this test fails, check that ``get_by_ids`` is implemented and returns
643+
documents in the same order as the IDs passed in.
644+
645+
.. note::
646+
``get_by_ids`` was added to the ``VectorStore`` interface in
647+
``langchain-core`` version 0.2.11. If difficult to implement, this
648+
test can be skipped using a pytest ``xfail`` on the test class:
649+
650+
.. code-block:: python
651+
652+
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
653+
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
654+
await super().test_get_by_ids(vectorstore)
655+
"""
530656
documents = [
531657
Document(page_content="foo", metadata={"id": 1}),
532658
Document(page_content="bar", metadata={"id": 2}),
@@ -539,12 +665,49 @@ async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
539665
]
540666

541667
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
542-
"""Test get by IDs with missing IDs."""
668+
"""Test get by IDs with missing IDs.
669+
670+
.. dropdown:: Troubleshooting
671+
672+
If this test fails, check that ``get_by_ids`` is implemented and does not
673+
raise an exception when given IDs that do not exist.
674+
675+
.. note::
676+
``get_by_ids`` was added to the ``VectorStore`` interface in
677+
``langchain-core`` version 0.2.11. If difficult to implement, this
678+
test can be skipped using a pytest ``xfail`` on the test class:
679+
680+
.. code-block:: python
681+
682+
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
683+
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
684+
await super().test_get_by_ids_missing(vectorstore)
685+
""" # noqa: E501
543686
# This should not raise an exception
544687
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
545688

546689
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
547-
"""Run add_documents tests."""
690+
"""Run add_documents tests.
691+
692+
.. dropdown:: Troubleshooting
693+
694+
If this test fails, check that ``get_by_ids`` is implemented and returns
695+
documents in the same order as the IDs passed in.
696+
697+
Check also that ``aadd_documents`` will correctly generate string IDs if
698+
none are provided.
699+
700+
.. note::
701+
``get_by_ids`` was added to the ``VectorStore`` interface in
702+
``langchain-core`` version 0.2.11. If difficult to implement, this
703+
test can be skipped using a pytest ``xfail`` on the test class:
704+
705+
.. code-block:: python
706+
707+
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
708+
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
709+
await super().test_add_documents_documents(vectorstore)
710+
""" # noqa: E501
548711
documents = [
549712
Document(page_content="foo", metadata={"id": 1}),
550713
Document(page_content="bar", metadata={"id": 2}),
@@ -558,7 +721,29 @@ async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
558721
async def test_add_documents_with_existing_ids(
559722
self, vectorstore: VectorStore
560723
) -> None:
561-
"""Test that add_documentsing with existing IDs is idempotent."""
724+
"""Test that add_documents with existing IDs is idempotent.
725+
726+
.. dropdown:: Troubleshooting
727+
728+
If this test fails, check that ``get_by_ids`` is implemented and returns
729+
documents in the same order as the IDs passed in.
730+
731+
This test also verifies that:
732+
733+
1. IDs specified in the ``Document.id`` field are assigned when adding documents.
734+
2. If some documents include IDs and others don't string IDs are generated for the latter.
735+
736+
.. note::
737+
``get_by_ids`` was added to the ``VectorStore`` interface in
738+
``langchain-core`` version 0.2.11. If difficult to implement, this
739+
test can be skipped using a pytest ``xfail`` on the test class:
740+
741+
.. code-block:: python
742+
743+
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
744+
async def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
745+
await super().test_add_documents_with_existing_ids(vectorstore)
746+
""" # noqa: E501
562747
documents = [
563748
Document(id="foo", page_content="foo", metadata={"id": 1}),
564749
Document(page_content="bar", metadata={"id": 2}),

0 commit comments

Comments
 (0)