15
15
16
16
17
17
class ReadWriteTestSuite (BaseStandardTests ):
18
- """Test suite for checking the read-write API of a vectorstore.
19
-
20
- This test suite verifies the basic read-write API of a vectorstore.
21
-
22
- The test suite is designed for synchronous vectorstores.
18
+ """Test suite for checking the synchronous read-write API of a vectorstore.
23
19
24
20
Implementers should subclass this test suite and provide a fixture
25
21
that returns an empty vectorstore for each test.
26
22
27
- The fixture should use the `get_embeddings` method to get a pre-defined
23
+ The fixture should use the `` get_embeddings` ` method to get a pre-defined
28
24
embeddings model that should be used for this test suite.
29
25
30
26
Here is a template:
@@ -109,7 +105,7 @@ def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
109
105
.. dropdown:: Troubleshooting
110
106
111
107
If this test fails, check that the test class (i.e., sub class of
112
- ReadWriteTestSuite) initializes an empty vector store in the
108
+ `` ReadWriteTestSuite`` ) initializes an empty vector store in the
113
109
``vectorestore`` fixture.
114
110
"""
115
111
assert vectorstore .similarity_search ("foo" , k = 1 ) == []
@@ -151,8 +147,8 @@ def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
151
147
.. dropdown:: Troubleshooting
152
148
153
149
If this test fails, check that the test class (i.e., sub class of
154
- ReadWriteTestSuite) correctly clears the vector store in the ``finally``
155
- block.
150
+ `` ReadWriteTestSuite`` ) correctly clears the vector store in the
151
+ ``finally`` block.
156
152
"""
157
153
assert vectorstore .similarity_search ("foo" , k = 1 ) == []
158
154
@@ -181,7 +177,7 @@ def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
181
177
.. dropdown:: Troubleshooting
182
178
183
179
If this test fails, check that ``delete`` correctly removes multiple
184
- documents when givena list of IDs.
180
+ documents when given a list of IDs.
185
181
"""
186
182
documents = [
187
183
Document (page_content = "foo" , metadata = {"id" : 1 }),
@@ -388,18 +384,68 @@ def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None
388
384
389
385
390
386
class AsyncReadWriteTestSuite (BaseStandardTests ):
391
- """Test suite for checking the **async** read-write API of a vectorstore.
392
-
393
- This test suite verifies the basic read-write API of a vectorstore.
394
-
395
- The test suite is designed for asynchronous vectorstores.
387
+ """Test suite for checking the async read-write API of a vectorstore.
396
388
397
389
Implementers should subclass this test suite and provide a fixture
398
390
that returns an empty vectorstore for each test.
399
391
400
- The fixture should use the `get_embeddings` method to get a pre-defined
392
+ The fixture should use the `` get_embeddings` ` method to get a pre-defined
401
393
embeddings model that should be used for this test suite.
402
- """
394
+
395
+ Here is a template:
396
+
397
+ .. code-block:: python
398
+
399
+ from typing import AsyncGenerator
400
+
401
+ import pytest
402
+ from langchain_core.vectorstores import VectorStore
403
+ from langchain_parrot_link.vectorstores import ParrotVectorStore
404
+ from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
405
+
406
+
407
+ class TestAsync(AsyncReadWriteTestSuite):
408
+ @pytest.fixture()
409
+ def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
410
+ \" \" \" Get an empty vectorstore.\" \" \"
411
+ store = ParrotVectorStore(self.get_embeddings())
412
+ # note: store should be EMPTY at this point
413
+ # if you need to delete data, you may do so here
414
+ try:
415
+ yield store
416
+ finally:
417
+ # cleanup operations, or deleting data
418
+ pass
419
+
420
+ In the fixture, before the ``yield`` we instantiate an empty vector store. In the
421
+ ``finally`` block, we call whatever logic is necessary to bring the vector store
422
+ to a clean state.
423
+
424
+ Example:
425
+
426
+ .. code-block:: python
427
+
428
+ from typing import AsyncGenerator, Generator
429
+
430
+ import pytest
431
+ from langchain_core.vectorstores import VectorStore
432
+ from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
433
+
434
+ from langchain_chroma import Chroma
435
+
436
+
437
+ class TestAsync(AsyncReadWriteTestSuite):
438
+ @pytest.fixture()
439
+ async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
440
+ \" \" \" Get an empty vectorstore for unit tests.\" \" \"
441
+ store = Chroma(embedding_function=self.get_embeddings())
442
+ try:
443
+ yield store
444
+ finally:
445
+ store.delete_collection()
446
+ pass
447
+
448
+ """ # noqa: E501
403
449
404
450
@abstractmethod
405
451
@pytest .fixture
@@ -411,17 +457,39 @@ async def vectorstore(self) -> VectorStore:
411
457
412
458
@staticmethod
413
459
def get_embeddings () -> Embeddings :
414
- """A pre-defined embeddings model that should be used for this test."""
460
+ """A pre-defined embeddings model that should be used for this test.
461
+
462
+ This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``,
463
+ which uses numpy to generate random numbers based on a hash of the input text.
464
+
465
+ The resulting embeddings are not meaningful, but they are deterministic.
466
+ """
415
467
return DeterministicFakeEmbedding (
416
468
size = EMBEDDING_SIZE ,
417
469
)
418
470
419
471
async def test_vectorstore_is_empty (self , vectorstore : VectorStore ) -> None :
420
- """Test that the vectorstore is empty."""
472
+ """Test that the vectorstore is empty.
473
+
474
+ .. dropdown:: Troubleshooting
475
+
476
+ If this test fails, check that the test class (i.e., sub class of
477
+ ``AsyncReadWriteTestSuite``) initializes an empty vector store in the
478
+ ``vectorestore`` fixture.
479
+ """
421
480
assert await vectorstore .asimilarity_search ("foo" , k = 1 ) == []
422
481
423
482
async def test_add_documents (self , vectorstore : VectorStore ) -> None :
424
- """Test adding documents into the vectorstore."""
483
+ """Test adding documents into the vectorstore.
484
+
485
+ .. dropdown:: Troubleshooting
486
+
487
+ If this test fails, check that:
488
+
489
+ 1. We correctly initialize an empty vector store in the ``vectorestore`` fixture.
490
+ 2. Calling ``.asimilarity_search`` for the top ``k`` similar documents does not threshold by score.
491
+ 3. We do not mutate the original document object when adding it to the vector store (e.g., by adding an ID).
492
+ """ # noqa: E501
425
493
original_documents = [
426
494
Document (page_content = "foo" , metadata = {"id" : 1 }),
427
495
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -445,11 +513,24 @@ async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
445
513
446
514
This just verifies that the fixture is set up properly to be empty
447
515
after each test.
516
+
517
+ .. dropdown:: Troubleshooting
518
+
519
+ If this test fails, check that the test class (i.e., sub class of
520
+ ``AsyncReadWriteTestSuite``) correctly clears the vector store in the
521
+ ``finally`` block.
448
522
"""
449
523
assert await vectorstore .asimilarity_search ("foo" , k = 1 ) == []
450
524
451
525
async def test_deleting_documents (self , vectorstore : VectorStore ) -> None :
452
- """Test deleting documents from the vectorstore."""
526
+ """Test deleting documents from the vectorstore.
527
+
528
+ .. dropdown:: Troubleshooting
529
+
530
+ If this test fails, check that ``aadd_documents`` preserves identifiers
531
+ passed in through ``ids``, and that ``delete`` correctly removes
532
+ documents.
533
+ """
453
534
documents = [
454
535
Document (page_content = "foo" , metadata = {"id" : 1 }),
455
536
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -461,7 +542,13 @@ async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
461
542
assert documents == [Document (page_content = "bar" , metadata = {"id" : 2 }, id = "2" )]
462
543
463
544
async def test_deleting_bulk_documents (self , vectorstore : VectorStore ) -> None :
464
- """Test that we can delete several documents at once."""
545
+ """Test that we can delete several documents at once.
546
+
547
+ .. dropdown:: Troubleshooting
548
+
549
+ If this test fails, check that ``adelete`` correctly removes multiple
550
+ documents when given a list of IDs.
551
+ """
465
552
documents = [
466
553
Document (page_content = "foo" , metadata = {"id" : 1 }),
467
554
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -474,14 +561,27 @@ async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
474
561
assert documents == [Document (page_content = "baz" , metadata = {"id" : 3 }, id = "3" )]
475
562
476
563
async def test_delete_missing_content (self , vectorstore : VectorStore ) -> None :
477
- """Deleting missing content should not raise an exception."""
564
+ """Deleting missing content should not raise an exception.
565
+
566
+ .. dropdown:: Troubleshooting
567
+
568
+ If this test fails, check that ``adelete`` does not raise an exception
569
+ when deleting IDs that do not exist.
570
+ """
478
571
await vectorstore .adelete (["1" ])
479
572
await vectorstore .adelete (["1" , "2" , "3" ])
480
573
481
574
async def test_add_documents_with_ids_is_idempotent (
482
575
self , vectorstore : VectorStore
483
576
) -> None :
484
- """Adding by ID should be idempotent."""
577
+ """Adding by ID should be idempotent.
578
+
579
+ .. dropdown:: Troubleshooting
580
+
581
+ If this test fails, check that adding the same document twice with the
582
+ same IDs has the same effect as adding it once (i.e., it does not
583
+ duplicate the documents).
584
+ """
485
585
documents = [
486
586
Document (page_content = "foo" , metadata = {"id" : 1 }),
487
587
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -497,7 +597,14 @@ async def test_add_documents_with_ids_is_idempotent(
497
597
async def test_add_documents_by_id_with_mutation (
498
598
self , vectorstore : VectorStore
499
599
) -> None :
500
- """Test that we can overwrite by ID using add_documents."""
600
+ """Test that we can overwrite by ID using add_documents.
601
+
602
+ .. dropdown:: Troubleshooting
603
+
604
+ If this test fails, check that when ``aadd_documents`` is called with an
605
+ ID that already exists in the vector store, the content is updated
606
+ rather than duplicated.
607
+ """
501
608
documents = [
502
609
Document (page_content = "foo" , metadata = {"id" : 1 }),
503
610
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -526,7 +633,26 @@ async def test_add_documents_by_id_with_mutation(
526
633
]
527
634
528
635
async def test_get_by_ids (self , vectorstore : VectorStore ) -> None :
529
- """Test get by IDs."""
636
+ """Test get by IDs.
637
+
638
+ This test requires that ``get_by_ids`` be implemented on the vector store.
639
+
640
+ .. dropdown:: Troubleshooting
641
+
642
+ If this test fails, check that ``get_by_ids`` is implemented and returns
643
+ documents in the same order as the IDs passed in.
644
+
645
+ .. note::
646
+ ``get_by_ids`` was added to the ``VectorStore`` interface in
647
+ ``langchain-core`` version 0.2.11. If difficult to implement, this
648
+ test can be skipped using a pytest ``xfail`` on the test class:
649
+
650
+ .. code-block:: python
651
+
652
+ @pytest.mark.xfail(reason=("get_by_ids not implemented."))
653
+ async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
654
+ await super().test_get_by_ids(vectorstore)
655
+ """
530
656
documents = [
531
657
Document (page_content = "foo" , metadata = {"id" : 1 }),
532
658
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -539,12 +665,49 @@ async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
539
665
]
540
666
541
667
async def test_get_by_ids_missing (self , vectorstore : VectorStore ) -> None :
542
- """Test get by IDs with missing IDs."""
668
+ """Test get by IDs with missing IDs.
669
+
670
+ .. dropdown:: Troubleshooting
671
+
672
+ If this test fails, check that ``get_by_ids`` is implemented and does not
673
+ raise an exception when given IDs that do not exist.
674
+
675
+ .. note::
676
+ ``get_by_ids`` was added to the ``VectorStore`` interface in
677
+ ``langchain-core`` version 0.2.11. If difficult to implement, this
678
+ test can be skipped using a pytest ``xfail`` on the test class:
679
+
680
+ .. code-block:: python
681
+
682
+ @pytest.mark.xfail(reason=("get_by_ids not implemented."))
683
+ async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
684
+ await super().test_get_by_ids_missing(vectorstore)
685
+ """ # noqa: E501
543
686
# This should not raise an exception
544
687
assert await vectorstore .aget_by_ids (["1" , "2" , "3" ]) == []
545
688
546
689
async def test_add_documents_documents (self , vectorstore : VectorStore ) -> None :
547
- """Run add_documents tests."""
690
+ """Run add_documents tests.
691
+
692
+ .. dropdown:: Troubleshooting
693
+
694
+ If this test fails, check that ``get_by_ids`` is implemented and returns
695
+ documents in the same order as the IDs passed in.
696
+
697
+ Check also that ``aadd_documents`` will correctly generate string IDs if
698
+ none are provided.
699
+
700
+ .. note::
701
+ ``get_by_ids`` was added to the ``VectorStore`` interface in
702
+ ``langchain-core`` version 0.2.11. If difficult to implement, this
703
+ test can be skipped using a pytest ``xfail`` on the test class:
704
+
705
+ .. code-block:: python
706
+
707
+ @pytest.mark.xfail(reason=("get_by_ids not implemented."))
708
+ async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
709
+ await super().test_add_documents_documents(vectorstore)
710
+ """ # noqa: E501
548
711
documents = [
549
712
Document (page_content = "foo" , metadata = {"id" : 1 }),
550
713
Document (page_content = "bar" , metadata = {"id" : 2 }),
@@ -558,7 +721,29 @@ async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
558
721
async def test_add_documents_with_existing_ids (
559
722
self , vectorstore : VectorStore
560
723
) -> None :
561
- """Test that add_documentsing with existing IDs is idempotent."""
724
+ """Test that add_documents with existing IDs is idempotent.
725
+
726
+ .. dropdown:: Troubleshooting
727
+
728
+ If this test fails, check that ``get_by_ids`` is implemented and returns
729
+ documents in the same order as the IDs passed in.
730
+
731
+ This test also verifies that:
732
+
733
+ 1. IDs specified in the ``Document.id`` field are assigned when adding documents.
734
+ 2. If some documents include IDs and others don't string IDs are generated for the latter.
735
+
736
+ .. note::
737
+ ``get_by_ids`` was added to the ``VectorStore`` interface in
738
+ ``langchain-core`` version 0.2.11. If difficult to implement, this
739
+ test can be skipped using a pytest ``xfail`` on the test class:
740
+
741
+ .. code-block:: python
742
+
743
+ @pytest.mark.xfail(reason=("get_by_ids not implemented."))
744
+ async def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
745
+ await super().test_add_documents_with_existing_ids(vectorstore)
746
+ """ # noqa: E501
562
747
documents = [
563
748
Document (id = "foo" , page_content = "foo" , metadata = {"id" : 1 }),
564
749
Document (page_content = "bar" , metadata = {"id" : 2 }),
0 commit comments