Skip to content

Commit b13e52b

Browse files
authored
community[patch]: Fix AstraDBCache docstrings (langchain-ai#17802)
1 parent 865cabf commit b13e52b

File tree

1 file changed

+47
-71
lines changed
  • libs/community/langchain_community

1 file changed

+47
-71
lines changed

libs/community/langchain_community/cache.py

Lines changed: 47 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,16 +1366,6 @@ def get_md5(input_string: str) -> str:
13661366

13671367

13681368
class AstraDBCache(BaseCache):
1369-
"""
1370-
Cache that uses Astra DB as a backend.
1371-
1372-
It uses a single collection as a kv store
1373-
The lookup keys, combined in the _id of the documents, are:
1374-
- prompt, a string
1375-
- llm_string, a deterministic str representation of the model parameters.
1376-
(needed to prevent same-prompt-different-model collisions)
1377-
"""
1378-
13791369
@staticmethod
13801370
def _make_id(prompt: str, llm_string: str) -> str:
13811371
return f"{_hash(prompt)}#{_hash(llm_string)}"
@@ -1393,25 +1383,30 @@ def __init__(
13931383
setup_mode: SetupMode = SetupMode.SYNC,
13941384
):
13951385
"""
1396-
Create an AstraDB cache using a collection for storage.
1397-
1398-
Args (only keyword-arguments accepted):
1399-
collection_name (str): name of the Astra DB collection to create/use.
1400-
token (Optional[str]): API token for Astra DB usage.
1401-
api_endpoint (Optional[str]): full URL to the API endpoint,
1402-
such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
1403-
astra_db_client (Optional[AstraDB]):
1404-
*alternative to token+api_endpoint*,
1386+
Cache that uses Astra DB as a backend.
1387+
1388+
It uses a single collection as a kv store
1389+
The lookup keys, combined in the _id of the documents, are:
1390+
- prompt, a string
1391+
- llm_string, a deterministic str representation of the model parameters.
1392+
(needed to prevent same-prompt-different-model collisions)
1393+
1394+
Args:
1395+
collection_name: name of the Astra DB collection to create/use.
1396+
token: API token for Astra DB usage.
1397+
api_endpoint: full URL to the API endpoint,
1398+
such as `https://<DB-ID>-us-east1.apps.astra.datastax.com`.
1399+
astra_db_client: *alternative to token+api_endpoint*,
14051400
you can pass an already-created 'astrapy.db.AstraDB' instance.
1406-
async_astra_db_client (Optional[AsyncAstraDB]):
1407-
*alternative to token+api_endpoint*,
1401+
async_astra_db_client: *alternative to token+api_endpoint*,
14081402
you can pass an already-created 'astrapy.db.AsyncAstraDB' instance.
1409-
namespace (Optional[str]): namespace (aka keyspace) where the
1403+
namespace: namespace (aka keyspace) where the
14101404
collection is created. Defaults to the database's "default namespace".
1411-
pre_delete_collection (bool): whether to delete and re-create the
1412-
collection. Defaults to False.
1413-
async_setup (bool): whether to create the collection asynchronously.
1414-
Enable only if there is a running asyncio event loop. Defaults to False.
1405+
setup_mode: mode used to create the Astra DB collection (SYNC, ASYNC or
1406+
OFF).
1407+
pre_delete_collection: whether to delete the collection
1408+
before creating it. If False and the collection already exists,
1409+
the collection will be used as is.
14151410
"""
14161411
self.astra_env = _AstraDBCollectionEnvironment(
14171412
collection_name=collection_name,
@@ -1427,7 +1422,6 @@ def __init__(
14271422
self.async_collection = self.astra_env.async_collection
14281423

14291424
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
1430-
"""Look up based on prompt and llm_string."""
14311425
self.astra_env.ensure_db_setup()
14321426
doc_id = self._make_id(prompt, llm_string)
14331427
item = self.collection.find_one(
@@ -1441,7 +1435,6 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
14411435
return _loads_generations(item["body_blob"]) if item is not None else None
14421436

14431437
async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
1444-
"""Look up based on prompt and llm_string."""
14451438
await self.astra_env.aensure_db_setup()
14461439
doc_id = self._make_id(prompt, llm_string)
14471440
item = (
@@ -1457,7 +1450,6 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP
14571450
return _loads_generations(item["body_blob"]) if item is not None else None
14581451

14591452
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
1460-
"""Update cache based on prompt and llm_string."""
14611453
self.astra_env.ensure_db_setup()
14621454
doc_id = self._make_id(prompt, llm_string)
14631455
blob = _dumps_generations(return_val)
@@ -1471,7 +1463,6 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
14711463
async def aupdate(
14721464
self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
14731465
) -> None:
1474-
"""Update cache based on prompt and llm_string."""
14751466
await self.astra_env.aensure_db_setup()
14761467
doc_id = self._make_id(prompt, llm_string)
14771468
blob = _dumps_generations(return_val)
@@ -1523,12 +1514,10 @@ async def adelete(self, prompt: str, llm_string: str) -> None:
15231514
await self.async_collection.delete_one(doc_id)
15241515

15251516
def clear(self, **kwargs: Any) -> None:
1526-
"""Clear cache. This is for all LLMs at once."""
15271517
self.astra_env.ensure_db_setup()
15281518
self.collection.clear()
15291519

15301520
async def aclear(self, **kwargs: Any) -> None:
1531-
"""Clear cache. This is for all LLMs at once."""
15321521
await self.astra_env.aensure_db_setup()
15331522
await self.async_collection.clear()
15341523

@@ -1575,18 +1564,6 @@ def decorating_function(user_function: Callable) -> Callable:
15751564

15761565

15771566
class AstraDBSemanticCache(BaseCache):
1578-
"""
1579-
Cache that uses Astra DB as a vector-store backend for semantic
1580-
(i.e. similarity-based) lookup.
1581-
1582-
It uses a single (vector) collection and can store
1583-
cached values from several LLMs, so the LLM's 'llm_string' is stored
1584-
in the document metadata.
1585-
1586-
You can choose the preferred similarity (or use the API default) --
1587-
remember the threshold might require metric-dependent tuning.
1588-
"""
1589-
15901567
def __init__(
15911568
self,
15921569
*,
@@ -1603,33 +1580,38 @@ def __init__(
16031580
similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
16041581
):
16051582
"""
1606-
Initialize the cache with all relevant parameters.
1607-
Args:
1583+
Cache that uses Astra DB as a vector-store backend for semantic
1584+
(i.e. similarity-based) lookup.
1585+
1586+
It uses a single (vector) collection and can store
1587+
cached values from several LLMs, so the LLM's 'llm_string' is stored
1588+
in the document metadata.
1589+
1590+
You can choose the preferred similarity (or use the API default).
1591+
The default score threshold is tuned to the default metric.
1592+
Tune it carefully yourself if switching to another distance metric.
16081593
1609-
collection_name (str): name of the Astra DB collection to create/use.
1610-
token (Optional[str]): API token for Astra DB usage.
1611-
api_endpoint (Optional[str]): full URL to the API endpoint,
1612-
such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
1613-
astra_db_client (Optional[AstraDB]): *alternative to token+api_endpoint*,
1594+
Args:
1595+
collection_name: name of the Astra DB collection to create/use.
1596+
token: API token for Astra DB usage.
1597+
api_endpoint: full URL to the API endpoint,
1598+
such as `https://<DB-ID>-us-east1.apps.astra.datastax.com`.
1599+
astra_db_client: *alternative to token+api_endpoint*,
16141600
you can pass an already-created 'astrapy.db.AstraDB' instance.
1615-
async_astra_db_client (Optional[AsyncAstraDB]):
1616-
*alternative to token+api_endpoint*,
1601+
async_astra_db_client: *alternative to token+api_endpoint*,
16171602
you can pass an already-created 'astrapy.db.AsyncAstraDB' instance.
1618-
namespace (Optional[str]): namespace (aka keyspace) where the
1603+
namespace: namespace (aka keyspace) where the
16191604
collection is created. Defaults to the database's "default namespace".
1620-
setup_mode (SetupMode): mode used to create the collection in the DB
1621-
(SYNC, ASYNC or OFF). Defaults to SYNC.
1622-
pre_delete_collection (bool): whether to delete and re-create the
1623-
collection. Defaults to False.
1624-
embedding (Embedding): Embedding provider for semantic
1625-
encoding and search.
1605+
setup_mode: mode used to create the Astra DB collection (SYNC, ASYNC or
1606+
OFF).
1607+
pre_delete_collection: whether to delete the collection
1608+
before creating it. If False and the collection already exists,
1609+
the collection will be used as is.
1610+
embedding: Embedding provider for semantic encoding and search.
16261611
metric: the function to use for evaluating similarity of text embeddings.
16271612
Defaults to 'cosine' (alternatives: 'euclidean', 'dot_product')
1628-
similarity_threshold (float, optional): the minimum similarity
1629-
for accepting a (semantic-search) match.
1630-
1631-
The default score threshold is tuned to the default metric.
1632-
Tune it carefully yourself if switching to another distance metric.
1613+
similarity_threshold: the minimum similarity for accepting a
1614+
(semantic-search) match.
16331615
"""
16341616
self.embedding = embedding
16351617
self.metric = metric
@@ -1685,7 +1667,6 @@ def _make_id(prompt: str, llm_string: str) -> str:
16851667
return f"{_hash(prompt)}#{_hash(llm_string)}"
16861668

16871669
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
1688-
"""Update cache based on prompt and llm_string."""
16891670
self.astra_env.ensure_db_setup()
16901671
doc_id = self._make_id(prompt, llm_string)
16911672
llm_string_hash = _hash(llm_string)
@@ -1704,7 +1685,6 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N
17041685
async def aupdate(
17051686
self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
17061687
) -> None:
1707-
"""Update cache based on prompt and llm_string."""
17081688
await self.astra_env.aensure_db_setup()
17091689
doc_id = self._make_id(prompt, llm_string)
17101690
llm_string_hash = _hash(llm_string)
@@ -1721,15 +1701,13 @@ async def aupdate(
17211701
)
17221702

17231703
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
1724-
"""Look up based on prompt and llm_string."""
17251704
hit_with_id = self.lookup_with_id(prompt, llm_string)
17261705
if hit_with_id is not None:
17271706
return hit_with_id[1]
17281707
else:
17291708
return None
17301709

17311710
async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
1732-
"""Look up based on prompt and llm_string."""
17331711
hit_with_id = await self.alookup_with_id(prompt, llm_string)
17341712
if hit_with_id is not None:
17351713
return hit_with_id[1]
@@ -1835,11 +1813,9 @@ async def adelete_by_document_id(self, document_id: str) -> None:
18351813
await self.async_collection.delete_one(document_id)
18361814

18371815
def clear(self, **kwargs: Any) -> None:
1838-
"""Clear the *whole* semantic cache."""
18391816
self.astra_env.ensure_db_setup()
18401817
self.collection.clear()
18411818

18421819
async def aclear(self, **kwargs: Any) -> None:
1843-
"""Clear the *whole* semantic cache."""
18441820
await self.astra_env.aensure_db_setup()
18451821
await self.async_collection.clear()

0 commit comments

Comments
 (0)