Skip to content

Commit

Permalink
build: fix astra integration tests (#3518)
Browse files Browse the repository at this point in the history
  • Loading branch information
jordanrfrazier authored and ogabrielluiz committed Aug 27, 2024
1 parent 1d2ff38 commit bc67127
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 90 deletions.
28 changes: 7 additions & 21 deletions .github/workflows/scheduled_integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ env:
POETRY_VERSION: "1.8.2"

jobs:
setup-environment:
test-integration:
name: Run Integration Tests
runs-on: ubuntu-latest
strategy:
max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints
matrix:
python-version:
- "3.12"
- "3.11"
- "3.10"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -37,26 +43,6 @@ jobs:
run: |
poetry env use ${{ matrix.python-version }}
poetry install
test-integration:
needs: setup-environment
name: Run Integration Tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.12"
- "3.11"
- "3.10"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.branch || github.ref }}
- name: Run integration tests
timeout-minutes: 12
run: |
Expand Down
78 changes: 9 additions & 69 deletions src/backend/tests/integration/astra/test_astra_component.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

import pytest
from integration.utils import MockEmbeddings, check_env_vars
from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region
from langchain_core.documents import Document

# from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent
Expand Down Expand Up @@ -91,8 +91,9 @@ def test_astra_embeds_and_search(astra_fixture):


@pytest.mark.skipif(
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
reason="missing astra env vars",
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT")
or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")),
reason="missing env vars or invalid region for nvidia vectorize",
)
def test_astra_vectorize():
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
Expand All @@ -101,7 +102,7 @@ def test_astra_vectorize():

store = None
try:
options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
options = {"provider": "nvidia", "modelName": "NV-Embed-QA"}
store = AstraDBVectorStore(
collection_name=VECTORIZE_COLLECTION,
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
Expand Down Expand Up @@ -177,6 +178,7 @@ def test_astra_vectorize_with_provider_api_key():
ingest_data=records,
embedding=vectorize_options,
search_input="test",
number_of_results=4,
)
component.build_vector_store()
records = component.search_documents()
Expand All @@ -187,7 +189,7 @@ def test_astra_vectorize_with_provider_api_key():


@pytest.mark.skipif(
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
reason="missing env vars",
)
def test_astra_vectorize_passes_authentication():
Expand All @@ -204,7 +206,7 @@ def test_astra_vectorize_passes_authentication():
"provider": "openai",
"modelName": "text-embedding-3-small",
"parameters": {},
"authentication": {"providerKey": "providerKey"},
"authentication": {"providerKey": "apikey"},
}
store = AstraDBVectorStore(
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
Expand All @@ -217,7 +219,7 @@ def test_astra_vectorize_passes_authentication():

vectorize = AstraVectorizeComponent()
vectorize.build(
provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "apikey"}
)
vectorize_options = vectorize.build_options()

Expand All @@ -236,65 +238,3 @@ def test_astra_vectorize_passes_authentication():
finally:
if store is not None:
store.delete_collection()


# @pytest.mark.skipif(
# not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
# reason="missing astra env vars",
# )
# def test_astra_memory():
# application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
# api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")

# writer = AstraDBMessageWriterComponent()
# reader = AstraDBMessageReaderComponent()

# input_value = Data.from_document(
# Document(
# page_content="memory1",
# metadata={"session_id": 1, "sender": "human", "sender_name": "Bob"},
# )
# )
# writer.build(
# input_value=input_value,
# session_id=1,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )

# # verify reading w/ same session id pulls the same record
# records = reader.build(
# session_id=1,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )
# assert len(records) == 1
# assert isinstance(records[0], Data)
# content = records[0].get_text()
# assert content == "memory1"

# # verify reading w/ different session id does not pull the same record
# records = reader.build(
# session_id=2,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )
# assert len(records) == 0

# # Cleanup store - doing here rather than fixture (see https://github.com/langchain-ai/langchain-datastax/pull/36)
# try:
# from langchain_astradb import AstraDBVectorStore
# except ImportError:
# raise ImportError(
# "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`."
# )
# store = AstraDBVectorStore(
# collection_name=MEMORY_COLLECTION,
# embedding=MockEmbeddings(),
# api_endpoint=api_endpoint,
# token=application_token,
# )
# store.delete_collection()
17 changes: 17 additions & 0 deletions src/backend/tests/integration/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from typing import List

from astrapy.admin import parse_api_endpoint
from langflow.field_typing import Embeddings


Expand All @@ -17,6 +18,22 @@ def check_env_vars(*vars):
return all(os.getenv(var) for var in vars)


def valid_nvidia_vectorize_region(api_endpoint: str) -> bool:
"""
Check if the specified region is valid.
Args:
region (str): The region to check.
Returns:
bool: True if the region is contains hosted nvidia models, False otherwise.
"""
parsed_endpoint = parse_api_endpoint(api_endpoint)
if not parsed_endpoint:
raise ValueError("Invalid ASTRA_DB_API_ENDPOINT")
return parsed_endpoint.region in ["us-east-2"]


class MockEmbeddings(Embeddings):
def __init__(self):
self.embedded_documents = None
Expand Down

0 comments on commit bc67127

Please sign in to comment.