Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: fix astra integration tests #3518

Merged
merged 11 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 7 additions & 21 deletions .github/workflows/scheduled_integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ env:
POETRY_VERSION: "1.8.2"

jobs:
setup-environment:
test-integration:
name: Run Integration Tests
runs-on: ubuntu-latest
strategy:
max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints
matrix:
python-version:
- "3.12"
- "3.11"
- "3.10"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -37,26 +43,6 @@ jobs:
run: |
poetry env use ${{ matrix.python-version }}
poetry install

test-integration:
needs: setup-environment
name: Run Integration Tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.12"
- "3.11"
- "3.10"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.branch || github.ref }}
- name: Run integration tests
timeout-minutes: 12
run: |
Expand Down
78 changes: 9 additions & 69 deletions src/backend/tests/integration/astra/test_astra_component.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

import pytest
from integration.utils import MockEmbeddings, check_env_vars
from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region
from langchain_core.documents import Document

# from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent
Expand Down Expand Up @@ -91,8 +91,9 @@ def test_astra_embeds_and_search(astra_fixture):


@pytest.mark.skipif(
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
reason="missing astra env vars",
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT")
or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")),
reason="missing env vars or invalid region for nvidia vectorize",
)
def test_astra_vectorize():
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
Expand All @@ -101,7 +102,7 @@ def test_astra_vectorize():

store = None
try:
options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
options = {"provider": "nvidia", "modelName": "NV-Embed-QA"}
store = AstraDBVectorStore(
collection_name=VECTORIZE_COLLECTION,
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
Expand Down Expand Up @@ -177,6 +178,7 @@ def test_astra_vectorize_with_provider_api_key():
ingest_data=records,
embedding=vectorize_options,
search_input="test",
number_of_results=4,
)
component.build_vector_store()
records = component.search_documents()
Expand All @@ -187,7 +189,7 @@ def test_astra_vectorize_with_provider_api_key():


@pytest.mark.skipif(
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
reason="missing env vars",
)
def test_astra_vectorize_passes_authentication():
Expand All @@ -204,7 +206,7 @@ def test_astra_vectorize_passes_authentication():
"provider": "openai",
"modelName": "text-embedding-3-small",
"parameters": {},
"authentication": {"providerKey": "providerKey"},
"authentication": {"providerKey": "apikey"},
}
store = AstraDBVectorStore(
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
Expand All @@ -217,7 +219,7 @@ def test_astra_vectorize_passes_authentication():

vectorize = AstraVectorizeComponent()
vectorize.build(
provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "apikey"}
)
vectorize_options = vectorize.build_options()

Expand All @@ -236,65 +238,3 @@ def test_astra_vectorize_passes_authentication():
finally:
if store is not None:
store.delete_collection()


# @pytest.mark.skipif(
# not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
# reason="missing astra env vars",
# )
# def test_astra_memory():
# application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
# api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")

# writer = AstraDBMessageWriterComponent()
# reader = AstraDBMessageReaderComponent()

# input_value = Data.from_document(
# Document(
# page_content="memory1",
# metadata={"session_id": 1, "sender": "human", "sender_name": "Bob"},
# )
# )
# writer.build(
# input_value=input_value,
# session_id=1,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )

# # verify reading w/ same session id pulls the same record
# records = reader.build(
# session_id=1,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )
# assert len(records) == 1
# assert isinstance(records[0], Data)
# content = records[0].get_text()
# assert content == "memory1"

# # verify reading w/ different session id does not pull the same record
# records = reader.build(
# session_id=2,
# token=application_token,
# api_endpoint=api_endpoint,
# collection_name=MEMORY_COLLECTION,
# )
# assert len(records) == 0

# # Cleanup store - doing here rather than fixture (see https://github.com/langchain-ai/langchain-datastax/pull/36)
# try:
# from langchain_astradb import AstraDBVectorStore
# except ImportError:
# raise ImportError(
# "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`."
# )
# store = AstraDBVectorStore(
# collection_name=MEMORY_COLLECTION,
# embedding=MockEmbeddings(),
# api_endpoint=api_endpoint,
# token=application_token,
# )
# store.delete_collection()
17 changes: 17 additions & 0 deletions src/backend/tests/integration/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from typing import List

from astrapy.admin import parse_api_endpoint
from langflow.field_typing import Embeddings


Expand All @@ -17,6 +18,22 @@ def check_env_vars(*vars):
return all(os.getenv(var) for var in vars)


def valid_nvidia_vectorize_region(api_endpoint: str) -> bool:
"""
Check if the specified region is valid.

Args:
region (str): The region to check.

Returns:
bool: True if the region is contains hosted nvidia models, False otherwise.
"""
parsed_endpoint = parse_api_endpoint(api_endpoint)
if not parsed_endpoint:
raise ValueError("Invalid ASTRA_DB_API_ENDPOINT")
return parsed_endpoint.region in ["us-east-2"]


class MockEmbeddings(Embeddings):
def __init__(self):
self.embedded_documents = None
Expand Down