build: fix astra integration tests (#3518)

langflow-ai · Aug 27, 2024 · bc67127 · bc67127
1 parent 1d2ff38
commit bc67127
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 90 deletions.
diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml
@@ -14,14 +14,20 @@ env:
   POETRY_VERSION: "1.8.2"
 
 jobs:
-  setup-environment:
+  test-integration:
+    name: Run Integration Tests
     runs-on: ubuntu-latest
     strategy:
+      max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints
       matrix:
         python-version:
           - "3.12"
           - "3.11"
           - "3.10"
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
+      ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -37,26 +43,6 @@ jobs:
         run: |
           poetry env use ${{ matrix.python-version }}
           poetry install
-
-  test-integration:
-    needs: setup-environment
-    name: Run Integration Tests
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version:
-          - "3.12"
-          - "3.11"
-          - "3.10"
-    env:
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }}
-      ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.branch || github.ref }}
       - name: Run integration tests
         timeout-minutes: 12
         run: |

diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py
@@ -1,7 +1,7 @@
 import os
 
 import pytest
-from integration.utils import MockEmbeddings, check_env_vars
+from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region
 from langchain_core.documents import Document
 
 # from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent
@@ -91,8 +91,9 @@ def test_astra_embeds_and_search(astra_fixture):
 
 
 @pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-    reason="missing astra env vars",
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT")
+    or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")),
+    reason="missing env vars or invalid region for nvidia vectorize",
 )
 def test_astra_vectorize():
     from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
@@ -101,7 +102,7 @@ def test_astra_vectorize():
 
     store = None
     try:
-        options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
+        options = {"provider": "nvidia", "modelName": "NV-Embed-QA"}
         store = AstraDBVectorStore(
             collection_name=VECTORIZE_COLLECTION,
             api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
@@ -177,6 +178,7 @@ def test_astra_vectorize_with_provider_api_key():
             ingest_data=records,
             embedding=vectorize_options,
             search_input="test",
+            number_of_results=4,
         )
         component.build_vector_store()
         records = component.search_documents()
@@ -187,7 +189,7 @@ def test_astra_vectorize_with_provider_api_key():
 
 
 @pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
     reason="missing env vars",
 )
 def test_astra_vectorize_passes_authentication():
@@ -204,7 +206,7 @@ def test_astra_vectorize_passes_authentication():
             "provider": "openai",
             "modelName": "text-embedding-3-small",
             "parameters": {},
-            "authentication": {"providerKey": "providerKey"},
+            "authentication": {"providerKey": "apikey"},
         }
         store = AstraDBVectorStore(
             collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
@@ -217,7 +219,7 @@ def test_astra_vectorize_passes_authentication():
 
         vectorize = AstraVectorizeComponent()
         vectorize.build(
-            provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
+            provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "apikey"}
         )
         vectorize_options = vectorize.build_options()
 
@@ -236,65 +238,3 @@ def test_astra_vectorize_passes_authentication():
     finally:
         if store is not None:
             store.delete_collection()
-
-
-# @pytest.mark.skipif(
-#     not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-#     reason="missing astra env vars",
-# )
-# def test_astra_memory():
-#     application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-#     api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
-
-#     writer = AstraDBMessageWriterComponent()
-#     reader = AstraDBMessageReaderComponent()
-
-#     input_value = Data.from_document(
-#         Document(
-#             page_content="memory1",
-#             metadata={"session_id": 1, "sender": "human", "sender_name": "Bob"},
-#         )
-#     )
-#     writer.build(
-#         input_value=input_value,
-#         session_id=1,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-
-#     # verify reading w/ same session id pulls the same record
-#     records = reader.build(
-#         session_id=1,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-#     assert len(records) == 1
-#     assert isinstance(records[0], Data)
-#     content = records[0].get_text()
-#     assert content == "memory1"
-
-#     # verify reading w/ different session id does not pull the same record
-#     records = reader.build(
-#         session_id=2,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-#     assert len(records) == 0
-
-#     # Cleanup store - doing here rather than fixture (see https://github.com/langchain-ai/langchain-datastax/pull/36)
-#     try:
-#         from langchain_astradb import AstraDBVectorStore
-#     except ImportError:
-#         raise ImportError(
-#             "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`."
-#         )
-#     store = AstraDBVectorStore(
-#         collection_name=MEMORY_COLLECTION,
-#         embedding=MockEmbeddings(),
-#         api_endpoint=api_endpoint,
-#         token=application_token,
-#     )
-#     store.delete_collection()
diff --git a/src/backend/tests/integration/utils.py b/src/backend/tests/integration/utils.py
@@ -1,6 +1,7 @@
 import os
 from typing import List
 
+from astrapy.admin import parse_api_endpoint
 from langflow.field_typing import Embeddings
 
 
@@ -17,6 +18,22 @@ def check_env_vars(*vars):
     return all(os.getenv(var) for var in vars)
 
 
+def valid_nvidia_vectorize_region(api_endpoint: str) -> bool:
+    """
+    Check if the specified region is valid.
+
+    Args:
+    region (str): The region to check.
+
+    Returns:
+    bool: True if the region is contains hosted nvidia models, False otherwise.
+    """
+    parsed_endpoint = parse_api_endpoint(api_endpoint)
+    if not parsed_endpoint:
+        raise ValueError("Invalid ASTRA_DB_API_ENDPOINT")
+    return parsed_endpoint.region in ["us-east-2"]
+
+
 class MockEmbeddings(Embeddings):
     def __init__(self):
         self.embedded_documents = None