From 746249c478460fe69ca68189b51210add6cbbb5d Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Thu, 22 Aug 2024 18:18:18 -0700 Subject: [PATCH 01/11] test poetry install --- .github/workflows/scheduled_integration_test.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml index a728a50ccffe..64e6834c6462 100644 --- a/.github/workflows/scheduled_integration_test.yml +++ b/.github/workflows/scheduled_integration_test.yml @@ -26,13 +26,19 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - ref: ${{ inputs.branch || github.ref }} + ref: ${{ inputs.branch != '' && inputs.branch || github.ref }} - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} uses: "./.github/actions/poetry_caching" with: python-version: ${{ matrix.python-version }} poetry-version: ${{ env.POETRY_VERSION }} cache-key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }} + + - name: Verify Poetry Installation + run: | + echo "PATH: $PATH" + poetry --version + - name: Install Python dependencies run: | poetry env use ${{ matrix.python-version }} @@ -56,7 +62,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - ref: ${{ inputs.branch || github.ref }} + ref: ${{ inputs.branch != '' && inputs.branch || github.ref }} - name: Run integration tests timeout-minutes: 12 run: | From 234a76abb4e16d39ba31d0451837aef64efb9b70 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Thu, 22 Aug 2024 18:23:09 -0700 Subject: [PATCH 02/11] move to single job --- .../workflows/scheduled_integration_test.yml | 33 ++++--------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml index 64e6834c6462..da6a09c8585f 100644 --- a/.github/workflows/scheduled_integration_test.yml +++ b/.github/workflows/scheduled_integration_test.yml @@ -14,7 +14,8 @@ env: POETRY_VERSION: "1.8.2" jobs: - setup-environment: + test-integration: + name: Run Integration Tests runs-on: ubuntu-latest strategy: matrix: @@ -22,6 +23,10 @@ jobs: - "3.12" - "3.11" - "3.10" + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -33,36 +38,10 @@ jobs: python-version: ${{ matrix.python-version }} poetry-version: ${{ env.POETRY_VERSION }} cache-key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }} - - - name: Verify Poetry Installation - run: | - echo "PATH: $PATH" - poetry --version - - name: Install Python dependencies run: | poetry env use ${{ matrix.python-version }} poetry install - - test-integration: - needs: setup-environment - name: Run Integration Tests - runs-on: ubuntu-latest - strategy: - matrix: - python-version: - - "3.12" - - "3.11" - - "3.10" - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }} - ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: ${{ inputs.branch != '' && inputs.branch || github.ref }} - name: Run integration tests timeout-minutes: 12 run: | From 051a907653b44ce0fd007b65981ebabd6c2716a5 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Thu, 22 Aug 2024 19:15:40 -0700 Subject: [PATCH 03/11] fix astra integ tests --- src/backend/base/langflow/utils/util.py | 1 + .../integration/astra/test_astra_component.py | 5 +++-- src/backend/tests/integration/utils.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/utils/util.py b/src/backend/base/langflow/utils/util.py index eb166cf0e70f..000ef672f58b 100644 --- a/src/backend/base/langflow/utils/util.py +++ b/src/backend/base/langflow/utils/util.py @@ -439,6 +439,7 @@ def update_settings( initialize_settings_service() settings_service = get_settings_service() if config: + print(f"frazierj: Config: {config}") logger.debug(f"Loading settings from {config}") settings_service.settings.update_from_yaml(config, dev=dev) if remove_api_keys: diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py index e955539b3ee2..31228d9b8dd1 100644 --- a/src/backend/tests/integration/astra/test_astra_component.py +++ b/src/backend/tests/integration/astra/test_astra_component.py @@ -1,7 +1,7 @@ import os import pytest -from integration.utils import MockEmbeddings, check_env_vars +from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region from langchain_core.documents import Document # from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent @@ -91,7 +91,7 @@ def test_astra_embeds_and_search(astra_fixture): @pytest.mark.skipif( - not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"), + not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT") or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")), reason="missing astra env vars", ) def test_astra_vectorize(): @@ -177,6 +177,7 @@ def test_astra_vectorize_with_provider_api_key(): ingest_data=records, embedding=vectorize_options, search_input="test", + number_of_results=4, ) component.build_vector_store() records = component.search_documents() diff --git a/src/backend/tests/integration/utils.py b/src/backend/tests/integration/utils.py index 5895debd058b..5b2d8560b0e2 100644 --- a/src/backend/tests/integration/utils.py +++ b/src/backend/tests/integration/utils.py @@ -1,6 +1,7 @@ import os from typing import List +from astrapy.admin import parse_api_endpoint from langflow.field_typing import Embeddings @@ -16,6 +17,22 @@ def check_env_vars(*vars): """ return all(os.getenv(var) for var in vars) +def valid_nvidia_vectorize_region(api_endpoint: str) -> bool: + """ + Check if the specified region is valid. + + Args: + region (str): The region to check. + + Returns: + bool: True if the region is contains hosted nvidia models, False otherwise. + """ + parsed_endpoint = parse_api_endpoint(api_endpoint) + if not parsed_endpoint: + raise ValueError(f"Invalid ASTRA_DB_API_ENDPOINT") + return parsed_endpoint.region in ["us-east-2"] + + class MockEmbeddings(Embeddings): def __init__(self): From 76f44190f34f3ee0072d595d4175a1fb0f18dc0e Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 23 Aug 2024 02:19:07 +0000 Subject: [PATCH 04/11] [autofix.ci] apply automated fixes --- src/backend/tests/integration/astra/test_astra_component.py | 3 ++- src/backend/tests/integration/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py index 31228d9b8dd1..821ce46f3e3e 100644 --- a/src/backend/tests/integration/astra/test_astra_component.py +++ b/src/backend/tests/integration/astra/test_astra_component.py @@ -91,7 +91,8 @@ def test_astra_embeds_and_search(astra_fixture): @pytest.mark.skipif( - not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT") or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")), + not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT") + or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")), reason="missing astra env vars", ) def test_astra_vectorize(): diff --git a/src/backend/tests/integration/utils.py b/src/backend/tests/integration/utils.py index 5b2d8560b0e2..1389cd082fd9 100644 --- a/src/backend/tests/integration/utils.py +++ b/src/backend/tests/integration/utils.py @@ -17,6 +17,7 @@ def check_env_vars(*vars): """ return all(os.getenv(var) for var in vars) + def valid_nvidia_vectorize_region(api_endpoint: str) -> bool: """ Check if the specified region is valid. @@ -29,11 +30,10 @@ def valid_nvidia_vectorize_region(api_endpoint: str) -> bool: """ parsed_endpoint = parse_api_endpoint(api_endpoint) if not parsed_endpoint: - raise ValueError(f"Invalid ASTRA_DB_API_ENDPOINT") + raise ValueError("Invalid ASTRA_DB_API_ENDPOINT") return parsed_endpoint.region in ["us-east-2"] - class MockEmbeddings(Embeddings): def __init__(self): self.embedded_documents = None From 183ebdf5da0950ddf9667a9c576ed3abba249cec Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 10:58:15 -0700 Subject: [PATCH 05/11] remove print --- src/backend/base/langflow/utils/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/backend/base/langflow/utils/util.py b/src/backend/base/langflow/utils/util.py index 000ef672f58b..eb166cf0e70f 100644 --- a/src/backend/base/langflow/utils/util.py +++ b/src/backend/base/langflow/utils/util.py @@ -439,7 +439,6 @@ def update_settings( initialize_settings_service() settings_service = get_settings_service() if config: - print(f"frazierj: Config: {config}") logger.debug(f"Loading settings from {config}") settings_service.settings.update_from_yaml(config, dev=dev) if remove_api_keys: From 3d4f63e2dd245640958be9317d41b09d844b7999 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 11:23:03 -0700 Subject: [PATCH 06/11] fix naming of env var --- .github/workflows/scheduled_integration_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml index da6a09c8585f..3a693e2d4963 100644 --- a/.github/workflows/scheduled_integration_test.yml +++ b/.github/workflows/scheduled_integration_test.yml @@ -25,13 +25,13 @@ jobs: - "3.10" env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 with: - ref: ${{ inputs.branch != '' && inputs.branch || github.ref }} + ref: ${{ inputs.branch || github.ref }} - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} uses: "./.github/actions/poetry_caching" with: From af44e07128625b88e5b99017c45fbefb99d12b0f Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 12:57:30 -0700 Subject: [PATCH 07/11] clarify error message --- src/backend/tests/integration/astra/test_astra_component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py index 821ce46f3e3e..5d862f603d26 100644 --- a/src/backend/tests/integration/astra/test_astra_component.py +++ b/src/backend/tests/integration/astra/test_astra_component.py @@ -93,7 +93,7 @@ def test_astra_embeds_and_search(astra_fixture): @pytest.mark.skipif( not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT") or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")), - reason="missing astra env vars", + reason="missing env vars or invalid region for nvidia vectorize", ) def test_astra_vectorize(): from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions From 7f7ed1700db59cf73763c4138287737be2e515d6 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 13:45:17 -0700 Subject: [PATCH 08/11] Add max parallelilism to jobs --- .github/workflows/python_test.yml | 1 + .../workflows/scheduled_integration_test.yml | 1 + .../integration/astra/test_astra_component.py | 62 ------------------- 3 files changed, 2 insertions(+), 62 deletions(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 1c3039a54af9..24b4f4f0914d 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -28,6 +28,7 @@ jobs: name: Unit Tests - Python ${{ matrix.python-version }} - Group ${{ matrix.group }} runs-on: ubuntu-latest strategy: + max-parallel: 1 matrix: python-version: ${{ fromJson(inputs.python-versions || '["3.10", "3.11", "3.12"]' ) }} splitCount: [5] diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml index 3a693e2d4963..ab8e25f25a4d 100644 --- a/.github/workflows/scheduled_integration_test.yml +++ b/.github/workflows/scheduled_integration_test.yml @@ -18,6 +18,7 @@ jobs: name: Run Integration Tests runs-on: ubuntu-latest strategy: + max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints (TODO: issue) matrix: python-version: - "3.12" diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py index 5d862f603d26..594043a57888 100644 --- a/src/backend/tests/integration/astra/test_astra_component.py +++ b/src/backend/tests/integration/astra/test_astra_component.py @@ -238,65 +238,3 @@ def test_astra_vectorize_passes_authentication(): finally: if store is not None: store.delete_collection() - - -# @pytest.mark.skipif( -# not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"), -# reason="missing astra env vars", -# ) -# def test_astra_memory(): -# application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN") -# api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT") - -# writer = AstraDBMessageWriterComponent() -# reader = AstraDBMessageReaderComponent() - -# input_value = Data.from_document( -# Document( -# page_content="memory1", -# metadata={"session_id": 1, "sender": "human", "sender_name": "Bob"}, -# ) -# ) -# writer.build( -# input_value=input_value, -# session_id=1, -# token=application_token, -# api_endpoint=api_endpoint, -# collection_name=MEMORY_COLLECTION, -# ) - -# # verify reading w/ same session id pulls the same record -# records = reader.build( -# session_id=1, -# token=application_token, -# api_endpoint=api_endpoint, -# collection_name=MEMORY_COLLECTION, -# ) -# assert len(records) == 1 -# assert isinstance(records[0], Data) -# content = records[0].get_text() -# assert content == "memory1" - -# # verify reading w/ different session id does not pull the same record -# records = reader.build( -# session_id=2, -# token=application_token, -# api_endpoint=api_endpoint, -# collection_name=MEMORY_COLLECTION, -# ) -# assert len(records) == 0 - -# # Cleanup store - doing here rather than fixture (see https://github.com/langchain-ai/langchain-datastax/pull/36) -# try: -# from langchain_astradb import AstraDBVectorStore -# except ImportError: -# raise ImportError( -# "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`." -# ) -# store = AstraDBVectorStore( -# collection_name=MEMORY_COLLECTION, -# embedding=MockEmbeddings(), -# api_endpoint=api_endpoint, -# token=application_token, -# ) -# store.delete_collection() From cd3ded3842bf89225dfc1239e9b0a87835d9a0e6 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 15:14:03 -0700 Subject: [PATCH 09/11] update providerkey --- .../tests/integration/astra/test_astra_component.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/tests/integration/astra/test_astra_component.py b/src/backend/tests/integration/astra/test_astra_component.py index 594043a57888..2742b0e3bd2c 100644 --- a/src/backend/tests/integration/astra/test_astra_component.py +++ b/src/backend/tests/integration/astra/test_astra_component.py @@ -102,7 +102,7 @@ def test_astra_vectorize(): store = None try: - options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}} + options = {"provider": "nvidia", "modelName": "NV-Embed-QA"} store = AstraDBVectorStore( collection_name=VECTORIZE_COLLECTION, api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"), @@ -189,7 +189,7 @@ def test_astra_vectorize_with_provider_api_key(): @pytest.mark.skipif( - not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"), + not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"), reason="missing env vars", ) def test_astra_vectorize_passes_authentication(): @@ -206,7 +206,7 @@ def test_astra_vectorize_passes_authentication(): "provider": "openai", "modelName": "text-embedding-3-small", "parameters": {}, - "authentication": {"providerKey": "providerKey"}, + "authentication": {"providerKey": "apikey"}, } store = AstraDBVectorStore( collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH, @@ -219,7 +219,7 @@ def test_astra_vectorize_passes_authentication(): vectorize = AstraVectorizeComponent() vectorize.build( - provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"} + provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "apikey"} ) vectorize_options = vectorize.build_options() From d39b7fd0a081f9159127a341b9d17909f50e590a Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Fri, 23 Aug 2024 15:17:26 -0700 Subject: [PATCH 10/11] remove --- .github/workflows/python_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 24b4f4f0914d..1c3039a54af9 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -28,7 +28,6 @@ jobs: name: Unit Tests - Python ${{ matrix.python-version }} - Group ${{ matrix.group }} runs-on: ubuntu-latest strategy: - max-parallel: 1 matrix: python-version: ${{ fromJson(inputs.python-versions || '["3.10", "3.11", "3.12"]' ) }} splitCount: [5] From 97f8197afdd318cd7ab819972c5f8e6f36b709f7 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Mon, 26 Aug 2024 09:21:42 -0700 Subject: [PATCH 11/11] remove todo --- .github/workflows/scheduled_integration_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled_integration_test.yml b/.github/workflows/scheduled_integration_test.yml index ab8e25f25a4d..56afc035f2ea 100644 --- a/.github/workflows/scheduled_integration_test.yml +++ b/.github/workflows/scheduled_integration_test.yml @@ -18,7 +18,7 @@ jobs: name: Run Integration Tests runs-on: ubuntu-latest strategy: - max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints (TODO: issue) + max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints matrix: python-version: - "3.12"