From 16775d11bc6d7a1a1464d5e2908c30e90f26d86b Mon Sep 17 00:00:00 2001
From: Eric Hare <>
Date: Fri, 31 Jan 2025 08:52:11 -0800
Subject: [PATCH 1/7] bugfix: Throw upstream error when invalid token

 .../components/vectorstores/        | 37 ++++++++++++-------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/backend/base/langflow/components/vectorstores/ b/src/backend/base/langflow/components/vectorstores/
index c529e92193d2..d617e85fc1f3 100644
--- a/src/backend/base/langflow/components/vectorstores/
+++ b/src/backend/base/langflow/components/vectorstores/
@@ -447,10 +447,9 @@ def _initialize_database_options(self):
                 for name, info in self.get_database_list().items()
-        except Exception as e:  # noqa: BLE001
-            self.log(f"Error fetching databases: {e}")
-            return []
+        except Exception as e:
+            msg = f"Error fetching database options: {e}"
+            raise ValueError(msg) from e
     def _initialize_collection_options(self, api_endpoint: str | None = None):
         database = self.get_database_object(api_endpoint=api_endpoint)
@@ -483,25 +482,37 @@ def _initialize_collection_options(self, api_endpoint: str | None = None):
             return []
+    def reset_build_config(self, build_config: dict):
+        # Reset the list of databases we have based on the token provided
+        build_config["api_endpoint"]["options"] = []
+        build_config["api_endpoint"]["options_metadata"] = []
+        build_config["api_endpoint"]["value"] = ""
+        build_config["api_endpoint"]["name"] = "Database"
+        # Reset the list of collections and metadata associated
+        build_config["collection_name"]["options"] = []
+        build_config["collection_name"]["options_metadata"] = []
+        build_config["collection_name"]["value"] = ""
+        return build_config
     def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
         # TODO: Remove special astra flags when overlays are out
         # TODO: Better targeting of this field
         dslf = os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE"
+        # If the token has not been provided, simply return
+        if not self.token:
+            return self.reset_build_config(build_config)
         # Refresh the database name options
         if not dslf and (field_name in ["token", "environment"] or not build_config["api_endpoint"]["options"]):
+            # Reset the build config to ensure we are starting fresh
+            build_config = self.reset_build_config(build_config)
             # Get the list of options we have based on the token provided
             database_options = self._initialize_database_options()
-            # Reset the collection values selected
-            build_config["collection_name"]["options"] = []
-            build_config["collection_name"]["options_metadata"] = []
-            build_config["collection_name"]["value"] = ""
-            # Scenario #1: We have database options from the provided token
-            build_config["api_endpoint"]["value"] = ""
-            build_config["api_endpoint"]["name"] = "Database"
             # If we retrieved options based on the token, show the dropdown
             build_config["api_endpoint"]["options"] = [db["name"] for db in database_options]
             build_config["api_endpoint"]["options_metadata"] = [

From 901d15b2b98b067b1ef73fcbb3d652297fa29d47 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]>
Date: Fri, 31 Jan 2025 16:54:07 +0000
Subject: [PATCH 2/7] [] apply automated fixes

 .../initial_setup/starter_projects/Vector Store RAG.json      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
index f115adda2381..4056a76374ea 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
@@ -3300,7 +3300,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching databases: {e}\")\n\n            return []\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # Reset the collection values selected\n            build_config[\"collection_name\"][\"options\"] = []\n            build_config[\"collection_name\"][\"options_metadata\"] = []\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Scenario #1: We have database options from the provided token\n            build_config[\"api_endpoint\"][\"value\"] = \"\"\n            build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",
@@ -3795,7 +3795,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching databases: {e}\")\n\n            return []\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # Reset the collection values selected\n            build_config[\"collection_name\"][\"options\"] = []\n            build_config[\"collection_name\"][\"options_metadata\"] = []\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Scenario #1: We have database options from the provided token\n            build_config[\"api_endpoint\"][\"value\"] = \"\"\n            build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",

From f3c67df4926744bbd8092d9b30eb756d76925182 Mon Sep 17 00:00:00 2001
From: Eric Hare <>
Date: Fri, 31 Jan 2025 09:14:18 -0800
Subject: [PATCH 3/7] Update

 .../components/vectorstores/        | 86 ++++++++-----------
 1 file changed, 34 insertions(+), 52 deletions(-)

diff --git a/src/backend/base/langflow/components/vectorstores/ b/src/backend/base/langflow/components/vectorstores/
index d617e85fc1f3..500979963a2f 100644
--- a/src/backend/base/langflow/components/vectorstores/
+++ b/src/backend/base/langflow/components/vectorstores/
@@ -386,10 +386,9 @@ def get_database_object(self, api_endpoint: str | None = None):
-        except Exception as e:  # noqa: BLE001
-            self.log(f"Error getting database: {e}")
-            return None
+        except Exception as e:
+            msg = f"Error fetching database object: {e}"
+            raise ValueError(msg) from e
     def collection_data(self, collection_name: str, database: Database | None = None):
@@ -452,35 +451,31 @@ def _initialize_database_options(self):
             raise ValueError(msg) from e
     def _initialize_collection_options(self, api_endpoint: str | None = None):
+        # Retrieve the database object
         database = self.get_database_object(api_endpoint=api_endpoint)
-        if database is None:
-            return []
-        try:
-            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))
-            return [
-                {
-                    "name":,
-                    "records": self.collection_data(, database=database),
-                    "provider": (
-                        col.options.vector.service.provider
-                        if col.options.vector and col.options.vector.service
-                        else None
-                    ),
-                    "icon": "",
-                    "model": (
-                        col.options.vector.service.model_name
-                        if col.options.vector and col.options.vector.service
-                        else None
-                    ),
-                }
-                for col in collection_list
-            ]
-        except Exception as e:  # noqa: BLE001
-            self.log(f"Error fetching collections: {e}")
-            return []
+        # Get the list of collections
+        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))
+        # Return the list of collections and metadata associated
+        return [
+            {
+                "name":,
+                "records": self.collection_data(, database=database),
+                "provider": (
+                    col.options.vector.service.provider
+                    if col.options.vector and col.options.vector.service
+                    else None
+                ),
+                "icon": "",
+                "model": (
+                    col.options.vector.service.model_name
+                    if col.options.vector and col.options.vector.service
+                    else None
+                ),
+            }
+            for col in collection_list
+        ]
     def reset_build_config(self, build_config: dict):
         # Reset the list of databases we have based on the token provided
@@ -561,27 +556,14 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
         if field_name == "collection_name" and field_value:
             # Set the options for collection name to be the field value if its a new collection
             if field_value not in build_config["collection_name"]["options"]:
-                # If this is running in DSLF, we may need to initialize the options again
-                if dslf:
-                    # Reload the list of collections and metadata associated
-                    collection_options = self._initialize_collection_options(
-                        api_endpoint=build_config["d_api_endpoint"]["value"] if not dslf else None
-                    )
-                    # If we have collections, show the dropdown
-                    build_config["collection_name"]["options"] = [col["name"] for col in collection_options]
-                    build_config["collection_name"]["options_metadata"] = [
-                        {k: v for k, v in col.items() if k not in ["name"]} for col in collection_options
-                    ]
-                else:
-                    # Add the new collection to the list of options
-                    build_config["collection_name"]["options"].append(field_value)
-                    build_config["collection_name"]["options_metadata"].append(
-                        {"records": 0, "provider": None, "icon": "", "model": None}
-                    )
-                    # Ensure that autodetect collection is set to False, since its a new collection
-                    build_config["autodetect_collection"]["value"] = False
+                # Add the new collection to the list of options
+                build_config["collection_name"]["options"].append(field_value)
+                build_config["collection_name"]["options_metadata"].append(
+                    {"records": 0, "provider": None, "icon": "", "model": None}
+                )
+                # Ensure that autodetect collection is set to False, since its a new collection
+                build_config["autodetect_collection"]["value"] = False
                 build_config["autodetect_collection"]["value"] = True

From 070b50b1b9cc63596be5cefe6b7526b921c677ad Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]>
Date: Fri, 31 Jan 2025 17:16:06 +0000
Subject: [PATCH 4/7] [] apply automated fixes

 .../base/langflow/components/vectorstores/      | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/backend/base/langflow/components/vectorstores/ b/src/backend/base/langflow/components/vectorstores/
index 500979963a2f..989aa265ec31 100644
--- a/src/backend/base/langflow/components/vectorstores/
+++ b/src/backend/base/langflow/components/vectorstores/
@@ -463,15 +463,11 @@ def _initialize_collection_options(self, api_endpoint: str | None = None):
                 "records": self.collection_data(, database=database),
                 "provider": (
-                    col.options.vector.service.provider
-                    if col.options.vector and col.options.vector.service
-                    else None
+                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None
                 "icon": "",
                 "model": (
-                    col.options.vector.service.model_name
-                    if col.options.vector and col.options.vector.service
-                    else None
+                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None
             for col in collection_list

From c488c8b49d22f88bf202377b55ea9036f037968f Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]>
Date: Fri, 31 Jan 2025 17:17:34 +0000
Subject: [PATCH 5/7] [] apply automated fixes (attempt 2/3)

 .../initial_setup/starter_projects/Vector Store RAG.json      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
index 4056a76374ea..366a607b84f0 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
@@ -3300,7 +3300,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",
@@ -3795,7 +3795,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error getting database: {e}\")\n\n            return None\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        database = self.get_database_object(api_endpoint=api_endpoint)\n        if database is None:\n            return []\n\n        try:\n            collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n            return [\n                {\n                    \"name\":,\n                    \"records\": self.collection_data(, database=database),\n                    \"provider\": (\n                        col.options.vector.service.provider\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                    \"icon\": \"\",\n                    \"model\": (\n                        col.options.vector.service.model_name\n                        if col.options.vector and col.options.vector.service\n                        else None\n                    ),\n                }\n                for col in collection_list\n            ]\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching collections: {e}\")\n\n            return []\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # If this is running in DSLF, we may need to initialize the options again\n                if dslf:\n                    # Reload the list of collections and metadata associated\n                    collection_options = self._initialize_collection_options(\n                        api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n                    )\n\n                    # If we have collections, show the dropdown\n                    build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n                    build_config[\"collection_name\"][\"options_metadata\"] = [\n                        {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n                    ]\n                else:\n                    # Add the new collection to the list of options\n                    build_config[\"collection_name\"][\"options\"].append(field_value)\n                    build_config[\"collection_name\"][\"options_metadata\"].append(\n                        {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                    )\n\n                    # Ensure that autodetect collection is set to False, since its a new collection\n                    build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",

From ac8f75feab1e6782248dcef4a50a48d66ae8e9a5 Mon Sep 17 00:00:00 2001
From: Eric Hare <>
Date: Fri, 31 Jan 2025 09:28:19 -0800
Subject: [PATCH 6/7] Small cleanup of dynamic config

 .../langflow/components/vectorstores/   | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/backend/base/langflow/components/vectorstores/ b/src/backend/base/langflow/components/vectorstores/
index 989aa265ec31..d8cf483c982a 100644
--- a/src/backend/base/langflow/components/vectorstores/
+++ b/src/backend/base/langflow/components/vectorstores/
@@ -488,16 +488,15 @@ def reset_build_config(self, build_config: dict):
         return build_config
     def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
-        # TODO: Remove special astra flags when overlays are out
-        # TODO: Better targeting of this field
-        dslf = os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE"
+        # When the component first executes, this is the update refresh call
+        first_run = field_name == "collection_name" and not field_value
         # If the token has not been provided, simply return
-        if not self.token:
+        if not self.token or field_name == "environment":
             return self.reset_build_config(build_config)
         # Refresh the database name options
-        if not dslf and (field_name in ["token", "environment"] or not build_config["api_endpoint"]["options"]):
+        if first_run or field_name == "token":
             # Reset the build config to ensure we are starting fresh
             build_config = self.reset_build_config(build_config)
@@ -523,6 +522,8 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
             ] = self.map_cloud_providers()[cloud_provider]["regions"]
+            return build_config
         # Refresh the collection name options
         if field_name == "api_endpoint":
             # Reset the selected collection
@@ -539,7 +540,7 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
             # Reload the list of collections and metadata associated
             collection_options = self._initialize_collection_options(
-                api_endpoint=build_config["d_api_endpoint"]["value"] if not dslf else None
+                api_endpoint=build_config["d_api_endpoint"]["value"]
             # If we have collections, show the dropdown
@@ -548,6 +549,8 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
                 {k: v for k, v in col.items() if k not in ["name"]} for col in collection_options
+            return build_config
         # Hide embedding model option if opriona_metadata provider is not null
         if field_name == "collection_name" and field_value:
             # Set the options for collection name to be the field value if its a new collection

From d07d6ea6db91eabbdadce17a2e2faf1c97c39696 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]>
Date: Fri, 31 Jan 2025 17:29:41 +0000
Subject: [PATCH 7/7] [] apply automated fixes

 .../initial_setup/starter_projects/Vector Store RAG.json      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
index 366a607b84f0..5c0d2a66709f 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
@@ -3300,7 +3300,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # When the component first executes, this is the update refresh call\n        first_run = field_name == \"collection_name\" and not field_value\n\n        # If the token has not been provided, simply return\n        if not self.token or field_name == \"environment\":\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if first_run or field_name == \"token\":\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n            return build_config\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"]\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n            return build_config\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",
@@ -3795,7 +3795,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # TODO: Remove special astra flags when overlays are out\n        # TODO: Better targeting of this field\n        dslf = os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\"\n\n        # If the token has not been provided, simply return\n        if not self.token:\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if not dslf and (field_name in [\"token\", \"environment\"] or not build_config[\"api_endpoint\"][\"options\"]):\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"] if not dslf else None\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
+                "value": "import os\nfrom collections import defaultdict\nfrom dataclasses import dataclass, field\n\nfrom astrapy import AstraDBAdmin, DataAPIClient, Database\nfrom langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.helpers import docs_to_data\nfrom langflow.inputs import FloatInput, NestedDictInput\nfrom import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    SecretStrInput,\n    StrInput,\n)\nfrom langflow.schema import Data\nfrom langflow.utils.version import get_version_info\n\n\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n    display_name: str = \"Astra DB\"\n    description: str = \"Ingest and search documents in Astra DB\"\n    documentation: str = \"\"\n    name = \"AstraDB\"\n    icon: str = \"AstraDB\"\n\n    _cached_vector_store: AstraDBVectorStore | None = None\n\n    @dataclass\n    class NewDatabaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new database in Astra DB.\",\n                        \"display_name\": \"Create New Database\",\n                        \"field_order\": [\"new_database_name\", \"cloud_provider\", \"region\"],\n                        \"template\": {\n                            \"new_database_name\": StrInput(\n                                name=\"new_database_name\",\n                                display_name=\"New Database Name\",\n                                info=\"Name of the new database to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"cloud_provider\": DropdownInput(\n                                name=\"cloud_provider\",\n                                display_name=\"Cloud Provider\",\n                                info=\"Cloud provider for the new database.\",\n                                options=[\"Amazon Web Services\", \"Google Cloud Platform\", \"Microsoft Azure\"],\n                                required=True,\n                            ),\n                            \"region\": DropdownInput(\n                                name=\"region\",\n                                display_name=\"Region\",\n                                info=\"Region for the new database.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    @dataclass\n    class NewCollectionInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"description\": \"Create a new collection in Astra DB.\",\n                        \"display_name\": \"Create New Collection\",\n                        \"field_order\": [\n                            \"new_collection_name\",\n                            \"embedding_generation_provider\",\n                            \"embedding_generation_model\",\n                        ],\n                        \"template\": {\n                            \"new_collection_name\": StrInput(\n                                name=\"new_collection_name\",\n                                display_name=\"New Collection Name\",\n                                info=\"Name of the new collection to create in Astra DB.\",\n                                required=True,\n                            ),\n                            \"embedding_generation_provider\": DropdownInput(\n                                name=\"embedding_generation_provider\",\n                                display_name=\"Embedding Generation Provider\",\n                                info=\"Provider to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                            \"embedding_generation_model\": DropdownInput(\n                                name=\"embedding_generation_model\",\n                                display_name=\"Embedding Generation Model\",\n                                info=\"Model to use for generating embeddings.\",\n                                options=[],\n                                required=True,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    inputs = [\n        SecretStrInput(\n            name=\"token\",\n            display_name=\"Astra DB Application Token\",\n            info=\"Authentication token for accessing Astra DB.\",\n            value=\"ASTRA_DB_APPLICATION_TOKEN\",\n            required=True,\n            real_time_refresh=True,\n            input_types=[],\n        ),\n        StrInput(\n            name=\"environment\",\n            display_name=\"Environment\",\n            info=\"The environment for the Astra DB API Endpoint.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"api_endpoint\",\n            display_name=\"Database\",\n            info=\"The Database / API Endpoint for the Astra DB instance.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            combobox=True,\n        ),\n        StrInput(\n            name=\"d_api_endpoint\",\n            display_name=\"Database API Endpoint\",\n            info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"collection_name\",\n            display_name=\"Collection\",\n            info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n            required=True,\n            refresh_button=True,\n            real_time_refresh=True,\n            # dialog_inputs=asdict(NewCollectionInput()),\n            combobox=True,\n        ),\n        StrInput(\n            name=\"keyspace\",\n            display_name=\"Keyspace\",\n            info=\"Optional keyspace within Astra DB to use for the collection.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"embedding_choice\",\n            display_name=\"Embedding Model or Astra Vectorize\",\n            info=\"Choose an embedding model or use Astra Vectorize.\",\n            options=[\"Embedding Model\", \"Astra Vectorize\"],\n            value=\"Embedding Model\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        HandleInput(\n            name=\"embedding_model\",\n            display_name=\"Embedding Model\",\n            input_types=[\"Embeddings\"],\n            info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n            required=False,\n        ),\n        *LCVectorStoreComponent.inputs,\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Number of Search Results\",\n            info=\"Number of search results to return.\",\n            advanced=True,\n            value=4,\n        ),\n        DropdownInput(\n            name=\"search_type\",\n            display_name=\"Search Type\",\n            info=\"Search type to use\",\n            options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n            value=\"Similarity\",\n            advanced=True,\n        ),\n        FloatInput(\n            name=\"search_score_threshold\",\n            display_name=\"Search Score Threshold\",\n            info=\"Minimum similarity score threshold for search results. \"\n            \"(when using 'Similarity with score threshold')\",\n            value=0,\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"advanced_search_filter\",\n            display_name=\"Search Metadata Filter\",\n            info=\"Optional dictionary of filters to apply to the search query.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"autodetect_collection\",\n            display_name=\"Autodetect Collection\",\n            info=\"Boolean flag to determine whether to autodetect the collection.\",\n            advanced=True,\n            value=True,\n        ),\n        StrInput(\n            name=\"content_field\",\n            display_name=\"Content Field\",\n            info=\"Field to use as the text content field for the vector store.\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"deletion_field\",\n            display_name=\"Deletion Based On Field\",\n            info=\"When this parameter is provided, documents in the target collection with \"\n            \"metadata field values matching the input metadata field value will be deleted \"\n            \"before new data is loaded.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"ignore_invalid_documents\",\n            display_name=\"Ignore Invalid Documents\",\n            info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n            advanced=True,\n        ),\n        NestedDictInput(\n            name=\"astradb_vectorstore_kwargs\",\n            display_name=\"AstraDBVectorStore Parameters\",\n            info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n            advanced=True,\n        ),\n    ]\n\n    @classmethod\n    def map_cloud_providers(cls):\n        return {\n            \"Amazon Web Services\": {\n                \"id\": \"aws\",\n                \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n            },\n            \"Google Cloud Platform\": {\n                \"id\": \"gcp\",\n                \"regions\": [\"us-east1\"],\n            },\n            \"Microsoft Azure\": {\n                \"id\": \"azure\",\n                \"regions\": [\"westus3\"],\n            },\n        }\n\n    @classmethod\n    def create_database_api(\n        cls,\n        token: str,\n        new_database_name: str,\n        cloud_provider: str,\n        region: str,\n    ):\n        client = DataAPIClient(token=token)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Call the create database function\n        return admin_client.create_database(\n            name=new_database_name,\n            cloud_provider=cloud_provider,\n            region=region,\n        )\n\n    @classmethod\n    def create_collection_api(\n        cls,\n        token: str,\n        database_name: str,\n        new_collection_name: str,\n        dimension: int | None = None,\n        embedding_generation_provider: str | None = None,\n        embedding_generation_model: str | None = None,\n    ):\n        client = DataAPIClient(token=token)\n        api_endpoint = cls.get_api_endpoint_static(token=token, database_name=database_name)\n\n        # Get the database object\n        database = client.get_database(api_endpoint=api_endpoint, token=token)\n\n        # Build vectorize options, if needed\n        vectorize_options = None\n        if not dimension:\n            vectorize_options = CollectionVectorServiceOptions(\n                provider=embedding_generation_provider,\n                model_name=embedding_generation_model,\n                authentication=None,\n                parameters=None,\n            )\n\n        # Create the collection\n        return database.create_collection(\n            name=new_collection_name,\n            dimension=dimension,\n            service=vectorize_options,\n        )\n\n    @classmethod\n    def get_database_list_static(cls, token: str, environment: str | None = None):\n        client = DataAPIClient(token=token, environment=environment)\n\n        # Get the admin object\n        admin_client = client.get_admin(token=token)\n\n        # Get the list of databases\n        db_list = list(admin_client.list_databases())\n\n        # Generate the api endpoint for each database\n        db_info_dict = {}\n        for db in db_list:\n            try:\n                api_endpoint = f\"https://{}-{}\"\n                db_info_dict[] = {\n                    \"api_endpoint\": api_endpoint,\n                    \"collections\": len(\n                        list(\n                            client.get_database(\n                                api_endpoint=api_endpoint, token=token,\n                            ).list_collection_names(\n                        )\n                    ),\n                }\n            except Exception:  # noqa: BLE001, S110\n                pass\n\n        return db_info_dict\n\n    def get_database_list(self):\n        return self.get_database_list_static(token=self.token, environment=self.environment)\n\n    @classmethod\n    def get_api_endpoint_static(\n        cls,\n        token: str,\n        environment: str | None = None,\n        api_endpoint: str | None = None,\n        database_name: str | None = None,\n    ):\n        # If the api_endpoint is set, return it\n        if api_endpoint:\n            return api_endpoint\n\n        # Check if the database_name is like a url\n        if database_name and database_name.startswith(\"https://\"):\n            return database_name\n\n        # If the database is not set, nothing we can do.\n        if not database_name:\n            return None\n\n        # Otherwise, get the URL from the database list\n        return cls.get_database_list_static(token=token, environment=environment).get(database_name).get(\"api_endpoint\")\n\n    def get_api_endpoint(self, *, api_endpoint: str | None = None):\n        return self.get_api_endpoint_static(\n            token=self.token,\n            environment=self.environment,\n            api_endpoint=api_endpoint or self.d_api_endpoint,\n            database_name=self.api_endpoint,\n        )\n\n    def get_keyspace(self):\n        keyspace = self.keyspace\n\n        if keyspace:\n            return keyspace.strip()\n\n        return None\n\n    def get_database_object(self, api_endpoint: str | None = None):\n        try:\n            client = DataAPIClient(token=self.token, environment=self.environment)\n\n            return client.get_database(\n                api_endpoint=self.get_api_endpoint(api_endpoint=api_endpoint),\n                token=self.token,\n                keyspace=self.get_keyspace(),\n            )\n        except Exception as e:\n            msg = f\"Error fetching database object: {e}\"\n            raise ValueError(msg) from e\n\n    def collection_data(self, collection_name: str, database: Database | None = None):\n        try:\n            if not database:\n                client = DataAPIClient(token=self.token, environment=self.environment)\n\n                database = client.get_database(\n                    api_endpoint=self.get_api_endpoint(),\n                    token=self.token,\n                    keyspace=self.get_keyspace(),\n                )\n\n            collection = database.get_collection(collection_name, keyspace=self.get_keyspace())\n\n            return collection.estimated_document_count()\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error checking collection data: {e}\")\n\n            return None\n\n    def get_vectorize_providers(self):\n        try:\n            self.log(\"Dynamically updating list of Vectorize providers.\")\n\n            # Get the admin object\n            admin = AstraDBAdmin(token=self.token)\n            db_admin = admin.get_database_admin(api_endpoint=self.get_api_endpoint())\n\n            # Get the list of embedding providers\n            embedding_providers = db_admin.find_embedding_providers().as_dict()\n\n            vectorize_providers_mapping = {}\n            # Map the provider display name to the provider key and models\n            for provider_key, provider_data in embedding_providers[\"embeddingProviders\"].items():\n                display_name = provider_data[\"displayName\"]\n                models = [model[\"name\"] for model in provider_data[\"models\"]]\n\n                # TODO:\n                vectorize_providers_mapping[display_name] = [provider_key, models]\n\n            # Sort the resulting dictionary\n            return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n        except Exception as e:  # noqa: BLE001\n            self.log(f\"Error fetching Vectorize providers: {e}\")\n\n            return {}\n\n    def _initialize_database_options(self):\n        try:\n            return [\n                {\n                    \"name\": name,\n                    \"collections\": info[\"collections\"],\n                    \"api_endpoint\": info[\"api_endpoint\"],\n                }\n                for name, info in self.get_database_list().items()\n            ]\n        except Exception as e:\n            msg = f\"Error fetching database options: {e}\"\n            raise ValueError(msg) from e\n\n    def _initialize_collection_options(self, api_endpoint: str | None = None):\n        # Retrieve the database object\n        database = self.get_database_object(api_endpoint=api_endpoint)\n\n        # Get the list of collections\n        collection_list = list(database.list_collections(keyspace=self.get_keyspace()))\n\n        # Return the list of collections and metadata associated\n        return [\n            {\n                \"name\":,\n                \"records\": self.collection_data(, database=database),\n                \"provider\": (\n                    col.options.vector.service.provider if col.options.vector and col.options.vector.service else None\n                ),\n                \"icon\": \"\",\n                \"model\": (\n                    col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None\n                ),\n            }\n            for col in collection_list\n        ]\n\n    def reset_build_config(self, build_config: dict):\n        # Reset the list of databases we have based on the token provided\n        build_config[\"api_endpoint\"][\"options\"] = []\n        build_config[\"api_endpoint\"][\"options_metadata\"] = []\n        build_config[\"api_endpoint\"][\"value\"] = \"\"\n        build_config[\"api_endpoint\"][\"name\"] = \"Database\"\n\n        # Reset the list of collections and metadata associated\n        build_config[\"collection_name\"][\"options\"] = []\n        build_config[\"collection_name\"][\"options_metadata\"] = []\n        build_config[\"collection_name\"][\"value\"] = \"\"\n\n        return build_config\n\n    def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):\n        # When the component first executes, this is the update refresh call\n        first_run = field_name == \"collection_name\" and not field_value\n\n        # If the token has not been provided, simply return\n        if not self.token or field_name == \"environment\":\n            return self.reset_build_config(build_config)\n\n        # Refresh the database name options\n        if first_run or field_name == \"token\":\n            # Reset the build config to ensure we are starting fresh\n            build_config = self.reset_build_config(build_config)\n\n            # Get the list of options we have based on the token provided\n            database_options = self._initialize_database_options()\n\n            # If we retrieved options based on the token, show the dropdown\n            build_config[\"api_endpoint\"][\"options\"] = [db[\"name\"] for db in database_options]\n            build_config[\"api_endpoint\"][\"options_metadata\"] = [\n                {k: v for k, v in db.items() if k not in [\"name\"]} for db in database_options\n            ]\n\n            # Get list of regions for a given cloud provider\n            \"\"\"\n            cloud_provider = (\n                build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"cloud_provider\"][\n                    \"value\"\n                ]\n                or \"Amazon Web Services\"\n            )\n            build_config[\"api_endpoint\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\"region\"][\n                \"options\"\n            ] = self.map_cloud_providers()[cloud_provider][\"regions\"]\n            \"\"\"\n\n            return build_config\n\n        # Refresh the collection name options\n        if field_name == \"api_endpoint\":\n            # Reset the selected collection\n            build_config[\"collection_name\"][\"value\"] = \"\"\n\n            # Set the underlying api endpoint value of the database\n            if field_value in build_config[\"api_endpoint\"][\"options\"]:\n                index_of_name = build_config[\"api_endpoint\"][\"options\"].index(field_value)\n                build_config[\"d_api_endpoint\"][\"value\"] = build_config[\"api_endpoint\"][\"options_metadata\"][\n                    index_of_name\n                ][\"api_endpoint\"]\n            else:\n                build_config[\"d_api_endpoint\"][\"value\"] = \"\"\n\n            # Reload the list of collections and metadata associated\n            collection_options = self._initialize_collection_options(\n                api_endpoint=build_config[\"d_api_endpoint\"][\"value\"]\n            )\n\n            # If we have collections, show the dropdown\n            build_config[\"collection_name\"][\"options\"] = [col[\"name\"] for col in collection_options]\n            build_config[\"collection_name\"][\"options_metadata\"] = [\n                {k: v for k, v in col.items() if k not in [\"name\"]} for col in collection_options\n            ]\n\n            return build_config\n\n        # Hide embedding model option if opriona_metadata provider is not null\n        if field_name == \"collection_name\" and field_value:\n            # Set the options for collection name to be the field value if its a new collection\n            if field_value not in build_config[\"collection_name\"][\"options\"]:\n                # Add the new collection to the list of options\n                build_config[\"collection_name\"][\"options\"].append(field_value)\n                build_config[\"collection_name\"][\"options_metadata\"].append(\n                    {\"records\": 0, \"provider\": None, \"icon\": \"\", \"model\": None}\n                )\n\n                # Ensure that autodetect collection is set to False, since its a new collection\n                build_config[\"autodetect_collection\"][\"value\"] = False\n            else:\n                build_config[\"autodetect_collection\"][\"value\"] = True\n\n            # Find the position of the selected collection to align with metadata\n            index_of_name = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n            # Get the provider value of the selected collection\n            value_of_provider = build_config[\"collection_name\"][\"options_metadata\"][index_of_name][\"provider\"]\n\n            # If we were able to determine the Vectorize provider, set it accordingly\n            if value_of_provider:\n                build_config[\"embedding_model\"][\"advanced\"] = True\n                build_config[\"embedding_choice\"][\"value\"] = \"Astra Vectorize\"\n            else:\n                build_config[\"embedding_model\"][\"advanced\"] = False\n                build_config[\"embedding_choice\"][\"value\"] = \"Embedding Model\"\n\n        # For the final step, get the list of vectorize providers\n        \"\"\"\n        vectorize_providers = self.get_vectorize_providers()\n        if not vectorize_providers:\n            return build_config\n\n        # Allow the user to see the embedding provider options\n        provider_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_provider\"\n        ][\"options\"]\n        if not provider_options:\n            # If the collection is set, allow user to see embedding options\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"options\"] = [\"Bring your own\", \"Nvidia\", *[key for key in vectorize_providers if key != \"Nvidia\"]]\n\n        # And allow the user to see the models based on a selected provider\n        model_options = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n            \"embedding_generation_model\"\n        ][\"options\"]\n        if not model_options:\n            embedding_provider = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_provider\"\n            ][\"value\"]\n\n            build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"][\n                \"embedding_generation_model\"\n            ][\"options\"] = vectorize_providers.get(embedding_provider, [[], []])[1]\n        \"\"\"\n\n        return build_config\n\n    @check_cached_vector_store\n    def build_vector_store(self):\n        try:\n            from langchain_astradb import AstraDBVectorStore\n        except ImportError as e:\n            msg = (\n                \"Could not import langchain Astra DB integration package. \"\n                \"Please install it with `pip install langchain-astradb`.\"\n            )\n            raise ImportError(msg) from e\n\n        # Get the embedding model and additional params\n        embedding_params = (\n            {\"embedding\": self.embedding_model}\n            if self.embedding_model and self.embedding_choice == \"Embedding Model\"\n            else {}\n        )\n\n        # Get the additional parameters\n        additional_params = self.astradb_vectorstore_kwargs or {}\n\n        # Get Langflow version and platform information\n        __version__ = get_version_info()[\"version\"]\n        langflow_prefix = \"\"\n        if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\":  # TODO: More precise way of detecting\n            langflow_prefix = \"ds-\"\n\n        # Get the database object\n        database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n        autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n        # Bundle up the auto-detect parameters\n        autodetect_params = {\n            \"autodetect_collection\": autodetect,\n            \"content_field\": (\n                self.content_field\n                if self.content_field and embedding_params\n                else (\n                    \"page_content\"\n                    if embedding_params\n                    and self.collection_data(collection_name=self.collection_name, database=database) == 0\n                    else None\n                )\n            ),\n            \"ignore_invalid_documents\": self.ignore_invalid_documents,\n        }\n\n        # Attempt to build the Vector Store object\n        try:\n            vector_store = AstraDBVectorStore(\n                # Astra DB Authentication Parameters\n                token=self.token,\n                api_endpoint=database.api_endpoint,\n                namespace=database.keyspace,\n                collection_name=self.collection_name,\n                environment=self.environment,\n                # Astra DB Usage Tracking Parameters\n                ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n                # Astra DB Vector Store Parameters\n                **autodetect_params,\n                **embedding_params,\n                **additional_params,\n            )\n        except Exception as e:\n            msg = f\"Error initializing AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        # Add documents to the vector store\n        self._add_documents_to_vector_store(vector_store)\n\n        return vector_store\n\n    def _add_documents_to_vector_store(self, vector_store) -> None:\n        documents = []\n        for _input in self.ingest_data or []:\n            if isinstance(_input, Data):\n                documents.append(_input.to_lc_document())\n            else:\n                msg = \"Vector Store Inputs must be Data objects.\"\n                raise TypeError(msg)\n\n        if documents and self.deletion_field:\n            self.log(f\"Deleting documents where {self.deletion_field}\")\n            try:\n                database = self.get_database_object(api_endpoint=self.d_api_endpoint)\n                collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n                delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n                self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n                collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n            except Exception as e:\n                msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n                raise ValueError(msg) from e\n\n        if documents:\n            self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n            try:\n                vector_store.add_documents(documents)\n            except Exception as e:\n                msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n                raise ValueError(msg) from e\n        else:\n            self.log(\"No documents to add to the Vector Store.\")\n\n    def _map_search_type(self) -> str:\n        search_type_mapping = {\n            \"Similarity with score threshold\": \"similarity_score_threshold\",\n            \"MMR (Max Marginal Relevance)\": \"mmr\",\n        }\n\n        return search_type_mapping.get(self.search_type, \"similarity\")\n\n    def _build_search_args(self):\n        query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n\n        if query:\n            args = {\n                \"query\": query,\n                \"search_type\": self._map_search_type(),\n                \"k\": self.number_of_results,\n                \"score_threshold\": self.search_score_threshold,\n            }\n        elif self.advanced_search_filter:\n            args = {\n                \"n\": self.number_of_results,\n            }\n        else:\n            return {}\n\n        filter_arg = self.advanced_search_filter or {}\n        if filter_arg:\n            args[\"filter\"] = filter_arg\n\n        return args\n\n    def search_documents(self, vector_store=None) -> list[Data]:\n        vector_store = vector_store or self.build_vector_store()\n\n        self.log(f\"Search input: {self.search_query}\")\n        self.log(f\"Search type: {self.search_type}\")\n        self.log(f\"Number of results: {self.number_of_results}\")\n\n        try:\n            search_args = self._build_search_args()\n        except Exception as e:\n            msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n            raise ValueError(msg) from e\n\n        if not search_args:\n            self.log(\"No search input or filters provided. Skipping search.\")\n            return []\n\n        docs = []\n        search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n        try:\n            self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n            docs = getattr(vector_store, search_method)(**search_args)\n        except Exception as e:\n            msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n            raise ValueError(msg) from e\n\n        self.log(f\"Retrieved documents: {len(docs)}\")\n\n        data = docs_to_data(docs)\n        self.log(f\"Converted documents to data: {len(data)}\")\n        self.status = data\n\n        return data\n\n    def get_retriever_kwargs(self):\n        search_args = self._build_search_args()\n\n        return {\n            \"search_type\": self._map_search_type(),\n            \"search_kwargs\": search_args,\n        }\n"
               "collection_name": {
                 "_input_type": "DropdownInput",