From ec3de7c81d2889e674b091f46ce34e4f132593e8 Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Thu, 23 Oct 2025 04:24:14 -0400 Subject: [PATCH 1/6] Example of how to move training data --- .../move_training_data_across_analyzers.ipynb | 3541 +++++++++++++++++ 1 file changed, 3541 insertions(+) create mode 100644 notebooks/move_training_data_across_analyzers.ipynb diff --git a/notebooks/move_training_data_across_analyzers.ipynb b/notebooks/move_training_data_across_analyzers.ipynb new file mode 100644 index 0000000..4117155 --- /dev/null +++ b/notebooks/move_training_data_across_analyzers.ipynb @@ -0,0 +1,3541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e3ff63c1", + "metadata": {}, + "source": [ + "# Move Training Data Across Analyzers\n", + "\n", + "This notebook demonstrates how to reuse training data from an existing analyzer when creating a new analyzer in the same Azure AI Content Understanding resource.\n", + "\n", + "## Overview\n", + "\n", + "When you have an analyzer with training data and want to create a new analyzer using the same labeled examples, you can reference the existing blob storage location without duplicating or moving the data.\n", + "\n", + "### Benefits\n", + "- **No data duplication**: Reuse existing training data without copying\n", + "- **Same resource**: Both analyzers access the same blob storage\n", + "- **Field portability**: Maintain stable `fieldId`s across analyzers\n", + "- **Rapid iteration**: Test schema variations quickly\n", + "\n", + "### Prerequisites\n", + "1. An existing analyzer with training data already configured\n", + "2. Azure AI service configured by following the [configuration steps](../README.md#configure-azure-ai-service-resource)\n", + "3. Required packages installed" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2f76b866", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: aiohttp in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 1)) (3.12.15)\n", + "Requirement already satisfied: azure-identity in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 2)) (1.25.0)\n", + "Requirement already satisfied: azure-storage-blob in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 3)) (12.26.0)\n", + "Requirement already satisfied: python-dotenv in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 4)) (1.1.1)\n", + "Requirement already satisfied: requests in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 5)) (2.32.5)\n", + "Requirement already satisfied: Pillow in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 6)) (11.3.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.4.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (6.6.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.20.1)\n", + "Requirement already satisfied: aiohttp in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 1)) (3.12.15)\n", + "Requirement already satisfied: azure-identity in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 2)) (1.25.0)\n", + "Requirement already satisfied: azure-storage-blob in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 3)) (12.26.0)\n", + "Requirement already satisfied: python-dotenv in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 4)) (1.1.1)\n", + "Requirement already satisfied: requests in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 5)) (2.32.5)\n", + "Requirement already satisfied: Pillow in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 6)) (11.3.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.4.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (6.6.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.20.1)\n", + "Requirement already satisfied: azure-core>=1.31.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.35.1)\n", + "Requirement already satisfied: cryptography>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (46.0.2)\n", + "Requirement already satisfied: msal>=1.30.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.34.0)\n", + "Requirement already satisfied: msal-extensions>=1.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.3.1)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (4.15.0)\n", + "Requirement already satisfied: isodate>=0.6.1 in /home/vscode/.local/lib/python3.11/site-packages (from azure-storage-blob->-r ../requirements.txt (line 3)) (0.7.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2025.8.3)\n", + "Requirement already satisfied: azure-core>=1.31.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.35.1)\n", + "Requirement already satisfied: cryptography>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (46.0.2)\n", + "Requirement already satisfied: msal>=1.30.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.34.0)\n", + "Requirement already satisfied: msal-extensions>=1.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.3.1)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (4.15.0)\n", + "Requirement already satisfied: isodate>=0.6.1 in /home/vscode/.local/lib/python3.11/site-packages (from azure-storage-blob->-r ../requirements.txt (line 3)) (0.7.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2025.8.3)\n", + "Requirement already satisfied: six>=1.11.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-core>=1.31.0->azure-identity->-r ../requirements.txt (line 2)) (1.17.0)\n", + "Requirement already satisfied: six>=1.11.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-core>=1.31.0->azure-identity->-r ../requirements.txt (line 2)) (1.17.0)\n", + "Requirement already satisfied: cffi>=2.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.0.0)\n", + "Requirement already satisfied: PyJWT<3,>=1.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from PyJWT[crypto]<3,>=1.0.0->msal>=1.30.0->azure-identity->-r ../requirements.txt (line 2)) (2.10.1)\n", + "Requirement already satisfied: cffi>=2.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.0.0)\n", + "Requirement already satisfied: PyJWT<3,>=1.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from PyJWT[crypto]<3,>=1.0.0->msal>=1.30.0->azure-identity->-r ../requirements.txt (line 2)) (2.10.1)\n", + "Requirement already satisfied: pycparser in /home/vscode/.local/lib/python3.11/site-packages (from cffi>=2.0.0->cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.23)\n", + "Requirement already satisfied: pycparser in /home/vscode/.local/lib/python3.11/site-packages (from cffi>=2.0.0->cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.23)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -r ../requirements.txt" + ] + }, + { + "cell_type": "markdown", + "id": "a0032373", + "metadata": {}, + "source": [ + "## Create Azure AI Content Understanding Client\n", + "\n", + "> The [AzureContentUnderstandingClient](../python/content_understanding_client.py) is a utility class providing functions to interact with the Content Understanding API. Before the official release of the Content Understanding SDK, this acts as a lightweight SDK.\n", + "\n", + "> ⚠️ **Important**: Update the code below to match your Azure authentication method. Look for the `# IMPORTANT` comments and modify those sections accordingly.\n", + "\n", + "> ⚠️ **Note**: Using a subscription key works, but using a token provider with Azure Active Directory (AAD) is safer and highly recommended for production environments." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bcea7936", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:azure.identity._credentials.environment:No environment configuration found.\n", + "INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", + "Request method: 'GET'\n", + "Request headers:\n", + " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", + "No body was attached to the request\n", + "INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", + "Request method: 'GET'\n", + "Request headers:\n", + " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", + "No body was attached to the request\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", + "Response headers:\n", + " 'Content-Type': 'application/json; charset=utf-8'\n", + " 'Server': 'IMDS/150.870.65.1854'\n", + " 'x-ms-request-id': '7683a8fc-6110-4d17-ba92-e7986c8af8e0'\n", + " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", + " 'Content-Length': '88'\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", + "Request method: 'GET'\n", + "Request headers:\n", + " 'Metadata': 'REDACTED'\n", + " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", + "No body was attached to the request\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", + "Response headers:\n", + " 'Content-Type': 'application/json; charset=utf-8'\n", + " 'Server': 'IMDS/150.870.65.1854'\n", + " 'x-ms-request-id': '31ec0b5d-182f-4981-8624-34083dd1c063'\n", + " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", + " 'Content-Length': '68'\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", + "Response headers:\n", + " 'Content-Type': 'application/json; charset=utf-8'\n", + " 'Server': 'IMDS/150.870.65.1854'\n", + " 'x-ms-request-id': '7683a8fc-6110-4d17-ba92-e7986c8af8e0'\n", + " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", + " 'Content-Length': '88'\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", + "Request method: 'GET'\n", + "Request headers:\n", + " 'Metadata': 'REDACTED'\n", + " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", + "No body was attached to the request\n", + "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", + "Response headers:\n", + " 'Content-Type': 'application/json; charset=utf-8'\n", + " 'Server': 'IMDS/150.870.65.1854'\n", + " 'x-ms-request-id': '31ec0b5d-182f-4981-8624-34083dd1c063'\n", + " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", + " 'Content-Length': '68'\n", + "INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureDeveloperCliCredential\n", + "INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureDeveloperCliCredential\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Content Understanding client initialized successfully!\n" + ] + } + ], + "source": [ + "import logging\n", + "import json\n", + "import os\n", + "import sys\n", + "import uuid\n", + "from pathlib import Path\n", + "from dotenv import find_dotenv, load_dotenv\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "\n", + "load_dotenv(find_dotenv())\n", + "logging.basicConfig(level=logging.INFO)\n", + "\n", + "# For authentication, you can use either token-based authentication or a subscription key; only one method is required.\n", + "AZURE_AI_ENDPOINT = os.getenv(\"AZURE_AI_ENDPOINT\")\n", + "# IMPORTANT: Replace with your actual subscription key or set it in the \".env\" file if not using token authentication.\n", + "AZURE_AI_API_KEY = os.getenv(\"AZURE_AI_API_KEY\")\n", + "AZURE_AI_API_VERSION = os.getenv(\"AZURE_AI_API_VERSION\", \"2025-05-01-preview\")\n", + "\n", + "# Add the parent directory to the path to use shared modules\n", + "parent_dir = Path(Path.cwd()).parent\n", + "sys.path.append(str(parent_dir))\n", + "from python.content_understanding_client import AzureContentUnderstandingClient\n", + "\n", + "credential = DefaultAzureCredential()\n", + "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", + "\n", + "client = AzureContentUnderstandingClient(\n", + " endpoint=AZURE_AI_ENDPOINT,\n", + " api_version=AZURE_AI_API_VERSION,\n", + " # IMPORTANT: Comment out token_provider if using subscription key\n", + " token_provider=token_provider,\n", + " # IMPORTANT: Uncomment this if using subscription key\n", + " # subscription_key=AZURE_AI_API_KEY,\n", + " x_ms_useragent=\"azure-ai-content-understanding-python/move_training_data\",\n", + ")\n", + "\n", + "print(\"✅ Content Understanding client initialized successfully!\")" + ] + }, + { + "cell_type": "markdown", + "id": "92e5f27f", + "metadata": {}, + "source": [ + "## Step 1: List Available Analyzers\n", + "\n", + "First, let's see what analyzers are available in your resource. We'll look for analyzers that have training data configured." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fcbc218a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 675 analyzer(s) in your resource\n", + "\n", + "Available analyzers:\n", + "1. ID: prebuilt-audioAnalyzer\n", + " Name: N/A\n", + "\n", + "2. ID: prebuilt-callCenter\n", + " Name: N/A\n", + "\n", + "3. ID: prebuilt-contract\n", + " Name: N/A\n", + "\n", + "4. ID: prebuilt-documentAnalyzer\n", + " Name: N/A\n", + "\n", + "5. ID: prebuilt-imageAnalyzer\n", + " Name: N/A\n", + "\n", + "6. ID: prebuilt-invoice\n", + " Name: N/A\n", + "\n", + "7. ID: prebuilt-videoAnalyzer\n", + " Name: N/A\n", + "\n", + "8. ID: 123\n", + " Name: N/A\n", + "\n", + "9. ID: Test-description\n", + " Name: N/A\n", + "\n", + "10. ID: Test\n", + " Name: N/A\n", + "\n", + "11. ID: abc\n", + " Name: N/A\n", + "\n", + "12. ID: audio-250808\n", + " Name: N/A\n", + "\n", + "13. ID: auto-highlight-analyzer-1753389013\n", + " Name: N/A\n", + "\n", + "14. ID: auto-highlight-analyzer-1753393121\n", + " Name: N/A\n", + "\n", + "15. ID: auto-highlight-analyzer-1753727044\n", + " Name: N/A\n", + "\n", + "16. ID: auto-highlight-analyzer-1753728638\n", + " Name: N/A\n", + "\n", + "17. ID: auto-highlight-analyzer-1753822646\n", + " Name: N/A\n", + "\n", + "18. ID: auto-highlight-analyzer-1753823934\n", + " Name: N/A\n", + "\n", + "19. ID: auto-highlight-analyzer-1753826664\n", + " Name: N/A\n", + "\n", + "20. ID: auto-highlight-analyzer-1753829625\n", + " Name: N/A\n", + "\n", + "21. ID: auto-highlight-analyzer-1754935354\n", + " Name: N/A\n", + "\n", + "22. ID: auto-labeling-model-1748319168608-457\n", + " Name: N/A\n", + "\n", + "23. ID: auto-labeling-model-1748343190922-522\n", + " Name: N/A\n", + "\n", + "24. ID: auto-labeling-model-1748343844913-193\n", + " Name: N/A\n", + "\n", + "25. ID: auto-labeling-model-1748364582299-194\n", + " Name: N/A\n", + "\n", + "26. ID: auto-labeling-model-1748364610998-174\n", + " Name: N/A\n", + "\n", + "27. ID: auto-labeling-model-1748364627905-392\n", + " Name: N/A\n", + "\n", + "28. ID: auto-labeling-model-1748364882995-331\n", + " Name: N/A\n", + "\n", + "29. ID: auto-labeling-model-1748365809345-194\n", + " Name: N/A\n", + "\n", + "30. ID: auto-labeling-model-1748365844597-722\n", + " Name: N/A\n", + "\n", + "31. ID: auto-labeling-model-1748369310664-291\n", + " Name: N/A\n", + "\n", + "32. ID: auto-labeling-model-1748382666104-108\n", + " Name: N/A\n", + "\n", + "33. ID: auto-labeling-model-1748398666237-678\n", + " Name: N/A\n", + "\n", + "34. ID: auto-labeling-model-1748406169100-153\n", + " Name: N/A\n", + "\n", + "35. ID: auto-labeling-model-1748487450682-652\n", + " Name: N/A\n", + "\n", + "36. ID: auto-labeling-model-1748490709500-887\n", + " Name: N/A\n", + "\n", + "37. ID: auto-labeling-model-1748524957609-245\n", + " Name: N/A\n", + "\n", + "38. ID: auto-labeling-model-1748525150770-437\n", + " Name: N/A\n", + "\n", + "39. ID: auto-labeling-model-1748527146405-802\n", + " Name: N/A\n", + "\n", + "40. ID: auto-labeling-model-1748532349641-24\n", + " Name: N/A\n", + "\n", + "41. ID: auto-labeling-model-1748652707721-341\n", + " Name: N/A\n", + "\n", + "42. ID: auto-labeling-model-1748652848103-155\n", + " Name: N/A\n", + "\n", + "43. ID: auto-labeling-model-1748839949920-863\n", + " Name: N/A\n", + "\n", + "44. ID: auto-labeling-model-1748845791989-716\n", + " Name: N/A\n", + "\n", + "45. ID: auto-labeling-model-1748845807869-415\n", + " Name: N/A\n", + "\n", + "46. ID: auto-labeling-model-1748907891703-517\n", + " Name: N/A\n", + "\n", + "47. ID: auto-labeling-model-1748908692967-569\n", + " Name: N/A\n", + "\n", + "48. ID: auto-labeling-model-1748914058095-616\n", + " Name: N/A\n", + "\n", + "49. ID: auto-labeling-model-1748936065478-291\n", + " Name: N/A\n", + "\n", + "50. ID: auto-labeling-model-1748936271674-552\n", + " Name: N/A\n", + "\n", + "51. ID: auto-labeling-model-1748936490686-646\n", + " Name: N/A\n", + "\n", + "52. ID: auto-labeling-model-1748937447139-653\n", + " Name: N/A\n", + "\n", + "53. ID: auto-labeling-model-1748940860399-529\n", + " Name: N/A\n", + "\n", + "54. ID: auto-labeling-model-1748941320548-161\n", + " Name: N/A\n", + "\n", + "55. ID: auto-labeling-model-1748941816737-4\n", + " Name: N/A\n", + "\n", + "56. ID: auto-labeling-model-1748942668260-584\n", + " Name: N/A\n", + "\n", + "57. ID: auto-labeling-model-1748942752946-240\n", + " Name: N/A\n", + "\n", + "58. ID: auto-labeling-model-1748943751138-585\n", + " Name: N/A\n", + "\n", + "59. ID: auto-labeling-model-1748943869439-730\n", + " Name: N/A\n", + "\n", + "60. ID: auto-labeling-model-1748944505181-366\n", + " Name: N/A\n", + "\n", + "61. ID: auto-labeling-model-1748945194482-115\n", + " Name: N/A\n", + "\n", + "62. ID: auto-labeling-model-1749003326198-992\n", + " Name: N/A\n", + "\n", + "63. ID: auto-labeling-model-1749023590022-874\n", + " Name: N/A\n", + "\n", + "64. ID: auto-labeling-model-1749023636121-927\n", + " Name: N/A\n", + "\n", + "65. ID: auto-labeling-model-1749023850993-339\n", + " Name: N/A\n", + "\n", + "66. ID: auto-labeling-model-1749023887009-843\n", + " Name: N/A\n", + "\n", + "67. ID: auto-labeling-model-1749023901480-881\n", + " Name: N/A\n", + "\n", + "68. ID: auto-labeling-model-1749023933378-529\n", + " Name: N/A\n", + "\n", + "69. ID: auto-labeling-model-1749024617342-607\n", + " Name: N/A\n", + "\n", + "70. ID: auto-labeling-model-1749024650401-862\n", + " Name: N/A\n", + "\n", + "71. ID: auto-labeling-model-1749095665011-257\n", + " Name: N/A\n", + "\n", + "72. ID: auto-labeling-model-1749096929213-707\n", + " Name: N/A\n", + "\n", + "73. ID: auto-labeling-model-1749104361550-221\n", + " Name: N/A\n", + "\n", + "74. ID: auto-labeling-model-1749104922387-882\n", + " Name: N/A\n", + "\n", + "75. ID: auto-labeling-model-1749105026574-367\n", + " Name: N/A\n", + "\n", + "76. ID: auto-labeling-model-1749251965833-403\n", + " Name: N/A\n", + "\n", + "77. ID: auto-labeling-model-1749254053334-357\n", + " Name: N/A\n", + "\n", + "78. ID: auto-labeling-model-1749311286700-369\n", + " Name: N/A\n", + "\n", + "79. ID: auto-labeling-model-1749509842310-370\n", + " Name: N/A\n", + "\n", + "80. ID: auto-labeling-model-1749520600099-409\n", + " Name: N/A\n", + "\n", + "81. ID: auto-labeling-model-1749522784982-438\n", + " Name: N/A\n", + "\n", + "82. ID: auto-labeling-model-1749535466854-401\n", + " Name: N/A\n", + "\n", + "83. ID: auto-labeling-model-1749581796990-277\n", + " Name: N/A\n", + "\n", + "84. ID: auto-labeling-model-1749581836897-138\n", + " Name: N/A\n", + "\n", + "85. ID: auto-labeling-model-1749584140873-572\n", + " Name: N/A\n", + "\n", + "86. ID: auto-labeling-model-1749585959231-24\n", + " Name: N/A\n", + "\n", + "87. ID: auto-labeling-model-1749604604536-674\n", + " Name: N/A\n", + "\n", + "88. ID: auto-labeling-model-1749620902726-984\n", + " Name: N/A\n", + "\n", + "89. ID: auto-labeling-model-1749626687259-809\n", + " Name: N/A\n", + "\n", + "90. ID: auto-labeling-model-1749627602312-979\n", + " Name: N/A\n", + "\n", + "91. ID: auto-labeling-model-1749630601186-689\n", + " Name: N/A\n", + "\n", + "92. ID: auto-labeling-model-1749631339251-319\n", + " Name: N/A\n", + "\n", + "93. ID: auto-labeling-model-1749631742974-733\n", + " Name: N/A\n", + "\n", + "94. ID: auto-labeling-model-1749631891328-309\n", + " Name: N/A\n", + "\n", + "95. ID: auto-labeling-model-1749696702275-545\n", + " Name: N/A\n", + "\n", + "96. ID: auto-labeling-model-1749758278394-240\n", + " Name: N/A\n", + "\n", + "97. ID: auto-labeling-model-1749758517784-660\n", + " Name: N/A\n", + "\n", + "98. ID: auto-labeling-model-1749758533104-929\n", + " Name: N/A\n", + "\n", + "99. ID: auto-labeling-model-1749758555087-116\n", + " Name: N/A\n", + "\n", + "100. ID: auto-labeling-model-1749759432793-891\n", + " Name: N/A\n", + "\n", + "101. ID: auto-labeling-model-1749768746704-802\n", + " Name: N/A\n", + "\n", + "102. ID: auto-labeling-model-1749775305589-256\n", + " Name: N/A\n", + "\n", + "103. ID: auto-labeling-model-1749802761164-406\n", + " Name: N/A\n", + "\n", + "104. ID: auto-labeling-model-1749956497322-594\n", + " Name: N/A\n", + "\n", + "105. ID: auto-labeling-model-1749960177654-514\n", + " Name: N/A\n", + "\n", + "106. ID: auto-labeling-model-1749961833034-154\n", + " Name: N/A\n", + "\n", + "107. ID: auto-labeling-model-1749962138214-21\n", + " Name: N/A\n", + "\n", + "108. ID: auto-labeling-model-1750045513862-445\n", + " Name: N/A\n", + "\n", + "109. ID: auto-labeling-model-1750108497453-922\n", + " Name: N/A\n", + "\n", + "110. ID: auto-labeling-model-1750123214932-968\n", + " Name: N/A\n", + "\n", + "111. ID: auto-labeling-model-1750128770286-412\n", + " Name: N/A\n", + "\n", + "112. ID: auto-labeling-model-1750128888980-243\n", + " Name: N/A\n", + "\n", + "113. ID: auto-labeling-model-1750141234245-231\n", + " Name: N/A\n", + "\n", + "114. ID: auto-labeling-model-1750145695285-480\n", + " Name: N/A\n", + "\n", + "115. ID: auto-labeling-model-1750211643719-379\n", + " Name: N/A\n", + "\n", + "116. ID: auto-labeling-model-1750233198991-694\n", + " Name: N/A\n", + "\n", + "117. ID: auto-labeling-model-1750241272780-2\n", + " Name: N/A\n", + "\n", + "118. ID: auto-labeling-model-1750279157596-35\n", + " Name: N/A\n", + "\n", + "119. ID: auto-labeling-model-1750291999953-91\n", + " Name: N/A\n", + "\n", + "120. ID: auto-labeling-model-1750292632586-625\n", + " Name: N/A\n", + "\n", + "121. ID: auto-labeling-model-1750312049582-59\n", + " Name: N/A\n", + "\n", + "122. ID: auto-labeling-model-1750312573420-578\n", + " Name: N/A\n", + "\n", + "123. ID: auto-labeling-model-1750376726735-970\n", + " Name: N/A\n", + "\n", + "124. ID: auto-labeling-model-1750377427038-364\n", + " Name: N/A\n", + "\n", + "125. ID: auto-labeling-model-1750385575232-897\n", + " Name: N/A\n", + "\n", + "126. ID: auto-labeling-model-1750403576185-741\n", + " Name: N/A\n", + "\n", + "127. ID: auto-labeling-model-1750404809435-451\n", + " Name: N/A\n", + "\n", + "128. ID: auto-labeling-model-1750405070052-89\n", + " Name: N/A\n", + "\n", + "129. ID: auto-labeling-model-1750405091355-763\n", + " Name: N/A\n", + "\n", + "130. ID: auto-labeling-model-1750417420016-430\n", + " Name: N/A\n", + "\n", + "131. ID: auto-labeling-model-1750659725597-788\n", + " Name: N/A\n", + "\n", + "132. ID: auto-labeling-model-1750659733517-772\n", + " Name: N/A\n", + "\n", + "133. ID: auto-labeling-model-1750659761722-251\n", + " Name: N/A\n", + "\n", + "134. ID: auto-labeling-model-1750659784566-101\n", + " Name: N/A\n", + "\n", + "135. ID: auto-labeling-model-1750659903607-108\n", + " Name: N/A\n", + "\n", + "136. ID: auto-labeling-model-1750659933637-141\n", + " Name: N/A\n", + "\n", + "137. ID: auto-labeling-model-1750659945217-945\n", + " Name: N/A\n", + "\n", + "138. ID: auto-labeling-model-1750660650963-739\n", + " Name: N/A\n", + "\n", + "139. ID: auto-labeling-model-1750660824597-923\n", + " Name: N/A\n", + "\n", + "140. ID: auto-labeling-model-1750663207559-512\n", + " Name: N/A\n", + "\n", + "141. ID: auto-labeling-model-1750663259510-796\n", + " Name: N/A\n", + "\n", + "142. ID: auto-labeling-model-1750663303432-581\n", + " Name: N/A\n", + "\n", + "143. ID: auto-labeling-model-1750663377213-340\n", + " Name: N/A\n", + "\n", + "144. ID: auto-labeling-model-1750663393108-597\n", + " Name: N/A\n", + "\n", + "145. ID: auto-labeling-model-1750664456347-683\n", + " Name: N/A\n", + "\n", + "146. ID: auto-labeling-model-1750664605893-618\n", + " Name: N/A\n", + "\n", + "147. ID: auto-labeling-model-1750665355708-8\n", + " Name: N/A\n", + "\n", + "148. ID: auto-labeling-model-1750673318125-535\n", + " Name: N/A\n", + "\n", + "149. ID: auto-labeling-model-1750673331433-642\n", + " Name: N/A\n", + "\n", + "150. ID: auto-labeling-model-1750709349430-630\n", + " Name: N/A\n", + "\n", + "151. ID: auto-labeling-model-1750719511542-531\n", + " Name: N/A\n", + "\n", + "152. ID: auto-labeling-model-1750744047556-446\n", + " Name: N/A\n", + "\n", + "153. ID: auto-labeling-model-1750755510472-120\n", + " Name: N/A\n", + "\n", + "154. ID: auto-labeling-model-1750784814399-27\n", + " Name: N/A\n", + "\n", + "155. ID: auto-labeling-model-1750788356545-200\n", + " Name: N/A\n", + "\n", + "156. ID: auto-labeling-model-1750789921864-730\n", + " Name: N/A\n", + "\n", + "157. ID: auto-labeling-model-1750836585070-913\n", + " Name: N/A\n", + "\n", + "158. ID: auto-labeling-model-1750842588854-962\n", + " Name: N/A\n", + "\n", + "159. ID: auto-labeling-model-1750842831795-314\n", + " Name: N/A\n", + "\n", + "160. ID: auto-labeling-model-1750842897183-394\n", + " Name: N/A\n", + "\n", + "161. ID: auto-labeling-model-1750842978258-136\n", + " Name: N/A\n", + "\n", + "162. ID: auto-labeling-model-1750843282949-512\n", + " Name: N/A\n", + "\n", + "163. ID: auto-labeling-model-1750843704909-216\n", + " Name: N/A\n", + "\n", + "164. ID: auto-labeling-model-1750843908445-174\n", + " Name: N/A\n", + "\n", + "165. ID: auto-labeling-model-1750844014408-330\n", + " Name: N/A\n", + "\n", + "166. ID: auto-labeling-model-1750844234138-988\n", + " Name: N/A\n", + "\n", + "167. ID: auto-labeling-model-1750844709672-320\n", + " Name: N/A\n", + "\n", + "168. ID: auto-labeling-model-1750845307517-940\n", + " Name: N/A\n", + "\n", + "169. ID: auto-labeling-model-1750846220484-837\n", + " Name: N/A\n", + "\n", + "170. ID: auto-labeling-model-1750846255005-395\n", + " Name: N/A\n", + "\n", + "171. ID: auto-labeling-model-1750847433984-311\n", + " Name: N/A\n", + "\n", + "172. ID: auto-labeling-model-1750853034834-460\n", + " Name: N/A\n", + "\n", + "173. ID: auto-labeling-model-1750919114419-408\n", + " Name: N/A\n", + "\n", + "174. ID: auto-labeling-model-1750920179010-279\n", + " Name: N/A\n", + "\n", + "175. ID: auto-labeling-model-1750920218343-518\n", + " Name: N/A\n", + "\n", + "176. ID: auto-labeling-model-1750920298701-557\n", + " Name: N/A\n", + "\n", + "177. ID: auto-labeling-model-1750920352617-62\n", + " Name: N/A\n", + "\n", + "178. ID: auto-labeling-model-1751052501474-178\n", + " Name: N/A\n", + "\n", + "179. ID: auto-labeling-model-1751069615217-264\n", + " Name: N/A\n", + "\n", + "180. ID: auto-labeling-model-1751270970103-549\n", + " Name: N/A\n", + "\n", + "181. ID: auto-labeling-model-1751272499140-268\n", + " Name: N/A\n", + "\n", + "182. ID: auto-labeling-model-1751272544250-613\n", + " Name: N/A\n", + "\n", + "183. ID: auto-labeling-model-1751273787498-265\n", + " Name: N/A\n", + "\n", + "184. ID: auto-labeling-model-1751273849331-220\n", + " Name: N/A\n", + "\n", + "185. ID: auto-labeling-model-1751273904647-201\n", + " Name: N/A\n", + "\n", + "186. ID: auto-labeling-model-1751273937246-448\n", + " Name: N/A\n", + "\n", + "187. ID: auto-labeling-model-1751273983364-401\n", + " Name: N/A\n", + "\n", + "188. ID: auto-labeling-model-1751336918679-904\n", + " Name: N/A\n", + "\n", + "189. ID: auto-labeling-model-1751349360361-963\n", + " Name: N/A\n", + "\n", + "190. ID: auto-labeling-model-1751427888199-459\n", + " Name: N/A\n", + "\n", + "191. ID: auto-labeling-model-1751427891721-940\n", + " Name: N/A\n", + "\n", + "192. ID: auto-labeling-model-1751441608096-967\n", + " Name: N/A\n", + "\n", + "193. ID: auto-labeling-model-1751441662962-402\n", + " Name: N/A\n", + "\n", + "194. ID: auto-labeling-model-1751444577624-169\n", + " Name: N/A\n", + "\n", + "195. ID: auto-labeling-model-1751446425406-566\n", + " Name: N/A\n", + "\n", + "196. ID: auto-labeling-model-1751446744627-904\n", + " Name: N/A\n", + "\n", + "197. ID: auto-labeling-model-1751447069922-153\n", + " Name: N/A\n", + "\n", + "198. ID: auto-labeling-model-1751447126141-210\n", + " Name: N/A\n", + "\n", + "199. ID: auto-labeling-model-1751450223362-323\n", + " Name: N/A\n", + "\n", + "200. ID: auto-labeling-model-1751619901375-912\n", + " Name: N/A\n", + "\n", + "201. ID: auto-labeling-model-1751621939880-824\n", + " Name: N/A\n", + "\n", + "202. ID: auto-labeling-model-1751622003371-912\n", + " Name: N/A\n", + "\n", + "203. ID: auto-labeling-model-1751622246359-22\n", + " Name: N/A\n", + "\n", + "204. ID: auto-labeling-model-1751622337847-185\n", + " Name: N/A\n", + "\n", + "205. ID: auto-labeling-model-1751630796222-228\n", + " Name: N/A\n", + "\n", + "206. ID: auto-labeling-model-1751630815948-351\n", + " Name: N/A\n", + "\n", + "207. ID: auto-labeling-model-1751998528557-924\n", + " Name: N/A\n", + "\n", + "208. ID: auto-labeling-model-1752025809239-846\n", + " Name: N/A\n", + "\n", + "209. ID: auto-labeling-model-1752034702114-180\n", + " Name: N/A\n", + "\n", + "210. ID: auto-labeling-model-1752098586840-747\n", + " Name: N/A\n", + "\n", + "211. ID: auto-labeling-model-1752180782600-490\n", + " Name: N/A\n", + "\n", + "212. ID: auto-labeling-model-1752271117113-156\n", + " Name: N/A\n", + "\n", + "213. ID: auto-labeling-model-1752523653762-595\n", + " Name: N/A\n", + "\n", + "214. ID: auto-labeling-model-1752600290738-67\n", + " Name: N/A\n", + "\n", + "215. ID: auto-labeling-model-1752625416686-81\n", + " Name: N/A\n", + "\n", + "216. ID: auto-labeling-model-1752625871649-767\n", + " Name: N/A\n", + "\n", + "217. ID: auto-labeling-model-1752693120005-346\n", + " Name: N/A\n", + "\n", + "218. ID: auto-labeling-model-1752697569506-376\n", + " Name: N/A\n", + "\n", + "219. ID: auto-labeling-model-1752697610504-950\n", + " Name: N/A\n", + "\n", + "220. ID: auto-labeling-model-1752700740555-590\n", + " Name: N/A\n", + "\n", + "221. ID: auto-labeling-model-1752708687132-939\n", + " Name: N/A\n", + "\n", + "222. ID: auto-labeling-model-1752741732428-578\n", + " Name: N/A\n", + "\n", + "223. ID: auto-labeling-model-1752780032715-66\n", + " Name: N/A\n", + "\n", + "224. ID: auto-labeling-model-1752780325289-573\n", + " Name: N/A\n", + "\n", + "225. ID: auto-labeling-model-1752795955082-603\n", + " Name: N/A\n", + "\n", + "226. ID: auto-labeling-model-1752796753555-462\n", + " Name: N/A\n", + "\n", + "227. ID: auto-labeling-model-1752797239305-251\n", + " Name: N/A\n", + "\n", + "228. ID: auto-labeling-model-1752800932971-876\n", + " Name: N/A\n", + "\n", + "229. ID: auto-labeling-model-1752803086727-971\n", + " Name: N/A\n", + "\n", + "230. ID: auto-labeling-model-1752803985621-193\n", + " Name: N/A\n", + "\n", + "231. ID: auto-labeling-model-1752806777300-862\n", + " Name: N/A\n", + "\n", + "232. ID: auto-labeling-model-1752884829621-441\n", + " Name: N/A\n", + "\n", + "233. ID: auto-labeling-model-1753083025779-103\n", + " Name: N/A\n", + "\n", + "234. ID: auto-labeling-model-1753083077531-666\n", + " Name: N/A\n", + "\n", + "235. ID: auto-labeling-model-1753083850816-29\n", + " Name: N/A\n", + "\n", + "236. ID: auto-labeling-model-1753083864041-58\n", + " Name: N/A\n", + "\n", + "237. ID: auto-labeling-model-1753086883459-951\n", + " Name: N/A\n", + "\n", + "238. ID: auto-labeling-model-1753089079279-222\n", + " Name: N/A\n", + "\n", + "239. ID: auto-labeling-model-1753150531096-410\n", + " Name: N/A\n", + "\n", + "240. ID: auto-labeling-model-1753151865515-394\n", + " Name: N/A\n", + "\n", + "241. ID: auto-labeling-model-1753168395318-507\n", + " Name: N/A\n", + "\n", + "242. ID: auto-labeling-model-1753169409334-912\n", + " Name: N/A\n", + "\n", + "243. ID: auto-labeling-model-1753173597967-303\n", + " Name: N/A\n", + "\n", + "244. ID: auto-labeling-model-1753177537439-711\n", + " Name: N/A\n", + "\n", + "245. ID: auto-labeling-model-1753205662320-583\n", + " Name: N/A\n", + "\n", + "246. ID: auto-labeling-model-1753207022483-913\n", + " Name: N/A\n", + "\n", + "247. ID: auto-labeling-model-1753207579262-276\n", + " Name: N/A\n", + "\n", + "248. ID: auto-labeling-model-1753208672240-981\n", + " Name: N/A\n", + "\n", + "249. ID: auto-labeling-model-1753209156822-298\n", + " Name: N/A\n", + "\n", + "250. ID: auto-labeling-model-1753209981617-818\n", + " Name: N/A\n", + "\n", + "251. ID: auto-labeling-model-1753236316137-300\n", + " Name: N/A\n", + "\n", + "252. ID: auto-labeling-model-1753237512820-249\n", + " Name: N/A\n", + "\n", + "253. ID: auto-labeling-model-1753250369127-625\n", + " Name: N/A\n", + "\n", + "254. ID: auto-labeling-model-1753255567341-610\n", + " Name: N/A\n", + "\n", + "255. ID: auto-labeling-model-1753259092944-226\n", + " Name: N/A\n", + "\n", + "256. ID: auto-labeling-model-1753287197755-783\n", + " Name: N/A\n", + "\n", + "257. ID: auto-labeling-model-1753321650913-823\n", + " Name: N/A\n", + "\n", + "258. ID: auto-labeling-model-1753325891996-80\n", + " Name: N/A\n", + "\n", + "259. ID: auto-labeling-model-1753334968241-706\n", + " Name: N/A\n", + "\n", + "260. ID: auto-labeling-model-1753335132165-512\n", + " Name: N/A\n", + "\n", + "261. ID: auto-labeling-model-1753335555914-390\n", + " Name: N/A\n", + "\n", + "262. ID: auto-labeling-model-1753335697157-843\n", + " Name: N/A\n", + "\n", + "263. ID: auto-labeling-model-1753340903345-139\n", + " Name: N/A\n", + "\n", + "264. ID: auto-labeling-model-1753344102782-140\n", + " Name: N/A\n", + "\n", + "265. ID: auto-labeling-model-1753344491064-431\n", + " Name: N/A\n", + "\n", + "266. ID: auto-labeling-model-1753344947435-154\n", + " Name: N/A\n", + "\n", + "267. ID: auto-labeling-model-1753346772842-804\n", + " Name: N/A\n", + "\n", + "268. ID: auto-labeling-model-1753420107017-420\n", + " Name: N/A\n", + "\n", + "269. ID: auto-labeling-model-1753420466410-256\n", + " Name: N/A\n", + "\n", + "270. ID: auto-labeling-model-1753423049391-214\n", + " Name: N/A\n", + "\n", + "271. ID: auto-labeling-model-1753430316648-188\n", + " Name: N/A\n", + "\n", + "272. ID: auto-labeling-model-1753431705642-795\n", + " Name: N/A\n", + "\n", + "273. ID: auto-labeling-model-1753432653890-622\n", + " Name: N/A\n", + "\n", + "274. ID: auto-labeling-model-1753433164146-455\n", + " Name: N/A\n", + "\n", + "275. ID: auto-labeling-model-1753434806213-833\n", + " Name: N/A\n", + "\n", + "276. ID: auto-labeling-model-1753670824352-493\n", + " Name: N/A\n", + "\n", + "277. ID: auto-labeling-model-1753680640396-566\n", + " Name: N/A\n", + "\n", + "278. ID: auto-labeling-model-1753681888155-667\n", + " Name: N/A\n", + "\n", + "279. ID: auto-labeling-model-1753682254644-331\n", + " Name: N/A\n", + "\n", + "280. ID: auto-labeling-model-1753683583061-323\n", + " Name: N/A\n", + "\n", + "281. ID: auto-labeling-model-1753684547670-475\n", + " Name: N/A\n", + "\n", + "282. ID: auto-labeling-model-1753684784064-358\n", + " Name: N/A\n", + "\n", + "283. ID: auto-labeling-model-1753686206798-898\n", + " Name: N/A\n", + "\n", + "284. ID: auto-labeling-model-1753686800552-354\n", + " Name: N/A\n", + "\n", + "285. ID: auto-labeling-model-1753691313133-192\n", + " Name: N/A\n", + "\n", + "286. ID: auto-labeling-model-1753755468942-82\n", + " Name: N/A\n", + "\n", + "287. ID: auto-labeling-model-1753765727024-37\n", + " Name: N/A\n", + "\n", + "288. ID: auto-labeling-model-1753766046014-152\n", + " Name: N/A\n", + "\n", + "289. ID: auto-labeling-model-1753767335342-370\n", + " Name: N/A\n", + "\n", + "290. ID: auto-labeling-model-1753767338325-621\n", + " Name: N/A\n", + "\n", + "291. ID: auto-labeling-model-1753773699582-540\n", + " Name: N/A\n", + "\n", + "292. ID: auto-labeling-model-1753774470271-985\n", + " Name: N/A\n", + "\n", + "293. ID: auto-labeling-model-1753775949221-151\n", + " Name: N/A\n", + "\n", + "294. ID: auto-labeling-model-1753777245479-372\n", + " Name: N/A\n", + "\n", + "295. ID: auto-labeling-model-1753777925896-803\n", + " Name: N/A\n", + "\n", + "296. ID: auto-labeling-model-1753780557881-855\n", + " Name: N/A\n", + "\n", + "297. ID: auto-labeling-model-1753841121952-979\n", + " Name: N/A\n", + "\n", + "298. ID: auto-labeling-model-1753841981886-902\n", + " Name: N/A\n", + "\n", + "299. ID: auto-labeling-model-1753843376936-643\n", + " Name: N/A\n", + "\n", + "300. ID: auto-labeling-model-1753844211334-641\n", + " Name: N/A\n", + "\n", + "301. ID: auto-labeling-model-1753853033274-214\n", + " Name: N/A\n", + "\n", + "302. ID: auto-labeling-model-1753855251911-309\n", + " Name: N/A\n", + "\n", + "303. ID: auto-labeling-model-1753855551724-866\n", + " Name: N/A\n", + "\n", + "304. ID: auto-labeling-model-1753857116602-791\n", + " Name: N/A\n", + "\n", + "305. ID: auto-labeling-model-1753857268920-608\n", + " Name: N/A\n", + "\n", + "306. ID: auto-labeling-model-1753857820246-647\n", + " Name: N/A\n", + "\n", + "307. ID: auto-labeling-model-1753857865813-554\n", + " Name: N/A\n", + "\n", + "308. ID: auto-labeling-model-1753858369469-249\n", + " Name: N/A\n", + "\n", + "309. ID: auto-labeling-model-1753859412803-605\n", + " Name: N/A\n", + "\n", + "310. ID: auto-labeling-model-1753860904131-872\n", + " Name: N/A\n", + "\n", + "311. ID: auto-labeling-model-1753861167980-954\n", + " Name: N/A\n", + "\n", + "312. ID: auto-labeling-model-1753861799127-664\n", + " Name: N/A\n", + "\n", + "313. ID: auto-labeling-model-1753862553873-905\n", + " Name: N/A\n", + "\n", + "314. ID: auto-labeling-model-1753862814119-255\n", + " Name: N/A\n", + "\n", + "315. ID: auto-labeling-model-1753863784180-612\n", + " Name: N/A\n", + "\n", + "316. ID: auto-labeling-model-1753863994987-510\n", + " Name: N/A\n", + "\n", + "317. ID: auto-labeling-model-1753864084656-697\n", + " Name: N/A\n", + "\n", + "318. ID: auto-labeling-model-1753865255601-417\n", + " Name: N/A\n", + "\n", + "319. ID: auto-labeling-model-1753888993477-912\n", + " Name: N/A\n", + "\n", + "320. ID: auto-labeling-model-1753936473158-979\n", + " Name: N/A\n", + "\n", + "321. ID: auto-labeling-model-1753939417926-903\n", + " Name: N/A\n", + "\n", + "322. ID: auto-labeling-model-1753941090969-886\n", + " Name: N/A\n", + "\n", + "323. ID: auto-labeling-model-1753941295803-93\n", + " Name: N/A\n", + "\n", + "324. ID: auto-labeling-model-1753943808756-255\n", + " Name: N/A\n", + "\n", + "325. ID: auto-labeling-model-1754012684592-887\n", + " Name: N/A\n", + "\n", + "326. ID: auto-labeling-model-1754015881192-443\n", + " Name: N/A\n", + "\n", + "327. ID: auto-labeling-model-1754016406351-97\n", + " Name: N/A\n", + "\n", + "328. ID: auto-labeling-model-1754016977082-211\n", + " Name: N/A\n", + "\n", + "329. ID: auto-labeling-model-1754017707931-428\n", + " Name: N/A\n", + "\n", + "330. ID: auto-labeling-model-1754024495010-992\n", + " Name: N/A\n", + "\n", + "331. ID: auto-labeling-model-1754025560953-192\n", + " Name: N/A\n", + "\n", + "332. ID: auto-labeling-model-1754026435557-853\n", + " Name: N/A\n", + "\n", + "333. ID: auto-labeling-model-1754037940196-869\n", + " Name: N/A\n", + "\n", + "334. ID: auto-labeling-model-1754082032616-607\n", + " Name: N/A\n", + "\n", + "335. ID: auto-labeling-model-1754082215077-482\n", + " Name: N/A\n", + "\n", + "336. ID: auto-labeling-model-1754082332437-629\n", + " Name: N/A\n", + "\n", + "337. ID: auto-labeling-model-1754082479343-224\n", + " Name: N/A\n", + "\n", + "338. ID: auto-labeling-model-1754082536526-914\n", + " Name: N/A\n", + "\n", + "339. ID: auto-labeling-model-1754082630700-302\n", + " Name: N/A\n", + "\n", + "340. ID: auto-labeling-model-1754082725263-83\n", + " Name: N/A\n", + "\n", + "341. ID: auto-labeling-model-1754082811382-584\n", + " Name: N/A\n", + "\n", + "342. ID: auto-labeling-model-1754082998761-352\n", + " Name: N/A\n", + "\n", + "343. ID: auto-labeling-model-1754083046825-203\n", + " Name: N/A\n", + "\n", + "344. ID: auto-labeling-model-1754083150278-445\n", + " Name: N/A\n", + "\n", + "345. ID: auto-labeling-model-1754083462284-222\n", + " Name: N/A\n", + "\n", + "346. ID: auto-labeling-model-1754083621516-367\n", + " Name: N/A\n", + "\n", + "347. ID: auto-labeling-model-1754083719163-272\n", + " Name: N/A\n", + "\n", + "348. ID: auto-labeling-model-1754083866374-41\n", + " Name: N/A\n", + "\n", + "349. ID: auto-labeling-model-1754084032708-231\n", + " Name: N/A\n", + "\n", + "350. ID: auto-labeling-model-1754084406835-168\n", + " Name: N/A\n", + "\n", + "351. ID: auto-labeling-model-1754084472348-188\n", + " Name: N/A\n", + "\n", + "352. ID: auto-labeling-model-1754084575001-916\n", + " Name: N/A\n", + "\n", + "353. ID: auto-labeling-model-1754084884148-481\n", + " Name: N/A\n", + "\n", + "354. ID: auto-labeling-model-1754088680537-743\n", + " Name: N/A\n", + "\n", + "355. ID: auto-labeling-model-1754277589373-867\n", + " Name: N/A\n", + "\n", + "356. ID: auto-labeling-model-1754327062412-76\n", + " Name: N/A\n", + "\n", + "357. ID: auto-labeling-model-1754361872613-844\n", + " Name: N/A\n", + "\n", + "358. ID: auto-labeling-model-1754442934624-187\n", + " Name: N/A\n", + "\n", + "359. ID: auto-labeling-model-1754443219339-17\n", + " Name: N/A\n", + "\n", + "360. ID: auto-labeling-model-1754448125079-528\n", + " Name: N/A\n", + "\n", + "361. ID: auto-labeling-model-1754448200938-6\n", + " Name: N/A\n", + "\n", + "362. ID: auto-labeling-model-1754448830534-215\n", + " Name: N/A\n", + "\n", + "363. ID: auto-labeling-model-1754448901751-597\n", + " Name: N/A\n", + "\n", + "364. ID: auto-labeling-model-1754449038080-472\n", + " Name: N/A\n", + "\n", + "365. ID: auto-labeling-model-1754449135369-901\n", + " Name: N/A\n", + "\n", + "366. ID: auto-labeling-model-1754449150398-162\n", + " Name: N/A\n", + "\n", + "367. ID: auto-labeling-model-1754449206123-981\n", + " Name: N/A\n", + "\n", + "368. ID: auto-labeling-model-1754449280061-594\n", + " Name: N/A\n", + "\n", + "369. ID: auto-labeling-model-1754449347580-776\n", + " Name: N/A\n", + "\n", + "370. ID: auto-labeling-model-1754449538829-202\n", + " Name: N/A\n", + "\n", + "371. ID: auto-labeling-model-1754449608449-502\n", + " Name: N/A\n", + "\n", + "372. ID: auto-labeling-model-1754449678933-461\n", + " Name: N/A\n", + "\n", + "373. ID: auto-labeling-model-1754449747782-122\n", + " Name: N/A\n", + "\n", + "374. ID: auto-labeling-model-1754449819030-776\n", + " Name: N/A\n", + "\n", + "375. ID: auto-labeling-model-1754454485024-346\n", + " Name: N/A\n", + "\n", + "376. ID: auto-labeling-model-1754456633663-795\n", + " Name: N/A\n", + "\n", + "377. ID: auto-labeling-model-1754457369864-749\n", + " Name: N/A\n", + "\n", + "378. ID: auto-labeling-model-1754457591929-484\n", + " Name: N/A\n", + "\n", + "379. ID: auto-labeling-model-1754460230719-575\n", + " Name: N/A\n", + "\n", + "380. ID: auto-labeling-model-1754460479500-36\n", + " Name: N/A\n", + "\n", + "381. ID: auto-labeling-model-1754460640349-364\n", + " Name: N/A\n", + "\n", + "382. ID: auto-labeling-model-1754669409054-428\n", + " Name: N/A\n", + "\n", + "383. ID: auto-labeling-model-1754951212582-203\n", + " Name: N/A\n", + "\n", + "384. ID: auto-labeling-model-1754965260794-576\n", + " Name: N/A\n", + "\n", + "385. ID: auto-labeling-model-1754965331102-485\n", + " Name: N/A\n", + "\n", + "386. ID: auto-labeling-model-1754965445643-161\n", + " Name: N/A\n", + "\n", + "387. ID: auto-labeling-model-1754965630031-820\n", + " Name: N/A\n", + "\n", + "388. ID: auto-labeling-model-1754965704606-779\n", + " Name: N/A\n", + "\n", + "389. ID: auto-labeling-model-1754965767126-499\n", + " Name: N/A\n", + "\n", + "390. ID: auto-labeling-model-1754965926600-215\n", + " Name: N/A\n", + "\n", + "391. ID: auto-labeling-model-1754965996281-810\n", + " Name: N/A\n", + "\n", + "392. ID: auto-labeling-model-1754966073913-92\n", + " Name: N/A\n", + "\n", + "393. ID: auto-labeling-model-1754966208584-396\n", + " Name: N/A\n", + "\n", + "394. ID: auto-labeling-model-1754966287090-692\n", + " Name: N/A\n", + "\n", + "395. ID: auto-labeling-model-1754966553579-724\n", + " Name: N/A\n", + "\n", + "396. ID: auto-labeling-model-1754966634261-409\n", + " Name: N/A\n", + "\n", + "397. ID: auto-labeling-model-1754966703678-7\n", + " Name: N/A\n", + "\n", + "398. ID: auto-labeling-model-1754966778721-225\n", + " Name: N/A\n", + "\n", + "399. ID: auto-labeling-model-1754966848977-806\n", + " Name: N/A\n", + "\n", + "400. ID: auto-labeling-model-1754966934481-980\n", + " Name: N/A\n", + "\n", + "401. ID: auto-labeling-model-1754967006745-602\n", + " Name: N/A\n", + "\n", + "402. ID: auto-labeling-model-1754967080546-450\n", + " Name: N/A\n", + "\n", + "403. ID: auto-labeling-model-1754967570056-479\n", + " Name: N/A\n", + "\n", + "404. ID: auto-labeling-model-1754967665781-18\n", + " Name: N/A\n", + "\n", + "405. ID: auto-labeling-model-1754967737902-258\n", + " Name: N/A\n", + "\n", + "406. ID: auto-labeling-model-1754967809639-969\n", + " Name: N/A\n", + "\n", + "407. ID: auto-labeling-model-1754967879833-46\n", + " Name: N/A\n", + "\n", + "408. ID: auto-labeling-model-1754967953160-263\n", + " Name: N/A\n", + "\n", + "409. ID: auto-labeling-model-1754968036672-249\n", + " Name: N/A\n", + "\n", + "410. ID: auto-labeling-model-1754968110963-400\n", + " Name: N/A\n", + "\n", + "411. ID: auto-labeling-model-1754968179908-761\n", + " Name: N/A\n", + "\n", + "412. ID: auto-labeling-model-1754974913641-913\n", + " Name: N/A\n", + "\n", + "413. ID: auto-labeling-model-1754975127019-903\n", + " Name: N/A\n", + "\n", + "414. ID: auto-labeling-model-1754975368613-717\n", + " Name: N/A\n", + "\n", + "415. ID: auto-labeling-model-1754975432901-90\n", + " Name: N/A\n", + "\n", + "416. ID: auto-labeling-model-1754975454687-707\n", + " Name: N/A\n", + "\n", + "417. ID: auto-labeling-model-1754975527897-708\n", + " Name: N/A\n", + "\n", + "418. ID: auto-labeling-model-1754975600064-524\n", + " Name: N/A\n", + "\n", + "419. ID: auto-labeling-model-1754975711179-28\n", + " Name: N/A\n", + "\n", + "420. ID: auto-labeling-model-1754975967653-203\n", + " Name: N/A\n", + "\n", + "421. ID: auto-labeling-model-1754976038813-381\n", + " Name: N/A\n", + "\n", + "422. ID: auto-labeling-model-1754976117940-973\n", + " Name: N/A\n", + "\n", + "423. ID: auto-labeling-model-1754976193933-189\n", + " Name: N/A\n", + "\n", + "424. ID: auto-labeling-model-1754976293724-520\n", + " Name: N/A\n", + "\n", + "425. ID: auto-labeling-model-1754976368518-509\n", + " Name: N/A\n", + "\n", + "426. ID: auto-labeling-model-1754976437096-539\n", + " Name: N/A\n", + "\n", + "427. ID: auto-labeling-model-1754976513472-952\n", + " Name: N/A\n", + "\n", + "428. ID: auto-labeling-model-1754976754715-501\n", + " Name: N/A\n", + "\n", + "429. ID: auto-labeling-model-1754976904752-710\n", + " Name: N/A\n", + "\n", + "430. ID: auto-labeling-model-1754976976653-350\n", + " Name: N/A\n", + "\n", + "431. ID: auto-labeling-model-1754977052535-217\n", + " Name: N/A\n", + "\n", + "432. ID: auto-labeling-model-1754977121829-706\n", + " Name: N/A\n", + "\n", + "433. ID: auto-labeling-model-1754977217214-291\n", + " Name: N/A\n", + "\n", + "434. ID: auto-labeling-model-1754977287574-575\n", + " Name: N/A\n", + "\n", + "435. ID: auto-labeling-model-1754977360553-264\n", + " Name: N/A\n", + "\n", + "436. ID: auto-labeling-model-1754977435968-198\n", + " Name: N/A\n", + "\n", + "437. ID: auto-labeling-model-1754977508312-429\n", + " Name: N/A\n", + "\n", + "438. ID: auto-labeling-model-1754977588026-221\n", + " Name: N/A\n", + "\n", + "439. ID: auto-labeling-model-1754977663056-797\n", + " Name: N/A\n", + "\n", + "440. ID: auto-labeling-model-1754978589858-924\n", + " Name: N/A\n", + "\n", + "441. ID: auto-labeling-model-1754978799780-511\n", + " Name: N/A\n", + "\n", + "442. ID: auto-labeling-model-1754980148754-523\n", + " Name: N/A\n", + "\n", + "443. ID: auto-labeling-model-1754980966501-518\n", + " Name: N/A\n", + "\n", + "444. ID: auto-labeling-model-1754981828125-533\n", + " Name: N/A\n", + "\n", + "445. ID: auto-labeling-model-1754983426916-774\n", + " Name: N/A\n", + "\n", + "446. ID: auto-labeling-model-1754984348089-313\n", + " Name: N/A\n", + "\n", + "447. ID: auto-labeling-model-1754984423463-874\n", + " Name: N/A\n", + "\n", + "448. ID: auto-labeling-model-1754984499501-967\n", + " Name: N/A\n", + "\n", + "449. ID: auto-labeling-model-1754984577453-603\n", + " Name: N/A\n", + "\n", + "450. ID: auto-labeling-model-1754984673348-39\n", + " Name: N/A\n", + "\n", + "451. ID: auto-labeling-model-1754984745908-988\n", + " Name: N/A\n", + "\n", + "452. ID: auto-labeling-model-1754984844230-121\n", + " Name: N/A\n", + "\n", + "453. ID: auto-labeling-model-1754985031421-137\n", + " Name: N/A\n", + "\n", + "454. ID: auto-labeling-model-1754985052679-764\n", + " Name: N/A\n", + "\n", + "455. ID: auto-labeling-model-1754985230207-884\n", + " Name: N/A\n", + "\n", + "456. ID: auto-labeling-model-1754993665797-458\n", + " Name: N/A\n", + "\n", + "457. ID: auto-labeling-model-1754993775398-308\n", + " Name: N/A\n", + "\n", + "458. ID: auto-labeling-model-1755021430602-389\n", + " Name: N/A\n", + "\n", + "459. ID: auto-labeling-model-1755021530633-576\n", + " Name: N/A\n", + "\n", + "460. ID: auto-labeling-model-1755034509086-812\n", + " Name: N/A\n", + "\n", + "461. ID: auto-labeling-model-1755036680421-274\n", + " Name: N/A\n", + "\n", + "462. ID: auto-labeling-model-1755036840212-13\n", + " Name: N/A\n", + "\n", + "463. ID: auto-labeling-model-1755037123033-737\n", + " Name: N/A\n", + "\n", + "464. ID: auto-labeling-model-1755041702234-29\n", + " Name: N/A\n", + "\n", + "465. ID: auto-labeling-model-1755041716845-12\n", + " Name: N/A\n", + "\n", + "466. ID: auto-labeling-model-1755043090900-677\n", + " Name: N/A\n", + "\n", + "467. ID: auto-labeling-model-1755044191218-796\n", + " Name: N/A\n", + "\n", + "468. ID: auto-labeling-model-1755044423164-353\n", + " Name: N/A\n", + "\n", + "469. ID: auto-labeling-model-1755048701795-244\n", + " Name: N/A\n", + "\n", + "470. ID: auto-labeling-model-1755048719130-947\n", + " Name: N/A\n", + "\n", + "471. ID: auto-labeling-model-1755048825616-336\n", + " Name: N/A\n", + "\n", + "472. ID: auto-labeling-model-1755048863902-319\n", + " Name: N/A\n", + "\n", + "473. ID: auto-labeling-model-1755048975788-30\n", + " Name: N/A\n", + "\n", + "474. ID: auto-labeling-model-1755049161847-499\n", + " Name: N/A\n", + "\n", + "475. ID: auto-labeling-model-1755061734445-540\n", + " Name: N/A\n", + "\n", + "476. ID: auto-labeling-model-1755061987015-686\n", + " Name: N/A\n", + "\n", + "477. ID: auto-labeling-model-1755062318015-752\n", + " Name: N/A\n", + "\n", + "478. ID: auto-labeling-model-1755062966345-99\n", + " Name: N/A\n", + "\n", + "479. ID: auto-labeling-model-1755063315485-717\n", + " Name: N/A\n", + "\n", + "480. ID: auto-labeling-model-1755063386013-936\n", + " Name: N/A\n", + "\n", + "481. ID: auto-labeling-model-1755069455912-277\n", + " Name: N/A\n", + "\n", + "482. ID: auto-labeling-model-1755069553935-338\n", + " Name: N/A\n", + "\n", + "483. ID: auto-labeling-model-1755069702068-412\n", + " Name: N/A\n", + "\n", + "484. ID: auto-labeling-model-1755069842876-922\n", + " Name: N/A\n", + "\n", + "485. ID: auto-labeling-model-1755072279253-390\n", + " Name: N/A\n", + "\n", + "486. ID: auto-labeling-model-1755076709324-342\n", + " Name: N/A\n", + "\n", + "487. ID: auto-labeling-model-1755077617558-667\n", + " Name: N/A\n", + "\n", + "488. ID: auto-labeling-model-1755077873604-810\n", + " Name: N/A\n", + "\n", + "489. ID: auto-labeling-model-1755078021426-256\n", + " Name: N/A\n", + "\n", + "490. ID: auto-labeling-model-1755134767049-985\n", + " Name: N/A\n", + "\n", + "491. ID: auto-labeling-model-1755135457748-675\n", + " Name: N/A\n", + "\n", + "492. ID: auto-labeling-model-1755220299075-866\n", + " Name: N/A\n", + "\n", + "493. ID: auto-labeling-model-1755221919898-254\n", + " Name: N/A\n", + "\n", + "494. ID: auto-labeling-model-1755222009716-189\n", + " Name: N/A\n", + "\n", + "495. ID: auto-labeling-model-1755222110837-250\n", + " Name: N/A\n", + "\n", + "496. ID: auto-labeling-model-1755222196939-944\n", + " Name: N/A\n", + "\n", + "497. ID: auto-labeling-model-1755222580985-811\n", + " Name: N/A\n", + "\n", + "498. ID: auto-labeling-model-1755224344739-857\n", + " Name: N/A\n", + "\n", + "499. ID: auto-labeling-model-1755224418333-237\n", + " Name: N/A\n", + "\n", + "500. ID: auto-labeling-model-1755224501846-126\n", + " Name: N/A\n", + "\n", + "501. ID: auto-labeling-model-1755224573788-830\n", + " Name: N/A\n", + "\n", + "502. ID: auto-labeling-model-1755274111236-815\n", + " Name: N/A\n", + "\n", + "503. ID: auto-labeling-model-1755546385161-718\n", + " Name: N/A\n", + "\n", + "504. ID: auto-labeling-model-1755564859753-49\n", + " Name: N/A\n", + "\n", + "505. ID: auto-labeling-model-1755571891436-24\n", + " Name: N/A\n", + "\n", + "506. ID: auto-labeling-model-1755575417648-956\n", + " Name: N/A\n", + "\n", + "507. ID: auto-labeling-model-1755589868572-105\n", + " Name: N/A\n", + "\n", + "508. ID: auto-labeling-model-1755623887267-687\n", + " Name: N/A\n", + "\n", + "509. ID: auto-labeling-model-1755657602248-443\n", + " Name: N/A\n", + "\n", + "510. ID: auto-labeling-model-1755671136055-108\n", + " Name: N/A\n", + "\n", + "511. ID: auto-labeling-model-1755673245801-744\n", + " Name: N/A\n", + "\n", + "512. ID: auto-labeling-model-1755675180889-142\n", + " Name: N/A\n", + "\n", + "513. ID: auto-labeling-model-1755678446620-988\n", + " Name: N/A\n", + "\n", + "514. ID: auto-labeling-model-1755738759590-405\n", + " Name: N/A\n", + "\n", + "515. ID: auto-labeling-model-1755741941138-610\n", + " Name: N/A\n", + "\n", + "516. ID: auto-labeling-model-1755745805348-731\n", + " Name: N/A\n", + "\n", + "517. ID: auto-labeling-model-1755753976159-223\n", + " Name: N/A\n", + "\n", + "518. ID: auto-labeling-model-1755756092896-628\n", + " Name: N/A\n", + "\n", + "519. ID: auto-labeling-model-1755761289894-657\n", + " Name: N/A\n", + "\n", + "520. ID: auto-labeling-model-1755824923780-82\n", + " Name: N/A\n", + "\n", + "521. ID: auto-labeling-model-1755839089591-320\n", + " Name: N/A\n", + "\n", + "522. ID: auto-labeling-model-1755840078392-806\n", + " Name: N/A\n", + "\n", + "523. ID: auto-labeling-model-1755843001974-210\n", + " Name: N/A\n", + "\n", + "524. ID: auto-labeling-model-1755844906709-250\n", + " Name: N/A\n", + "\n", + "525. ID: auto-labeling-model-1755846971954-69\n", + " Name: N/A\n", + "\n", + "526. ID: auto-labeling-model-1755847550122-149\n", + " Name: N/A\n", + "\n", + "527. ID: auto-labeling-model-1755849254781-355\n", + " Name: N/A\n", + "\n", + "528. ID: auto-labeling-model-1755854539631-293\n", + " Name: N/A\n", + "\n", + "529. ID: auto-labeling-model-1756087002299-72\n", + " Name: N/A\n", + "\n", + "530. ID: auto-labeling-model-1756087565828-132\n", + " Name: N/A\n", + "\n", + "531. ID: auto-labeling-model-1756087680461-719\n", + " Name: N/A\n", + "\n", + "532. ID: auto-labeling-model-1756087819774-813\n", + " Name: N/A\n", + "\n", + "533. ID: auto-labeling-model-1756087867761-583\n", + " Name: N/A\n", + "\n", + "534. ID: auto-labeling-model-1756112514075-201\n", + " Name: N/A\n", + "\n", + "535. ID: auto-labeling-model-1756137207447-376\n", + " Name: N/A\n", + "\n", + "536. ID: auto-labeling-model-1756137492728-788\n", + " Name: N/A\n", + "\n", + "537. ID: auto-labeling-model-1756138904093-804\n", + " Name: N/A\n", + "\n", + "538. ID: auto-labeling-model-1756193938984-510\n", + " Name: N/A\n", + "\n", + "539. ID: auto-labeling-model-1756279382223-424\n", + " Name: N/A\n", + "\n", + "540. ID: auto-labeling-model-1756281178604-829\n", + " Name: N/A\n", + "\n", + "541. ID: auto-labeling-model-1756347012781-494\n", + " Name: N/A\n", + "\n", + "542. ID: auto-labeling-model-1756348972897-103\n", + " Name: N/A\n", + "\n", + "543. ID: auto-labeling-model-1756349422839-305\n", + " Name: N/A\n", + "\n", + "544. ID: auto-labeling-model-1756349498730-552\n", + " Name: N/A\n", + "\n", + "545. ID: auto-labeling-model-1756360413351-308\n", + " Name: N/A\n", + "\n", + "546. ID: auto-labeling-model-1756363959156-20\n", + " Name: N/A\n", + "\n", + "547. ID: auto-labeling-model-1756369801529-118\n", + " Name: N/A\n", + "\n", + "548. ID: auto-labeling-model-1756430598758-905\n", + " Name: N/A\n", + "\n", + "549. ID: auto-labeling-model-1756440760505-307\n", + " Name: N/A\n", + "\n", + "550. ID: auto-labeling-model-1756460100800-668\n", + " Name: N/A\n", + "\n", + "551. ID: auto-labeling-model-1756460110544-559\n", + " Name: N/A\n", + "\n", + "552. ID: auto-labeling-model-1756693820728-76\n", + " Name: N/A\n", + "\n", + "553. ID: auto-labeling-model-1756912886736-101\n", + " Name: N/A\n", + "\n", + "554. ID: auto-labeling-model-1757497814136-763\n", + " Name: N/A\n", + "\n", + "555. ID: auto-labeling-model-1757663204666-122\n", + " Name: N/A\n", + "\n", + "556. ID: auto-labeling-model-1757995180429-664\n", + " Name: N/A\n", + "\n", + "557. ID: auto-labeling-model-1758045209157-220\n", + " Name: N/A\n", + "\n", + "558. ID: auto-labeling-model-1758045343765-419\n", + " Name: N/A\n", + "\n", + "559. ID: auto-labeling-model-1758182652735-580\n", + " Name: N/A\n", + "\n", + "560. ID: auto-labeling-model-1758551942230-384\n", + " Name: N/A\n", + "\n", + "561. ID: auto-labeling-model-1758693093755-157\n", + " Name: N/A\n", + "\n", + "562. ID: auto-labeling-model-1758703215086-912\n", + " Name: N/A\n", + "\n", + "563. ID: auto-labeling-model-1758742506653-803\n", + " Name: N/A\n", + "\n", + "564. ID: auto-labeling-model-1758859363470-900\n", + " Name: N/A\n", + "\n", + "565. ID: auto-labeling-model-1758861498544-317\n", + " Name: N/A\n", + "\n", + "566. ID: auto-labeling-model-1759166848691-35\n", + " Name: N/A\n", + "\n", + "567. ID: auto-labeling-model-1759310043204-41\n", + " Name: N/A\n", + "\n", + "568. ID: auto-labeling-model-1759334232768-397\n", + " Name: N/A\n", + "\n", + "569. ID: auto-labeling-model-1759817160138-569\n", + " Name: N/A\n", + "\n", + "570. ID: auto-labeling-model-1759956977266-516\n", + " Name: N/A\n", + "\n", + "571. ID: auto-labeling-model-1760426322250-908\n", + " Name: N/A\n", + "\n", + "572. ID: auto-labeling-model-1760479341007-491\n", + " Name: N/A\n", + "\n", + "573. ID: auto-labeling-model-1760479492039-631\n", + " Name: N/A\n", + "\n", + "574. ID: auto-labeling-model-1760479645658-613\n", + " Name: N/A\n", + "\n", + "575. ID: auto-labeling-model-1760479761056-497\n", + " Name: N/A\n", + "\n", + "576. ID: auto-labeling-model-1760479780527-626\n", + " Name: N/A\n", + "\n", + "577. ID: auto-labeling-model-1760479932099-212\n", + " Name: N/A\n", + "\n", + "578. ID: auto-labeling-model-1760479949487-358\n", + " Name: N/A\n", + "\n", + "579. ID: auto-labeling-model-1760480389179-217\n", + " Name: N/A\n", + "\n", + "580. ID: auto-labeling-model-1760490988143-30\n", + " Name: N/A\n", + "\n", + "581. ID: auto-labeling-model-1760499252646-774\n", + " Name: N/A\n", + "\n", + "582. ID: auto-labeling-model-1760539714171-740\n", + " Name: N/A\n", + "\n", + "583. ID: auto-labeling-model-1760540221082-518\n", + " Name: N/A\n", + "\n", + "584. ID: auto-labeling-model-1760566206649-192\n", + " Name: N/A\n", + "\n", + "585. ID: auto-labeling-model-1760649718443-469\n", + " Name: N/A\n", + "\n", + "586. ID: auto-labeling-model-1760974810245-633\n", + " Name: N/A\n", + "\n", + "587. ID: auto-labeling-model-1761060285537-410\n", + " Name: N/A\n", + "\n", + "588. ID: auto-labeling-model-1761072595965-766\n", + " Name: N/A\n", + "\n", + "589. ID: auto-labeling-model-1761170322608-61\n", + " Name: N/A\n", + "\n", + "590. ID: auto-labeling-model-1761170507108-187\n", + " Name: N/A\n", + "\n", + "591. ID: cu-eox\n", + " Name: N/A\n", + "\n", + "592. ID: cu-test-2\n", + " Name: N/A\n", + "\n", + "593. ID: cu-test-3\n", + " Name: N/A\n", + "\n", + "594. ID: cu-test\n", + " Name: N/A\n", + "\n", + "595. ID: cu-test3\n", + " Name: N/A\n", + "\n", + "596. ID: cu-trainig-debug\n", + " Name: N/A\n", + "\n", + "597. ID: cx-deloitte-all-items-good\n", + " Name: N/A\n", + "\n", + "598. ID: cx-deloitte-all-items-idex\n", + " Name: N/A\n", + "\n", + "599. ID: cx-deloitte-all-items-keep-one-label\n", + " Name: N/A\n", + "\n", + "600. ID: cx-deloitte-all-items\n", + " Name: N/A\n", + "\n", + "601. ID: cx-deloitte\n", + " Name: N/A\n", + "\n", + "602. ID: default\n", + " Name: N/A\n", + "\n", + "603. ID: document-test\n", + " Name: N/A\n", + "\n", + "604. ID: example\n", + " Name: N/A\n", + "\n", + "605. ID: excel\n", + " Name: N/A\n", + "\n", + "606. ID: highlight-analyzer-03673070-1755304831\n", + " Name: N/A\n", + "\n", + "607. ID: highlight-analyzer-1755112569\n", + " Name: N/A\n", + "\n", + "608. ID: highlight-analyzer-1755113090\n", + " Name: N/A\n", + "\n", + "609. ID: highlight-analyzer-1755117427\n", + " Name: N/A\n", + "\n", + "610. ID: highlight-analyzer-1755127191\n", + " Name: N/A\n", + "\n", + "611. ID: highlight-analyzer-1755128917\n", + " Name: N/A\n", + "\n", + "612. ID: highlight-analyzer-1755204485\n", + " Name: N/A\n", + "\n", + "613. ID: highlight-analyzer-1755205148\n", + " Name: N/A\n", + "\n", + "614. ID: highlight-analyzer-1755304423\n", + " Name: N/A\n", + "\n", + "615. ID: highlight-analyzer-49453d78-1755304719\n", + " Name: N/A\n", + "\n", + "616. ID: insurance-test\n", + " Name: N/A\n", + "\n", + "617. ID: invoiceLabeledData\n", + " Name: N/A\n", + "\n", + "618. ID: joann-insurance\n", + " Name: N/A\n", + "\n", + "619. ID: joann-tryout-invoice\n", + " Name: N/A\n", + "\n", + "620. ID: k\n", + " Name: N/A\n", + "\n", + "621. ID: minus\n", + " Name: N/A\n", + "\n", + "622. ID: mySampleAnalyzer\n", + " Name: N/A\n", + "\n", + "623. ID: pro-test\n", + " Name: N/A\n", + "\n", + "624. ID: proExample\n", + " Name: N/A\n", + "\n", + "625. ID: sampleAnalyzer273626\n", + " Name: N/A\n", + "\n", + "626. ID: sampleAnalyzer530775\n", + " Name: N/A\n", + "\n", + "627. ID: sampleAnalyzer679281\n", + " Name: N/A\n", + "\n", + "628. ID: shihw-insurance-0529\n", + " Name: N/A\n", + "\n", + "629. ID: shihw-video-test0528\n", + " Name: N/A\n", + "\n", + "630. ID: soccer-highlights-analyzer-v1\n", + " Name: N/A\n", + "\n", + "631. ID: soccer-highlights-analyzer-v2\n", + " Name: N/A\n", + "\n", + "632. ID: soccer-highlights-analyzer-v3\n", + " Name: N/A\n", + "\n", + "633. ID: soccer-highlights-analyzer-v4\n", + " Name: N/A\n", + "\n", + "634. ID: soccer-highlights-analyzer-v5\n", + " Name: N/A\n", + "\n", + "635. ID: soccer-highlights-analyzer1751301403\n", + " Name: N/A\n", + "\n", + "636. ID: soccer-highlights-analyzer1751301722\n", + " Name: N/A\n", + "\n", + "637. ID: soccer-highlights-analyzer2-v1\n", + " Name: N/A\n", + "\n", + "638. ID: soccer-highlights-analyzer5314167881751302137\n", + " Name: N/A\n", + "\n", + "639. ID: soccer-highlights-analyzer5314167881751302581\n", + " Name: N/A\n", + "\n", + "640. ID: soccer-highlights-analyzer5314167881751303949\n", + " Name: N/A\n", + "\n", + "641. ID: soccer-highlights-analyzer5314167881751306147\n", + " Name: N/A\n", + "\n", + "642. ID: soccer-highlights-analyzer5314167881751313349\n", + " Name: N/A\n", + "\n", + "643. ID: soccer-highlights-analyzer5314167881755019232\n", + " Name: N/A\n", + "\n", + "644. ID: soccer-highlights-analyzer5314167881755020564\n", + " Name: N/A\n", + "\n", + "645. ID: soccer-highlights-analyzer5314167881755023993\n", + " Name: N/A\n", + "\n", + "646. ID: soccer-highlights-analyzer5314167881755029594\n", + " Name: N/A\n", + "\n", + "647. ID: soccer-highlights-analyzer54167881751301841\n", + " Name: N/A\n", + "\n", + "648. ID: t\n", + " Name: N/A\n", + "\n", + "649. ID: tes\n", + " Name: N/A\n", + "\n", + "650. ID: test-bar-gap\n", + " Name: N/A\n", + "\n", + "651. ID: test\n", + " Name: N/A\n", + "\n", + "652. ID: testMeow\n", + " Name: N/A\n", + "\n", + "653. ID: tingwliu-invoice-test\n", + " Name: N/A\n", + "\n", + "654. ID: video-250808\n", + " Name: N/A\n", + "\n", + "655. ID: video\n", + " Name: N/A\n", + "\n", + "656. ID: videotest\n", + " Name: N/A\n", + "\n", + "657. ID: yahch-contract-0805-1\n", + " Name: N/A\n", + "\n", + "658. ID: yahch-document-HKinvoice-label-2\n", + " Name: N/A\n", + "\n", + "659. ID: yahch-document-HKinvoice-label-3\n", + " Name: N/A\n", + "\n", + "660. ID: yahch-document-HKinvoice-label-4\n", + " Name: N/A\n", + "\n", + "661. ID: yahch-document-HKinvoice-label-5\n", + " Name: N/A\n", + "\n", + "662. ID: yahch-document-HKinvoice-label-6\n", + " Name: N/A\n", + "\n", + "663. ID: yahch-document-HKinvoice-label\n", + " Name: N/A\n", + "\n", + "664. ID: yahch-document-HKinvoice-local-1\n", + " Name: N/A\n", + "\n", + "665. ID: yahch-document-HKinvoice-local-icl-1\n", + " Name: N/A\n", + "\n", + "666. ID: yahch-document-HKinvoice-local-icl-2\n", + " Name: N/A\n", + "\n", + "667. ID: yahch-invoice-HKinvoice-local-icl-1\n", + " Name: N/A\n", + "\n", + "668. ID: yahch-invoice-HKinvoice-local-zeroshot-1\n", + " Name: N/A\n", + "\n", + "669. ID: yahch-invoice-HKinvoice-local-zeroshot-2\n", + " Name: N/A\n", + "\n", + "670. ID: yiyun\n", + " Name: N/A\n", + "\n", + "671. ID: yiyun1223\n", + " Name: N/A\n", + "\n", + "672. ID: yiyun3333\n", + " Name: N/A\n", + "\n", + "673. ID: yiyun65656\n", + " Name: N/A\n", + "\n", + "674. ID: yiyunPromode\n", + " Name: N/A\n", + "\n", + "675. ID: yslin-2025-06-25-generative-date-fields\n", + " Name: N/A\n", + "\n" + ] + } + ], + "source": [ + "# Get all analyzers in your resource\n", + "all_analyzers = client.get_all_analyzers()\n", + "analyzers_list = all_analyzers.get('value', [])\n", + "\n", + "print(f\"Found {len(analyzers_list)} analyzer(s) in your resource\\n\")\n", + "\n", + "# Display analyzer names and IDs\n", + "if analyzers_list:\n", + " print(\"Available analyzers:\")\n", + " for idx, analyzer in enumerate(analyzers_list, 1):\n", + " analyzer_id = analyzer.get('analyzerId', 'N/A')\n", + " analyzer_name = analyzer.get('name', 'N/A')\n", + " print(f\"{idx}. ID: {analyzer_id}\")\n", + " print(f\" Name: {analyzer_name}\")\n", + " print()\n", + "else:\n", + " print(\"No analyzers found. Please create an analyzer with training data first.\")\n", + " print(\"See: notebooks/analyzer_training.ipynb for guidance.\")" + ] + }, + { + "cell_type": "markdown", + "id": "8e6ae2ac", + "metadata": {}, + "source": [ + "## Step 2: Select Source Analyzer\n", + "\n", + "Specify the ID of the analyzer whose training data you want to reuse.\n", + "\n", + "**Option 1**: Set `SOURCE_ANALYZER_ID` to an existing analyzer ID from the list above.\n", + "\n", + "**Option 2**: If you don't have an analyzer with training data, uncomment and run the next cell to create one first." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9772b0f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Source Analyzer ID: invoiceLabeledData\n" + ] + } + ], + "source": [ + "# OPTION 1: Specify an existing analyzer ID that has training data\n", + "# Replace this with your actual analyzer ID\n", + "SOURCE_ANALYZER_ID = \"invoiceLabeledData\"\n", + "\n", + "# Uncomment to use the first analyzer from the list\n", + "# if analyzers_list:\n", + "# SOURCE_ANALYZER_ID = analyzers_list[0].get('id')\n", + "# print(f\"Using first analyzer: {SOURCE_ANALYZER_ID}\")\n", + "\n", + "print(f\"Source Analyzer ID: {SOURCE_ANALYZER_ID}\")" + ] + }, + { + "cell_type": "markdown", + "id": "d7ceffda", + "metadata": {}, + "source": [ + "### Option 2: Create a Source Analyzer with Training Data (Optional)\n", + "\n", + "If you don't have an existing analyzer with training data, run this cell to create one first.\n", + "\n", + "**Prerequisites**:\n", + "- Set environment variables for training data (see [docs/set_env_for_training_data_and_reference_doc.md](../docs/set_env_for_training_data_and_reference_doc.md))\n", + "- Ensure you have labeled training data in `../data/document_training/`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ce228bd", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment this entire cell if you need to create a source analyzer first\n", + "\n", + "# from azure.storage.blob import ContainerSasPermissions\n", + "\n", + "# # Configure training data\n", + "# analyzer_template_path = \"../analyzer_templates/receipt.json\"\n", + "# training_docs_folder = \"../data/document_training\"\n", + "\n", + "# # Get or generate SAS URL\n", + "# training_data_sas_url = os.getenv(\"TRAINING_DATA_SAS_URL\")\n", + "# if not training_data_sas_url:\n", + "# TRAINING_DATA_STORAGE_ACCOUNT_NAME = os.getenv(\"TRAINING_DATA_STORAGE_ACCOUNT_NAME\")\n", + "# TRAINING_DATA_CONTAINER_NAME = os.getenv(\"TRAINING_DATA_CONTAINER_NAME\")\n", + "# if not TRAINING_DATA_STORAGE_ACCOUNT_NAME:\n", + "# raise ValueError(\n", + "# \"Please set either TRAINING_DATA_SAS_URL or both TRAINING_DATA_STORAGE_ACCOUNT_NAME \"\n", + "# \"and TRAINING_DATA_CONTAINER_NAME environment variables.\"\n", + "# )\n", + "# training_data_sas_url = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n", + "# account_name=TRAINING_DATA_STORAGE_ACCOUNT_NAME,\n", + "# container_name=TRAINING_DATA_CONTAINER_NAME,\n", + "# permissions=ContainerSasPermissions(read=True, write=True, list=True),\n", + "# expiry_hours=1,\n", + "# )\n", + "\n", + "# training_data_path = os.getenv(\"TRAINING_DATA_PATH\")\n", + "\n", + "# # Upload training data to blob storage\n", + "# print(\"Uploading training data to blob storage...\")\n", + "# await client.generate_training_data_on_blob(training_docs_folder, training_data_sas_url, training_data_path)\n", + "# print(\"✅ Training data uploaded successfully!\")\n", + "\n", + "# # Create source analyzer\n", + "# SOURCE_ANALYZER_ID = \"source-analyzer-\" + str(uuid.uuid4())\n", + "# print(f\"Creating source analyzer: {SOURCE_ANALYZER_ID}\")\n", + "\n", + "# response = client.begin_create_analyzer(\n", + "# SOURCE_ANALYZER_ID,\n", + "# analyzer_template_path=analyzer_template_path,\n", + "# training_storage_container_sas_url=training_data_sas_url,\n", + "# training_storage_container_path_prefix=training_data_path,\n", + "# )\n", + "# result = client.poll_result(response)\n", + "# print(\"✅ Source analyzer created successfully!\")\n", + "# print(json.dumps(result, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "d9b1bc93", + "metadata": {}, + "source": [ + "## Step 3: Retrieve Source Analyzer Details\n", + "\n", + "Now we'll fetch the complete definition of the source analyzer, including its training data configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b2c9ae0c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Source Analyzer: invoiceLabeledData\n", + "Name: N/A\n", + "Description: \n", + "\n", + "Full analyzer definition:\n", + "{\n", + " \"analyzerId\": \"invoiceLabeledData\",\n", + " \"description\": \"\",\n", + " \"tags\": {\n", + " \"projectId\": \"d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\",\n", + " \"templateId\": \"document-2025-05-01\"\n", + " },\n", + " \"createdAt\": \"2025-10-22T22:03:08Z\",\n", + " \"lastModifiedAt\": \"2025-10-22T22:03:11Z\",\n", + " \"baseAnalyzerId\": \"prebuilt-documentAnalyzer\",\n", + " \"config\": {\n", + " \"returnDetails\": true,\n", + " \"enableOcr\": true,\n", + " \"enableLayout\": true,\n", + " \"enableFormula\": false,\n", + " \"disableContentFiltering\": false,\n", + " \"tableFormat\": \"html\",\n", + " \"estimateFieldSourceAndConfidence\": false\n", + " },\n", + " \"fieldSchema\": {\n", + " \"fields\": {\n", + " \"CompanyName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the pharmaceutical company involved in the rebate program\"\n", + " },\n", + " \"ProductDetails\": {\n", + " \"type\": \"array\",\n", + " \"description\": \"List of products with rebate and unit details\",\n", + " \"items\": {\n", + " \"type\": \"object\",\n", + " \"description\": \"Details of a single product\",\n", + " \"properties\": {\n", + " \"ProductPackageCode\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Code representing the product or package\"\n", + " },\n", + " \"ProductName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the product\"\n", + " },\n", + " \"FfsimcoRecordId\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Record ID for FFSIMCO\"\n", + " },\n", + " \"RebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Rebate amount per unit of the product\"\n", + " },\n", + " \"AdjustedRebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Adjusted rebate amount per unit\"\n", + " },\n", + " \"UnitsInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units invoiced\"\n", + " },\n", + " \"UnitsPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units for which payment was made\"\n", + " },\n", + " \"RebateAmountInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount invoiced\"\n", + " },\n", + " \"RebateAmountPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount paid\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"TotalPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total payment amount \"\n", + " }\n", + " }\n", + " },\n", + " \"trainingData\": {\n", + " \"containerUrl\": \"https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\",\n", + " \"kind\": \"blob\",\n", + " \"prefix\": \"labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train\"\n", + " },\n", + " \"warnings\": [],\n", + " \"status\": \"ready\",\n", + " \"processingLocation\": \"geography\",\n", + " \"mode\": \"standard\"\n", + "}\n" + ] + } + ], + "source": [ + "# Get detailed information about the source analyzer\n", + "source_analyzer = client.get_analyzer_detail_by_id(SOURCE_ANALYZER_ID)\n", + "\n", + "print(f\"Source Analyzer: {SOURCE_ANALYZER_ID}\")\n", + "print(f\"Name: {source_analyzer.get('name', 'N/A')}\")\n", + "print(f\"Description: {source_analyzer.get('description', 'N/A')}\")\n", + "print(\"\\nFull analyzer definition:\")\n", + "print(json.dumps(source_analyzer, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "3eb0b65d", + "metadata": {}, + "source": [ + "## Step 4: Extract Training Data Configuration\n", + "\n", + "Extract the training data configuration from the source analyzer. This includes:\n", + "- **trainingData**: The blob container location with labeled examples\n", + "- **fieldSchema**: The field definitions\n", + "- **tags**: Project and template metadata (important for Azure AI Foundry project association)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7c57655f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📦 Training Data Configuration:\n", + "{\n", + " \"containerUrl\": \"https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\",\n", + " \"kind\": \"blob\",\n", + " \"prefix\": \"labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train\"\n", + "}\n", + "\n", + "✅ Found training data at: https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\n", + " Path prefix: labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train\n", + "\n", + "📚 Knowledge Sources Configuration:\n", + "No knowledge sources configured (this is normal for standard mode)\n", + "\n", + "📋 Field Schema:\n", + "{\n", + " \"fields\": {\n", + " \"CompanyName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the pharmaceutical company involved in the rebate program\"\n", + " },\n", + " \"ProductDetails\": {\n", + " \"type\": \"array\",\n", + " \"description\": \"List of products with rebate and unit details\",\n", + " \"items\": {\n", + " \"type\": \"object\",\n", + " \"description\": \"Details of a single product\",\n", + " \"properties\": {\n", + " \"ProductPackageCode\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Code representing the product or package\"\n", + " },\n", + " \"ProductName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the product\"\n", + " },\n", + " \"FfsimcoRecordId\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Record ID for FFSIMCO\"\n", + " },\n", + " \"RebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Rebate amount per unit of the product\"\n", + " },\n", + " \"AdjustedRebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Adjusted rebate amount per unit\"\n", + " },\n", + " \"UnitsInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units invoiced\"\n", + " },\n", + " \"UnitsPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units for which payment was made\"\n", + " },\n", + " \"RebateAmountInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount invoiced\"\n", + " },\n", + " \"RebateAmountPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount paid\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"TotalPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total payment amount \"\n", + " }\n", + " }\n", + "}\n", + "\n", + "🏷️ Tags (Project & Template Metadata):\n", + "{\n", + " \"projectId\": \"d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\",\n", + " \"templateId\": \"document-2025-05-01\"\n", + "}\n", + "\n", + "✅ Found Project ID: d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\n", + "✅ Found Template ID: document-2025-05-01\n", + "\n", + "💡 These tags will be copied to ensure the new analyzer appears in the same Azure AI Foundry project.\n" + ] + } + ], + "source": [ + "# Extract training data configuration\n", + "training_data_config = source_analyzer.get('trainingData')\n", + "knowledge_sources_config = source_analyzer.get('knowledgeSources')\n", + "field_schema = source_analyzer.get('fieldSchema', {})\n", + "tags = source_analyzer.get('tags', {})\n", + "\n", + "print(\"📦 Training Data Configuration:\")\n", + "if training_data_config:\n", + " print(json.dumps(training_data_config, indent=2))\n", + " container_url = training_data_config.get('containerUrl', 'N/A')\n", + " prefix = training_data_config.get('prefix', '')\n", + " print(f\"\\n✅ Found training data at: {container_url}\")\n", + " print(f\" Path prefix: {prefix}\")\n", + "else:\n", + " print(\"⚠️ No training data found in this analyzer.\")\n", + " print(\" Please select an analyzer that has training data configured.\")\n", + "\n", + "print(\"\\n📚 Knowledge Sources Configuration:\")\n", + "if knowledge_sources_config:\n", + " print(json.dumps(knowledge_sources_config, indent=2))\n", + "else:\n", + " print(\"No knowledge sources configured (this is normal for standard mode)\")\n", + "\n", + "print(\"\\n📋 Field Schema:\")\n", + "print(json.dumps(field_schema, indent=2))\n", + "\n", + "print(\"\\n🏷️ Tags (Project & Template Metadata):\")\n", + "if tags:\n", + " print(json.dumps(tags, indent=2))\n", + " project_id = tags.get('projectId')\n", + " template_id = tags.get('templateId')\n", + " if project_id:\n", + " print(f\"\\n✅ Found Project ID: {project_id}\")\n", + " if template_id:\n", + " print(f\"✅ Found Template ID: {template_id}\")\n", + " print(\"\\n💡 These tags will be copied to ensure the new analyzer appears in the same Azure AI Foundry project.\")\n", + "else:\n", + " print(\"No tags found (the new analyzer may not be associated with a Foundry project)\")" + ] + }, + { + "cell_type": "markdown", + "id": "e7770461", + "metadata": {}, + "source": [ + "## Step 5: Create New Analyzer with Existing Training Data\n", + "\n", + "Now we'll create a new analyzer that references the same training data. This new analyzer will:\n", + "- Use the same blob storage container and path\n", + "- Start with the same field schema (you can modify this)\n", + "- Have its own unique ID\n", + "- **Include the same tags** (projectId and templateId) to ensure it appears in the correct Azure AI Foundry project\n", + "\n", + "### Key Points:\n", + "- **Same resource**: Both analyzers are in the same Azure AI resource\n", + "- **No data duplication**: The training data stays in one place\n", + "- **Same project**: Tags ensure the analyzer appears in the same Foundry project\n", + "- **Independent lifecycle**: Each analyzer can be updated or deleted independently" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "98b0c9c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Including tags from source analyzer (ensures correct project association in Foundry)\n", + " Project ID: d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\n", + " Template ID: document-2025-05-01\n", + "\n", + "Creating new analyzer: cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\n", + "\n", + "New analyzer payload (ordered to match API structure):\n", + "{\n", + " \"description\": \"Created from invoiceLabeledData with reused training data\",\n", + " \"tags\": {\n", + " \"projectId\": \"d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\",\n", + " \"templateId\": \"document-2025-05-01\"\n", + " },\n", + " \"baseAnalyzerId\": \"prebuilt-documentAnalyzer\",\n", + " \"config\": {\n", + " \"returnDetails\": true,\n", + " \"enableOcr\": true,\n", + " \"enableLayout\": true,\n", + " \"enableFormula\": false,\n", + " \"disableContentFiltering\": false,\n", + " \"tableFormat\": \"html\",\n", + " \"estimateFieldSourceAndConfidence\": false\n", + " },\n", + " \"fieldSchema\": {\n", + " \"fields\": {\n", + " \"CompanyName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the pharmaceutical company involved in the rebate program\"\n", + " },\n", + " \"ProductDetails\": {\n", + " \"type\": \"array\",\n", + " \"description\": \"List of products with rebate and unit details\",\n", + " \"items\": {\n", + " \"type\": \"object\",\n", + " \"description\": \"Details of a single product\",\n", + " \"properties\": {\n", + " \"ProductPackageCode\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Code representing the product or package\"\n", + " },\n", + " \"ProductName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the product\"\n", + " },\n", + " \"FfsimcoRecordId\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Record ID for FFSIMCO\"\n", + " },\n", + " \"RebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Rebate amount per unit of the product\"\n", + " },\n", + " \"AdjustedRebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Adjusted rebate amount per unit\"\n", + " },\n", + " \"UnitsInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units invoiced\"\n", + " },\n", + " \"UnitsPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units for which payment was made\"\n", + " },\n", + " \"RebateAmountInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount invoiced\"\n", + " },\n", + " \"RebateAmountPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount paid\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"TotalPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total payment amount \"\n", + " }\n", + " }\n", + " },\n", + " \"mode\": \"standard\"\n", + "}\n", + "\n", + "📦 Training data will be configured separately:\n", + " Container URL: https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\n", + " Prefix: labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train\n" + ] + } + ], + "source": [ + "# Verify we have training data before proceeding\n", + "if not training_data_config:\n", + " raise ValueError(\n", + " \"Cannot proceed: Source analyzer does not have training data. \"\n", + " \"Please select an analyzer with training data or create one using the optional cell above.\"\n", + " )\n", + "\n", + "# Create a new analyzer ID\n", + "# Analyzer names must be 1-64 characters and only contain letters, numbers, dots, underscores, or hyphens\n", + "NEW_ANALYZER_ID = \"cloned-analyzer-\" + str(uuid.uuid4())\n", + "\n", + "# Build the new analyzer payload in the correct order matching the API structure\n", + "# Note: Read-only fields like createdAt, lastModifiedAt, status, etc. are omitted as they're set by the service\n", + "new_analyzer_payload = {}\n", + "\n", + "# 1. Analyzer ID (not needed as it's passed separately, but kept for reference)\n", + "# new_analyzer_payload[\"analyzerId\"] = NEW_ANALYZER_ID\n", + "\n", + "# 2. Description\n", + "new_analyzer_payload[\"description\"] = f\"Created from {SOURCE_ANALYZER_ID} with reused training data\"\n", + "\n", + "# 3. Tags (projectId and templateId) - IMPORTANT for Foundry project association\n", + "if tags:\n", + " new_analyzer_payload[\"tags\"] = tags\n", + " print(\"✅ Including tags from source analyzer (ensures correct project association in Foundry)\")\n", + " print(f\" Project ID: {tags.get('projectId', 'N/A')}\")\n", + " print(f\" Template ID: {tags.get('templateId', 'N/A')}\")\n", + "else:\n", + " print(\"⚠️ No tags found in source analyzer - new analyzer may not appear in Foundry project\")\n", + "\n", + "# 4. Base Analyzer ID (if present)\n", + "if 'baseAnalyzerId' in source_analyzer:\n", + " new_analyzer_payload['baseAnalyzerId'] = source_analyzer['baseAnalyzerId']\n", + "\n", + "# 5. Config settings\n", + "if 'config' in source_analyzer:\n", + " new_analyzer_payload['config'] = source_analyzer['config']\n", + "\n", + "# 6. Field Schema\n", + "new_analyzer_payload[\"fieldSchema\"] = field_schema\n", + "\n", + "# 7. Training Data - Will be passed separately to begin_create_analyzer()\n", + "# Note: We extract the container URL and prefix to pass as separate parameters\n", + "training_container_sas_url = training_data_config.get('containerUrl', '')\n", + "training_container_prefix = training_data_config.get('prefix', '')\n", + "\n", + "# 8. Knowledge Sources (if present - typically for Pro mode)\n", + "# Extract these separately if they exist\n", + "pro_mode_container_sas_url = \"\"\n", + "pro_mode_container_prefix = \"\"\n", + "if knowledge_sources_config and isinstance(knowledge_sources_config, list) and len(knowledge_sources_config) > 0:\n", + " # Get the first knowledge source (typically there's only one)\n", + " first_knowledge_source = knowledge_sources_config[0]\n", + " pro_mode_container_sas_url = first_knowledge_source.get('containerUrl', '')\n", + " pro_mode_container_prefix = first_knowledge_source.get('prefix', '')\n", + "\n", + "# 9. Mode (if present)\n", + "if 'mode' in source_analyzer:\n", + " new_analyzer_payload['mode'] = source_analyzer['mode']\n", + "\n", + "print(f\"\\nCreating new analyzer: {NEW_ANALYZER_ID}\")\n", + "print(\"\\nNew analyzer payload (ordered to match API structure):\")\n", + "print(json.dumps(new_analyzer_payload, indent=2))\n", + "\n", + "print(\"\\n📦 Training data will be configured separately:\")\n", + "print(f\" Container URL: {training_container_sas_url}\")\n", + "print(f\" Prefix: {training_container_prefix}\")\n", + "\n", + "if pro_mode_container_sas_url:\n", + " print(\"\\n📚 Pro mode reference docs will be configured separately:\")\n", + " print(f\" Container URL: {pro_mode_container_sas_url}\")\n", + " print(f\" Prefix: {pro_mode_container_prefix}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "385a0867", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:python.content_understanding_client:Analyzer cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b create request accepted.\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request a22ddf12-3156-4a9a-9675-7b85789a8686 in progress ...\n", + "INFO:python.content_understanding_client:Request result is ready after 152.25 seconds.\n", + "INFO:python.content_understanding_client:Request result is ready after 152.25 seconds.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Successfully created new analyzer: cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\n", + "\n", + "Creation result:\n", + "{\n", + " \"id\": \"a22ddf12-3156-4a9a-9675-7b85789a8686\",\n", + " \"status\": \"Succeeded\",\n", + " \"result\": {\n", + " \"analyzerId\": \"cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\",\n", + " \"description\": \"Created from invoiceLabeledData with reused training data\",\n", + " \"tags\": {\n", + " \"projectId\": \"d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb\",\n", + " \"templateId\": \"document-2025-05-01\"\n", + " },\n", + " \"createdAt\": \"2025-10-22T22:44:56Z\",\n", + " \"lastModifiedAt\": \"2025-10-22T22:47:27Z\",\n", + " \"baseAnalyzerId\": \"prebuilt-documentAnalyzer\",\n", + " \"config\": {\n", + " \"returnDetails\": true,\n", + " \"enableOcr\": true,\n", + " \"enableLayout\": true,\n", + " \"enableFormula\": false,\n", + " \"disableContentFiltering\": false,\n", + " \"tableFormat\": \"html\",\n", + " \"estimateFieldSourceAndConfidence\": false\n", + " },\n", + " \"fieldSchema\": {\n", + " \"fields\": {\n", + " \"CompanyName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the pharmaceutical company involved in the rebate program\"\n", + " },\n", + " \"ProductDetails\": {\n", + " \"type\": \"array\",\n", + " \"description\": \"List of products with rebate and unit details\",\n", + " \"items\": {\n", + " \"type\": \"object\",\n", + " \"description\": \"Details of a single product\",\n", + " \"properties\": {\n", + " \"ProductPackageCode\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Code representing the product or package\"\n", + " },\n", + " \"ProductName\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Name of the product\"\n", + " },\n", + " \"FfsimcoRecordId\": {\n", + " \"type\": \"string\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Record ID for FFSIMCO\"\n", + " },\n", + " \"RebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Rebate amount per unit of the product\"\n", + " },\n", + " \"AdjustedRebatePerUnit\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Adjusted rebate amount per unit\"\n", + " },\n", + " \"UnitsInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units invoiced\"\n", + " },\n", + " \"UnitsPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Number of units for which payment was made\"\n", + " },\n", + " \"RebateAmountInvoiced\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount invoiced\"\n", + " },\n", + " \"RebateAmountPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total rebate amount paid\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"TotalPaid\": {\n", + " \"type\": \"number\",\n", + " \"method\": \"extract\",\n", + " \"description\": \"Total payment amount \"\n", + " }\n", + " }\n", + " },\n", + " \"trainingData\": {\n", + " \"containerUrl\": \"https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\",\n", + " \"kind\": \"blob\",\n", + " \"prefix\": \"labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train/\"\n", + " },\n", + " \"warnings\": [],\n", + " \"status\": \"ready\",\n", + " \"processingLocation\": \"geography\",\n", + " \"mode\": \"standard\"\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "# Create the new analyzer\n", + "# Pass training data and knowledge sources as separate parameters\n", + "response = client.begin_create_analyzer(\n", + " NEW_ANALYZER_ID,\n", + " analyzer_template=new_analyzer_payload,\n", + " training_storage_container_sas_url=training_container_sas_url,\n", + " training_storage_container_path_prefix=training_container_prefix,\n", + ")\n", + "\n", + "result = client.poll_result(response)\n", + "\n", + "if result and result.get('status') == 'Succeeded':\n", + " print(f\"✅ Successfully created new analyzer: {NEW_ANALYZER_ID}\")\n", + " print(\"\\nCreation result:\")\n", + " print(json.dumps(result, indent=2))\n", + "else:\n", + " print(\"⚠️ Analyzer creation encountered an issue.\")\n", + " print(json.dumps(result, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "63295659", + "metadata": {}, + "source": [ + "## Step 6: Verify the New Analyzer\n", + "\n", + "Let's confirm the new analyzer was created correctly and is using the same training data." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "685ff06f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "New Analyzer: cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\n", + "Name: N/A\n", + "Description: Created from invoiceLabeledData with reused training data\n", + "\n", + "Training Data Configuration:\n", + "{\n", + " \"containerUrl\": \"https://staistudiote203841201294.blob.core.windows.net/7c123b64-9378-4fa7-a807-081efa839c00-cu\",\n", + " \"kind\": \"blob\",\n", + " \"prefix\": \"labelingProjects/d7afeaa4-fe05-4df7-bd7c-46f3a94a96cb/train/\"\n", + "}\n", + "\n", + "✅ Verification successful: Both analyzers reference the same training data location!\n" + ] + } + ], + "source": [ + "# Get details of the newly created analyzer\n", + "new_analyzer = client.get_analyzer_detail_by_id(NEW_ANALYZER_ID)\n", + "\n", + "print(f\"New Analyzer: {NEW_ANALYZER_ID}\")\n", + "print(f\"Name: {new_analyzer.get('name', 'N/A')}\")\n", + "print(f\"Description: {new_analyzer.get('description', 'N/A')}\")\n", + "print(\"\\nTraining Data Configuration:\")\n", + "print(json.dumps(new_analyzer.get('trainingData', {}), indent=2))\n", + "\n", + "# Verify the training data location matches\n", + "new_training_data = new_analyzer.get('trainingData', {})\n", + "original_container = training_data_config.get('containerUrl', '')\n", + "new_container = new_training_data.get('containerUrl', '')\n", + "\n", + "if original_container == new_container:\n", + " print(\"\\n✅ Verification successful: Both analyzers reference the same training data location!\")\n", + "else:\n", + " print(\"\\n⚠️ Warning: Training data locations don't match.\")\n", + " print(f\"Original: {original_container}\")\n", + " print(f\"New: {new_container}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe3352c9", + "metadata": {}, + "source": [ + "## Step 7: Test Both Analyzers\n", + "\n", + "Now let's test both analyzers with a sample file to verify they both work correctly with the shared training data." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "cc934efd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing with file: ../data/receipt.png\n" + ] + } + ], + "source": [ + "# Specify a test file - adjust this path based on your analyzer type\n", + "# For receipt analyzers:\n", + "test_file = \"../data/receipt.png\"\n", + "\n", + "# For invoice analyzers:\n", + "# test_file = \"../data/invoice.pdf\"\n", + "\n", + "# For custom documents:\n", + "# test_file = \"../data/your-document.pdf\"\n", + "\n", + "# Verify the file exists\n", + "if not Path(test_file).exists():\n", + " print(f\"⚠️ Test file not found: {test_file}\")\n", + " print(\"Please adjust the test_file path to match your use case.\")\n", + "else:\n", + " print(f\"Testing with file: {test_file}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "273dd85c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "📝 Analyzing with SOURCE analyzer: invoiceLabeledData\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:python.content_understanding_client:Analyzing file ../data/receipt.png with analyzer: invoiceLabeledData\n", + "INFO:python.content_understanding_client:Request 80b00372-a498-4564-9ff1-1e6901778a2d in progress ...\n", + "INFO:python.content_understanding_client:Request 80b00372-a498-4564-9ff1-1e6901778a2d in progress ...\n", + "INFO:python.content_understanding_client:Request 80b00372-a498-4564-9ff1-1e6901778a2d in progress ...\n", + "INFO:python.content_understanding_client:Request 80b00372-a498-4564-9ff1-1e6901778a2d in progress ...\n", + "INFO:python.content_understanding_client:Request result is ready after 4.71 seconds.\n", + "INFO:python.content_understanding_client:Request result is ready after 4.71 seconds.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Source Analyzer Results:\n", + "Extracted 3 field(s)\n", + " - CompanyName: {'type': 'string', 'valueString': 'Contoso'}\n", + " - ProductDetails: {'type': 'array'}\n", + " - TotalPaid: {'type': 'number', 'valueNumber': 2516.28}\n" + ] + } + ], + "source": [ + "# Test the original analyzer\n", + "if Path(test_file).exists():\n", + " print(f\"\\n📝 Analyzing with SOURCE analyzer: {SOURCE_ANALYZER_ID}\")\n", + " response_source = client.begin_analyze(SOURCE_ANALYZER_ID, file_location=test_file)\n", + " result_source = client.poll_result(response_source)\n", + " \n", + " print(\"\\nSource Analyzer Results:\")\n", + " # Print a summary of extracted fields\n", + " if result_source.get('status') == 'Succeeded':\n", + " result_data = result_source.get('result', {})\n", + " fields = result_data.get('contents', [{}])[0].get('fields', {})\n", + " print(f\"Extracted {len(fields)} field(s)\")\n", + " for field_name, field_value in fields.items():\n", + " print(f\" - {field_name}: {field_value}\")\n", + " else:\n", + " print(json.dumps(result_source, indent=2))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e9654313", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "📝 Analyzing with NEW analyzer: cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:python.content_understanding_client:Analyzing file ../data/receipt.png with analyzer: cloned-analyzer-c073f24d-5659-42ed-8ac8-b083bde79a9b\n", + "INFO:python.content_understanding_client:Request 5d982b83-4b1c-4e99-b045-48e36cb5a7e3 in progress ...\n", + "INFO:python.content_understanding_client:Request 5d982b83-4b1c-4e99-b045-48e36cb5a7e3 in progress ...\n", + "INFO:python.content_understanding_client:Request 5d982b83-4b1c-4e99-b045-48e36cb5a7e3 in progress ...\n", + "INFO:python.content_understanding_client:Request 5d982b83-4b1c-4e99-b045-48e36cb5a7e3 in progress ...\n", + "INFO:python.content_understanding_client:Request result is ready after 4.72 seconds.\n", + "INFO:python.content_understanding_client:Request result is ready after 4.72 seconds.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "New Analyzer Results:\n", + "Extracted 3 field(s)\n", + " - CompanyName: {'type': 'string', 'valueString': 'Contoso'}\n", + " - ProductDetails: {'type': 'array'}\n", + " - TotalPaid: {'type': 'number', 'valueNumber': 2516.28}\n", + "\n", + "✅ Both analyzers successfully processed the file using the shared training data!\n" + ] + } + ], + "source": [ + "# Test the new analyzer\n", + "if Path(test_file).exists():\n", + " print(f\"\\n📝 Analyzing with NEW analyzer: {NEW_ANALYZER_ID}\")\n", + " response_new = client.begin_analyze(NEW_ANALYZER_ID, file_location=test_file)\n", + " result_new = client.poll_result(response_new)\n", + " \n", + " print(\"\\nNew Analyzer Results:\")\n", + " # Print a summary of extracted fields\n", + " if result_new.get('status') == 'Succeeded':\n", + " result_data = result_new.get('result', {})\n", + " fields = result_data.get('contents', [{}])[0].get('fields', {})\n", + " print(f\"Extracted {len(fields)} field(s)\")\n", + " for field_name, field_value in fields.items():\n", + " print(f\" - {field_name}: {field_value}\")\n", + " else:\n", + " print(json.dumps(result_new, indent=2))\n", + " \n", + " print(\"\\n✅ Both analyzers successfully processed the file using the shared training data!\")" + ] + }, + { + "cell_type": "markdown", + "id": "f913b6dd", + "metadata": {}, + "source": [ + "## Step 8: Compare Results (Optional)\n", + "\n", + "Let's compare the full results from both analyzers side by side." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6467b3f", + "metadata": {}, + "outputs": [], + "source": [ + "if Path(test_file).exists():\n", + " print(\"=\" * 80)\n", + " print(\"SOURCE ANALYZER FULL RESULTS\")\n", + " print(\"=\" * 80)\n", + " print(json.dumps(result_source, indent=2))\n", + " \n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"NEW ANALYZER FULL RESULTS\")\n", + " print(\"=\" * 80)\n", + " print(json.dumps(result_new, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "5f65f05c", + "metadata": {}, + "source": [ + "## Step 9: Cleanup (Optional)\n", + "\n", + "If you want to clean up the test analyzers, you can delete them. In production, you typically keep analyzers for reuse.\n", + "\n", + "⚠️ **Warning**: This will permanently delete the analyzer. The training data in blob storage will remain unaffected." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00cde3ff", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment to delete the new analyzer\n", + "# print(f\"Deleting new analyzer: {NEW_ANALYZER_ID}\")\n", + "# client.delete_analyzer(NEW_ANALYZER_ID)\n", + "# print(\"✅ New analyzer deleted\")\n", + "\n", + "# Uncomment to also delete the source analyzer (be careful!)\n", + "# print(f\"Deleting source analyzer: {SOURCE_ANALYZER_ID}\")\n", + "# client.delete_analyzer(SOURCE_ANALYZER_ID)\n", + "# print(\"✅ Source analyzer deleted\")" + ] + }, + { + "cell_type": "markdown", + "id": "d952dfef", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "🎉 **Congratulations!** You have successfully:\n", + "\n", + "✅ Retrieved an existing analyzer with training data \n", + "✅ Extracted the training data configuration \n", + "✅ Created a new analyzer referencing the same training data \n", + "✅ Verified both analyzers work correctly \n", + "✅ Tested both analyzers with a sample file \n", + "\n", + "### Key Takeaways\n", + "\n", + "- **No data duplication**: Both analyzers reference the same blob storage location\n", + "- **Same resource**: Both analyzers use the same authentication and access permissions\n", + "- **Field portability**: You can maintain stable `fieldId`s across different analyzer versions\n", + "- **Rapid iteration**: Test schema changes quickly without re-uploading training data\n", + "\n", + "### Best Practices\n", + "\n", + "1. **Stable field IDs**: Keep `fieldId`s consistent across analyzers for easier migration\n", + "2. **Version control**: Maintain analyzer schemas in source control\n", + "3. **Documentation**: Document which blob paths contain which training datasets\n", + "4. **Testing**: Always test a new analyzer before deleting the original\n", + "5. **Naming conventions**: Use descriptive analyzer IDs that indicate purpose and version\n", + "\n", + "### Next Steps\n", + "\n", + "- Modify the field schema in the new analyzer to test different configurations\n", + "- Add additional training data to improve both analyzers\n", + "- Use this pattern to create A/B testing scenarios\n", + "- Explore other notebooks:\n", + " - [analyzer_training.ipynb](./analyzer_training.ipynb) - Create analyzers with training data\n", + " - [field_extraction.ipynb](./field_extraction.ipynb) - Extract fields from documents\n", + " - [management.ipynb](./management.ipynb) - Manage analyzer lifecycle" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 50557e6b4d690d9d3d3243f5a0bed2b478bfd1cc Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Thu, 23 Oct 2025 15:00:30 -0400 Subject: [PATCH 2/6] removing extra analyzer list and add clear note to add analyzer name --- .../move_training_data_across_analyzers.ipynb | 2051 +---------------- 1 file changed, 9 insertions(+), 2042 deletions(-) diff --git a/notebooks/move_training_data_across_analyzers.ipynb b/notebooks/move_training_data_across_analyzers.ipynb index 4117155..6bc6195 100644 --- a/notebooks/move_training_data_across_analyzers.ipynb +++ b/notebooks/move_training_data_across_analyzers.ipynb @@ -246,2045 +246,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "fcbc218a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 675 analyzer(s) in your resource\n", - "\n", - "Available analyzers:\n", - "1. ID: prebuilt-audioAnalyzer\n", - " Name: N/A\n", - "\n", - "2. ID: prebuilt-callCenter\n", - " Name: N/A\n", - "\n", - "3. ID: prebuilt-contract\n", - " Name: N/A\n", - "\n", - "4. ID: prebuilt-documentAnalyzer\n", - " Name: N/A\n", - "\n", - "5. ID: prebuilt-imageAnalyzer\n", - " Name: N/A\n", - "\n", - "6. ID: prebuilt-invoice\n", - " Name: N/A\n", - "\n", - "7. ID: prebuilt-videoAnalyzer\n", - " Name: N/A\n", - "\n", - "8. ID: 123\n", - " Name: N/A\n", - "\n", - "9. ID: Test-description\n", - " Name: N/A\n", - "\n", - "10. ID: Test\n", - " Name: N/A\n", - "\n", - "11. ID: abc\n", - " Name: N/A\n", - "\n", - "12. ID: audio-250808\n", - " Name: N/A\n", - "\n", - "13. ID: auto-highlight-analyzer-1753389013\n", - " Name: N/A\n", - "\n", - "14. ID: auto-highlight-analyzer-1753393121\n", - " Name: N/A\n", - "\n", - "15. ID: auto-highlight-analyzer-1753727044\n", - " Name: N/A\n", - "\n", - "16. ID: auto-highlight-analyzer-1753728638\n", - " Name: N/A\n", - "\n", - "17. ID: auto-highlight-analyzer-1753822646\n", - " Name: N/A\n", - "\n", - "18. ID: auto-highlight-analyzer-1753823934\n", - " Name: N/A\n", - "\n", - "19. ID: auto-highlight-analyzer-1753826664\n", - " Name: N/A\n", - "\n", - "20. ID: auto-highlight-analyzer-1753829625\n", - " Name: N/A\n", - "\n", - "21. ID: auto-highlight-analyzer-1754935354\n", - " Name: N/A\n", - "\n", - "22. ID: auto-labeling-model-1748319168608-457\n", - " Name: N/A\n", - "\n", - "23. ID: auto-labeling-model-1748343190922-522\n", - " Name: N/A\n", - "\n", - "24. ID: auto-labeling-model-1748343844913-193\n", - " Name: N/A\n", - "\n", - "25. ID: auto-labeling-model-1748364582299-194\n", - " Name: N/A\n", - "\n", - "26. ID: auto-labeling-model-1748364610998-174\n", - " Name: N/A\n", - "\n", - "27. ID: auto-labeling-model-1748364627905-392\n", - " Name: N/A\n", - "\n", - "28. ID: auto-labeling-model-1748364882995-331\n", - " Name: N/A\n", - "\n", - "29. ID: auto-labeling-model-1748365809345-194\n", - " Name: N/A\n", - "\n", - "30. ID: auto-labeling-model-1748365844597-722\n", - " Name: N/A\n", - "\n", - "31. ID: auto-labeling-model-1748369310664-291\n", - " Name: N/A\n", - "\n", - "32. ID: auto-labeling-model-1748382666104-108\n", - " Name: N/A\n", - "\n", - "33. ID: auto-labeling-model-1748398666237-678\n", - " Name: N/A\n", - "\n", - "34. ID: auto-labeling-model-1748406169100-153\n", - " Name: N/A\n", - "\n", - "35. ID: auto-labeling-model-1748487450682-652\n", - " Name: N/A\n", - "\n", - "36. ID: auto-labeling-model-1748490709500-887\n", - " Name: N/A\n", - "\n", - "37. ID: auto-labeling-model-1748524957609-245\n", - " Name: N/A\n", - "\n", - "38. ID: auto-labeling-model-1748525150770-437\n", - " Name: N/A\n", - "\n", - "39. ID: auto-labeling-model-1748527146405-802\n", - " Name: N/A\n", - "\n", - "40. ID: auto-labeling-model-1748532349641-24\n", - " Name: N/A\n", - "\n", - "41. ID: auto-labeling-model-1748652707721-341\n", - " Name: N/A\n", - "\n", - "42. ID: auto-labeling-model-1748652848103-155\n", - " Name: N/A\n", - "\n", - "43. ID: auto-labeling-model-1748839949920-863\n", - " Name: N/A\n", - "\n", - "44. ID: auto-labeling-model-1748845791989-716\n", - " Name: N/A\n", - "\n", - "45. ID: auto-labeling-model-1748845807869-415\n", - " Name: N/A\n", - "\n", - "46. ID: auto-labeling-model-1748907891703-517\n", - " Name: N/A\n", - "\n", - "47. ID: auto-labeling-model-1748908692967-569\n", - " Name: N/A\n", - "\n", - "48. ID: auto-labeling-model-1748914058095-616\n", - " Name: N/A\n", - "\n", - "49. ID: auto-labeling-model-1748936065478-291\n", - " Name: N/A\n", - "\n", - "50. ID: auto-labeling-model-1748936271674-552\n", - " Name: N/A\n", - "\n", - "51. ID: auto-labeling-model-1748936490686-646\n", - " Name: N/A\n", - "\n", - "52. ID: auto-labeling-model-1748937447139-653\n", - " Name: N/A\n", - "\n", - "53. ID: auto-labeling-model-1748940860399-529\n", - " Name: N/A\n", - "\n", - "54. ID: auto-labeling-model-1748941320548-161\n", - " Name: N/A\n", - "\n", - "55. ID: auto-labeling-model-1748941816737-4\n", - " Name: N/A\n", - "\n", - "56. ID: auto-labeling-model-1748942668260-584\n", - " Name: N/A\n", - "\n", - "57. ID: auto-labeling-model-1748942752946-240\n", - " Name: N/A\n", - "\n", - "58. ID: auto-labeling-model-1748943751138-585\n", - " Name: N/A\n", - "\n", - "59. ID: auto-labeling-model-1748943869439-730\n", - " Name: N/A\n", - "\n", - "60. ID: auto-labeling-model-1748944505181-366\n", - " Name: N/A\n", - "\n", - "61. ID: auto-labeling-model-1748945194482-115\n", - " Name: N/A\n", - "\n", - "62. ID: auto-labeling-model-1749003326198-992\n", - " Name: N/A\n", - "\n", - "63. ID: auto-labeling-model-1749023590022-874\n", - " Name: N/A\n", - "\n", - "64. ID: auto-labeling-model-1749023636121-927\n", - " Name: N/A\n", - "\n", - "65. ID: auto-labeling-model-1749023850993-339\n", - " Name: N/A\n", - "\n", - "66. ID: auto-labeling-model-1749023887009-843\n", - " Name: N/A\n", - "\n", - "67. ID: auto-labeling-model-1749023901480-881\n", - " Name: N/A\n", - "\n", - "68. ID: auto-labeling-model-1749023933378-529\n", - " Name: N/A\n", - "\n", - "69. ID: auto-labeling-model-1749024617342-607\n", - " Name: N/A\n", - "\n", - "70. ID: auto-labeling-model-1749024650401-862\n", - " Name: N/A\n", - "\n", - "71. ID: auto-labeling-model-1749095665011-257\n", - " Name: N/A\n", - "\n", - "72. ID: auto-labeling-model-1749096929213-707\n", - " Name: N/A\n", - "\n", - "73. ID: auto-labeling-model-1749104361550-221\n", - " Name: N/A\n", - "\n", - "74. ID: auto-labeling-model-1749104922387-882\n", - " Name: N/A\n", - "\n", - "75. ID: auto-labeling-model-1749105026574-367\n", - " Name: N/A\n", - "\n", - "76. ID: auto-labeling-model-1749251965833-403\n", - " Name: N/A\n", - "\n", - "77. ID: auto-labeling-model-1749254053334-357\n", - " Name: N/A\n", - "\n", - "78. ID: auto-labeling-model-1749311286700-369\n", - " Name: N/A\n", - "\n", - "79. ID: auto-labeling-model-1749509842310-370\n", - " Name: N/A\n", - "\n", - "80. ID: auto-labeling-model-1749520600099-409\n", - " Name: N/A\n", - "\n", - "81. ID: auto-labeling-model-1749522784982-438\n", - " Name: N/A\n", - "\n", - "82. ID: auto-labeling-model-1749535466854-401\n", - " Name: N/A\n", - "\n", - "83. ID: auto-labeling-model-1749581796990-277\n", - " Name: N/A\n", - "\n", - "84. ID: auto-labeling-model-1749581836897-138\n", - " Name: N/A\n", - "\n", - "85. ID: auto-labeling-model-1749584140873-572\n", - " Name: N/A\n", - "\n", - "86. ID: auto-labeling-model-1749585959231-24\n", - " Name: N/A\n", - "\n", - "87. ID: auto-labeling-model-1749604604536-674\n", - " Name: N/A\n", - "\n", - "88. ID: auto-labeling-model-1749620902726-984\n", - " Name: N/A\n", - "\n", - "89. ID: auto-labeling-model-1749626687259-809\n", - " Name: N/A\n", - "\n", - "90. ID: auto-labeling-model-1749627602312-979\n", - " Name: N/A\n", - "\n", - "91. ID: auto-labeling-model-1749630601186-689\n", - " Name: N/A\n", - "\n", - "92. ID: auto-labeling-model-1749631339251-319\n", - " Name: N/A\n", - "\n", - "93. ID: auto-labeling-model-1749631742974-733\n", - " Name: N/A\n", - "\n", - "94. ID: auto-labeling-model-1749631891328-309\n", - " Name: N/A\n", - "\n", - "95. ID: auto-labeling-model-1749696702275-545\n", - " Name: N/A\n", - "\n", - "96. ID: auto-labeling-model-1749758278394-240\n", - " Name: N/A\n", - "\n", - "97. ID: auto-labeling-model-1749758517784-660\n", - " Name: N/A\n", - "\n", - "98. ID: auto-labeling-model-1749758533104-929\n", - " Name: N/A\n", - "\n", - "99. ID: auto-labeling-model-1749758555087-116\n", - " Name: N/A\n", - "\n", - "100. ID: auto-labeling-model-1749759432793-891\n", - " Name: N/A\n", - "\n", - "101. ID: auto-labeling-model-1749768746704-802\n", - " Name: N/A\n", - "\n", - "102. ID: auto-labeling-model-1749775305589-256\n", - " Name: N/A\n", - "\n", - "103. ID: auto-labeling-model-1749802761164-406\n", - " Name: N/A\n", - "\n", - "104. ID: auto-labeling-model-1749956497322-594\n", - " Name: N/A\n", - "\n", - "105. ID: auto-labeling-model-1749960177654-514\n", - " Name: N/A\n", - "\n", - "106. ID: auto-labeling-model-1749961833034-154\n", - " Name: N/A\n", - "\n", - "107. ID: auto-labeling-model-1749962138214-21\n", - " Name: N/A\n", - "\n", - "108. ID: auto-labeling-model-1750045513862-445\n", - " Name: N/A\n", - "\n", - "109. ID: auto-labeling-model-1750108497453-922\n", - " Name: N/A\n", - "\n", - "110. ID: auto-labeling-model-1750123214932-968\n", - " Name: N/A\n", - "\n", - "111. ID: auto-labeling-model-1750128770286-412\n", - " Name: N/A\n", - "\n", - "112. ID: auto-labeling-model-1750128888980-243\n", - " Name: N/A\n", - "\n", - "113. ID: auto-labeling-model-1750141234245-231\n", - " Name: N/A\n", - "\n", - "114. ID: auto-labeling-model-1750145695285-480\n", - " Name: N/A\n", - "\n", - "115. ID: auto-labeling-model-1750211643719-379\n", - " Name: N/A\n", - "\n", - "116. ID: auto-labeling-model-1750233198991-694\n", - " Name: N/A\n", - "\n", - "117. ID: auto-labeling-model-1750241272780-2\n", - " Name: N/A\n", - "\n", - "118. ID: auto-labeling-model-1750279157596-35\n", - " Name: N/A\n", - "\n", - "119. ID: auto-labeling-model-1750291999953-91\n", - " Name: N/A\n", - "\n", - "120. ID: auto-labeling-model-1750292632586-625\n", - " Name: N/A\n", - "\n", - "121. ID: auto-labeling-model-1750312049582-59\n", - " Name: N/A\n", - "\n", - "122. ID: auto-labeling-model-1750312573420-578\n", - " Name: N/A\n", - "\n", - "123. ID: auto-labeling-model-1750376726735-970\n", - " Name: N/A\n", - "\n", - "124. ID: auto-labeling-model-1750377427038-364\n", - " Name: N/A\n", - "\n", - "125. ID: auto-labeling-model-1750385575232-897\n", - " Name: N/A\n", - "\n", - "126. ID: auto-labeling-model-1750403576185-741\n", - " Name: N/A\n", - "\n", - "127. ID: auto-labeling-model-1750404809435-451\n", - " Name: N/A\n", - "\n", - "128. ID: auto-labeling-model-1750405070052-89\n", - " Name: N/A\n", - "\n", - "129. ID: auto-labeling-model-1750405091355-763\n", - " Name: N/A\n", - "\n", - "130. ID: auto-labeling-model-1750417420016-430\n", - " Name: N/A\n", - "\n", - "131. ID: auto-labeling-model-1750659725597-788\n", - " Name: N/A\n", - "\n", - "132. ID: auto-labeling-model-1750659733517-772\n", - " Name: N/A\n", - "\n", - "133. ID: auto-labeling-model-1750659761722-251\n", - " Name: N/A\n", - "\n", - "134. ID: auto-labeling-model-1750659784566-101\n", - " Name: N/A\n", - "\n", - "135. ID: auto-labeling-model-1750659903607-108\n", - " Name: N/A\n", - "\n", - "136. ID: auto-labeling-model-1750659933637-141\n", - " Name: N/A\n", - "\n", - "137. ID: auto-labeling-model-1750659945217-945\n", - " Name: N/A\n", - "\n", - "138. ID: auto-labeling-model-1750660650963-739\n", - " Name: N/A\n", - "\n", - "139. ID: auto-labeling-model-1750660824597-923\n", - " Name: N/A\n", - "\n", - "140. ID: auto-labeling-model-1750663207559-512\n", - " Name: N/A\n", - "\n", - "141. ID: auto-labeling-model-1750663259510-796\n", - " Name: N/A\n", - "\n", - "142. ID: auto-labeling-model-1750663303432-581\n", - " Name: N/A\n", - "\n", - "143. ID: auto-labeling-model-1750663377213-340\n", - " Name: N/A\n", - "\n", - "144. ID: auto-labeling-model-1750663393108-597\n", - " Name: N/A\n", - "\n", - "145. ID: auto-labeling-model-1750664456347-683\n", - " Name: N/A\n", - "\n", - "146. ID: auto-labeling-model-1750664605893-618\n", - " Name: N/A\n", - "\n", - "147. ID: auto-labeling-model-1750665355708-8\n", - " Name: N/A\n", - "\n", - "148. ID: auto-labeling-model-1750673318125-535\n", - " Name: N/A\n", - "\n", - "149. ID: auto-labeling-model-1750673331433-642\n", - " Name: N/A\n", - "\n", - "150. ID: auto-labeling-model-1750709349430-630\n", - " Name: N/A\n", - "\n", - "151. ID: auto-labeling-model-1750719511542-531\n", - " Name: N/A\n", - "\n", - "152. ID: auto-labeling-model-1750744047556-446\n", - " Name: N/A\n", - "\n", - "153. ID: auto-labeling-model-1750755510472-120\n", - " Name: N/A\n", - "\n", - "154. ID: auto-labeling-model-1750784814399-27\n", - " Name: N/A\n", - "\n", - "155. ID: auto-labeling-model-1750788356545-200\n", - " Name: N/A\n", - "\n", - "156. ID: auto-labeling-model-1750789921864-730\n", - " Name: N/A\n", - "\n", - "157. ID: auto-labeling-model-1750836585070-913\n", - " Name: N/A\n", - "\n", - "158. ID: auto-labeling-model-1750842588854-962\n", - " Name: N/A\n", - "\n", - "159. ID: auto-labeling-model-1750842831795-314\n", - " Name: N/A\n", - "\n", - "160. ID: auto-labeling-model-1750842897183-394\n", - " Name: N/A\n", - "\n", - "161. ID: auto-labeling-model-1750842978258-136\n", - " Name: N/A\n", - "\n", - "162. ID: auto-labeling-model-1750843282949-512\n", - " Name: N/A\n", - "\n", - "163. ID: auto-labeling-model-1750843704909-216\n", - " Name: N/A\n", - "\n", - "164. ID: auto-labeling-model-1750843908445-174\n", - " Name: N/A\n", - "\n", - "165. ID: auto-labeling-model-1750844014408-330\n", - " Name: N/A\n", - "\n", - "166. ID: auto-labeling-model-1750844234138-988\n", - " Name: N/A\n", - "\n", - "167. ID: auto-labeling-model-1750844709672-320\n", - " Name: N/A\n", - "\n", - "168. ID: auto-labeling-model-1750845307517-940\n", - " Name: N/A\n", - "\n", - "169. ID: auto-labeling-model-1750846220484-837\n", - " Name: N/A\n", - "\n", - "170. ID: auto-labeling-model-1750846255005-395\n", - " Name: N/A\n", - "\n", - "171. ID: auto-labeling-model-1750847433984-311\n", - " Name: N/A\n", - "\n", - "172. ID: auto-labeling-model-1750853034834-460\n", - " Name: N/A\n", - "\n", - "173. ID: auto-labeling-model-1750919114419-408\n", - " Name: N/A\n", - "\n", - "174. ID: auto-labeling-model-1750920179010-279\n", - " Name: N/A\n", - "\n", - "175. ID: auto-labeling-model-1750920218343-518\n", - " Name: N/A\n", - "\n", - "176. ID: auto-labeling-model-1750920298701-557\n", - " Name: N/A\n", - "\n", - "177. ID: auto-labeling-model-1750920352617-62\n", - " Name: N/A\n", - "\n", - "178. ID: auto-labeling-model-1751052501474-178\n", - " Name: N/A\n", - "\n", - "179. ID: auto-labeling-model-1751069615217-264\n", - " Name: N/A\n", - "\n", - "180. ID: auto-labeling-model-1751270970103-549\n", - " Name: N/A\n", - "\n", - "181. ID: auto-labeling-model-1751272499140-268\n", - " Name: N/A\n", - "\n", - "182. ID: auto-labeling-model-1751272544250-613\n", - " Name: N/A\n", - "\n", - "183. ID: auto-labeling-model-1751273787498-265\n", - " Name: N/A\n", - "\n", - "184. ID: auto-labeling-model-1751273849331-220\n", - " Name: N/A\n", - "\n", - "185. ID: auto-labeling-model-1751273904647-201\n", - " Name: N/A\n", - "\n", - "186. ID: auto-labeling-model-1751273937246-448\n", - " Name: N/A\n", - "\n", - "187. ID: auto-labeling-model-1751273983364-401\n", - " Name: N/A\n", - "\n", - "188. ID: auto-labeling-model-1751336918679-904\n", - " Name: N/A\n", - "\n", - "189. ID: auto-labeling-model-1751349360361-963\n", - " Name: N/A\n", - "\n", - "190. ID: auto-labeling-model-1751427888199-459\n", - " Name: N/A\n", - "\n", - "191. ID: auto-labeling-model-1751427891721-940\n", - " Name: N/A\n", - "\n", - "192. ID: auto-labeling-model-1751441608096-967\n", - " Name: N/A\n", - "\n", - "193. ID: auto-labeling-model-1751441662962-402\n", - " Name: N/A\n", - "\n", - "194. ID: auto-labeling-model-1751444577624-169\n", - " Name: N/A\n", - "\n", - "195. ID: auto-labeling-model-1751446425406-566\n", - " Name: N/A\n", - "\n", - "196. ID: auto-labeling-model-1751446744627-904\n", - " Name: N/A\n", - "\n", - "197. ID: auto-labeling-model-1751447069922-153\n", - " Name: N/A\n", - "\n", - "198. ID: auto-labeling-model-1751447126141-210\n", - " Name: N/A\n", - "\n", - "199. ID: auto-labeling-model-1751450223362-323\n", - " Name: N/A\n", - "\n", - "200. ID: auto-labeling-model-1751619901375-912\n", - " Name: N/A\n", - "\n", - "201. ID: auto-labeling-model-1751621939880-824\n", - " Name: N/A\n", - "\n", - "202. ID: auto-labeling-model-1751622003371-912\n", - " Name: N/A\n", - "\n", - "203. ID: auto-labeling-model-1751622246359-22\n", - " Name: N/A\n", - "\n", - "204. ID: auto-labeling-model-1751622337847-185\n", - " Name: N/A\n", - "\n", - "205. ID: auto-labeling-model-1751630796222-228\n", - " Name: N/A\n", - "\n", - "206. ID: auto-labeling-model-1751630815948-351\n", - " Name: N/A\n", - "\n", - "207. ID: auto-labeling-model-1751998528557-924\n", - " Name: N/A\n", - "\n", - "208. ID: auto-labeling-model-1752025809239-846\n", - " Name: N/A\n", - "\n", - "209. ID: auto-labeling-model-1752034702114-180\n", - " Name: N/A\n", - "\n", - "210. ID: auto-labeling-model-1752098586840-747\n", - " Name: N/A\n", - "\n", - "211. ID: auto-labeling-model-1752180782600-490\n", - " Name: N/A\n", - "\n", - "212. ID: auto-labeling-model-1752271117113-156\n", - " Name: N/A\n", - "\n", - "213. ID: auto-labeling-model-1752523653762-595\n", - " Name: N/A\n", - "\n", - "214. ID: auto-labeling-model-1752600290738-67\n", - " Name: N/A\n", - "\n", - "215. ID: auto-labeling-model-1752625416686-81\n", - " Name: N/A\n", - "\n", - "216. ID: auto-labeling-model-1752625871649-767\n", - " Name: N/A\n", - "\n", - "217. ID: auto-labeling-model-1752693120005-346\n", - " Name: N/A\n", - "\n", - "218. ID: auto-labeling-model-1752697569506-376\n", - " Name: N/A\n", - "\n", - "219. ID: auto-labeling-model-1752697610504-950\n", - " Name: N/A\n", - "\n", - "220. ID: auto-labeling-model-1752700740555-590\n", - " Name: N/A\n", - "\n", - "221. ID: auto-labeling-model-1752708687132-939\n", - " Name: N/A\n", - "\n", - "222. ID: auto-labeling-model-1752741732428-578\n", - " Name: N/A\n", - "\n", - "223. ID: auto-labeling-model-1752780032715-66\n", - " Name: N/A\n", - "\n", - "224. ID: auto-labeling-model-1752780325289-573\n", - " Name: N/A\n", - "\n", - "225. ID: auto-labeling-model-1752795955082-603\n", - " Name: N/A\n", - "\n", - "226. ID: auto-labeling-model-1752796753555-462\n", - " Name: N/A\n", - "\n", - "227. ID: auto-labeling-model-1752797239305-251\n", - " Name: N/A\n", - "\n", - "228. ID: auto-labeling-model-1752800932971-876\n", - " Name: N/A\n", - "\n", - "229. ID: auto-labeling-model-1752803086727-971\n", - " Name: N/A\n", - "\n", - "230. ID: auto-labeling-model-1752803985621-193\n", - " Name: N/A\n", - "\n", - "231. ID: auto-labeling-model-1752806777300-862\n", - " Name: N/A\n", - "\n", - "232. ID: auto-labeling-model-1752884829621-441\n", - " Name: N/A\n", - "\n", - "233. ID: auto-labeling-model-1753083025779-103\n", - " Name: N/A\n", - "\n", - "234. ID: auto-labeling-model-1753083077531-666\n", - " Name: N/A\n", - "\n", - "235. ID: auto-labeling-model-1753083850816-29\n", - " Name: N/A\n", - "\n", - "236. ID: auto-labeling-model-1753083864041-58\n", - " Name: N/A\n", - "\n", - "237. ID: auto-labeling-model-1753086883459-951\n", - " Name: N/A\n", - "\n", - "238. ID: auto-labeling-model-1753089079279-222\n", - " Name: N/A\n", - "\n", - "239. ID: auto-labeling-model-1753150531096-410\n", - " Name: N/A\n", - "\n", - "240. ID: auto-labeling-model-1753151865515-394\n", - " Name: N/A\n", - "\n", - "241. ID: auto-labeling-model-1753168395318-507\n", - " Name: N/A\n", - "\n", - "242. ID: auto-labeling-model-1753169409334-912\n", - " Name: N/A\n", - "\n", - "243. ID: auto-labeling-model-1753173597967-303\n", - " Name: N/A\n", - "\n", - "244. ID: auto-labeling-model-1753177537439-711\n", - " Name: N/A\n", - "\n", - "245. ID: auto-labeling-model-1753205662320-583\n", - " Name: N/A\n", - "\n", - "246. ID: auto-labeling-model-1753207022483-913\n", - " Name: N/A\n", - "\n", - "247. ID: auto-labeling-model-1753207579262-276\n", - " Name: N/A\n", - "\n", - "248. ID: auto-labeling-model-1753208672240-981\n", - " Name: N/A\n", - "\n", - "249. ID: auto-labeling-model-1753209156822-298\n", - " Name: N/A\n", - "\n", - "250. ID: auto-labeling-model-1753209981617-818\n", - " Name: N/A\n", - "\n", - "251. ID: auto-labeling-model-1753236316137-300\n", - " Name: N/A\n", - "\n", - "252. ID: auto-labeling-model-1753237512820-249\n", - " Name: N/A\n", - "\n", - "253. ID: auto-labeling-model-1753250369127-625\n", - " Name: N/A\n", - "\n", - "254. ID: auto-labeling-model-1753255567341-610\n", - " Name: N/A\n", - "\n", - "255. ID: auto-labeling-model-1753259092944-226\n", - " Name: N/A\n", - "\n", - "256. ID: auto-labeling-model-1753287197755-783\n", - " Name: N/A\n", - "\n", - "257. ID: auto-labeling-model-1753321650913-823\n", - " Name: N/A\n", - "\n", - "258. ID: auto-labeling-model-1753325891996-80\n", - " Name: N/A\n", - "\n", - "259. ID: auto-labeling-model-1753334968241-706\n", - " Name: N/A\n", - "\n", - "260. ID: auto-labeling-model-1753335132165-512\n", - " Name: N/A\n", - "\n", - "261. ID: auto-labeling-model-1753335555914-390\n", - " Name: N/A\n", - "\n", - "262. ID: auto-labeling-model-1753335697157-843\n", - " Name: N/A\n", - "\n", - "263. ID: auto-labeling-model-1753340903345-139\n", - " Name: N/A\n", - "\n", - "264. ID: auto-labeling-model-1753344102782-140\n", - " Name: N/A\n", - "\n", - "265. ID: auto-labeling-model-1753344491064-431\n", - " Name: N/A\n", - "\n", - "266. ID: auto-labeling-model-1753344947435-154\n", - " Name: N/A\n", - "\n", - "267. ID: auto-labeling-model-1753346772842-804\n", - " Name: N/A\n", - "\n", - "268. ID: auto-labeling-model-1753420107017-420\n", - " Name: N/A\n", - "\n", - "269. ID: auto-labeling-model-1753420466410-256\n", - " Name: N/A\n", - "\n", - "270. ID: auto-labeling-model-1753423049391-214\n", - " Name: N/A\n", - "\n", - "271. ID: auto-labeling-model-1753430316648-188\n", - " Name: N/A\n", - "\n", - "272. ID: auto-labeling-model-1753431705642-795\n", - " Name: N/A\n", - "\n", - "273. ID: auto-labeling-model-1753432653890-622\n", - " Name: N/A\n", - "\n", - "274. ID: auto-labeling-model-1753433164146-455\n", - " Name: N/A\n", - "\n", - "275. ID: auto-labeling-model-1753434806213-833\n", - " Name: N/A\n", - "\n", - "276. ID: auto-labeling-model-1753670824352-493\n", - " Name: N/A\n", - "\n", - "277. ID: auto-labeling-model-1753680640396-566\n", - " Name: N/A\n", - "\n", - "278. ID: auto-labeling-model-1753681888155-667\n", - " Name: N/A\n", - "\n", - "279. ID: auto-labeling-model-1753682254644-331\n", - " Name: N/A\n", - "\n", - "280. ID: auto-labeling-model-1753683583061-323\n", - " Name: N/A\n", - "\n", - "281. ID: auto-labeling-model-1753684547670-475\n", - " Name: N/A\n", - "\n", - "282. ID: auto-labeling-model-1753684784064-358\n", - " Name: N/A\n", - "\n", - "283. ID: auto-labeling-model-1753686206798-898\n", - " Name: N/A\n", - "\n", - "284. ID: auto-labeling-model-1753686800552-354\n", - " Name: N/A\n", - "\n", - "285. ID: auto-labeling-model-1753691313133-192\n", - " Name: N/A\n", - "\n", - "286. ID: auto-labeling-model-1753755468942-82\n", - " Name: N/A\n", - "\n", - "287. ID: auto-labeling-model-1753765727024-37\n", - " Name: N/A\n", - "\n", - "288. ID: auto-labeling-model-1753766046014-152\n", - " Name: N/A\n", - "\n", - "289. ID: auto-labeling-model-1753767335342-370\n", - " Name: N/A\n", - "\n", - "290. ID: auto-labeling-model-1753767338325-621\n", - " Name: N/A\n", - "\n", - "291. ID: auto-labeling-model-1753773699582-540\n", - " Name: N/A\n", - "\n", - "292. ID: auto-labeling-model-1753774470271-985\n", - " Name: N/A\n", - "\n", - "293. ID: auto-labeling-model-1753775949221-151\n", - " Name: N/A\n", - "\n", - "294. ID: auto-labeling-model-1753777245479-372\n", - " Name: N/A\n", - "\n", - "295. ID: auto-labeling-model-1753777925896-803\n", - " Name: N/A\n", - "\n", - "296. ID: auto-labeling-model-1753780557881-855\n", - " Name: N/A\n", - "\n", - "297. ID: auto-labeling-model-1753841121952-979\n", - " Name: N/A\n", - "\n", - "298. ID: auto-labeling-model-1753841981886-902\n", - " Name: N/A\n", - "\n", - "299. ID: auto-labeling-model-1753843376936-643\n", - " Name: N/A\n", - "\n", - "300. ID: auto-labeling-model-1753844211334-641\n", - " Name: N/A\n", - "\n", - "301. ID: auto-labeling-model-1753853033274-214\n", - " Name: N/A\n", - "\n", - "302. ID: auto-labeling-model-1753855251911-309\n", - " Name: N/A\n", - "\n", - "303. ID: auto-labeling-model-1753855551724-866\n", - " Name: N/A\n", - "\n", - "304. ID: auto-labeling-model-1753857116602-791\n", - " Name: N/A\n", - "\n", - "305. ID: auto-labeling-model-1753857268920-608\n", - " Name: N/A\n", - "\n", - "306. ID: auto-labeling-model-1753857820246-647\n", - " Name: N/A\n", - "\n", - "307. ID: auto-labeling-model-1753857865813-554\n", - " Name: N/A\n", - "\n", - "308. ID: auto-labeling-model-1753858369469-249\n", - " Name: N/A\n", - "\n", - "309. ID: auto-labeling-model-1753859412803-605\n", - " Name: N/A\n", - "\n", - "310. ID: auto-labeling-model-1753860904131-872\n", - " Name: N/A\n", - "\n", - "311. ID: auto-labeling-model-1753861167980-954\n", - " Name: N/A\n", - "\n", - "312. ID: auto-labeling-model-1753861799127-664\n", - " Name: N/A\n", - "\n", - "313. ID: auto-labeling-model-1753862553873-905\n", - " Name: N/A\n", - "\n", - "314. ID: auto-labeling-model-1753862814119-255\n", - " Name: N/A\n", - "\n", - "315. ID: auto-labeling-model-1753863784180-612\n", - " Name: N/A\n", - "\n", - "316. ID: auto-labeling-model-1753863994987-510\n", - " Name: N/A\n", - "\n", - "317. ID: auto-labeling-model-1753864084656-697\n", - " Name: N/A\n", - "\n", - "318. ID: auto-labeling-model-1753865255601-417\n", - " Name: N/A\n", - "\n", - "319. ID: auto-labeling-model-1753888993477-912\n", - " Name: N/A\n", - "\n", - "320. ID: auto-labeling-model-1753936473158-979\n", - " Name: N/A\n", - "\n", - "321. ID: auto-labeling-model-1753939417926-903\n", - " Name: N/A\n", - "\n", - "322. ID: auto-labeling-model-1753941090969-886\n", - " Name: N/A\n", - "\n", - "323. ID: auto-labeling-model-1753941295803-93\n", - " Name: N/A\n", - "\n", - "324. ID: auto-labeling-model-1753943808756-255\n", - " Name: N/A\n", - "\n", - "325. ID: auto-labeling-model-1754012684592-887\n", - " Name: N/A\n", - "\n", - "326. ID: auto-labeling-model-1754015881192-443\n", - " Name: N/A\n", - "\n", - "327. ID: auto-labeling-model-1754016406351-97\n", - " Name: N/A\n", - "\n", - "328. ID: auto-labeling-model-1754016977082-211\n", - " Name: N/A\n", - "\n", - "329. ID: auto-labeling-model-1754017707931-428\n", - " Name: N/A\n", - "\n", - "330. ID: auto-labeling-model-1754024495010-992\n", - " Name: N/A\n", - "\n", - "331. ID: auto-labeling-model-1754025560953-192\n", - " Name: N/A\n", - "\n", - "332. ID: auto-labeling-model-1754026435557-853\n", - " Name: N/A\n", - "\n", - "333. ID: auto-labeling-model-1754037940196-869\n", - " Name: N/A\n", - "\n", - "334. ID: auto-labeling-model-1754082032616-607\n", - " Name: N/A\n", - "\n", - "335. ID: auto-labeling-model-1754082215077-482\n", - " Name: N/A\n", - "\n", - "336. ID: auto-labeling-model-1754082332437-629\n", - " Name: N/A\n", - "\n", - "337. ID: auto-labeling-model-1754082479343-224\n", - " Name: N/A\n", - "\n", - "338. ID: auto-labeling-model-1754082536526-914\n", - " Name: N/A\n", - "\n", - "339. ID: auto-labeling-model-1754082630700-302\n", - " Name: N/A\n", - "\n", - "340. ID: auto-labeling-model-1754082725263-83\n", - " Name: N/A\n", - "\n", - "341. ID: auto-labeling-model-1754082811382-584\n", - " Name: N/A\n", - "\n", - "342. ID: auto-labeling-model-1754082998761-352\n", - " Name: N/A\n", - "\n", - "343. ID: auto-labeling-model-1754083046825-203\n", - " Name: N/A\n", - "\n", - "344. ID: auto-labeling-model-1754083150278-445\n", - " Name: N/A\n", - "\n", - "345. ID: auto-labeling-model-1754083462284-222\n", - " Name: N/A\n", - "\n", - "346. ID: auto-labeling-model-1754083621516-367\n", - " Name: N/A\n", - "\n", - "347. ID: auto-labeling-model-1754083719163-272\n", - " Name: N/A\n", - "\n", - "348. ID: auto-labeling-model-1754083866374-41\n", - " Name: N/A\n", - "\n", - "349. ID: auto-labeling-model-1754084032708-231\n", - " Name: N/A\n", - "\n", - "350. ID: auto-labeling-model-1754084406835-168\n", - " Name: N/A\n", - "\n", - "351. ID: auto-labeling-model-1754084472348-188\n", - " Name: N/A\n", - "\n", - "352. ID: auto-labeling-model-1754084575001-916\n", - " Name: N/A\n", - "\n", - "353. ID: auto-labeling-model-1754084884148-481\n", - " Name: N/A\n", - "\n", - "354. ID: auto-labeling-model-1754088680537-743\n", - " Name: N/A\n", - "\n", - "355. ID: auto-labeling-model-1754277589373-867\n", - " Name: N/A\n", - "\n", - "356. ID: auto-labeling-model-1754327062412-76\n", - " Name: N/A\n", - "\n", - "357. ID: auto-labeling-model-1754361872613-844\n", - " Name: N/A\n", - "\n", - "358. ID: auto-labeling-model-1754442934624-187\n", - " Name: N/A\n", - "\n", - "359. ID: auto-labeling-model-1754443219339-17\n", - " Name: N/A\n", - "\n", - "360. ID: auto-labeling-model-1754448125079-528\n", - " Name: N/A\n", - "\n", - "361. ID: auto-labeling-model-1754448200938-6\n", - " Name: N/A\n", - "\n", - "362. ID: auto-labeling-model-1754448830534-215\n", - " Name: N/A\n", - "\n", - "363. ID: auto-labeling-model-1754448901751-597\n", - " Name: N/A\n", - "\n", - "364. ID: auto-labeling-model-1754449038080-472\n", - " Name: N/A\n", - "\n", - "365. ID: auto-labeling-model-1754449135369-901\n", - " Name: N/A\n", - "\n", - "366. ID: auto-labeling-model-1754449150398-162\n", - " Name: N/A\n", - "\n", - "367. ID: auto-labeling-model-1754449206123-981\n", - " Name: N/A\n", - "\n", - "368. ID: auto-labeling-model-1754449280061-594\n", - " Name: N/A\n", - "\n", - "369. ID: auto-labeling-model-1754449347580-776\n", - " Name: N/A\n", - "\n", - "370. ID: auto-labeling-model-1754449538829-202\n", - " Name: N/A\n", - "\n", - "371. ID: auto-labeling-model-1754449608449-502\n", - " Name: N/A\n", - "\n", - "372. ID: auto-labeling-model-1754449678933-461\n", - " Name: N/A\n", - "\n", - "373. ID: auto-labeling-model-1754449747782-122\n", - " Name: N/A\n", - "\n", - "374. ID: auto-labeling-model-1754449819030-776\n", - " Name: N/A\n", - "\n", - "375. ID: auto-labeling-model-1754454485024-346\n", - " Name: N/A\n", - "\n", - "376. ID: auto-labeling-model-1754456633663-795\n", - " Name: N/A\n", - "\n", - "377. ID: auto-labeling-model-1754457369864-749\n", - " Name: N/A\n", - "\n", - "378. ID: auto-labeling-model-1754457591929-484\n", - " Name: N/A\n", - "\n", - "379. ID: auto-labeling-model-1754460230719-575\n", - " Name: N/A\n", - "\n", - "380. ID: auto-labeling-model-1754460479500-36\n", - " Name: N/A\n", - "\n", - "381. ID: auto-labeling-model-1754460640349-364\n", - " Name: N/A\n", - "\n", - "382. ID: auto-labeling-model-1754669409054-428\n", - " Name: N/A\n", - "\n", - "383. ID: auto-labeling-model-1754951212582-203\n", - " Name: N/A\n", - "\n", - "384. ID: auto-labeling-model-1754965260794-576\n", - " Name: N/A\n", - "\n", - "385. ID: auto-labeling-model-1754965331102-485\n", - " Name: N/A\n", - "\n", - "386. ID: auto-labeling-model-1754965445643-161\n", - " Name: N/A\n", - "\n", - "387. ID: auto-labeling-model-1754965630031-820\n", - " Name: N/A\n", - "\n", - "388. ID: auto-labeling-model-1754965704606-779\n", - " Name: N/A\n", - "\n", - "389. ID: auto-labeling-model-1754965767126-499\n", - " Name: N/A\n", - "\n", - "390. ID: auto-labeling-model-1754965926600-215\n", - " Name: N/A\n", - "\n", - "391. ID: auto-labeling-model-1754965996281-810\n", - " Name: N/A\n", - "\n", - "392. ID: auto-labeling-model-1754966073913-92\n", - " Name: N/A\n", - "\n", - "393. ID: auto-labeling-model-1754966208584-396\n", - " Name: N/A\n", - "\n", - "394. ID: auto-labeling-model-1754966287090-692\n", - " Name: N/A\n", - "\n", - "395. ID: auto-labeling-model-1754966553579-724\n", - " Name: N/A\n", - "\n", - "396. ID: auto-labeling-model-1754966634261-409\n", - " Name: N/A\n", - "\n", - "397. ID: auto-labeling-model-1754966703678-7\n", - " Name: N/A\n", - "\n", - "398. ID: auto-labeling-model-1754966778721-225\n", - " Name: N/A\n", - "\n", - "399. ID: auto-labeling-model-1754966848977-806\n", - " Name: N/A\n", - "\n", - "400. ID: auto-labeling-model-1754966934481-980\n", - " Name: N/A\n", - "\n", - "401. ID: auto-labeling-model-1754967006745-602\n", - " Name: N/A\n", - "\n", - "402. ID: auto-labeling-model-1754967080546-450\n", - " Name: N/A\n", - "\n", - "403. ID: auto-labeling-model-1754967570056-479\n", - " Name: N/A\n", - "\n", - "404. ID: auto-labeling-model-1754967665781-18\n", - " Name: N/A\n", - "\n", - "405. ID: auto-labeling-model-1754967737902-258\n", - " Name: N/A\n", - "\n", - "406. ID: auto-labeling-model-1754967809639-969\n", - " Name: N/A\n", - "\n", - "407. ID: auto-labeling-model-1754967879833-46\n", - " Name: N/A\n", - "\n", - "408. ID: auto-labeling-model-1754967953160-263\n", - " Name: N/A\n", - "\n", - "409. ID: auto-labeling-model-1754968036672-249\n", - " Name: N/A\n", - "\n", - "410. ID: auto-labeling-model-1754968110963-400\n", - " Name: N/A\n", - "\n", - "411. ID: auto-labeling-model-1754968179908-761\n", - " Name: N/A\n", - "\n", - "412. ID: auto-labeling-model-1754974913641-913\n", - " Name: N/A\n", - "\n", - "413. ID: auto-labeling-model-1754975127019-903\n", - " Name: N/A\n", - "\n", - "414. ID: auto-labeling-model-1754975368613-717\n", - " Name: N/A\n", - "\n", - "415. ID: auto-labeling-model-1754975432901-90\n", - " Name: N/A\n", - "\n", - "416. ID: auto-labeling-model-1754975454687-707\n", - " Name: N/A\n", - "\n", - "417. ID: auto-labeling-model-1754975527897-708\n", - " Name: N/A\n", - "\n", - "418. ID: auto-labeling-model-1754975600064-524\n", - " Name: N/A\n", - "\n", - "419. ID: auto-labeling-model-1754975711179-28\n", - " Name: N/A\n", - "\n", - "420. ID: auto-labeling-model-1754975967653-203\n", - " Name: N/A\n", - "\n", - "421. ID: auto-labeling-model-1754976038813-381\n", - " Name: N/A\n", - "\n", - "422. ID: auto-labeling-model-1754976117940-973\n", - " Name: N/A\n", - "\n", - "423. ID: auto-labeling-model-1754976193933-189\n", - " Name: N/A\n", - "\n", - "424. ID: auto-labeling-model-1754976293724-520\n", - " Name: N/A\n", - "\n", - "425. ID: auto-labeling-model-1754976368518-509\n", - " Name: N/A\n", - "\n", - "426. ID: auto-labeling-model-1754976437096-539\n", - " Name: N/A\n", - "\n", - "427. ID: auto-labeling-model-1754976513472-952\n", - " Name: N/A\n", - "\n", - "428. ID: auto-labeling-model-1754976754715-501\n", - " Name: N/A\n", - "\n", - "429. ID: auto-labeling-model-1754976904752-710\n", - " Name: N/A\n", - "\n", - "430. ID: auto-labeling-model-1754976976653-350\n", - " Name: N/A\n", - "\n", - "431. ID: auto-labeling-model-1754977052535-217\n", - " Name: N/A\n", - "\n", - "432. ID: auto-labeling-model-1754977121829-706\n", - " Name: N/A\n", - "\n", - "433. ID: auto-labeling-model-1754977217214-291\n", - " Name: N/A\n", - "\n", - "434. ID: auto-labeling-model-1754977287574-575\n", - " Name: N/A\n", - "\n", - "435. ID: auto-labeling-model-1754977360553-264\n", - " Name: N/A\n", - "\n", - "436. ID: auto-labeling-model-1754977435968-198\n", - " Name: N/A\n", - "\n", - "437. ID: auto-labeling-model-1754977508312-429\n", - " Name: N/A\n", - "\n", - "438. ID: auto-labeling-model-1754977588026-221\n", - " Name: N/A\n", - "\n", - "439. ID: auto-labeling-model-1754977663056-797\n", - " Name: N/A\n", - "\n", - "440. ID: auto-labeling-model-1754978589858-924\n", - " Name: N/A\n", - "\n", - "441. ID: auto-labeling-model-1754978799780-511\n", - " Name: N/A\n", - "\n", - "442. ID: auto-labeling-model-1754980148754-523\n", - " Name: N/A\n", - "\n", - "443. ID: auto-labeling-model-1754980966501-518\n", - " Name: N/A\n", - "\n", - "444. ID: auto-labeling-model-1754981828125-533\n", - " Name: N/A\n", - "\n", - "445. ID: auto-labeling-model-1754983426916-774\n", - " Name: N/A\n", - "\n", - "446. ID: auto-labeling-model-1754984348089-313\n", - " Name: N/A\n", - "\n", - "447. ID: auto-labeling-model-1754984423463-874\n", - " Name: N/A\n", - "\n", - "448. ID: auto-labeling-model-1754984499501-967\n", - " Name: N/A\n", - "\n", - "449. ID: auto-labeling-model-1754984577453-603\n", - " Name: N/A\n", - "\n", - "450. ID: auto-labeling-model-1754984673348-39\n", - " Name: N/A\n", - "\n", - "451. ID: auto-labeling-model-1754984745908-988\n", - " Name: N/A\n", - "\n", - "452. ID: auto-labeling-model-1754984844230-121\n", - " Name: N/A\n", - "\n", - "453. ID: auto-labeling-model-1754985031421-137\n", - " Name: N/A\n", - "\n", - "454. ID: auto-labeling-model-1754985052679-764\n", - " Name: N/A\n", - "\n", - "455. ID: auto-labeling-model-1754985230207-884\n", - " Name: N/A\n", - "\n", - "456. ID: auto-labeling-model-1754993665797-458\n", - " Name: N/A\n", - "\n", - "457. ID: auto-labeling-model-1754993775398-308\n", - " Name: N/A\n", - "\n", - "458. ID: auto-labeling-model-1755021430602-389\n", - " Name: N/A\n", - "\n", - "459. ID: auto-labeling-model-1755021530633-576\n", - " Name: N/A\n", - "\n", - "460. ID: auto-labeling-model-1755034509086-812\n", - " Name: N/A\n", - "\n", - "461. ID: auto-labeling-model-1755036680421-274\n", - " Name: N/A\n", - "\n", - "462. ID: auto-labeling-model-1755036840212-13\n", - " Name: N/A\n", - "\n", - "463. ID: auto-labeling-model-1755037123033-737\n", - " Name: N/A\n", - "\n", - "464. ID: auto-labeling-model-1755041702234-29\n", - " Name: N/A\n", - "\n", - "465. ID: auto-labeling-model-1755041716845-12\n", - " Name: N/A\n", - "\n", - "466. ID: auto-labeling-model-1755043090900-677\n", - " Name: N/A\n", - "\n", - "467. ID: auto-labeling-model-1755044191218-796\n", - " Name: N/A\n", - "\n", - "468. ID: auto-labeling-model-1755044423164-353\n", - " Name: N/A\n", - "\n", - "469. ID: auto-labeling-model-1755048701795-244\n", - " Name: N/A\n", - "\n", - "470. ID: auto-labeling-model-1755048719130-947\n", - " Name: N/A\n", - "\n", - "471. ID: auto-labeling-model-1755048825616-336\n", - " Name: N/A\n", - "\n", - "472. ID: auto-labeling-model-1755048863902-319\n", - " Name: N/A\n", - "\n", - "473. ID: auto-labeling-model-1755048975788-30\n", - " Name: N/A\n", - "\n", - "474. ID: auto-labeling-model-1755049161847-499\n", - " Name: N/A\n", - "\n", - "475. ID: auto-labeling-model-1755061734445-540\n", - " Name: N/A\n", - "\n", - "476. ID: auto-labeling-model-1755061987015-686\n", - " Name: N/A\n", - "\n", - "477. ID: auto-labeling-model-1755062318015-752\n", - " Name: N/A\n", - "\n", - "478. ID: auto-labeling-model-1755062966345-99\n", - " Name: N/A\n", - "\n", - "479. ID: auto-labeling-model-1755063315485-717\n", - " Name: N/A\n", - "\n", - "480. ID: auto-labeling-model-1755063386013-936\n", - " Name: N/A\n", - "\n", - "481. ID: auto-labeling-model-1755069455912-277\n", - " Name: N/A\n", - "\n", - "482. ID: auto-labeling-model-1755069553935-338\n", - " Name: N/A\n", - "\n", - "483. ID: auto-labeling-model-1755069702068-412\n", - " Name: N/A\n", - "\n", - "484. ID: auto-labeling-model-1755069842876-922\n", - " Name: N/A\n", - "\n", - "485. ID: auto-labeling-model-1755072279253-390\n", - " Name: N/A\n", - "\n", - "486. ID: auto-labeling-model-1755076709324-342\n", - " Name: N/A\n", - "\n", - "487. ID: auto-labeling-model-1755077617558-667\n", - " Name: N/A\n", - "\n", - "488. ID: auto-labeling-model-1755077873604-810\n", - " Name: N/A\n", - "\n", - "489. ID: auto-labeling-model-1755078021426-256\n", - " Name: N/A\n", - "\n", - "490. ID: auto-labeling-model-1755134767049-985\n", - " Name: N/A\n", - "\n", - "491. ID: auto-labeling-model-1755135457748-675\n", - " Name: N/A\n", - "\n", - "492. ID: auto-labeling-model-1755220299075-866\n", - " Name: N/A\n", - "\n", - "493. ID: auto-labeling-model-1755221919898-254\n", - " Name: N/A\n", - "\n", - "494. ID: auto-labeling-model-1755222009716-189\n", - " Name: N/A\n", - "\n", - "495. ID: auto-labeling-model-1755222110837-250\n", - " Name: N/A\n", - "\n", - "496. ID: auto-labeling-model-1755222196939-944\n", - " Name: N/A\n", - "\n", - "497. ID: auto-labeling-model-1755222580985-811\n", - " Name: N/A\n", - "\n", - "498. ID: auto-labeling-model-1755224344739-857\n", - " Name: N/A\n", - "\n", - "499. ID: auto-labeling-model-1755224418333-237\n", - " Name: N/A\n", - "\n", - "500. ID: auto-labeling-model-1755224501846-126\n", - " Name: N/A\n", - "\n", - "501. ID: auto-labeling-model-1755224573788-830\n", - " Name: N/A\n", - "\n", - "502. ID: auto-labeling-model-1755274111236-815\n", - " Name: N/A\n", - "\n", - "503. ID: auto-labeling-model-1755546385161-718\n", - " Name: N/A\n", - "\n", - "504. ID: auto-labeling-model-1755564859753-49\n", - " Name: N/A\n", - "\n", - "505. ID: auto-labeling-model-1755571891436-24\n", - " Name: N/A\n", - "\n", - "506. ID: auto-labeling-model-1755575417648-956\n", - " Name: N/A\n", - "\n", - "507. ID: auto-labeling-model-1755589868572-105\n", - " Name: N/A\n", - "\n", - "508. ID: auto-labeling-model-1755623887267-687\n", - " Name: N/A\n", - "\n", - "509. ID: auto-labeling-model-1755657602248-443\n", - " Name: N/A\n", - "\n", - "510. ID: auto-labeling-model-1755671136055-108\n", - " Name: N/A\n", - "\n", - "511. ID: auto-labeling-model-1755673245801-744\n", - " Name: N/A\n", - "\n", - "512. ID: auto-labeling-model-1755675180889-142\n", - " Name: N/A\n", - "\n", - "513. ID: auto-labeling-model-1755678446620-988\n", - " Name: N/A\n", - "\n", - "514. ID: auto-labeling-model-1755738759590-405\n", - " Name: N/A\n", - "\n", - "515. ID: auto-labeling-model-1755741941138-610\n", - " Name: N/A\n", - "\n", - "516. ID: auto-labeling-model-1755745805348-731\n", - " Name: N/A\n", - "\n", - "517. ID: auto-labeling-model-1755753976159-223\n", - " Name: N/A\n", - "\n", - "518. ID: auto-labeling-model-1755756092896-628\n", - " Name: N/A\n", - "\n", - "519. ID: auto-labeling-model-1755761289894-657\n", - " Name: N/A\n", - "\n", - "520. ID: auto-labeling-model-1755824923780-82\n", - " Name: N/A\n", - "\n", - "521. ID: auto-labeling-model-1755839089591-320\n", - " Name: N/A\n", - "\n", - "522. ID: auto-labeling-model-1755840078392-806\n", - " Name: N/A\n", - "\n", - "523. ID: auto-labeling-model-1755843001974-210\n", - " Name: N/A\n", - "\n", - "524. ID: auto-labeling-model-1755844906709-250\n", - " Name: N/A\n", - "\n", - "525. ID: auto-labeling-model-1755846971954-69\n", - " Name: N/A\n", - "\n", - "526. ID: auto-labeling-model-1755847550122-149\n", - " Name: N/A\n", - "\n", - "527. ID: auto-labeling-model-1755849254781-355\n", - " Name: N/A\n", - "\n", - "528. ID: auto-labeling-model-1755854539631-293\n", - " Name: N/A\n", - "\n", - "529. ID: auto-labeling-model-1756087002299-72\n", - " Name: N/A\n", - "\n", - "530. ID: auto-labeling-model-1756087565828-132\n", - " Name: N/A\n", - "\n", - "531. ID: auto-labeling-model-1756087680461-719\n", - " Name: N/A\n", - "\n", - "532. ID: auto-labeling-model-1756087819774-813\n", - " Name: N/A\n", - "\n", - "533. ID: auto-labeling-model-1756087867761-583\n", - " Name: N/A\n", - "\n", - "534. ID: auto-labeling-model-1756112514075-201\n", - " Name: N/A\n", - "\n", - "535. ID: auto-labeling-model-1756137207447-376\n", - " Name: N/A\n", - "\n", - "536. ID: auto-labeling-model-1756137492728-788\n", - " Name: N/A\n", - "\n", - "537. ID: auto-labeling-model-1756138904093-804\n", - " Name: N/A\n", - "\n", - "538. ID: auto-labeling-model-1756193938984-510\n", - " Name: N/A\n", - "\n", - "539. ID: auto-labeling-model-1756279382223-424\n", - " Name: N/A\n", - "\n", - "540. ID: auto-labeling-model-1756281178604-829\n", - " Name: N/A\n", - "\n", - "541. ID: auto-labeling-model-1756347012781-494\n", - " Name: N/A\n", - "\n", - "542. ID: auto-labeling-model-1756348972897-103\n", - " Name: N/A\n", - "\n", - "543. ID: auto-labeling-model-1756349422839-305\n", - " Name: N/A\n", - "\n", - "544. ID: auto-labeling-model-1756349498730-552\n", - " Name: N/A\n", - "\n", - "545. ID: auto-labeling-model-1756360413351-308\n", - " Name: N/A\n", - "\n", - "546. ID: auto-labeling-model-1756363959156-20\n", - " Name: N/A\n", - "\n", - "547. ID: auto-labeling-model-1756369801529-118\n", - " Name: N/A\n", - "\n", - "548. ID: auto-labeling-model-1756430598758-905\n", - " Name: N/A\n", - "\n", - "549. ID: auto-labeling-model-1756440760505-307\n", - " Name: N/A\n", - "\n", - "550. ID: auto-labeling-model-1756460100800-668\n", - " Name: N/A\n", - "\n", - "551. ID: auto-labeling-model-1756460110544-559\n", - " Name: N/A\n", - "\n", - "552. ID: auto-labeling-model-1756693820728-76\n", - " Name: N/A\n", - "\n", - "553. ID: auto-labeling-model-1756912886736-101\n", - " Name: N/A\n", - "\n", - "554. ID: auto-labeling-model-1757497814136-763\n", - " Name: N/A\n", - "\n", - "555. ID: auto-labeling-model-1757663204666-122\n", - " Name: N/A\n", - "\n", - "556. ID: auto-labeling-model-1757995180429-664\n", - " Name: N/A\n", - "\n", - "557. ID: auto-labeling-model-1758045209157-220\n", - " Name: N/A\n", - "\n", - "558. ID: auto-labeling-model-1758045343765-419\n", - " Name: N/A\n", - "\n", - "559. ID: auto-labeling-model-1758182652735-580\n", - " Name: N/A\n", - "\n", - "560. ID: auto-labeling-model-1758551942230-384\n", - " Name: N/A\n", - "\n", - "561. ID: auto-labeling-model-1758693093755-157\n", - " Name: N/A\n", - "\n", - "562. ID: auto-labeling-model-1758703215086-912\n", - " Name: N/A\n", - "\n", - "563. ID: auto-labeling-model-1758742506653-803\n", - " Name: N/A\n", - "\n", - "564. ID: auto-labeling-model-1758859363470-900\n", - " Name: N/A\n", - "\n", - "565. ID: auto-labeling-model-1758861498544-317\n", - " Name: N/A\n", - "\n", - "566. ID: auto-labeling-model-1759166848691-35\n", - " Name: N/A\n", - "\n", - "567. ID: auto-labeling-model-1759310043204-41\n", - " Name: N/A\n", - "\n", - "568. ID: auto-labeling-model-1759334232768-397\n", - " Name: N/A\n", - "\n", - "569. ID: auto-labeling-model-1759817160138-569\n", - " Name: N/A\n", - "\n", - "570. ID: auto-labeling-model-1759956977266-516\n", - " Name: N/A\n", - "\n", - "571. ID: auto-labeling-model-1760426322250-908\n", - " Name: N/A\n", - "\n", - "572. ID: auto-labeling-model-1760479341007-491\n", - " Name: N/A\n", - "\n", - "573. ID: auto-labeling-model-1760479492039-631\n", - " Name: N/A\n", - "\n", - "574. ID: auto-labeling-model-1760479645658-613\n", - " Name: N/A\n", - "\n", - "575. ID: auto-labeling-model-1760479761056-497\n", - " Name: N/A\n", - "\n", - "576. ID: auto-labeling-model-1760479780527-626\n", - " Name: N/A\n", - "\n", - "577. ID: auto-labeling-model-1760479932099-212\n", - " Name: N/A\n", - "\n", - "578. ID: auto-labeling-model-1760479949487-358\n", - " Name: N/A\n", - "\n", - "579. ID: auto-labeling-model-1760480389179-217\n", - " Name: N/A\n", - "\n", - "580. ID: auto-labeling-model-1760490988143-30\n", - " Name: N/A\n", - "\n", - "581. ID: auto-labeling-model-1760499252646-774\n", - " Name: N/A\n", - "\n", - "582. ID: auto-labeling-model-1760539714171-740\n", - " Name: N/A\n", - "\n", - "583. ID: auto-labeling-model-1760540221082-518\n", - " Name: N/A\n", - "\n", - "584. ID: auto-labeling-model-1760566206649-192\n", - " Name: N/A\n", - "\n", - "585. ID: auto-labeling-model-1760649718443-469\n", - " Name: N/A\n", - "\n", - "586. ID: auto-labeling-model-1760974810245-633\n", - " Name: N/A\n", - "\n", - "587. ID: auto-labeling-model-1761060285537-410\n", - " Name: N/A\n", - "\n", - "588. ID: auto-labeling-model-1761072595965-766\n", - " Name: N/A\n", - "\n", - "589. ID: auto-labeling-model-1761170322608-61\n", - " Name: N/A\n", - "\n", - "590. ID: auto-labeling-model-1761170507108-187\n", - " Name: N/A\n", - "\n", - "591. ID: cu-eox\n", - " Name: N/A\n", - "\n", - "592. ID: cu-test-2\n", - " Name: N/A\n", - "\n", - "593. ID: cu-test-3\n", - " Name: N/A\n", - "\n", - "594. ID: cu-test\n", - " Name: N/A\n", - "\n", - "595. ID: cu-test3\n", - " Name: N/A\n", - "\n", - "596. ID: cu-trainig-debug\n", - " Name: N/A\n", - "\n", - "597. ID: cx-deloitte-all-items-good\n", - " Name: N/A\n", - "\n", - "598. ID: cx-deloitte-all-items-idex\n", - " Name: N/A\n", - "\n", - "599. ID: cx-deloitte-all-items-keep-one-label\n", - " Name: N/A\n", - "\n", - "600. ID: cx-deloitte-all-items\n", - " Name: N/A\n", - "\n", - "601. ID: cx-deloitte\n", - " Name: N/A\n", - "\n", - "602. ID: default\n", - " Name: N/A\n", - "\n", - "603. ID: document-test\n", - " Name: N/A\n", - "\n", - "604. ID: example\n", - " Name: N/A\n", - "\n", - "605. ID: excel\n", - " Name: N/A\n", - "\n", - "606. ID: highlight-analyzer-03673070-1755304831\n", - " Name: N/A\n", - "\n", - "607. ID: highlight-analyzer-1755112569\n", - " Name: N/A\n", - "\n", - "608. ID: highlight-analyzer-1755113090\n", - " Name: N/A\n", - "\n", - "609. ID: highlight-analyzer-1755117427\n", - " Name: N/A\n", - "\n", - "610. ID: highlight-analyzer-1755127191\n", - " Name: N/A\n", - "\n", - "611. ID: highlight-analyzer-1755128917\n", - " Name: N/A\n", - "\n", - "612. ID: highlight-analyzer-1755204485\n", - " Name: N/A\n", - "\n", - "613. ID: highlight-analyzer-1755205148\n", - " Name: N/A\n", - "\n", - "614. ID: highlight-analyzer-1755304423\n", - " Name: N/A\n", - "\n", - "615. ID: highlight-analyzer-49453d78-1755304719\n", - " Name: N/A\n", - "\n", - "616. ID: insurance-test\n", - " Name: N/A\n", - "\n", - "617. ID: invoiceLabeledData\n", - " Name: N/A\n", - "\n", - "618. ID: joann-insurance\n", - " Name: N/A\n", - "\n", - "619. ID: joann-tryout-invoice\n", - " Name: N/A\n", - "\n", - "620. ID: k\n", - " Name: N/A\n", - "\n", - "621. ID: minus\n", - " Name: N/A\n", - "\n", - "622. ID: mySampleAnalyzer\n", - " Name: N/A\n", - "\n", - "623. ID: pro-test\n", - " Name: N/A\n", - "\n", - "624. ID: proExample\n", - " Name: N/A\n", - "\n", - "625. ID: sampleAnalyzer273626\n", - " Name: N/A\n", - "\n", - "626. ID: sampleAnalyzer530775\n", - " Name: N/A\n", - "\n", - "627. ID: sampleAnalyzer679281\n", - " Name: N/A\n", - "\n", - "628. ID: shihw-insurance-0529\n", - " Name: N/A\n", - "\n", - "629. ID: shihw-video-test0528\n", - " Name: N/A\n", - "\n", - "630. ID: soccer-highlights-analyzer-v1\n", - " Name: N/A\n", - "\n", - "631. ID: soccer-highlights-analyzer-v2\n", - " Name: N/A\n", - "\n", - "632. ID: soccer-highlights-analyzer-v3\n", - " Name: N/A\n", - "\n", - "633. ID: soccer-highlights-analyzer-v4\n", - " Name: N/A\n", - "\n", - "634. ID: soccer-highlights-analyzer-v5\n", - " Name: N/A\n", - "\n", - "635. ID: soccer-highlights-analyzer1751301403\n", - " Name: N/A\n", - "\n", - "636. ID: soccer-highlights-analyzer1751301722\n", - " Name: N/A\n", - "\n", - "637. ID: soccer-highlights-analyzer2-v1\n", - " Name: N/A\n", - "\n", - "638. ID: soccer-highlights-analyzer5314167881751302137\n", - " Name: N/A\n", - "\n", - "639. ID: soccer-highlights-analyzer5314167881751302581\n", - " Name: N/A\n", - "\n", - "640. ID: soccer-highlights-analyzer5314167881751303949\n", - " Name: N/A\n", - "\n", - "641. ID: soccer-highlights-analyzer5314167881751306147\n", - " Name: N/A\n", - "\n", - "642. ID: soccer-highlights-analyzer5314167881751313349\n", - " Name: N/A\n", - "\n", - "643. ID: soccer-highlights-analyzer5314167881755019232\n", - " Name: N/A\n", - "\n", - "644. ID: soccer-highlights-analyzer5314167881755020564\n", - " Name: N/A\n", - "\n", - "645. ID: soccer-highlights-analyzer5314167881755023993\n", - " Name: N/A\n", - "\n", - "646. ID: soccer-highlights-analyzer5314167881755029594\n", - " Name: N/A\n", - "\n", - "647. ID: soccer-highlights-analyzer54167881751301841\n", - " Name: N/A\n", - "\n", - "648. ID: t\n", - " Name: N/A\n", - "\n", - "649. ID: tes\n", - " Name: N/A\n", - "\n", - "650. ID: test-bar-gap\n", - " Name: N/A\n", - "\n", - "651. ID: test\n", - " Name: N/A\n", - "\n", - "652. ID: testMeow\n", - " Name: N/A\n", - "\n", - "653. ID: tingwliu-invoice-test\n", - " Name: N/A\n", - "\n", - "654. ID: video-250808\n", - " Name: N/A\n", - "\n", - "655. ID: video\n", - " Name: N/A\n", - "\n", - "656. ID: videotest\n", - " Name: N/A\n", - "\n", - "657. ID: yahch-contract-0805-1\n", - " Name: N/A\n", - "\n", - "658. ID: yahch-document-HKinvoice-label-2\n", - " Name: N/A\n", - "\n", - "659. ID: yahch-document-HKinvoice-label-3\n", - " Name: N/A\n", - "\n", - "660. ID: yahch-document-HKinvoice-label-4\n", - " Name: N/A\n", - "\n", - "661. ID: yahch-document-HKinvoice-label-5\n", - " Name: N/A\n", - "\n", - "662. ID: yahch-document-HKinvoice-label-6\n", - " Name: N/A\n", - "\n", - "663. ID: yahch-document-HKinvoice-label\n", - " Name: N/A\n", - "\n", - "664. ID: yahch-document-HKinvoice-local-1\n", - " Name: N/A\n", - "\n", - "665. ID: yahch-document-HKinvoice-local-icl-1\n", - " Name: N/A\n", - "\n", - "666. ID: yahch-document-HKinvoice-local-icl-2\n", - " Name: N/A\n", - "\n", - "667. ID: yahch-invoice-HKinvoice-local-icl-1\n", - " Name: N/A\n", - "\n", - "668. ID: yahch-invoice-HKinvoice-local-zeroshot-1\n", - " Name: N/A\n", - "\n", - "669. ID: yahch-invoice-HKinvoice-local-zeroshot-2\n", - " Name: N/A\n", - "\n", - "670. ID: yiyun\n", - " Name: N/A\n", - "\n", - "671. ID: yiyun1223\n", - " Name: N/A\n", - "\n", - "672. ID: yiyun3333\n", - " Name: N/A\n", - "\n", - "673. ID: yiyun65656\n", - " Name: N/A\n", - "\n", - "674. ID: yiyunPromode\n", - " Name: N/A\n", - "\n", - "675. ID: yslin-2025-06-25-generative-date-fields\n", - " Name: N/A\n", - "\n" - ] - } - ], + "execution_count": null, + "id": "fcbc218a", + "metadata": {}, + "outputs": [], "source": [ "# Get all analyzers in your resource\n", "all_analyzers = client.get_all_analyzers()\n", @@ -2322,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "9772b0f5", "metadata": {}, "outputs": [ @@ -2336,8 +301,10 @@ ], "source": [ "# OPTION 1: Specify an existing analyzer ID that has training data\n", - "# Replace this with your actual analyzer ID\n", - "SOURCE_ANALYZER_ID = \"invoiceLabeledData\"\n", + "\n", + "# ⚠️ REQUIRED: Replace \"MyAnalyzer\" with your actual analyzer ID from the list above\n", + "# You can find available analyzer IDs in the output of the previous cell\n", + "SOURCE_ANALYZER_ID = \"MyAnalyzer\" # ← CHANGE THIS!\n", "\n", "# Uncomment to use the first analyzer from the list\n", "# if analyzers_list:\n", From a5ca66de2ca250f1f027181b8f3416a750eecfad Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Thu, 23 Oct 2025 15:15:21 -0400 Subject: [PATCH 3/6] Improving readability --- .../move_training_data_across_analyzers.ipynb | 219 +----------------- 1 file changed, 5 insertions(+), 214 deletions(-) diff --git a/notebooks/move_training_data_across_analyzers.ipynb b/notebooks/move_training_data_across_analyzers.ipynb index 6bc6195..428d9db 100644 --- a/notebooks/move_training_data_across_analyzers.ipynb +++ b/notebooks/move_training_data_across_analyzers.ipynb @@ -27,80 +27,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "2f76b866", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: aiohttp in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 1)) (3.12.15)\n", - "Requirement already satisfied: azure-identity in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 2)) (1.25.0)\n", - "Requirement already satisfied: azure-storage-blob in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 3)) (12.26.0)\n", - "Requirement already satisfied: python-dotenv in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 4)) (1.1.1)\n", - "Requirement already satisfied: requests in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 5)) (2.32.5)\n", - "Requirement already satisfied: Pillow in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 6)) (11.3.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (2.6.1)\n", - "Requirement already satisfied: aiosignal>=1.4.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.4.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (25.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.7.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (6.6.4)\n", - "Requirement already satisfied: propcache>=0.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (0.3.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.20.1)\n", - "Requirement already satisfied: aiohttp in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 1)) (3.12.15)\n", - "Requirement already satisfied: azure-identity in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 2)) (1.25.0)\n", - "Requirement already satisfied: azure-storage-blob in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 3)) (12.26.0)\n", - "Requirement already satisfied: python-dotenv in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 4)) (1.1.1)\n", - "Requirement already satisfied: requests in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 5)) (2.32.5)\n", - "Requirement already satisfied: Pillow in /home/vscode/.local/lib/python3.11/site-packages (from -r ../requirements.txt (line 6)) (11.3.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (2.6.1)\n", - "Requirement already satisfied: aiosignal>=1.4.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.4.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (25.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.7.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (6.6.4)\n", - "Requirement already satisfied: propcache>=0.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (0.3.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/vscode/.local/lib/python3.11/site-packages (from aiohttp->-r ../requirements.txt (line 1)) (1.20.1)\n", - "Requirement already satisfied: azure-core>=1.31.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.35.1)\n", - "Requirement already satisfied: cryptography>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (46.0.2)\n", - "Requirement already satisfied: msal>=1.30.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.34.0)\n", - "Requirement already satisfied: msal-extensions>=1.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.3.1)\n", - "Requirement already satisfied: typing-extensions>=4.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (4.15.0)\n", - "Requirement already satisfied: isodate>=0.6.1 in /home/vscode/.local/lib/python3.11/site-packages (from azure-storage-blob->-r ../requirements.txt (line 3)) (0.7.2)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.4.3)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2025.8.3)\n", - "Requirement already satisfied: azure-core>=1.31.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.35.1)\n", - "Requirement already satisfied: cryptography>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (46.0.2)\n", - "Requirement already satisfied: msal>=1.30.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.34.0)\n", - "Requirement already satisfied: msal-extensions>=1.2.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (1.3.1)\n", - "Requirement already satisfied: typing-extensions>=4.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-identity->-r ../requirements.txt (line 2)) (4.15.0)\n", - "Requirement already satisfied: isodate>=0.6.1 in /home/vscode/.local/lib/python3.11/site-packages (from azure-storage-blob->-r ../requirements.txt (line 3)) (0.7.2)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.4.3)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/vscode/.local/lib/python3.11/site-packages (from requests->-r ../requirements.txt (line 5)) (2025.8.3)\n", - "Requirement already satisfied: six>=1.11.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-core>=1.31.0->azure-identity->-r ../requirements.txt (line 2)) (1.17.0)\n", - "Requirement already satisfied: six>=1.11.0 in /home/vscode/.local/lib/python3.11/site-packages (from azure-core>=1.31.0->azure-identity->-r ../requirements.txt (line 2)) (1.17.0)\n", - "Requirement already satisfied: cffi>=2.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.0.0)\n", - "Requirement already satisfied: PyJWT<3,>=1.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from PyJWT[crypto]<3,>=1.0.0->msal>=1.30.0->azure-identity->-r ../requirements.txt (line 2)) (2.10.1)\n", - "Requirement already satisfied: cffi>=2.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.0.0)\n", - "Requirement already satisfied: PyJWT<3,>=1.0.0 in /home/vscode/.local/lib/python3.11/site-packages (from PyJWT[crypto]<3,>=1.0.0->msal>=1.30.0->azure-identity->-r ../requirements.txt (line 2)) (2.10.1)\n", - "Requirement already satisfied: pycparser in /home/vscode/.local/lib/python3.11/site-packages (from cffi>=2.0.0->cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.23)\n", - "Requirement already satisfied: pycparser in /home/vscode/.local/lib/python3.11/site-packages (from cffi>=2.0.0->cryptography>=2.5->azure-identity->-r ../requirements.txt (line 2)) (2.23)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install -r ../requirements.txt" ] @@ -121,79 +51,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "bcea7936", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:azure.identity._credentials.environment:No environment configuration found.\n", - "INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", - "Request method: 'GET'\n", - "Request headers:\n", - " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", - "No body was attached to the request\n", - "INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", - "Request method: 'GET'\n", - "Request headers:\n", - " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", - "No body was attached to the request\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", - "Response headers:\n", - " 'Content-Type': 'application/json; charset=utf-8'\n", - " 'Server': 'IMDS/150.870.65.1854'\n", - " 'x-ms-request-id': '7683a8fc-6110-4d17-ba92-e7986c8af8e0'\n", - " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", - " 'Content-Length': '88'\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", - "Request method: 'GET'\n", - "Request headers:\n", - " 'Metadata': 'REDACTED'\n", - " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", - "No body was attached to the request\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", - "Response headers:\n", - " 'Content-Type': 'application/json; charset=utf-8'\n", - " 'Server': 'IMDS/150.870.65.1854'\n", - " 'x-ms-request-id': '31ec0b5d-182f-4981-8624-34083dd1c063'\n", - " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", - " 'Content-Length': '68'\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", - "Response headers:\n", - " 'Content-Type': 'application/json; charset=utf-8'\n", - " 'Server': 'IMDS/150.870.65.1854'\n", - " 'x-ms-request-id': '7683a8fc-6110-4d17-ba92-e7986c8af8e0'\n", - " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", - " 'Content-Length': '88'\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'\n", - "Request method: 'GET'\n", - "Request headers:\n", - " 'Metadata': 'REDACTED'\n", - " 'User-Agent': 'azsdk-python-identity/1.25.0 Python/3.11.13 (Linux-6.8.0-1030-azure-x86_64-with-glibc2.41)'\n", - "No body was attached to the request\n", - "INFO:azure.core.pipeline.policies.http_logging_policy:Response status: 400\n", - "Response headers:\n", - " 'Content-Type': 'application/json; charset=utf-8'\n", - " 'Server': 'IMDS/150.870.65.1854'\n", - " 'x-ms-request-id': '31ec0b5d-182f-4981-8624-34083dd1c063'\n", - " 'Date': 'Wed, 22 Oct 2025 22:06:40 GMT'\n", - " 'Content-Length': '68'\n", - "INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureDeveloperCliCredential\n", - "INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureDeveloperCliCredential\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Content Understanding client initialized successfully!\n" - ] - } - ], + "outputs": [], "source": [ "import logging\n", "import json\n", @@ -280,9 +141,7 @@ "\n", "Specify the ID of the analyzer whose training data you want to reuse.\n", "\n", - "**Option 1**: Set `SOURCE_ANALYZER_ID` to an existing analyzer ID from the list above.\n", - "\n", - "**Option 2**: If you don't have an analyzer with training data, uncomment and run the next cell to create one first." + "Set `SOURCE_ANALYZER_ID` to an existing analyzer ID from the list above" ] }, { @@ -314,74 +173,6 @@ "print(f\"Source Analyzer ID: {SOURCE_ANALYZER_ID}\")" ] }, - { - "cell_type": "markdown", - "id": "d7ceffda", - "metadata": {}, - "source": [ - "### Option 2: Create a Source Analyzer with Training Data (Optional)\n", - "\n", - "If you don't have an existing analyzer with training data, run this cell to create one first.\n", - "\n", - "**Prerequisites**:\n", - "- Set environment variables for training data (see [docs/set_env_for_training_data_and_reference_doc.md](../docs/set_env_for_training_data_and_reference_doc.md))\n", - "- Ensure you have labeled training data in `../data/document_training/`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ce228bd", - "metadata": {}, - "outputs": [], - "source": [ - "# Uncomment this entire cell if you need to create a source analyzer first\n", - "\n", - "# from azure.storage.blob import ContainerSasPermissions\n", - "\n", - "# # Configure training data\n", - "# analyzer_template_path = \"../analyzer_templates/receipt.json\"\n", - "# training_docs_folder = \"../data/document_training\"\n", - "\n", - "# # Get or generate SAS URL\n", - "# training_data_sas_url = os.getenv(\"TRAINING_DATA_SAS_URL\")\n", - "# if not training_data_sas_url:\n", - "# TRAINING_DATA_STORAGE_ACCOUNT_NAME = os.getenv(\"TRAINING_DATA_STORAGE_ACCOUNT_NAME\")\n", - "# TRAINING_DATA_CONTAINER_NAME = os.getenv(\"TRAINING_DATA_CONTAINER_NAME\")\n", - "# if not TRAINING_DATA_STORAGE_ACCOUNT_NAME:\n", - "# raise ValueError(\n", - "# \"Please set either TRAINING_DATA_SAS_URL or both TRAINING_DATA_STORAGE_ACCOUNT_NAME \"\n", - "# \"and TRAINING_DATA_CONTAINER_NAME environment variables.\"\n", - "# )\n", - "# training_data_sas_url = AzureContentUnderstandingClient.generate_temp_container_sas_url(\n", - "# account_name=TRAINING_DATA_STORAGE_ACCOUNT_NAME,\n", - "# container_name=TRAINING_DATA_CONTAINER_NAME,\n", - "# permissions=ContainerSasPermissions(read=True, write=True, list=True),\n", - "# expiry_hours=1,\n", - "# )\n", - "\n", - "# training_data_path = os.getenv(\"TRAINING_DATA_PATH\")\n", - "\n", - "# # Upload training data to blob storage\n", - "# print(\"Uploading training data to blob storage...\")\n", - "# await client.generate_training_data_on_blob(training_docs_folder, training_data_sas_url, training_data_path)\n", - "# print(\"✅ Training data uploaded successfully!\")\n", - "\n", - "# # Create source analyzer\n", - "# SOURCE_ANALYZER_ID = \"source-analyzer-\" + str(uuid.uuid4())\n", - "# print(f\"Creating source analyzer: {SOURCE_ANALYZER_ID}\")\n", - "\n", - "# response = client.begin_create_analyzer(\n", - "# SOURCE_ANALYZER_ID,\n", - "# analyzer_template_path=analyzer_template_path,\n", - "# training_storage_container_sas_url=training_data_sas_url,\n", - "# training_storage_container_path_prefix=training_data_path,\n", - "# )\n", - "# result = client.poll_result(response)\n", - "# print(\"✅ Source analyzer created successfully!\")\n", - "# print(json.dumps(result, indent=2))" - ] - }, { "cell_type": "markdown", "id": "d9b1bc93", From 4a5ff1b988f86a492f9a77fa58dbb83946e7ab51 Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Tue, 16 Dec 2025 20:44:30 -0500 Subject: [PATCH 4/6] Update README and API testing guide for GA version; Update conversion scripts with "knowledge source" property from the GA API --- python/di_to_cu_migration_tool/README.md | 25 +- .../cu-ga-NoSecrets.http | 570 ++++++++++++++++++ .../cu_converter_generative.py | 19 +- .../cu_converter_neural.py | 19 +- .../di_to_cu_converter.py | 14 +- python/di_to_cu_migration_tool/get_ocr.py | 16 +- .../sample_documents/analyzer_result.json | 2 +- 7 files changed, 638 insertions(+), 27 deletions(-) create mode 100644 python/di_to_cu_migration_tool/cu-ga-NoSecrets.http diff --git a/python/di_to_cu_migration_tool/README.md b/python/di_to_cu_migration_tool/README.md index e473ad0..d5548c8 100644 --- a/python/di_to_cu_migration_tool/README.md +++ b/python/di_to_cu_migration_tool/README.md @@ -1,13 +1,13 @@ # Document Intelligence to Content Understanding Migration Tool (Python) -Welcome! This tool helps convert your Document Intelligence (DI) datasets to the Content Understanding (CU) **Preview.2** 2025-05-01-preview format, as used in AI Foundry. The following DI versions are supported: +Welcome! This tool helps convert your Document Intelligence (DI) datasets to the Content Understanding (CU) **GA** 2025-11-01 format, as used in AI Foundry. The following DI versions are supported: - Custom Extraction Model DI 3.1 GA (2023-07-31) to DI 4.0 GA (2024-11-30) (Document Intelligence Studio) → DI-version = neural - Document Field Extraction Model 4.0 Preview (2024-07-31-preview) (AI Foundry / AI Services / Vision + Document / Document Field Extraction) → DI-version = generative To identify the version of your Document Intelligence dataset, please consult the sample documents in this folder to match your format. You can also verify the version by reviewing your DI project's user experience. For instance, Custom Extraction DI 3.1/4.0 GA appears in Document Intelligence Studio (https://documentintelligence.ai.azure.com/studio), whereas Document Field Extraction DI 4.0 Preview is only available on Azure AI Foundry's preview service (https://ai.azure.com/explore/aiservices/vision/document/extraction). -For migrating from these DI versions to Content Understanding Preview.2, this tool first converts the DI dataset into a CU-compatible format. After conversion, you can create a Content Understanding Analyzer trained on your converted CU dataset. Additionally, you have the option to test its quality against any sample documents. +For migrating from these DI versions to Content Understanding GA (2025-11-01), this tool first converts the DI dataset into a CU-compatible format. After conversion, you can create a Content Understanding Analyzer trained on your converted CU dataset. Additionally, you have the option to test its quality against any sample documents. ## Details About the Tools @@ -43,7 +43,7 @@ Please follow these steps to set up the tool: - **SUBSCRIPTION_KEY:** Update to your Azure AI Service API Key or Subscription ID to authenticate the API requests. - Locate your API Key here: ![Azure AI Service Endpoints With Keys](assets/endpoint-with-keys.png) - If using Azure Active Directory (AAD), please refer to your Subscription ID: ![Azure AI Service Subscription ID](assets/subscription-id.png) - - **API_VERSION:** This is preset to the CU Preview.2 version; no changes are needed. + - **API_VERSION:** This is preset to the CU GA version (2025-11-01); no changes are needed. ## How to Locate Your Document Field Extraction Dataset for Migration @@ -73,8 +73,12 @@ To obtain SAS URLs for a file or folder for any container URL arguments, please 3. Configure permissions and expiry for your SAS URL as follows: - For the **DI source dataset**, please select permissions: _**Read & List**_ +https://jfilcikditestdata.blob.core.windows.net/didata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A17%3A06Z&se=2025-12-17T22%3A17%3A06Z&sr=c&sp=rl&sig=nvUIelZQ9yWEJx3jA%2FjUOIdHn6OVnp5gvKSJ3zgzwvE%3D + - For the **CU target dataset**, please select permissions: _**Read, Add, Create, & Write**_ +https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D + After configuring, click **Generate SAS Token and URL** and copy the URL shown under **Blob SAS URL**. ![Generate SAS Pop-Up](assets/generate-sas-pop-up.png) @@ -98,6 +102,9 @@ If migrating a _DI 3.1/4.0 GA Custom Extraction_ dataset, please run: python ./di_to_cu_converter.py --DI-version neural --analyzer-prefix mySampleAnalyzer \ --source-container-sas-url "https://sourceStorageAccount.blob.core.windows.net/sourceContainer?sourceSASToken" --source-blob-folder diDatasetFolderName \ --target-container-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer?targetSASToken" --target-blob-folder cuDatasetFolderName + +python ./di_to_cu_converter.py --DI-version neural --analyzer-prefix mySampleAnalyzer --source-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/didata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A17%3A06Z&se=2025-12-17T22%3A17%3A06Z&sr=c&sp=rl&sig=nvUIelZQ9yWEJx3jA%2FjUOIdHn6OVnp5gvKSJ3zgzwvE%3D" --source-blob-folder diDatasetFolderName \ +--target-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" --target-blob-folder cuDatasetFolderName ``` For this migration, specifying an analyzer prefix is crucial for creating a CU analyzer. Since the fields.json does not define a "doc_type" for identification, the created analyzer ID will be the specified analyzer prefix. @@ -120,9 +127,9 @@ After converting the CU analyzer.json, please run: ``` python ./create_analyzer.py \ ---analyzer-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer/cuDatasetFolderName/analyzer.json?targetSASToken" \ ---target-container-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer?targetSASToken" \ ---target-blob-folder cuDatasetFolderName +--analyzer-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" \ +--target-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" \ +--target-blob-folder "di_convert" ``` The `analyzer.json` file is located in the specified target blob container and folder. Please obtain the SAS URL for `analyzer.json` from there. @@ -155,7 +162,7 @@ Below are common issues you might encounter when creating an analyzer or running - **400 Bad Request** errors: Please validate the following: - The endpoint URL is valid. Example: - `https://yourEndpoint/contentunderstanding/analyzers/yourAnalyzerID?api-version=2025-05-01-preview` + `https://yourEndpoint/contentunderstanding/analyzers/yourAnalyzerID?api-version=2025-11-01` - Your converted CU dataset respects the naming constraints below. If needed, please manually correct the `analyzer.json` fields: - Field names start with a letter or underscore - Field name length must be between 1 and 64 characters @@ -174,7 +181,7 @@ Below are common issues you might encounter when creating an analyzer or running - **400 Bad Request**: This implies that you might have an incorrect endpoint or SAS URL. Please ensure that your endpoint is valid and that you are using the correct SAS URL for the document: - `https://yourendpoint/contentunderstanding/analyzers/yourAnalyzerID:analyze?api-version=2025-05-01-preview` + `https://yourendpoint/contentunderstanding/analyzers/yourAnalyzerID:analyze?api-version=2025-11-01` Confirm you are using the correct SAS URL for the document. - **401 Unauthorized**: @@ -189,4 +196,4 @@ Below are common issues you might encounter when creating an analyzer or running 2. Signature field types (e.g., in previous DI versions) are not yet supported in Content Understanding. These will be ignored during migration when creating the analyzer. 3. The content of your training documents is retained in the CU model's metadata, under storage specifically. You can find more details at: https://learn.microsoft.com/en-us/legal/cognitive-services/content-understanding/transparency-note?toc=%2Fazure%2Fai-services%2Fcontent-understanding%2Ftoc.json&bc=%2Fazure%2Fai-services%2Fcontent-understanding%2Fbreadcrumb%2Ftoc.json -4. All conversions are for Content Understanding preview.2 version only. \ No newline at end of file +4. All conversions are for Content Understanding GA (2025-11-01) version. \ No newline at end of file diff --git a/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http b/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http new file mode 100644 index 0000000..f48ec55 --- /dev/null +++ b/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http @@ -0,0 +1,570 @@ +# ==================================================================== +# Azure AI Content Understanding - API Testing Guide +# ==================================================================== +# +# This file demonstrates how to use Azure AI Content Understanding APIs +# to analyze documents, images, videos, and audio using prebuilt and +# custom analyzers. +# +# ⚠️ SETUP REQUIRED - BEFORE RUNNING ANY REQUESTS: +# +# 1. Edit the .env file in this same directory (tools/BugBashSample/.env) +# 2. Set your API_KEY and ENDPOINT_URL values: +# API_KEY="your-subscription-key-here" +# ENDPOINT_URL="https://your-resource.services.ai.azure.com" +# 3. Save the .env file +# 4. Run any request using the "Send Request" link above each HTTP request +# +# Variables will be automatically loaded from .env - do not edit this file. +# +# DOCUMENTATION: +# - Models & Deployments: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/models-deployments?branch=main +# - Migration Guide (Preview to GA): https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/how-to/migration-preview-to-ga +# - Analyzer Reference: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference +# +# ==================================================================== + +# Variables - Automatically loaded from .env file in the same directory +@subscriptionKey = {{$dotenv API_KEY}} +@endpoint = {{$dotenv ENDPOINT_URL}} +@apiVersion = 2025-11-01 + +# ==================================================================== +# SECTION 1: CONFIGURE DEFAULT MODEL DEPLOYMENTS (BYOC) +# ==================================================================== +# +# Configure default model deployments for your resource. This allows you to +# use custom Azure OpenAI deployments (Bring Your Own Compute) instead of +# the default shared models. +# +# Learn more: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/models-deployments?branch=main +# + +### Set default model deployments for the resource +# This configures which model deployments to use by default for completion and embedding tasks +# Alternatively, if you setup a resource using https://aka.ms/cu-studio then you can skip this step +PATCH {{endpoint}}/contentunderstanding/defaults?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "modelDeployments": { + "gpt-4.1": "gpt-4.1-datazone", + "gpt-4.1-mini": "gpt-4.1-mini", + "text-embedding-ada-002": "text-embedding-ada-002" + } +} + +### Get current default settings +GET {{endpoint}}/contentunderstanding/defaults?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +# ==================================================================== +# SECTION 2: LIST AND EXPLORE ANALYZERS +# ==================================================================== +# +# Analyzers are the core components that process your content. Azure provides +# prebuilt analyzers for common scenarios and you can create custom analyzers. +# +# Prebuilt analyzers include: +# - prebuilt-document: Extract text, tables, and structure from documents +# - prebuilt-invoice: Extract invoice-specific fields +# - prebuilt-video: Analyze video content with scene detection and transcription +# - prebuilt-audio: Transcribe and analyze audio content +# - prebuilt-imageSearch: Analyze and search within images +# +# Learn more: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference +# + +### List all available analyzers +GET {{endpoint}}/contentunderstanding/analyzers?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### Get details of a specific analyzer +GET {{endpoint}}/contentunderstanding/analyzers/prebuilt-invoice?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +# ==================================================================== +# SECTION 3: TEST PREBUILT ANALYZERS +# ==================================================================== +# +# Test the prebuilt analyzers with sample documents to understand their +# capabilities before creating custom analyzers. +# + +### 3.1 Analyze a document with prebuilt-document analyzer +# This analyzer extracts text, tables, key-value pairs, and document structure +# @name documentAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-document:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf" + } + ], + "modelDeployments": { + "gpt-4.1": "gpt-4.1-datazone" + } +} + +### Get document analysis results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{documentAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### 3.2 Analyze an invoice with prebuilt-invoice analyzer +# Specialized analyzer for invoices that extracts vendor, items, totals, etc. +# @name invoiceAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-invoice:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf" + } + ] +} + +### Get invoice analysis results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{invoiceAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### 3.3 Analyze an image with prebuilt-imageSearch analyzer +# Extract visual content, text, and enable semantic search within images +# @name imageAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-imageSearch:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/pieChart.jpg" + } + ], + "modelDeployments": { + "gpt-4.1": "gpt-4.1-datazone", + "text-embedding-3-large": "text-embedding-3-large" + } +} + +### Get image analysis results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{imageAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +# ==================================================================== +# SECTION 4: CREATE CUSTOM DOCUMENT ANALYZER +# ==================================================================== +# +# Create custom analyzers to extract specific fields from your documents. +# Custom analyzers build on top of prebuilt analyzers and add field schemas +# to define what information to extract. +# +# Learn more: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/how-to/migration-preview-to-ga +# + +### 4.1 Create a custom insurance claim form analyzer +# This analyzer extracts specific fields from insurance claim forms +PUT {{endpoint}}/contentunderstanding/analyzers/claimForm?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "baseAnalyzerId": "prebuilt-document", + "analyzerId": "claimForm", + "models": { + "completion": "gpt-4.1", + "embedding": "text-embedding-ada-002" + }, + "fieldSchema": { + "fields": { + "PolicyNumber": { + "type": "string", + "method": "extract", + "description": "The insurance policy number associated with this claim." + }, + "ClaimNumber": { + "type": "string", + "method": "extract", + "description": "The unique claim number assigned to this claim." + }, + "TotalClaimAmount": { + "type": "number", + "method": "extract", + "description": "The total amount being claimed." + }, + "AccidentDate": { + "type": "string", + "method": "extract", + "description": "The date when the accident occurred." + }, + "LossType": { + "type": "string", + "method": "classify", + "description": "The type of loss (e.g., collision, theft, fire).", + "enum": ["collision", "theft", "fire", "natural disaster", "vandalism"] + } + }, + "definitions": {} + }, + "omitContent": true +} + +### Get the custom analyzer details +GET {{endpoint}}/contentunderstanding/analyzers/claimForm?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### 4.2 Create a custom invoice analyzer +# Example of a custom invoice analyzer with specific field extraction +PUT {{endpoint}}/contentunderstanding/analyzers/invoice_custom?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "analyzerId": "invoice_custom", + "baseAnalyzerId": "prebuilt-document", + "description": "Custom invoice analyzer with specific fields", + "scenario": "document", + "models": { + "completion": "gpt-4.1" + }, + "fieldSchema": { + "fields": { + "VendorName": { + "type": "string", + "method": "extract", + "description": "Vendor issuing the invoice" + }, + "Items": { + "type": "array", + "method": "extract", + "items": { + "type": "object", + "properties": { + "Description": { + "type": "string", + "method": "extract", + "description": "Description of the item" + }, + "Amount": { + "type": "number", + "method": "extract", + "description": "Amount of the item" + } + } + } + } + } + } +} + +### Test the custom invoice analyzer +# @name customInvoiceAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/invoice_custom:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://documentintelligence.ai.azure.com/documents/samples/read/read-healthcare.png" + } + ], + "modelDeployments": { + "gpt-4.1": "gpt-4.1-datazone" + } +} + +### Get custom invoice results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{customInvoiceAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### 4.3 Create a complex multi-document analyzer with content categories +# This analyzer can classify and route different document types to specialized analyzers +PUT {{endpoint}}/contentunderstanding/analyzers/insuranceClaim?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "baseAnalyzerId": "prebuilt-document", + "analyzerId": "insuranceClaim", + "models": { + "completion": "gpt-4.1", + "embedding": "text-embedding-ada-002" + }, + "config": { + // Enable splitting of the input into segments for multi-document files + "enableSegment": true, + "contentCategories": { + "claimForm": { + "description": "The claim form for Zava Insurance", + "analyzerId": "claimForm" + }, + "estimate": { + "description": "The body shop estimate or contractor estimate to fix the property damage.", + "analyzerId": "prebuilt-invoice" + }, + "medicalReport": { + "description": "A doctors assessment or medical report related to injury suffered.", + "analyzerId": "prebuilt-document" + }, + "policeReport": { + "description": "A police or law enforcement report detailing the events that lead to the loss." + } + }, + "omitContent": true + } +} + +### Delete an analyzer (if needed) +DELETE {{endpoint}}/contentunderstanding/analyzers/insuranceClaim?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +# ==================================================================== +# SECTION 5: VIDEO ANALYZER EXAMPLES +# ==================================================================== +# +# Video analyzers can transcribe, detect scenes, extract entities, and +# perform custom analysis on video content. +# +# Learn more: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference +# + +### 5.1 Test prebuilt video analyzer +# @name videoAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-video:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/FlightSimulator.mp4" + } + ], + "modelDeployments": { + "gpt-4.1": "gpt-4.1-datazone", + "text-embedding-3-large": "text-embedding-3-large" + } +} + +### Get video analysis results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{videoAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### 5.2 Create custom video analyzer with dynamic chaptering +# This analyzer segments videos into chapters/stories with scene detection +PUT {{endpoint}}/contentunderstanding/analyzers/video_chaptering?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "description": "Dynamic video chaptering with scene detection", + "scenario": "videoShot", + "baseAnalyzerId": "prebuilt-video", + "models": { + "completion": "gpt-4.1" + }, + "config": { + "returnDetails": true, + "enableSegmentation": true, + "segmentationMode": "custom", + "segmentationDefinition": "Segment the video into stories or chapters. A story (chapter) in a video is a self-contained portion of the program dedicated to a specific news story, topic, or theme. Each segment typically includes a distinct introduction, development, and (sometimes) a conclusion, and can feature a combination of elements such as reporter narration, interviews, sound bites, relevant footage (B-roll), and graphics.", + "locales": ["en-US"] + }, + "fieldSchema": { + "name": "Content Understanding - Dynamic Chaptering", + "fields": { + "Segments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "SegmentId": { + "type": "string" + }, + "SegmentType": { + "type": "string", + "method": "generate", + "description": "The short title or a short summary of the story or chapter." + }, + "Scenes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Description": { + "type": "string", + "method": "generate", + "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." + }, + "StartTimestamp": { + "type": "string", + "description": "the start timestamp of the scene" + }, + "EndTimestamp": { + "type": "string", + "description": "the end timestamp of the scene" + } + } + } + } + } + } + } + } + } +} + +### Get the video analyzer details +GET {{endpoint}}/contentunderstanding/analyzers/video_chaptering?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +### Test the custom video chaptering analyzer +# @name videoChapteringAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/video_chaptering:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/blob/402ec1bf337d54b438581c69dbfb784da74ded38/data/video.mp4" + } + ] +} + +### Get video chaptering results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{videoChapteringAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +# ==================================================================== +# SECTION 6: AUDIO ANALYZER EXAMPLES +# ==================================================================== +# +# Audio analyzers can transcribe speech, detect speakers, and extract +# insights from audio content. +# + +### Test prebuilt audio analyzer (coming soon - placeholder) +# POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-audio:analyze?api-version={{apiVersion}} +# Content-Type: application/json +# Ocp-Apim-Subscription-Key: {{subscriptionKey}} +# +# { +# "inputs": [ +# { +# "url": "https://example.com/audio-sample.mp3" +# } +# ] +# } + +# ==================================================================== +# SECTION 7: ANALYZER MANAGEMENT OPERATIONS +# ==================================================================== +# +# Copy analyzers between resources or regions for deployment purposes. +# + +### Copy analyzer within same resource +POST {{endpoint}}/contentunderstanding/analyzers/insuranceClaimv3:copy?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "sourceAnalyzerId": "insuranceClaim" +} + +### Grant copy authorization for cross-resource copy +POST {{endpoint}}/contentunderstanding/analyzers/insuranceClaim:grantCopyAuthorization?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "targetAzureResourceId": "/subscriptions/3b393ccb-47e3-4dea-9004-0c1085b5aba6/resourceGroups/mmi-sea-eft/providers/Microsoft.CognitiveServices/accounts/foundry-sea-eft", + "targetRegion": "southeastasia" +} + +### Copy analyzer to different resource (use authorization from above) +# POST https://foundry-sea-eft.cognitiveservices.azure.com/contentunderstanding/analyzers/insuranceClaimCopy:copy?api-version={{apiVersion}} +# Content-Type: application/json +# Ocp-Apim-Subscription-Key: {{targetSubscriptionKey}} +# +# { +# "targetAzureResourceId": "/subscriptions/3b393ccb-47e3-4dea-9004-0c1085b5aba6/resourceGroups/mmi-sea-eft/providers/Microsoft.CognitiveServices/accounts/foundry-sea-eft", +# "targetRegion": "southeastasia", +# "expiresAt": "2025-11-01T19:18:11.095328+00:00" +# } + +# ==================================================================== +# END OF GUIDE +# ==================================================================== + + +# Test Create a DataSnipper Repro + + + +### 4.1 Create a custom insurance claim form analyzer +# This analyzer extracts specific fields from insurance claim forms +PUT {{endpoint}}/contentunderstanding/analyzers/dsTest?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} +Content-Type: application/json + +{ + "baseAnalyzerId": "prebuilt-document", + "analyzerId": "dsTest", + "models": { + "completion": "gpt-4o", + "embedding": "text-embedding-ada-002" + }, + "fieldSchema": { + "fields": { + "employees": { + "type": "array", + "items": { + "type": "object", + "properties": { + "employee_name": { + "type": "string", + "method": "extract", + "description": "The name of the employee", + "estimateSourceAndConfidence": true + } + }, + "description": "The employee of the company" + }, + "description": "The employees of the company" + } + } + } +} + +### Get the custom analyzer details +GET {{endpoint}}/contentunderstanding/analyzers/dsTest?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + + + +### Test dsTest analyzer with document +# @name dsTestAnalysis +POST {{endpoint}}/contentunderstanding/analyzers/dsTest:analyze?api-version={{apiVersion}} +Content-Type: application/json +Ocp-Apim-Subscription-Key: {{subscriptionKey}} + +{ + "inputs": [ + { + "url": "https://mmiusw3bbstore.blob.core.windows.net/bugbash-20251020/jfilcik/ADP%20Journals.pdf?sv=2025-07-05&spr=https&st=2025-12-15T17%3A06%3A24Z&se=2025-12-16T17%3A06%3A24Z&skoid=e48ae032-21b6-418b-963c-3129b6a130d3&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-12-15T17%3A06%3A24Z&ske=2025-12-16T17%3A06%3A24Z&sks=b&skv=2025-07-05&sr=b&sp=r&sig=fX9biRmkc2IPA8BGUrgyZZ651BnB0WPiU54YIyplE2Q%3D" + } + ] +} + +### Get dsTest analysis results (uses operation ID from previous request) +GET {{endpoint}}/contentunderstanding/analyzerResults/{{dsTestAnalysis.response.body.id}}?api-version={{apiVersion}} +Ocp-Apim-Subscription-Key: {{subscriptionKey}} \ No newline at end of file diff --git a/python/di_to_cu_migration_tool/cu_converter_generative.py b/python/di_to_cu_migration_tool/cu_converter_generative.py index f27938d..f384dc7 100644 --- a/python/di_to_cu_migration_tool/cu_converter_generative.py +++ b/python/di_to_cu_migration_tool/cu_converter_generative.py @@ -48,7 +48,7 @@ def format_angle(angle: float) -> float: formatted_num = f"{rounded_angle:.7f}".rstrip('0') # Remove trailing zeros return float(formatted_num) -def convert_fields_to_analyzer(fields_json_path: Path, analyzer_prefix: Optional[str], target_dir: Path, field_definitions: FieldDefinitions) -> dict: +def convert_fields_to_analyzer(fields_json_path: Path, analyzer_prefix: Optional[str], target_dir: Path, field_definitions: FieldDefinitions, target_container_sas_url: str = None, target_blob_folder: str = None) -> dict: """ Convert DI 4.0 preview Custom Document fields.json to analyzer.json format. Args: @@ -79,7 +79,11 @@ def convert_fields_to_analyzer(fields_json_path: Path, analyzer_prefix: Optional # build analyzer.json appropriately analyzer_data = { "analyzerId": analyzer_id, - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "baseAnalyzerId": "prebuilt-document", + "models": { + "completion": "gpt-4.1", + "embedding": "text-embedding-3-large" + }, "config": { "returnDetails": True, # Add the following line as a temp workaround before service issue is fixed. @@ -121,6 +125,17 @@ def convert_fields_to_analyzer(fields_json_path: Path, analyzer_prefix: Optional else: analyzer_json_path = fields_json_path.parent / 'analyzer.json' + # Add knowledgeSources section if container info is provided + if target_container_sas_url and target_blob_folder: + analyzer_data["knowledgeSources"] = [ + { + "kind": "labeledData", + "containerUrl": target_container_sas_url, + "prefix": target_blob_folder, + "fileListPath": "" + } + ] + # Ensure target directory exists analyzer_json_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/python/di_to_cu_migration_tool/cu_converter_neural.py b/python/di_to_cu_migration_tool/cu_converter_neural.py index d825f10..64d4d33 100644 --- a/python/di_to_cu_migration_tool/cu_converter_neural.py +++ b/python/di_to_cu_migration_tool/cu_converter_neural.py @@ -37,7 +37,7 @@ def convert_bounding_regions_to_source(page_number: int, polygon: list) -> str: source = f"D({page_number},{polygon_str})" return source -def convert_fields_to_analyzer_neural(fields_json_path: Path, analyzer_prefix: Optional[str], target_dir: Optional[Path], field_definitions: FieldDefinitions) -> Tuple[dict, dict]: +def convert_fields_to_analyzer_neural(fields_json_path: Path, analyzer_prefix: Optional[str], target_dir: Optional[Path], field_definitions: FieldDefinitions, target_container_sas_url: str = None, target_blob_folder: str = None) -> Tuple[dict, dict]: """ Convert DI 3.1/4.0GA Custom Neural fields.json to analyzer.json format. Args: @@ -67,7 +67,11 @@ def convert_fields_to_analyzer_neural(fields_json_path: Path, analyzer_prefix: O # Build analyzer.json content analyzer_data = { "analyzerId": analyzer_prefix, - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "baseAnalyzerId": "prebuilt-document", + "models": { + "completion": "gpt-4.1", + "embedding": "text-embedding-3-large" + }, "config": { "returnDetails": True, # Add the following line as a temp workaround before service issue is fixed. @@ -132,6 +136,17 @@ def convert_fields_to_analyzer_neural(fields_json_path: Path, analyzer_prefix: O else: analyzer_json_path = fields_json_path.parent / 'analyzer.json' + # Add knowledgeSources section if container info is provided + if target_container_sas_url and target_blob_folder: + analyzer_data["knowledgeSources"] = [ + { + "kind": "labeledData", + "containerUrl": target_container_sas_url, + "prefix": target_blob_folder, + "fileListPath": "" + } + ] + # Ensure target directory exists analyzer_json_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/python/di_to_cu_migration_tool/di_to_cu_converter.py b/python/di_to_cu_migration_tool/di_to_cu_converter.py index 5de14d9..c84111b 100644 --- a/python/di_to_cu_migration_tool/di_to_cu_converter.py +++ b/python/di_to_cu_migration_tool/di_to_cu_converter.py @@ -8,7 +8,7 @@ import shutil import tempfile import typer -from typing import Tuple +from typing import Optional, Tuple # imports from external packages (in requirements.txt) from rich import print # For colored output @@ -161,7 +161,7 @@ def main( print(f"[yellow]WARNING: The following signatures were removed from the dataset: {removed_signatures}[/yellow]\n") print("Second: Running DI to CU dataset conversion...") - analyzer_data, ocr_files = running_cu_conversion(temp_dir, temp_target_dir, DI_version, analyzer_prefix, removed_signatures) + analyzer_data, ocr_files = running_cu_conversion(temp_dir, temp_target_dir, DI_version, analyzer_prefix, removed_signatures, target_container_sas_url, target_blob_folder) # Run OCR on the pdf files run_cu_layout_ocr(ocr_files, temp_target_dir, subscription_key) @@ -232,15 +232,17 @@ def running_field_type_conversion(temp_source_dir: Path, temp_dir: Path, DI_vers return removed_signatures -def running_cu_conversion(temp_dir: Path, temp_target_dir: Path, DI_version: str, analyzer_prefix: str, removed_signatures: list) -> Tuple[dict, list]: +def running_cu_conversion(temp_dir: Path, temp_target_dir: Path, DI_version: str, analyzer_prefix: Optional[str], removed_signatures: list, target_container_sas_url: str, target_blob_folder: str) -> Tuple[dict, list]: """ - Function to run the DI to CU conversion + Function to run the CU conversion Args: temp_dir (Path): The path to the source directory temp_target_dir (Path): The path to the target directory DI_version (str): The version of DI being used analyzer_prefix (str): The prefix for the analyzer name removed_signatures (list): The list of removed signatures that will not be used in the CU converter + target_container_sas_url (str): The target container SAS URL for training data + target_blob_folder (str): The target blob folder prefix for training data """ # Creating a FieldDefinitons object to handle the converison of definitions in the fields.json field_definitions = FieldDefinitions() @@ -251,9 +253,9 @@ def running_cu_conversion(temp_dir: Path, temp_target_dir: Path, DI_version: str assert fields_path.exists(), "fields.json is needed. Fields.json is missing from the given dataset." if DI_version == "generative": - analyzer_data = cu_converter_generative.convert_fields_to_analyzer(fields_path, analyzer_prefix, temp_target_dir, field_definitions) + analyzer_data = cu_converter_generative.convert_fields_to_analyzer(fields_path, analyzer_prefix, temp_target_dir, field_definitions, target_container_sas_url, target_blob_folder) elif DI_version == "neural": - analyzer_data, fields_dict = cu_converter_neural.convert_fields_to_analyzer_neural(fields_path, analyzer_prefix, temp_target_dir, field_definitions) + analyzer_data, fields_dict = cu_converter_neural.convert_fields_to_analyzer_neural(fields_path, analyzer_prefix, temp_target_dir, field_definitions, target_container_sas_url, target_blob_folder) ocr_files = [] # List to store paths to pdf files to get OCR results from later for file in files: diff --git a/python/di_to_cu_migration_tool/get_ocr.py b/python/di_to_cu_migration_tool/get_ocr.py index a1b849b..32c0584 100644 --- a/python/di_to_cu_migration_tool/get_ocr.py +++ b/python/di_to_cu_migration_tool/get_ocr.py @@ -70,7 +70,11 @@ def build_analyzer(credential, current_token, host, api_version, subscriptionKey request_body = { "analyzerId": analyzer_id, "description": "Sample analyzer", - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "baseAnalyzerId": "prebuilt-document", + "models": { + "completion": "gpt-4.1", + "embedding": "text-embedding-3-large" + }, "config": { "returnDetails": True, "enableOcr": True, @@ -82,8 +86,7 @@ def build_analyzer(credential, current_token, host, api_version, subscriptionKey "fieldSchema": {}, "warnings": [], "status": "ready", - "processingLocation": "geography", - "mode": "standard" + "processingLocation": "geography" } endpoint = f"{host}/contentunderstanding/analyzers/{analyzer_id}?api-version={api_version}" print("[yellow]Creating sample analyzer to attain CU Layout results...[/yellow]") @@ -138,9 +141,8 @@ def run_cu_layout_ocr(input_files: list, output_dir_string: str, subscription_ke output_dir = Path(output_dir_string) output_dir.mkdir(parents=True, exist_ok=True) - # Need to create analyzer with empty schema - analyzer_id = build_analyzer(credential, current_token, host, api_version, subscription_key) - url = f"{host}/contentunderstanding/analyzers/{analyzer_id}:analyze?api-version={api_version}" + # Use prebuilt-read analyzer directly - no need to create a custom analyzer + url = f"{host}/contentunderstanding/analyzers/prebuilt-read:analyze?api-version={api_version}" for file in input_files: try: @@ -150,7 +152,7 @@ def run_cu_layout_ocr(input_files: list, output_dir_string: str, subscription_ke current_token = get_token(credential, current_token) headers = { "Authorization": f"Bearer {current_token.token}", - "Apim-Subscription-id": f"{subscription_key}", + "Ocp-Apim-Subscription-Key": f"{subscription_key}", "Content-Type": "application/pdf", } diff --git a/python/di_to_cu_migration_tool/sample_documents/analyzer_result.json b/python/di_to_cu_migration_tool/sample_documents/analyzer_result.json index bfa151f..f1507dc 100644 --- a/python/di_to_cu_migration_tool/sample_documents/analyzer_result.json +++ b/python/di_to_cu_migration_tool/sample_documents/analyzer_result.json @@ -3,7 +3,7 @@ "status": "Succeeded", "result": { "analyzerId": "mySampleAnalyzer", - "apiVersion": "2025-05-01-preview", + "apiVersion": "2025-11-01", "createdAt": "2025-05-30T15:47:15Z", "warnings": [], "contents": [ From 4669a5d44e61c8fc19469e5b15c85fdbf7d697da Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Tue, 16 Dec 2025 20:49:20 -0500 Subject: [PATCH 5/6] Remove http test file --- .../cu-ga-NoSecrets.http | 570 ------------------ 1 file changed, 570 deletions(-) delete mode 100644 python/di_to_cu_migration_tool/cu-ga-NoSecrets.http diff --git a/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http b/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http deleted file mode 100644 index f48ec55..0000000 --- a/python/di_to_cu_migration_tool/cu-ga-NoSecrets.http +++ /dev/null @@ -1,570 +0,0 @@ -# ==================================================================== -# Azure AI Content Understanding - API Testing Guide -# ==================================================================== -# -# This file demonstrates how to use Azure AI Content Understanding APIs -# to analyze documents, images, videos, and audio using prebuilt and -# custom analyzers. -# -# ⚠️ SETUP REQUIRED - BEFORE RUNNING ANY REQUESTS: -# -# 1. Edit the .env file in this same directory (tools/BugBashSample/.env) -# 2. Set your API_KEY and ENDPOINT_URL values: -# API_KEY="your-subscription-key-here" -# ENDPOINT_URL="https://your-resource.services.ai.azure.com" -# 3. Save the .env file -# 4. Run any request using the "Send Request" link above each HTTP request -# -# Variables will be automatically loaded from .env - do not edit this file. -# -# DOCUMENTATION: -# - Models & Deployments: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/models-deployments?branch=main -# - Migration Guide (Preview to GA): https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/how-to/migration-preview-to-ga -# - Analyzer Reference: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference -# -# ==================================================================== - -# Variables - Automatically loaded from .env file in the same directory -@subscriptionKey = {{$dotenv API_KEY}} -@endpoint = {{$dotenv ENDPOINT_URL}} -@apiVersion = 2025-11-01 - -# ==================================================================== -# SECTION 1: CONFIGURE DEFAULT MODEL DEPLOYMENTS (BYOC) -# ==================================================================== -# -# Configure default model deployments for your resource. This allows you to -# use custom Azure OpenAI deployments (Bring Your Own Compute) instead of -# the default shared models. -# -# Learn more: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/models-deployments?branch=main -# - -### Set default model deployments for the resource -# This configures which model deployments to use by default for completion and embedding tasks -# Alternatively, if you setup a resource using https://aka.ms/cu-studio then you can skip this step -PATCH {{endpoint}}/contentunderstanding/defaults?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "modelDeployments": { - "gpt-4.1": "gpt-4.1-datazone", - "gpt-4.1-mini": "gpt-4.1-mini", - "text-embedding-ada-002": "text-embedding-ada-002" - } -} - -### Get current default settings -GET {{endpoint}}/contentunderstanding/defaults?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -# ==================================================================== -# SECTION 2: LIST AND EXPLORE ANALYZERS -# ==================================================================== -# -# Analyzers are the core components that process your content. Azure provides -# prebuilt analyzers for common scenarios and you can create custom analyzers. -# -# Prebuilt analyzers include: -# - prebuilt-document: Extract text, tables, and structure from documents -# - prebuilt-invoice: Extract invoice-specific fields -# - prebuilt-video: Analyze video content with scene detection and transcription -# - prebuilt-audio: Transcribe and analyze audio content -# - prebuilt-imageSearch: Analyze and search within images -# -# Learn more: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference -# - -### List all available analyzers -GET {{endpoint}}/contentunderstanding/analyzers?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### Get details of a specific analyzer -GET {{endpoint}}/contentunderstanding/analyzers/prebuilt-invoice?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -# ==================================================================== -# SECTION 3: TEST PREBUILT ANALYZERS -# ==================================================================== -# -# Test the prebuilt analyzers with sample documents to understand their -# capabilities before creating custom analyzers. -# - -### 3.1 Analyze a document with prebuilt-document analyzer -# This analyzer extracts text, tables, key-value pairs, and document structure -# @name documentAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-document:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf" - } - ], - "modelDeployments": { - "gpt-4.1": "gpt-4.1-datazone" - } -} - -### Get document analysis results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{documentAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### 3.2 Analyze an invoice with prebuilt-invoice analyzer -# Specialized analyzer for invoices that extracts vendor, items, totals, etc. -# @name invoiceAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-invoice:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf" - } - ] -} - -### Get invoice analysis results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{invoiceAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### 3.3 Analyze an image with prebuilt-imageSearch analyzer -# Extract visual content, text, and enable semantic search within images -# @name imageAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-imageSearch:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/pieChart.jpg" - } - ], - "modelDeployments": { - "gpt-4.1": "gpt-4.1-datazone", - "text-embedding-3-large": "text-embedding-3-large" - } -} - -### Get image analysis results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{imageAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -# ==================================================================== -# SECTION 4: CREATE CUSTOM DOCUMENT ANALYZER -# ==================================================================== -# -# Create custom analyzers to extract specific fields from your documents. -# Custom analyzers build on top of prebuilt analyzers and add field schemas -# to define what information to extract. -# -# Learn more: https://review.learn.microsoft.com/en-us/azure/ai-services/content-understanding/how-to/migration-preview-to-ga -# - -### 4.1 Create a custom insurance claim form analyzer -# This analyzer extracts specific fields from insurance claim forms -PUT {{endpoint}}/contentunderstanding/analyzers/claimForm?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "baseAnalyzerId": "prebuilt-document", - "analyzerId": "claimForm", - "models": { - "completion": "gpt-4.1", - "embedding": "text-embedding-ada-002" - }, - "fieldSchema": { - "fields": { - "PolicyNumber": { - "type": "string", - "method": "extract", - "description": "The insurance policy number associated with this claim." - }, - "ClaimNumber": { - "type": "string", - "method": "extract", - "description": "The unique claim number assigned to this claim." - }, - "TotalClaimAmount": { - "type": "number", - "method": "extract", - "description": "The total amount being claimed." - }, - "AccidentDate": { - "type": "string", - "method": "extract", - "description": "The date when the accident occurred." - }, - "LossType": { - "type": "string", - "method": "classify", - "description": "The type of loss (e.g., collision, theft, fire).", - "enum": ["collision", "theft", "fire", "natural disaster", "vandalism"] - } - }, - "definitions": {} - }, - "omitContent": true -} - -### Get the custom analyzer details -GET {{endpoint}}/contentunderstanding/analyzers/claimForm?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### 4.2 Create a custom invoice analyzer -# Example of a custom invoice analyzer with specific field extraction -PUT {{endpoint}}/contentunderstanding/analyzers/invoice_custom?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "analyzerId": "invoice_custom", - "baseAnalyzerId": "prebuilt-document", - "description": "Custom invoice analyzer with specific fields", - "scenario": "document", - "models": { - "completion": "gpt-4.1" - }, - "fieldSchema": { - "fields": { - "VendorName": { - "type": "string", - "method": "extract", - "description": "Vendor issuing the invoice" - }, - "Items": { - "type": "array", - "method": "extract", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "method": "extract", - "description": "Description of the item" - }, - "Amount": { - "type": "number", - "method": "extract", - "description": "Amount of the item" - } - } - } - } - } - } -} - -### Test the custom invoice analyzer -# @name customInvoiceAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/invoice_custom:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://documentintelligence.ai.azure.com/documents/samples/read/read-healthcare.png" - } - ], - "modelDeployments": { - "gpt-4.1": "gpt-4.1-datazone" - } -} - -### Get custom invoice results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{customInvoiceAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### 4.3 Create a complex multi-document analyzer with content categories -# This analyzer can classify and route different document types to specialized analyzers -PUT {{endpoint}}/contentunderstanding/analyzers/insuranceClaim?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "baseAnalyzerId": "prebuilt-document", - "analyzerId": "insuranceClaim", - "models": { - "completion": "gpt-4.1", - "embedding": "text-embedding-ada-002" - }, - "config": { - // Enable splitting of the input into segments for multi-document files - "enableSegment": true, - "contentCategories": { - "claimForm": { - "description": "The claim form for Zava Insurance", - "analyzerId": "claimForm" - }, - "estimate": { - "description": "The body shop estimate or contractor estimate to fix the property damage.", - "analyzerId": "prebuilt-invoice" - }, - "medicalReport": { - "description": "A doctors assessment or medical report related to injury suffered.", - "analyzerId": "prebuilt-document" - }, - "policeReport": { - "description": "A police or law enforcement report detailing the events that lead to the loss." - } - }, - "omitContent": true - } -} - -### Delete an analyzer (if needed) -DELETE {{endpoint}}/contentunderstanding/analyzers/insuranceClaim?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -# ==================================================================== -# SECTION 5: VIDEO ANALYZER EXAMPLES -# ==================================================================== -# -# Video analyzers can transcribe, detect scenes, extract entities, and -# perform custom analysis on video content. -# -# Learn more: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference -# - -### 5.1 Test prebuilt video analyzer -# @name videoAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-video:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/FlightSimulator.mp4" - } - ], - "modelDeployments": { - "gpt-4.1": "gpt-4.1-datazone", - "text-embedding-3-large": "text-embedding-3-large" - } -} - -### Get video analysis results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{videoAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### 5.2 Create custom video analyzer with dynamic chaptering -# This analyzer segments videos into chapters/stories with scene detection -PUT {{endpoint}}/contentunderstanding/analyzers/video_chaptering?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "description": "Dynamic video chaptering with scene detection", - "scenario": "videoShot", - "baseAnalyzerId": "prebuilt-video", - "models": { - "completion": "gpt-4.1" - }, - "config": { - "returnDetails": true, - "enableSegmentation": true, - "segmentationMode": "custom", - "segmentationDefinition": "Segment the video into stories or chapters. A story (chapter) in a video is a self-contained portion of the program dedicated to a specific news story, topic, or theme. Each segment typically includes a distinct introduction, development, and (sometimes) a conclusion, and can feature a combination of elements such as reporter narration, interviews, sound bites, relevant footage (B-roll), and graphics.", - "locales": ["en-US"] - }, - "fieldSchema": { - "name": "Content Understanding - Dynamic Chaptering", - "fields": { - "Segments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "SegmentId": { - "type": "string" - }, - "SegmentType": { - "type": "string", - "method": "generate", - "description": "The short title or a short summary of the story or chapter." - }, - "Scenes": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "method": "generate", - "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." - }, - "StartTimestamp": { - "type": "string", - "description": "the start timestamp of the scene" - }, - "EndTimestamp": { - "type": "string", - "description": "the end timestamp of the scene" - } - } - } - } - } - } - } - } - } -} - -### Get the video analyzer details -GET {{endpoint}}/contentunderstanding/analyzers/video_chaptering?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -### Test the custom video chaptering analyzer -# @name videoChapteringAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/video_chaptering:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://github.com/Azure-Samples/azure-ai-content-understanding-python/blob/402ec1bf337d54b438581c69dbfb784da74ded38/data/video.mp4" - } - ] -} - -### Get video chaptering results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{videoChapteringAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -# ==================================================================== -# SECTION 6: AUDIO ANALYZER EXAMPLES -# ==================================================================== -# -# Audio analyzers can transcribe speech, detect speakers, and extract -# insights from audio content. -# - -### Test prebuilt audio analyzer (coming soon - placeholder) -# POST {{endpoint}}/contentunderstanding/analyzers/prebuilt-audio:analyze?api-version={{apiVersion}} -# Content-Type: application/json -# Ocp-Apim-Subscription-Key: {{subscriptionKey}} -# -# { -# "inputs": [ -# { -# "url": "https://example.com/audio-sample.mp3" -# } -# ] -# } - -# ==================================================================== -# SECTION 7: ANALYZER MANAGEMENT OPERATIONS -# ==================================================================== -# -# Copy analyzers between resources or regions for deployment purposes. -# - -### Copy analyzer within same resource -POST {{endpoint}}/contentunderstanding/analyzers/insuranceClaimv3:copy?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "sourceAnalyzerId": "insuranceClaim" -} - -### Grant copy authorization for cross-resource copy -POST {{endpoint}}/contentunderstanding/analyzers/insuranceClaim:grantCopyAuthorization?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "targetAzureResourceId": "/subscriptions/3b393ccb-47e3-4dea-9004-0c1085b5aba6/resourceGroups/mmi-sea-eft/providers/Microsoft.CognitiveServices/accounts/foundry-sea-eft", - "targetRegion": "southeastasia" -} - -### Copy analyzer to different resource (use authorization from above) -# POST https://foundry-sea-eft.cognitiveservices.azure.com/contentunderstanding/analyzers/insuranceClaimCopy:copy?api-version={{apiVersion}} -# Content-Type: application/json -# Ocp-Apim-Subscription-Key: {{targetSubscriptionKey}} -# -# { -# "targetAzureResourceId": "/subscriptions/3b393ccb-47e3-4dea-9004-0c1085b5aba6/resourceGroups/mmi-sea-eft/providers/Microsoft.CognitiveServices/accounts/foundry-sea-eft", -# "targetRegion": "southeastasia", -# "expiresAt": "2025-11-01T19:18:11.095328+00:00" -# } - -# ==================================================================== -# END OF GUIDE -# ==================================================================== - - -# Test Create a DataSnipper Repro - - - -### 4.1 Create a custom insurance claim form analyzer -# This analyzer extracts specific fields from insurance claim forms -PUT {{endpoint}}/contentunderstanding/analyzers/dsTest?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} -Content-Type: application/json - -{ - "baseAnalyzerId": "prebuilt-document", - "analyzerId": "dsTest", - "models": { - "completion": "gpt-4o", - "embedding": "text-embedding-ada-002" - }, - "fieldSchema": { - "fields": { - "employees": { - "type": "array", - "items": { - "type": "object", - "properties": { - "employee_name": { - "type": "string", - "method": "extract", - "description": "The name of the employee", - "estimateSourceAndConfidence": true - } - }, - "description": "The employee of the company" - }, - "description": "The employees of the company" - } - } - } -} - -### Get the custom analyzer details -GET {{endpoint}}/contentunderstanding/analyzers/dsTest?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - - - -### Test dsTest analyzer with document -# @name dsTestAnalysis -POST {{endpoint}}/contentunderstanding/analyzers/dsTest:analyze?api-version={{apiVersion}} -Content-Type: application/json -Ocp-Apim-Subscription-Key: {{subscriptionKey}} - -{ - "inputs": [ - { - "url": "https://mmiusw3bbstore.blob.core.windows.net/bugbash-20251020/jfilcik/ADP%20Journals.pdf?sv=2025-07-05&spr=https&st=2025-12-15T17%3A06%3A24Z&se=2025-12-16T17%3A06%3A24Z&skoid=e48ae032-21b6-418b-963c-3129b6a130d3&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-12-15T17%3A06%3A24Z&ske=2025-12-16T17%3A06%3A24Z&sks=b&skv=2025-07-05&sr=b&sp=r&sig=fX9biRmkc2IPA8BGUrgyZZ651BnB0WPiU54YIyplE2Q%3D" - } - ] -} - -### Get dsTest analysis results (uses operation ID from previous request) -GET {{endpoint}}/contentunderstanding/analyzerResults/{{dsTestAnalysis.response.body.id}}?api-version={{apiVersion}} -Ocp-Apim-Subscription-Key: {{subscriptionKey}} \ No newline at end of file From e2f5b16255cedbba6b7e5404417e76be220aabde Mon Sep 17 00:00:00 2001 From: Joe Filcik Date: Tue, 16 Dec 2025 20:57:13 -0500 Subject: [PATCH 6/6] Update contstants and readme --- python/di_to_cu_migration_tool/README.md | 27 ++++++++++++++++----- python/di_to_cu_migration_tool/constants.py | 2 +- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/python/di_to_cu_migration_tool/README.md b/python/di_to_cu_migration_tool/README.md index d5548c8..737a4ba 100644 --- a/python/di_to_cu_migration_tool/README.md +++ b/python/di_to_cu_migration_tool/README.md @@ -27,8 +27,26 @@ Here is a detailed breakdown of the three CLI tools and their functionality: * **call_analyze.py** * This CLI tool verifies that the migration completed successfully and assesses the quality of the created analyzer. + ## Setup +## Prerequisites + +⚠️ **IMPORTANT: Before using this migration tool**, ensure your Azure AI Foundry resource is properly configured for Content Understanding: + +1. **Configure Default Model Deployments**: You must set default model deployments in your Content Understanding in your Foundry Resource before creating or running analyzers. + + To do this walk through the prerequisites here: + - [REST API Quickstart Guide](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/quickstart/use-rest-api?tabs=portal%2Cdocument) + + For more details about defaults checkout this documentation: + - [Models and Deployments Documentation](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/models-deployments) + +2. **Verify you can create and use a basic Content Understanding analyzer** in your Azure AI Foundry resource before attempting migration. This ensures all prerequisites are met. + +3. Complete all setup steps outlined in the REST API documentation above, including authentication and model deployment configuration. + +### Tool Setup Please follow these steps to set up the tool: 1. Install dependencies by running: @@ -102,9 +120,6 @@ If migrating a _DI 3.1/4.0 GA Custom Extraction_ dataset, please run: python ./di_to_cu_converter.py --DI-version neural --analyzer-prefix mySampleAnalyzer \ --source-container-sas-url "https://sourceStorageAccount.blob.core.windows.net/sourceContainer?sourceSASToken" --source-blob-folder diDatasetFolderName \ --target-container-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer?targetSASToken" --target-blob-folder cuDatasetFolderName - -python ./di_to_cu_converter.py --DI-version neural --analyzer-prefix mySampleAnalyzer --source-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/didata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A17%3A06Z&se=2025-12-17T22%3A17%3A06Z&sr=c&sp=rl&sig=nvUIelZQ9yWEJx3jA%2FjUOIdHn6OVnp5gvKSJ3zgzwvE%3D" --source-blob-folder diDatasetFolderName \ ---target-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" --target-blob-folder cuDatasetFolderName ``` For this migration, specifying an analyzer prefix is crucial for creating a CU analyzer. Since the fields.json does not define a "doc_type" for identification, the created analyzer ID will be the specified analyzer prefix. @@ -127,9 +142,9 @@ After converting the CU analyzer.json, please run: ``` python ./create_analyzer.py \ ---analyzer-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" \ ---target-container-sas-url "https://jfilcikditestdata.blob.core.windows.net/cudata?sv=2025-07-05&spr=https&st=2025-12-16T22%3A19%3A39Z&se=2025-12-17T22%3A19%3A39Z&sr=c&sp=racwl&sig=K82dxEFNpYhuf5JRq3xJ4vc5SYE8A7FfsBnTJbB1VJY%3D" \ ---target-blob-folder "di_convert" +--analyzer-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer/cuDatasetFolderName/analyzer.json?targetSASToken" \ +--target-container-sas-url "https://targetStorageAccount.blob.core.windows.net/targetContainer?targetSASToken" \ +--target-blob-folder cuDatasetFolderName ``` The `analyzer.json` file is located in the specified target blob container and folder. Please obtain the SAS URL for `analyzer.json` from there. diff --git a/python/di_to_cu_migration_tool/constants.py b/python/di_to_cu_migration_tool/constants.py index 09dc972..73f9e0c 100644 --- a/python/di_to_cu_migration_tool/constants.py +++ b/python/di_to_cu_migration_tool/constants.py @@ -1,6 +1,6 @@ # Supported DI versions DI_VERSIONS = ["generative", "neural"] -CU_API_VERSION = "2025-05-01-preview" +CU_API_VERSION = "2025-11-01" # constants MAX_FIELD_COUNT = 100