From eeba42844231fc706755a90ca5ea27a11ce8ca19 Mon Sep 17 00:00:00 2001 From: Chien Yuan Chang Date: Tue, 20 Jan 2026 11:55:33 -0800 Subject: [PATCH 1/3] remove deprecated classifier and pro mode --- python/content_understanding_client.py | 139 ++++--------------------- 1 file changed, 22 insertions(+), 117 deletions(-) diff --git a/python/content_understanding_client.py b/python/content_understanding_client.py index 7b7cc68..21e1132 100644 --- a/python/content_understanding_client.py +++ b/python/content_understanding_client.py @@ -32,6 +32,27 @@ class ReferenceDocItem: class AzureContentUnderstandingClient: + """ + Lightweight client for Azure AI Content Understanding (GA API version 2025-11-01). + + This utility class provides methods to interact with the Content Understanding REST API. + It supports analyzer management, document analysis, and knowledge base operations. + + Key features in GA version: + - Classification is now integrated into analyzers via contentCategories (supports up to 200 categories) + - Deprecated: Separate classifier APIs (classifiers endpoints) + - Deprecated: Pro mode and cross-file analysis (knowledgeSources) + - Support for training data configuration + - Model deployment management via defaults endpoint + - Async blob storage operations for knowledge base generation + + Example: + client = AzureContentUnderstandingClient( + endpoint="https://your-resource.cognitiveservices.azure.com", + api_version="2025-11-01", + subscription_key="your-key", # or use token_provider with DefaultAzureCredential + ) + """ PREBUILT_DOCUMENT_ANALYZER_ID: str = "prebuilt-documentSearch" OCR_RESULT_FILE_SUFFIX: str = ".result.json" @@ -67,7 +88,7 @@ class AzureContentUnderstandingClient: ".png", ".bmp", ".heif", - ] # Pro mode and Training for Standard mode only support document data + ] # Training for Standard mode only support document data # Maximum number of pages to retrieve when following pagination links MAX_PAGINATION_PAGES: int = 1000 @@ -117,22 +138,6 @@ def _get_training_data_config( "kind": "blob", "prefix": storage_container_path_prefix, } - - def _get_pro_mode_reference_docs_config( - self, storage_container_sas_url: str, storage_container_path_prefix: str - ) -> List[Dict[str, str]]: - return [{ - "kind": "reference", - "containerUrl": storage_container_sas_url, - "prefix": storage_container_path_prefix, - "fileListPath": self.KNOWLEDGE_SOURCE_LIST_FILE_NAME, - }] - - def _get_classifier_url(self, endpoint: str, api_version: str, classifier_id: str) -> str: - return f"{endpoint}/contentunderstanding/classifiers/{classifier_id}?api-version={api_version}" - - def _get_classify_url(self, endpoint: str, api_version: str, classifier_id: str) -> str: - return f"{endpoint}/contentunderstanding/classifiers/{classifier_id}:classify?api-version={api_version}" def _get_defaults_url(self, endpoint: str, api_version: str) -> str: return f"{endpoint}/contentunderstanding/defaults?api-version={api_version}" @@ -431,8 +436,6 @@ def begin_create_analyzer( analyzer_template_path: str = "", training_storage_container_sas_url: str = "", training_storage_container_path_prefix: str = "", - pro_mode_reference_docs_storage_container_sas_url: str = "", - pro_mode_reference_docs_storage_container_path_prefix: str = "", ) -> Response: """ Initiates the creation of an analyzer with the given ID and schema. @@ -469,17 +472,6 @@ def begin_create_analyzer( training_storage_container_path_prefix, ) - if ( - pro_mode_reference_docs_storage_container_sas_url - and pro_mode_reference_docs_storage_container_path_prefix - ): # noqa - if not pro_mode_reference_docs_storage_container_path_prefix.endswith("/"): - pro_mode_reference_docs_storage_container_path_prefix += "/" - analyzer_template["knowledgeSources"] = self._get_pro_mode_reference_docs_config( - pro_mode_reference_docs_storage_container_sas_url, - pro_mode_reference_docs_storage_container_path_prefix, - ) - headers = {"Content-Type": "application/json"} headers.update(self._headers) @@ -833,93 +825,6 @@ def get_result_file( print(f"HTTP request failed: {e}") return None - def begin_create_classifier( - self, - classifier_id: str, - classifier_schema: Dict[str, Any], - ) -> Response: - """ - Initiates the creation of an classifier with the given ID and schema. - - Args: - classifier_id (str): The unique identifier for the classifier. - classifier_schema (dict): The schema definition for the classifier. - - Raises: - requests.exceptions.HTTPError: If the HTTP request to create the classifier fails. - ValueError: If the classifier schema or ID is not provided. - - Returns: - requests.Response: The response object from the HTTP request. - """ - - if not classifier_schema: - raise ValueError("Classifier schema must be provided.") - if not classifier_id: - raise ValueError("Classifier ID must be provided.") - - headers = {"Content-Type": "application/json"} - headers.update(self._headers) - - response = requests.put( - url=self._get_classifier_url(self._endpoint, self._api_version, classifier_id), - headers=headers, - json=classifier_schema, - ) - self._raise_for_status_with_detail(response) - self._logger.info(f"Classifier {classifier_id} create request accepted.") - return response - - def begin_classify(self, classifier_id: str, file_location: str) -> Response: - """ - Begins the analysis of a file or URL using the specified classifier. - - Args: - classifier_id (str): The ID of the classifier to use. - file_location (str): The local path to the file or the URL to analyze. - - Returns: - Response: The response from the analysis request. - - Raises: - ValueError: If the file location is not a valid path or URL. - HTTPError: If the HTTP request returned an unsuccessful status code. - """ - data = None - if Path(file_location).exists(): - with open(file_location, "rb") as file: - data = file.read() - headers = {"Content-Type": "application/octet-stream"} - elif "https://" in file_location or "http://" in file_location: - data = {"url": file_location} - headers = {"Content-Type": "application/json"} - else: - raise ValueError("File location must be a valid path or URL.") - - headers.update(self._headers) - if isinstance(data, dict): - response = requests.post( - url=self._get_classify_url( - self._endpoint, self._api_version, classifier_id - ), - headers=headers, - json=data, - ) - else: - response = requests.post( - url=self._get_classify_url( - self._endpoint, self._api_version, classifier_id - ), - headers=headers, - data=data, - ) - - self._raise_for_status_with_detail(response) - self._logger.info( - f"Analyzing file {file_location} with classifier_id: {classifier_id}" - ) - return response - def poll_result( self, response: Response, From 0c0d6efd973ed58bc2751b0efd936f7cc6b48ba0 Mon Sep 17 00:00:00 2001 From: Chien Yuan Chang Date: Tue, 20 Jan 2026 13:58:24 -0800 Subject: [PATCH 2/3] remove pro mode analyzer templates --- .../insurance_claims_review_pro_mode.json | 129 ------------------ ...nvoice_contract_verification_pro_mode.json | 70 ---------- 2 files changed, 199 deletions(-) delete mode 100644 analyzer_templates/insurance_claims_review_pro_mode.json delete mode 100644 analyzer_templates/invoice_contract_verification_pro_mode.json diff --git a/analyzer_templates/insurance_claims_review_pro_mode.json b/analyzer_templates/insurance_claims_review_pro_mode.json deleted file mode 100644 index 65cfea2..0000000 --- a/analyzer_templates/insurance_claims_review_pro_mode.json +++ /dev/null @@ -1,129 +0,0 @@ -{ - "baseAnalyzerId": "prebuilt-documentAnalyzer", - "mode": "pro", - "processingLocation": "global", - "fieldSchema": { - "name": "InsuranceClaimsReview", - "description": "Analyze documents for insurance claim approval strictly according to the provided insurance policy. Consider all aspects of the insurance claim documents, any potential discrepancies found among the documents, any claims that should be flagged for review, etc.", - "fields": { - "CarBrand": { - "description": "Brand of the damaged vehicle.", - "type": "string" - }, - "CarColor": { - "description": "Color of the damaged vehicle. Only use color name from 17 web colors. Use CamalCase naming convention.", - "type": "string" - }, - "CarModel": { - "description": "Model of the damaged vehicle. Do not include brand name. Leave empty if not found.", - "type": "string" - }, - "LicensePlate": { - "description": "License plate number of the damaged vehicle.", - "type": "string" - }, - "VIN": { - "description": "VIN of the damaged vehicle. Leave empty if not found.", - "type": "string" - }, - "ReportingOfficer": { - "description": "Name of the reporting officer for the incident.", - "type": "string" - }, - "LineItemCorroboration": { - "type": "array", - "description": "Validation of all of the line items on the claim, including parts, services, labors, materials, shipping and other costs and fees. When in doubt about adherence to the policy, mark as suspicious.", - "items": { - "$ref": "#/$defs/LineItemAnalysisEntry" - } - } - }, - "definitions": { - "LineItemAnalysisEntry": { - "type": "object", - "description": "Entry in the line item analysis table to analyze the pertinent information for the line item.", - "properties": { - "LineItemName": { - "description": "Name of the line item in the claim.", - "type": "string" - }, - "IdentifiedVehiclePart": { - "description": "The relevant associated vehicle part for this line item", - "enum": [ - "BODY_TRIM", - "DRIVER_SIDE_DRIVER_DOOR", - "DRIVER_SIDE_DRIVER_HANDLE", - "DRIVER_SIDE_FRONT_TIRE", - "DRIVER_SIDE_FRONT_WHEEL", - "DRIVER_SIDE_FUEL_CAP", - "DRIVER_SIDE_PASSENGER_DOOR", - "DRIVER_SIDE_PASSENGER_HANDLE", - "DRIVER_SIDE_PASSENGER_WINDOW", - "DRIVER_SIDE_REAR_HEADLAMP", - "DRIVER_SIDE_REAR_TIRE", - "DRIVER_SIDE_REAR_WHEEL", - "DRIVER_SIDE_SIDE_WINDOW", - "DRIVER_SIDE_WINDOW", - "DRIVER_SIDE_WING_MIRROR", - "FRONT_BONNET", - "FRONT_BUMPER_LOWER", - "FRONT_BUMPER_UPPER", - "FRONT_DRIVER_SIDE_FOG_LIGHT", - "FRONT_DRIVER_SIDE_HEADLAMP", - "FRONT_GRILL", - "FRONT_NUMBER_PLATE", - "FRONT_PASSENGER_SIDE_FOG_LIGHT", - "FRONT_PASSENGER_SIDE_HEADLAMP", - "FRONT_WINDSHIELD", - "PASSENGER_SIDE_DRIVER_DOOR", - "PASSENGER_SIDE_DRIVER_HANDLE", - "PASSENGER_SIDE_FRONT_TIRE", - "PASSENGER_SIDE_FRONT_WHEEL", - "PASSENGER_SIDE_PASSENGER_DOOR", - "PASSENGER_SIDE_PASSENGER_HANDLE", - "PASSENGER_SIDE_PASSENGER_WINDOW", - "PASSENGER_SIDE_REAR_HEADLAMP", - "PASSENGER_SIDE_REAR_TIRE", - "PASSENGER_SIDE_REAR_WHEEL", - "PASSENGER_SIDE_SIDE_WINDOW", - "PASSENGER_SIDE_WINDOW", - "PASSENGER_SIDE_WING_MIRROR", - "REAR_BUMPER", - "REAR_NUMBER_PLATE", - "REAR_TRUNK", - "REAR_WINDSHIELD", - "ROOF_PANEL", - "OTHER" - ], - "type": "string" - }, - "Cost": { - "description": "The cost of this line item on the claim.", - "type": "number" - }, - "Evidence": { - "description": "The evidence for this line item entry, a list of the document with analyzed evidence supporting the claim formatted as /. One of the insurance policy documents must be one of the documents.", - "items": { - "type": "string" - }, - "type": "array" - }, - "ClaimStatus": { - "type": "string", - "description": "Determined by confidence in whether the claim should be approved based on the evidence. Item should be compliant to insurance policy and required for repairing the vehicle. Only use 'confirmed' for items explicitly approvable according to the policy. If unsure, use 'suspicious'.", - "enum": [ - "confirmed", - "suspicious", - "unconfirmed" - ], - "enumDescriptions": { - "confirmed": "Completely and explicitly corroborated by the policy.", - "suspicious": "Only partially verified, questionable, or otherwise uncertain evidence to approve automatically. Requires human review.", - "unconfirmed": "Explicitly not approved by the policy." - } - } - } - } - } - } -} \ No newline at end of file diff --git a/analyzer_templates/invoice_contract_verification_pro_mode.json b/analyzer_templates/invoice_contract_verification_pro_mode.json deleted file mode 100644 index 855f4d6..0000000 --- a/analyzer_templates/invoice_contract_verification_pro_mode.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "baseAnalyzerId": "prebuilt-documentAnalyzer", - "mode": "pro", - "processingLocation": "global", - "fieldSchema": { - "name": "InvoiceContractVerification", - "description": "Analyze invoice to confirm total consistency with signed contract.", - "fields": { - "PaymentTermsInconsistencies": { - "type": "array", - "method": "generate", - "description": "List all areas of inconsistency identified in the invoice with corresponding evidence.", - "items": { - "$ref": "#/$defs/InvoiceInconsistency" - } - }, - "ItemInconsistencies": { - "type": "array", - "method": "generate", - "description": "List all areas of inconsistency identified in the invoice in the goods or services sold (including detailed specifications for every line item).", - "items": { - "$ref": "#/$defs/InvoiceInconsistency" - } - }, - "BillingLogisticsInconsistencies": { - "type": "array", - "method": "generate", - "description": "List all areas of inconsistency identified in the invoice regarding billing logistics and administrative or legal issues.", - "items": { - "$ref": "#/$defs/InvoiceInconsistency" - } - }, - "PaymentScheduleInconsistencies": { - "type": "array", - "method": "generate", - "description": "List all areas of inconsistency identified in the invoice with corresponding evidence.", - "items": { - "$ref": "#/$defs/InvoiceInconsistency" - } - }, - "TaxOrDiscountInconsistencies": { - "type": "array", - "method": "generate", - "description": "List all areas of inconsistency identified in the invoice with corresponding evidence regarding taxes or discounts.", - "items": { - "$ref": "#/$defs/InvoiceInconsistency" - } - } - }, - "definitions": { - "InvoiceInconsistency": { - "type": "object", - "method": "generate", - "description": "Area of inconsistency in the invoice with the company's contracts.", - "properties": { - "Evidence": { - "type": "string", - "method": "generate", - "description": "Evidence or reasoning for the inconsistency in the invoice." - }, - "InvoiceField": { - "type": "string", - "method": "generate", - "description": "Invoice field or the aspect that is inconsistent with the contract." - } - } - } - } - } -} \ No newline at end of file From ea37a83847ee87d4ddce51f0af3ac959ce898363 Mon Sep 17 00:00:00 2001 From: Chien Yuan Chang Date: Tue, 20 Jan 2026 16:36:20 -0800 Subject: [PATCH 3/3] update templates --- analyzer_templates/audio_transcription.json | 2 +- .../call_recording_analytics_text.json | 4 +- analyzer_templates/content_document.json | 2 +- analyzer_templates/content_video.json | 8 +-- analyzer_templates/face_aware_in_video.json | 8 +-- analyzer_templates/invoice_field_source.json | 4 +- analyzer_templates/marketing_video.json | 3 +- .../marketing_video_segmenation_auto.json | 39 +++++------ .../marketing_video_segmenation_custom.json | 45 ++++++------ analyzer_templates/receipt.json | 5 +- .../video_chapters_dynamic.json | 60 ++++++++-------- .../video_chapters_structured.json | 68 ++++++++++--------- python/content_understanding_client.py | 2 - 13 files changed, 118 insertions(+), 132 deletions(-) diff --git a/analyzer_templates/audio_transcription.json b/analyzer_templates/audio_transcription.json index 9f7c072..4943776 100644 --- a/analyzer_templates/audio_transcription.json +++ b/analyzer_templates/audio_transcription.json @@ -1,6 +1,6 @@ { "description": "Sample audio transcription", - "baseAnalyzerId": "prebuilt-audioAnalyzer", + "baseAnalyzerId": "prebuilt-audio", "config": { "returnDetails": true, "locales": ["en-US"] diff --git a/analyzer_templates/call_recording_analytics_text.json b/analyzer_templates/call_recording_analytics_text.json index ea4e5d2..7936477 100644 --- a/analyzer_templates/call_recording_analytics_text.json +++ b/analyzer_templates/call_recording_analytics_text.json @@ -1,6 +1,6 @@ { - "description": "Sample call recording analytics", - "baseAnalyzerId": "prebuilt-audioAnalyzer", + "description": "Sample call recording analytics", + "baseAnalyzerId": "prebuilt-audio", "config": { "returnDetails": true }, diff --git a/analyzer_templates/content_document.json b/analyzer_templates/content_document.json index f7bf0ba..94217e5 100644 --- a/analyzer_templates/content_document.json +++ b/analyzer_templates/content_document.json @@ -1,5 +1,5 @@ { "description": "Sample document content analyzer", - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "baseAnalyzerId": "prebuilt-document", "fieldSchema": {} } \ No newline at end of file diff --git a/analyzer_templates/content_video.json b/analyzer_templates/content_video.json index f949a3d..3416f96 100644 --- a/analyzer_templates/content_video.json +++ b/analyzer_templates/content_video.json @@ -1,6 +1,6 @@ { "description": "Sample video content analyzer", - "baseAnalyzerId": "prebuilt-videoAnalyzer", + "baseAnalyzerId": "prebuilt-video", "config": { "returnDetails": true, "locales": [ @@ -14,9 +14,7 @@ "ko-KR", "pt-BR", "zh-CN" - ], - "enableFace": false + ] }, - "fieldSchema": { - } + "fieldSchema": {} } \ No newline at end of file diff --git a/analyzer_templates/face_aware_in_video.json b/analyzer_templates/face_aware_in_video.json index b4a40d8..5bff296 100644 --- a/analyzer_templates/face_aware_in_video.json +++ b/analyzer_templates/face_aware_in_video.json @@ -1,8 +1,7 @@ { "description": "Generate face-aware content understanding from video.", - "baseAnalyzerId": "prebuilt-videoAnalyzer", + "baseAnalyzerId": "prebuilt-video", "config": { - "enableFace": true, "returnDetails": true, "locales": [ "en-US", @@ -18,7 +17,7 @@ ] }, "fieldSchema": { - "description": "Analyze videos to extract faces", + "description": "Analyze videos to extract visual descriptions including people", "fields": { "description": { "type": "string", @@ -35,6 +34,5 @@ ] } } - }, - "scenario": "videoShot" + } } \ No newline at end of file diff --git a/analyzer_templates/invoice_field_source.json b/analyzer_templates/invoice_field_source.json index 2d1e32e..f6d256c 100644 --- a/analyzer_templates/invoice_field_source.json +++ b/analyzer_templates/invoice_field_source.json @@ -1,6 +1,6 @@ { - "description": "Sample invoice analyzer", - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "description": "Sample invoice analyzer", + "baseAnalyzerId": "prebuilt-document", "config": { "returnDetails": true, "estimateFieldSourceAndConfidence": true diff --git a/analyzer_templates/marketing_video.json b/analyzer_templates/marketing_video.json index 4ebec48..cdcc2ac 100644 --- a/analyzer_templates/marketing_video.json +++ b/analyzer_templates/marketing_video.json @@ -2,8 +2,7 @@ "description": "Sample marketing video analyzer", "baseAnalyzerId": "prebuilt-video", "config": { - "returnDetails": true, - "segmentationMode": "noSegmentation" + "returnDetails": true }, "fieldSchema": { "fields": { diff --git a/analyzer_templates/marketing_video_segmenation_auto.json b/analyzer_templates/marketing_video_segmenation_auto.json index 40a393b..143c9a6 100644 --- a/analyzer_templates/marketing_video_segmenation_auto.json +++ b/analyzer_templates/marketing_video_segmenation_auto.json @@ -1,32 +1,25 @@ { - "description": "Sample marketing video analyzer", - "baseAnalyzerId": "prebuilt-videoAnalyzer", + "description": "Sample marketing video analyzer with automatic segmentation", + "baseAnalyzerId": "prebuilt-video", "config": { "returnDetails": true, - "segmentationMode": "auto" + "enableSegment": true }, "fieldSchema": { "fields": { - "Segments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "description": "Detailed summary of the video segment, focusing on product characteristics, lighting, and color palette." - }, - "Sentiment": { - "type": "string", - "method": "classify", - "enum": [ - "Positive", - "Neutral", - "Negative" - ] - } - } - } + "Description": { + "type": "string", + "description": "Detailed summary of the video segment, focusing on product characteristics, lighting, and color palette.", + "analyzerId": "prebuilt-video" + }, + "Sentiment": { + "type": "string", + "method": "classify", + "enum": [ + "Positive", + "Neutral", + "Negative" + ] } } } diff --git a/analyzer_templates/marketing_video_segmenation_custom.json b/analyzer_templates/marketing_video_segmenation_custom.json index b2fb8f0..75aef78 100644 --- a/analyzer_templates/marketing_video_segmenation_custom.json +++ b/analyzer_templates/marketing_video_segmenation_custom.json @@ -1,33 +1,30 @@ { - "description": "Sample marketing video analyzer", - "baseAnalyzerId": "prebuilt-videoAnalyzer", + "description": "Sample marketing video analyzer with custom segmentation using contentCategories", + "baseAnalyzerId": "prebuilt-video", "config": { "returnDetails": true, - "segmentationMode": "custom", - "segmentationDefinition": "Segment the video at each clear narrative or visual transition that introduces a new marketing message, speaker, or brand moment. Segments should begin when there is a change in speaker, a shift in visual theme (e.g., logos, product shots, data center views, simulation footage, aircraft scenes), or the introduction of a new key message (e.g., quality of data, scale of infrastructure, customer benefit, real-world aviation use). Each segment should capture one distinct marketing idea or value point, ending when the focus transitions to the next theme." + "enableSegment": true, + "contentCategories": { + "Marketing Segment": { + "description": "A distinct marketing message or visual transition introducing a new marketing message, speaker, or brand moment.", + "analyzerId": "prebuilt-video" + } + } }, "fieldSchema": { "fields": { - "Segments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "description": "Detailed summary of the video segment, focusing on product characteristics, lighting, and color palette." - }, - "Sentiment": { - "type": "string", - "method": "classify", - "enum": [ - "Positive", - "Neutral", - "Negative" - ] - } - } - } + "Description": { + "type": "string", + "description": "Detailed summary of the video segment, focusing on product characteristics, lighting, and color palette." + }, + "Sentiment": { + "type": "string", + "method": "classify", + "enum": [ + "Positive", + "Neutral", + "Negative" + ] } } } diff --git a/analyzer_templates/receipt.json b/analyzer_templates/receipt.json index 8d16b12..4a78359 100644 --- a/analyzer_templates/receipt.json +++ b/analyzer_templates/receipt.json @@ -1,7 +1,6 @@ { - "description": "Extract useful information from receipt", - "scenario": "document", - "baseAnalyzerId": "prebuilt-documentAnalyzer", + "description": "Extract useful information from receipt", + "baseAnalyzerId": "prebuilt-document", "fieldSchema": { "fields": { "MerchantName": { diff --git a/analyzer_templates/video_chapters_dynamic.json b/analyzer_templates/video_chapters_dynamic.json index 9c547fb..078c2d7 100644 --- a/analyzer_templates/video_chapters_dynamic.json +++ b/analyzer_templates/video_chapters_dynamic.json @@ -1,52 +1,50 @@ { "description": "Dynamic Chaptering", - "scenario": "videoShot", + "baseAnalyzerId": "prebuilt-video", + "models": { + "completion": "gpt-4.1-mini" + }, "config": { "returnDetails": true, - "enableSegmentation": true, - "segmentationMode": "custom", - "segmentationDefinition": "Segment the video into stories or chapters. A story (chapter) in a video is a self-contained portion of the program dedicated to a specific news story, topic, or theme. Each segment typically includes a distinct introduction, development, and (sometimes) a conclusion, and can feature a combination of elements such as reporter narration, interviews, sound bites, relevant footage (B-roll), and graphics.", + "enableSegment": true, + "contentCategories": { + "Chapter": { + "description": "A self-contained portion of the video dedicated to a specific news story, topic, or theme with distinct introduction, development, and conclusion.", + "analyzerId": "prebuilt-video" + } + }, "locales": [ "en-US" ] }, - "BaseAnalyzerId": "prebuilt-videoAnalyzer", "fieldSchema": { "name": "Content Understanding - Dynamic Chaptering", "fields": { - "Segments": { + "SegmentId": { + "type": "string" + }, + "SegmentType": { + "type": "string", + "method": "generate", + "description": "The short title or a short summary of the story or chapter." + }, + "Scenes": { "type": "array", "items": { "type": "object", "properties": { - "SegmentId": { - "type": "string" - }, - "SegmentType": { + "Description": { "type": "string", "method": "generate", - "description": "The short title or a short summary of the story or chapter." + "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continuous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." }, - "Scenes": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "method": "generate", - "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." - }, - "StartTimestamp": { - "type": "string", - "description": "the start timestamp of the scene" - }, - "EndTimestamp": { - "type": "string", - "description": "the end timestamp of the scene" - } - } - } + "StartTimestamp": { + "type": "string", + "description": "the start timestamp of the scene" + }, + "EndTimestamp": { + "type": "string", + "description": "the end timestamp of the scene" } } } diff --git a/analyzer_templates/video_chapters_structured.json b/analyzer_templates/video_chapters_structured.json index 744f1a2..4306348 100644 --- a/analyzer_templates/video_chapters_structured.json +++ b/analyzer_templates/video_chapters_structured.json @@ -1,52 +1,58 @@ { "description": "Structured Chaptering", - "scenario": "videoShot", + "baseAnalyzerId": "prebuilt-video", + "models": { + "completion": "gpt-4.1-mini" + }, "config": { "returnDetails": true, - "enableSegmentation": true, - "segmentationMode": "custom", - "segmentationDefinition": "Segment the video into only three chapter types: 'Topic Introduction', 'Details About the Work Done', and 'Outcome, Conclusion and Results'. Reason about the content and determine the best time to segment the video according to these chapter types. Use the timestamp of each image to identify the start and end time of each chapter, and avoid chapter overlap. You must always define the three chapter types, and each chapter must have at least one scene.", + "enableSegment": true, + "contentCategories": { + "Topic Introduction": { + "description": "The introduction section of the video that establishes the topic.", + "analyzerId": "prebuilt-video" + }, + "Details About the Work Done": { + "description": "The main section detailing the work, implementation, or process.", + "analyzerId": "prebuilt-video" + }, + "Outcome, Conclusion and Results": { + "description": "The conclusion section summarizing outcomes and results.", + "analyzerId": "prebuilt-video" + } + }, "locales": [ "en-US" ] }, - "BaseAnalyzerId": "prebuilt-videoAnalyzer", "fieldSchema": { "name": "Content Understanding - Structured Chaptering", "fields": { - "Segments": { + "SegmentId": { + "type": "string" + }, + "SegmentType": { + "type": "string", + "method": "generate", + "description": "The chapter type for the segment" + }, + "Scenes": { "type": "array", "items": { "type": "object", "properties": { - "SegmentId": { - "type": "string" - }, - "SegmentType": { + "Description": { "type": "string", "method": "generate", - "description": "The chapter type for the segment" + "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continuous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." }, - "Scenes": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Description": { - "type": "string", - "method": "generate", - "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." - }, - "StartTimestamp": { - "type": "string", - "description": "the start timestamp of the scene" - }, - "EndTimestamp": { - "type": "string", - "description": "the end timestamp of the scene" - } - } - } + "StartTimestamp": { + "type": "string", + "description": "the start timestamp of the scene" + }, + "EndTimestamp": { + "type": "string", + "description": "the end timestamp of the scene" } } } diff --git a/python/content_understanding_client.py b/python/content_understanding_client.py index 21e1132..84f64f6 100644 --- a/python/content_understanding_client.py +++ b/python/content_understanding_client.py @@ -40,8 +40,6 @@ class AzureContentUnderstandingClient: Key features in GA version: - Classification is now integrated into analyzers via contentCategories (supports up to 200 categories) - - Deprecated: Separate classifier APIs (classifiers endpoints) - - Deprecated: Pro mode and cross-file analysis (knowledgeSources) - Support for training data configuration - Model deployment management via defaults endpoint - Async blob storage operations for knowledge base generation