From e92542e1c356dd0a50d4c5265421f753035fe580 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 22 Feb 2026 14:31:08 +0000 Subject: [PATCH 1/2] rewrite pyannote-cloud crate with progenitor Replace hand-written API client with progenitor-generated code from the official pyannote OpenAPI spec (https://docs.pyannote.ai/openapi.json). - Add build.rs using progenitor-utils (same pattern as chatwoot crate) - Add fetch.sh to download the OpenAPI spec - Add openapi.gen.json (raw spec) and openapi.filtered.json (processed) - Remove manual client code (lib.rs, get_job.rs, submit_diarization_job.rs, test_key.rs) - Update dependencies: add progenitor-client, regress, chrono, serde_json; remove url, specta Co-Authored-By: yujonglee --- Cargo.lock | 10 +- crates/pyannote-cloud/Cargo.toml | 17 +- crates/pyannote-cloud/build.rs | 18 + crates/pyannote-cloud/fetch.sh | 10 + crates/pyannote-cloud/openapi.filtered.json | 1222 ++++++++++++++ crates/pyannote-cloud/openapi.gen.json | 1499 +++++++++++++++++ crates/pyannote-cloud/src/get_job.rs | 82 - crates/pyannote-cloud/src/lib.rs | 104 +- .../src/submit_diarization_job.rs | 45 - crates/pyannote-cloud/src/test_key.rs | 25 - 10 files changed, 2762 insertions(+), 270 deletions(-) create mode 100644 crates/pyannote-cloud/build.rs create mode 100644 crates/pyannote-cloud/fetch.sh create mode 100644 crates/pyannote-cloud/openapi.filtered.json create mode 100644 crates/pyannote-cloud/openapi.gen.json delete mode 100644 crates/pyannote-cloud/src/get_job.rs delete mode 100644 crates/pyannote-cloud/src/submit_diarization_job.rs delete mode 100644 crates/pyannote-cloud/src/test_key.rs diff --git a/Cargo.lock b/Cargo.lock index 1a8cca442a..c8326dae28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13663,13 +13663,13 @@ dependencies = [ name = "pyannote-cloud" version = "0.1.0" dependencies = [ - "data", + "chrono", + "progenitor-client", + "progenitor-utils", + "regress", "reqwest 0.13.2", - "rodio", "serde", - "specta", - "tokio", - "url", + "serde_json", ] [[package]] diff --git a/crates/pyannote-cloud/Cargo.toml b/crates/pyannote-cloud/Cargo.toml index 2145895315..21a9032212 100644 --- a/crates/pyannote-cloud/Cargo.toml +++ b/crates/pyannote-cloud/Cargo.toml @@ -4,15 +4,12 @@ version = "0.1.0" edition = "2024" [dependencies] -reqwest = { workspace = true, features = ["json"] } -url = { workspace = true } - +chrono = { workspace = true, features = ["serde"] } +progenitor-client = "0.12" +regress = "0.10" +reqwest = { workspace = true, features = ["json", "stream"] } serde = { workspace = true, features = ["derive"] } -specta = { workspace = true, features = ["derive"] } - -[dev-dependencies] -hypr-data = { workspace = true } +serde_json = { workspace = true } -reqwest = { workspace = true, features = ["json"] } -rodio = { workspace = true } -tokio = { workspace = true, features = ["rt", "macros"] } +[build-dependencies] +progenitor-utils = { path = "../progenitor-utils" } diff --git a/crates/pyannote-cloud/build.rs b/crates/pyannote-cloud/build.rs new file mode 100644 index 0000000000..a2ae88e705 --- /dev/null +++ b/crates/pyannote-cloud/build.rs @@ -0,0 +1,18 @@ +use progenitor_utils::OpenApiSpec; + +const ALLOWED_PATH_PREFIXES: &[&str] = &["/v1/"]; + +fn main() { + let src = concat!(env!("CARGO_MANIFEST_DIR"), "/openapi.gen.json"); + println!("cargo:rerun-if-changed={src}"); + + OpenApiSpec::from_path(src) + .retain_paths(ALLOWED_PATH_PREFIXES) + .normalize_responses() + .flatten_all_of() + .remove_unreferenced_schemas() + .write_filtered( + std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("openapi.filtered.json"), + ) + .generate("codegen.rs"); +} diff --git a/crates/pyannote-cloud/fetch.sh b/crates/pyannote-cloud/fetch.sh new file mode 100644 index 0000000000..08c2a830b2 --- /dev/null +++ b/crates/pyannote-cloud/fetch.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -euo pipefail + +cd "$(dirname "$0")" + +curl -sL \ + https://docs.pyannote.ai/openapi.json \ + -o openapi.gen.json + +echo "Fetched openapi.gen.json ($(wc -l < openapi.gen.json) lines)" diff --git a/crates/pyannote-cloud/openapi.filtered.json b/crates/pyannote-cloud/openapi.filtered.json new file mode 100644 index 0000000000..0f233ecd00 --- /dev/null +++ b/crates/pyannote-cloud/openapi.filtered.json @@ -0,0 +1,1222 @@ +{ + "components": { + "schemas": { + "Confidence": { + "properties": { + "resolution": { + "description": "Resolution of the confidence scores. Value is number of seconds per sample", + "example": 0.02, + "type": "number" + }, + "score": { + "description": "List of confidence scores for each sample. Values are between 0 and 100", + "example": [ + 95, + 89, + 78, + 67, + 56, + 45, + 34, + 23, + 12, + 1 + ], + "items": { + "type": "number" + }, + "type": "array" + } + }, + "required": [ + "score", + "resolution" + ], + "type": "object" + }, + "DiarizationJob": { + "properties": { + "createdAt": { + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + }, + "jobId": { + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b", + "type": "string" + }, + "output": { + "$ref": "#/components/schemas/DiarizationJobOutput" + }, + "status": { + "$ref": "#/components/schemas/JobStatus" + }, + "updatedAt": { + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + } + }, + "type": "object" + }, + "DiarizationJobOutput": { + "properties": { + "confidence": { + "$ref": "#/components/schemas/Confidence" + }, + "diarization": { + "description": "List of diarization segments", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + }, + "type": "array" + }, + "error": { + "description": "Error message if any", + "type": "string" + }, + "exclusiveDiarization": { + "description": "Exclusive diarization segments where only one speaker is active at a time. Only returned if `exclusive` is set to true when job is created.", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + }, + "type": "array" + }, + "turnLevelTranscription": { + "description": "Turn-level (speaker turn) transcription segments with text. Only returned if `transcription` is set to true when job is created.", + "example": [ + { + "end": 2.3, + "speaker": "SPEAKER_00", + "start": 0.5, + "text": "Hello, how are you?" + } + ], + "items": { + "$ref": "#/components/schemas/TranscriptionSegment" + }, + "type": "array" + }, + "warning": { + "description": "Warning message if any", + "type": "string" + }, + "wordLevelTranscription": { + "description": "Word-level transcription segments with text. Only returned if `transcription` is set to true when job is created.", + "example": [ + { + "end": 0.8, + "speaker": "SPEAKER_00", + "start": 0.5, + "text": "Hello" + } + ], + "items": { + "$ref": "#/components/schemas/TranscriptionSegment" + }, + "type": "array" + } + }, + "required": [ + "diarization" + ], + "type": "object" + }, + "DiarizationSegment": { + "properties": { + "confidence": { + "description": "Confidence scores that this speech turn matches each diarization speaker. Only available if `turnLevelConfidence` is set to true when job is created.", + "example": { + "SPEAKER_00": 16, + "SPEAKER_01": 93 + }, + "type": "object" + }, + "end": { + "description": "End time of the segment in seconds", + "example": 30.5, + "type": "number" + }, + "speaker": { + "description": "Speaker label", + "example": "SPEAKER_00", + "type": "string" + }, + "start": { + "description": "Start time of the segment in seconds", + "example": 15, + "type": "number" + } + }, + "required": [ + "speaker", + "start", + "end" + ], + "type": "object" + }, + "DiarizeRequest": { + "properties": { + "confidence": { + "default": false, + "description": "Include confidence values in the output. Output is considerably larger when this option is enabled. Output includes a list of confidence scores with a resolution.", + "example": true, + "type": "boolean" + }, + "exclusive": { + "default": false, + "description": "Includes exclusive diarization values in the output in `exclusiveDiarization` key (equivalent to diarization but without overlapping speech).", + "example": true, + "type": "boolean" + }, + "maxSpeakers": { + "description": "Maximum number of speakers (must be >= minSpeakers if both are set)", + "example": 4, + "minimum": 1, + "type": "number" + }, + "minSpeakers": { + "description": "Minimum number of speakers (must be <= maxSpeakers if both are set)", + "example": 1, + "minimum": 1, + "type": "number" + }, + "model": { + "default": "precision-2", + "enum": [ + "precision-2", + "community-1" + ], + "example": "precision-2", + "nullable": true, + "type": "string" + }, + "numSpeakers": { + "description": "Number of speakers. Only use if the number of speakers is known in advance. Number of speakers is detected automatically if not provided. Setting this value results in better overall diarization performance. In rare cases where we cannot honor this request (e.g. short files and large number of speakers), a warning will be added to the output. Equivalent to sending minSpeakers==maxSpeakers", + "example": 2, + "minimum": 1, + "type": "number" + }, + "transcription": { + "default": false, + "description": "Enable speaker attributed transcription. Only available for the `precision-2` diarization model.", + "type": "boolean" + }, + "transcriptionConfig": { + "$ref": "#/components/schemas/TranscriptionConfiguration" + }, + "turnLevelConfidence": { + "default": false, + "description": "Includes turn-level confidence values in the output.", + "example": true, + "nullable": true, + "type": "boolean" + }, + "url": { + "description": "URL of the audio file to be processed", + "example": "https://example.com/audio.wav", + "type": "string" + }, + "webhook": { + "description": "Webhook URL to receive results when job is completed (optional)", + "example": "https://example.com/webhook", + "type": "string" + }, + "webhookStatusOnly": { + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true, + "type": "boolean" + } + }, + "required": [ + "url" + ], + "type": "object" + }, + "GetJobsResponse": { + "properties": { + "items": { + "description": "List of jobs. Sorted by creation date, descending. Does not include output data.", + "items": { + "$ref": "#/components/schemas/JobListItem" + }, + "type": "array" + }, + "total": { + "description": "Total number of jobs", + "example": 100, + "type": "number" + } + }, + "required": [ + "items", + "total" + ], + "type": "object" + }, + "GetMediaUploadUrl": { + "properties": { + "url": { + "description": "The url should be in the form media://object-key where the object-key can be any alpha-numeric string. The object-key is unique to your account API token so there is no risk of collision with other users.", + "maxLength": 255, + "pattern": "/^media:\\/\\/[a-zA-Z0-9\\-_\\.\\/]+$/", + "type": "string" + } + }, + "required": [ + "url" + ], + "type": "object" + }, + "IdentificationJobOutput": { + "properties": { + "confidence": { + "$ref": "#/components/schemas/Confidence" + }, + "diarization": { + "description": "List of diarization segments", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + }, + "type": "array" + }, + "error": { + "description": "Error message if any", + "type": "string" + }, + "exclusiveDiarization": { + "description": "Exclusive diarization segments where only one speaker is active at a time. Only returned if `exclusive` is set to true when job is created.", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + }, + "type": "array" + }, + "identification": { + "description": "List of identification segments", + "items": { + "$ref": "#/components/schemas/IdentificationSegment" + }, + "type": "array" + }, + "voiceprints": { + "items": { + "$ref": "#/components/schemas/IdentificationVoiceprint" + }, + "type": "array" + }, + "warning": { + "description": "Warning message if any", + "type": "string" + } + }, + "type": "object" + }, + "IdentificationSegment": { + "properties": { + "confidence": { + "description": "Confidence scores that this speech turn matches each diarization speaker. Only available if `turnLevelConfidence` is set to true when job is created.", + "example": { + "SPEAKER_00": 16, + "SPEAKER_01": 93 + }, + "type": "object" + }, + "diarizationSpeaker": { + "description": "Speaker label", + "example": "SPEAKER_00", + "type": "string" + }, + "end": { + "description": "End time of the segment in seconds", + "example": 30.5, + "type": "number" + }, + "match": { + "description": "Label of the voiceprint that was identified following the matching settings", + "example": "Sam", + "nullable": true, + "type": "string" + }, + "speaker": { + "description": "Speaker label", + "example": "SPEAKER_00", + "type": "string" + }, + "start": { + "description": "Start time of the segment in seconds", + "example": 15, + "type": "number" + } + }, + "required": [ + "diarizationSpeaker", + "match" + ], + "type": "object" + }, + "IdentificationVoiceprint": { + "properties": { + "confidence": { + "description": "Confidence for each speaker label, as a dictionary of speaker label to confidence score", + "example": { + "Rick": 24, + "Sam": 16 + }, + "type": "object" + }, + "match": { + "description": "Label of the voiceprint that was identified following the matching settings", + "example": "Sam", + "type": "string" + }, + "speaker": { + "description": "Diarization speaker", + "example": "SPEAKER_00", + "type": "string" + } + }, + "required": [ + "speaker", + "match", + "confidence" + ], + "type": "object" + }, + "IdentifyJob": { + "properties": { + "createdAt": { + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + }, + "jobId": { + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b", + "type": "string" + }, + "output": { + "$ref": "#/components/schemas/IdentificationJobOutput" + }, + "status": { + "$ref": "#/components/schemas/JobStatus" + }, + "updatedAt": { + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + } + }, + "type": "object" + }, + "IdentifyRequest": { + "properties": { + "confidence": { + "default": false, + "description": "Include confidence values in the output. Output is considerably larger when this option is enabled. Output includes a list of confidence scores with a resolution.", + "example": true, + "type": "boolean" + }, + "exclusive": { + "default": false, + "description": "Includes exclusive diarization values in the output in `exclusiveDiarization` key (equivalent to diarization but without overlapping speech).", + "example": true, + "type": "boolean" + }, + "matching": { + "$ref": "#/components/schemas/MatchingOptions" + }, + "maxSpeakers": { + "description": "Maximum number of speakers (must be >= minSpeakers if both are set)", + "example": 4, + "minimum": 1, + "type": "number" + }, + "minSpeakers": { + "description": "Minimum number of speakers (must be <= maxSpeakers if both are set)", + "example": 1, + "minimum": 1, + "type": "number" + }, + "model": { + "default": "precision-2", + "enum": [ + "precision-2" + ], + "example": "precision-2", + "nullable": true, + "type": "string" + }, + "numSpeakers": { + "description": "Number of speakers. Only use if the number of speakers is known in advance. Number of speakers is detected automatically if not provided. Setting this value results in better overall diarization performance. In rare cases where we cannot honor this request (e.g. short files and large number of speakers), a warning will be added to the output. Equivalent to sending minSpeakers==maxSpeakers", + "example": 2, + "minimum": 1, + "type": "number" + }, + "turnLevelConfidence": { + "default": false, + "description": "Includes turn-level confidence values in the output.", + "example": true, + "nullable": true, + "type": "boolean" + }, + "url": { + "description": "URL of the audio file to be processed", + "example": "https://example.com/audio.wav", + "type": "string" + }, + "voiceprints": { + "description": "List of voiceprints to identify against", + "items": { + "$ref": "#/components/schemas/Voiceprint" + }, + "maxItems": 50, + "minItems": 1, + "type": "array" + }, + "webhook": { + "description": "Webhook URL to receive results when job is completed (optional)", + "example": "https://example.com/webhook", + "type": "string" + }, + "webhookStatusOnly": { + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true, + "type": "boolean" + } + }, + "required": [ + "voiceprints" + ], + "type": "object" + }, + "JobCreated": { + "properties": { + "jobId": { + "description": "ID of the job", + "example": "3c8a89a5-dcc6-4edb-a75d-ffd64739674d", + "type": "string" + }, + "status": { + "description": "Status of the job", + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ], + "example": "created", + "type": "string" + }, + "warning": { + "description": "Warning message if any", + "type": "string" + } + }, + "required": [ + "jobId", + "status" + ], + "type": "object" + }, + "JobListItem": { + "properties": { + "createdAt": { + "format": "date-time", + "type": "string" + }, + "id": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "required": [ + "id", + "status", + "createdAt" + ], + "type": "object" + }, + "JobStatus": { + "description": "Status of the job", + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ], + "type": "string" + }, + "MatchingOptions": { + "properties": { + "exclusive": { + "default": true, + "description": "Prevent multiple speakers from being matched to the same voiceprint. Default to true", + "nullable": true, + "type": "boolean" + }, + "threshold": { + "default": 0, + "description": "Prevent matching if confidence score is below this threshold. Value is between 0 and 100. Default is 0, meaning all voiceprints are matched", + "format": "float", + "maximum": 100, + "minimum": 0, + "nullable": true, + "type": "number" + } + }, + "type": "object" + }, + "MediaResponse": { + "properties": { + "url": { + "type": "string" + } + }, + "required": [ + "url" + ], + "type": "object" + }, + "TestResponse": { + "properties": { + "message": { + "description": "Message of the test", + "example": "Test connection successful", + "type": "string" + }, + "status": { + "description": "Status of the test", + "example": "OK", + "type": "string" + } + }, + "required": [ + "status", + "message" + ], + "type": "object" + }, + "TranscriptionConfiguration": { + "properties": { + "model": { + "default": "parakeet-tdt-0.6b-v3", + "description": "Transcription model to use. Supported languages depend on the chosen model:\n - `parakeet-tdt-0.6b-v3`: Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Russian or Ukrainian.\n - `faster-whisper-large-v3-turbo`: Afrikaans, Albanian, Amharic, Arabic, Armenian, Assamese, Azerbaijani, Bashkir, Basque, Belarusian, Bengali, Bosnian, Breton, Bulgarian, Cantonese, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Faroese, Finnish, French, Galician, Georgian, German, Greek, Gujarati, HaitianCreole, Hausa, Hawaiian, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Lao, Latin, Latvian, Lingala, Lithuanian, Luxembourgish, Macedonian, Malagasy, Malay, Malayalam, Maltese, Maori, Marathi, Mongolian, Myanmar, Nepali, Norwegian, Nynorsk, Occitan, Pashto, Persian, Polish, Portuguese, Punjabi, Romanian, Russian, Sanskrit, Serbian, Shona, Sindhi, Sinhala, Slovak, Slovenian, Somali, Spanish, Sundanese, Swahili, Swedish, Tagalog, Tajik, Tamil, Tatar, Telugu, Thai, Tibetan, Turkish, Turkmen, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yiddish or Yoruba.", + "enum": [ + "parakeet-tdt-0.6b-v3", + "faster-whisper-large-v3-turbo" + ], + "example": "faster-whisper-large-v3-turbo", + "type": "string" + } + }, + "type": "object" + }, + "TranscriptionSegment": { + "properties": { + "end": { + "description": "End time of the segment in seconds", + "type": "number" + }, + "speaker": { + "description": "Speaker label", + "type": "string" + }, + "start": { + "description": "Start time of the segment in seconds", + "type": "number" + }, + "text": { + "description": "The transcribed speech content for this segment", + "type": "string" + } + }, + "required": [ + "start", + "end", + "text", + "speaker" + ], + "type": "object" + }, + "Voiceprint": { + "properties": { + "label": { + "description": "Label for the speaker. Labels can't start with \"SPEAKER_\"", + "example": "John Doe", + "maxLength": 100, + "pattern": "^(?!speaker_).*", + "type": "string" + }, + "voiceprint": { + "description": "Voiceprint of a speaker", + "example": "U29tZUJhc2U2NERhdGE", + "format": "base64", + "maxLength": 20000, + "type": "string" + } + }, + "required": [ + "label", + "voiceprint" + ], + "type": "object" + }, + "VoiceprintJob": { + "properties": { + "createdAt": { + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + }, + "jobId": { + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b", + "type": "string" + }, + "output": { + "$ref": "#/components/schemas/VoiceprintJobResults" + }, + "status": { + "$ref": "#/components/schemas/JobStatus" + }, + "updatedAt": { + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z", + "format": "date-time", + "type": "string" + } + }, + "type": "object" + }, + "VoiceprintJobResults": { + "properties": { + "error": { + "description": "Error message if any", + "type": "string" + }, + "voiceprint": { + "description": "Voiceprint of the audio. To be used for identification", + "example": "aGVsbG8gd29ybGQ", + "type": "string" + }, + "warning": { + "description": "Warning message if any", + "type": "string" + } + }, + "required": [ + "voiceprint" + ], + "type": "object" + }, + "VoiceprintRequest": { + "properties": { + "model": { + "default": "precision-2", + "enum": [ + "precision-2" + ], + "example": "precision-2", + "nullable": true, + "type": "string" + }, + "url": { + "description": "URL of the voiceprint audio file", + "example": "https://example.com/voice.wav", + "type": "string" + }, + "webhook": { + "description": "Webhook URL to receive voiceprint results (optional)", + "example": "https://example.com/webhook", + "type": "string" + }, + "webhookStatusOnly": { + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true, + "type": "boolean" + } + }, + "required": [ + "url" + ], + "type": "object" + } + }, + "securitySchemes": { + "api-key": { + "bearerFormat": "JWT", + "scheme": "bearer", + "type": "http" + } + } + }, + "externalDocs": { + "description": "pyannoteAI Docs", + "url": "https://docs.pyannote.ai/" + }, + "info": { + "contact": {}, + "description": "", + "termsOfService": "https://pyannote.ai/terms-of-use", + "title": "pyannoteAI API", + "version": "local" + }, + "openapi": "3.0.0", + "paths": { + "/v1/diarize": { + "post": { + "operationId": "diarize", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DiarizeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Diarize audio", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + }, + "/v1/identify": { + "post": { + "operationId": "identify", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IdentifyRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Identify speaker with diarization", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + }, + "/v1/jobs": { + "get": { + "operationId": "getJobsByTeam", + "parameters": [ + { + "description": "Number of jobs to return", + "in": "query", + "name": "take", + "required": false, + "schema": { + "default": 10, + "example": 10, + "maximum": 100, + "minimum": 1, + "type": "number" + } + }, + { + "description": "Status of the jobs to return", + "in": "query", + "name": "status", + "required": false, + "schema": { + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ], + "example": "succeeded", + "type": "string" + } + }, + { + "description": "Number of jobs to skip", + "in": "query", + "name": "skip", + "required": false, + "schema": { + "default": null, + "example": 1, + "minimum": 0, + "type": "number" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetJobsResponse" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get all jobs.", + "tags": [ + "Api", + "Jobs" + ], + "x-api-key-permissions": [ + "jobs:read" + ] + } + }, + "/v1/jobs/{jobId}": { + "get": { + "operationId": "getJobById", + "parameters": [ + { + "description": "Job ID to track the progress", + "in": "path", + "name": "jobId", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/DiarizationJob", + "title": "Diarization" + }, + { + "$ref": "#/components/schemas/VoiceprintJob", + "title": "Voiceprint" + }, + { + "$ref": "#/components/schemas/IdentifyJob", + "title": "Identification" + } + ] + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get job by ID", + "tags": [ + "Api", + "Jobs" + ], + "x-api-key-permissions": [ + "jobs:read" + ] + } + }, + "/v1/media/input": { + "post": { + "description": "To use the provided temporary storage is a two step process.\n You start by declaring a media:// url that you can reference in any other API calls. The response will provide a url where you can put your media. This allows you to use the media:// url as a short-cut for a temporary storage location.\n You'll be returned a pre-signed url you can use to PUT and upload your media file. The temporary storage should allow you to read and write to the media:// locations for a period of at least 24 hours before it is removed.", + "operationId": "getMediaUploadURL", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetMediaUploadUrl" + } + } + }, + "required": true + }, + "responses": { + "201": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaResponse" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get upload URL", + "tags": [ + "Media" + ], + "x-api-key-permissions": [ + "media:write" + ] + } + }, + "/v1/media/output": { + "post": { + "description": "You can download media you previously uploaded with /media/input or media that was generated through another API call.\nThe temporary storage should allow you to read and write to the media:// locations for a period of at least 24 hours before it is removed.", + "operationId": "getMediaDownloadURL", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetMediaUploadUrl" + } + } + }, + "required": true + }, + "responses": { + "201": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaResponse" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get download URL", + "tags": [ + "Media" + ], + "x-api-key-permissions": [ + "media:read" + ] + } + }, + "/v1/test": { + "get": { + "operationId": "testKey", + "parameters": [], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TestResponse" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Test API endpoint", + "tags": [ + "Api", + "Test" + ] + } + }, + "/v1/voiceprint": { + "post": { + "operationId": "voiceprint", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VoiceprintRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + }, + "description": "" + }, + "400": { + "description": "Invalid request" + }, + "402": { + "description": "Subscription is required" + }, + "429": { + "description": "Too many requests" + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Extract voiceprint", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + } + }, + "servers": [ + { + "url": "https://api.pyannote.ai" + } + ], + "tags": [] +} \ No newline at end of file diff --git a/crates/pyannote-cloud/openapi.gen.json b/crates/pyannote-cloud/openapi.gen.json new file mode 100644 index 0000000000..63b4deb009 --- /dev/null +++ b/crates/pyannote-cloud/openapi.gen.json @@ -0,0 +1,1499 @@ +{ + "openapi": "3.0.0", + "paths": { + "/v1/media/input": { + "post": { + "description": "To use the provided temporary storage is a two step process.\n You start by declaring a media:// url that you can reference in any other API calls. The response will provide a url where you can put your media. This allows you to use the media:// url as a short-cut for a temporary storage location.\n You'll be returned a pre-signed url you can use to PUT and upload your media file. The temporary storage should allow you to read and write to the media:// locations for a period of at least 24 hours before it is removed.", + "operationId": "getMediaUploadURL", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetMediaUploadUrl" + } + } + } + }, + "responses": { + "201": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get upload URL", + "tags": [ + "Media" + ], + "x-api-key-permissions": [ + "media:write" + ] + } + }, + "/v1/media/output": { + "post": { + "description": "You can download media you previously uploaded with /media/input or media that was generated through another API call.\nThe temporary storage should allow you to read and write to the media:// locations for a period of at least 24 hours before it is removed.", + "operationId": "getMediaDownloadURL", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetMediaUploadUrl" + } + } + } + }, + "responses": { + "201": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get download URL", + "tags": [ + "Media" + ], + "x-api-key-permissions": [ + "media:read" + ] + } + }, + "/v1/test": { + "get": { + "operationId": "testKey", + "parameters": [], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TestResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Test API endpoint", + "tags": [ + "Api", + "Test" + ] + } + }, + "/v1/diarize": { + "post": { + "operationId": "diarize", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DiarizeRequest" + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Diarize audio", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + }, + "/v1/voiceprint": { + "post": { + "operationId": "voiceprint", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VoiceprintRequest" + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Extract voiceprint", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + }, + "/v1/identify": { + "post": { + "operationId": "identify", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IdentifyRequest" + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobCreated" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Identify speaker with diarization", + "tags": [ + "Api", + "Operations" + ], + "x-api-key-permissions": [ + "jobs:write" + ] + } + }, + "/v1/jobs/{jobId}": { + "get": { + "operationId": "getJobById", + "parameters": [ + { + "name": "jobId", + "required": true, + "in": "path", + "description": "Job ID to track the progress", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/DiarizationJob", + "title": "Diarization" + }, + { + "$ref": "#/components/schemas/VoiceprintJob", + "title": "Voiceprint" + }, + { + "$ref": "#/components/schemas/IdentifyJob", + "title": "Identification" + } + ] + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get job by ID", + "tags": [ + "Api", + "Jobs" + ], + "x-api-key-permissions": [ + "jobs:read" + ] + } + }, + "/v1/jobs": { + "get": { + "operationId": "getJobsByTeam", + "parameters": [ + { + "name": "take", + "required": false, + "in": "query", + "description": "Number of jobs to return", + "schema": { + "minimum": 1, + "maximum": 100, + "default": 10, + "example": 10, + "type": "number" + } + }, + { + "name": "status", + "required": false, + "in": "query", + "description": "Status of the jobs to return", + "schema": { + "example": "succeeded", + "type": "string", + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ] + } + }, + { + "name": "skip", + "required": false, + "in": "query", + "description": "Number of jobs to skip", + "schema": { + "minimum": 0, + "default": null, + "example": 1, + "type": "number" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetJobsResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ValidationErrorResponse" + } + } + } + }, + "402": { + "description": "Subscription is required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + }, + "security": [ + { + "api-key": [] + } + ], + "summary": "Get all jobs.", + "tags": [ + "Api", + "Jobs" + ], + "x-api-key-permissions": [ + "jobs:read" + ] + } + } + }, + "info": { + "title": "pyannoteAI API", + "description": "", + "version": "local", + "contact": {}, + "termsOfService": "https://pyannote.ai/terms-of-use" + }, + "tags": [], + "servers": [ + { + "url": "https://api.pyannote.ai" + } + ], + "components": { + "securitySchemes": { + "api-key": { + "scheme": "bearer", + "bearerFormat": "JWT", + "type": "http" + } + }, + "schemas": { + "GetMediaUploadUrl": { + "type": "object", + "properties": { + "url": { + "type": "string", + "maxLength": 255, + "pattern": "/^media:\\/\\/[a-zA-Z0-9\\-_\\.\\/]+$/", + "description": "The url should be in the form media://object-key where the object-key can be any alpha-numeric string. The object-key is unique to your account API token so there is no risk of collision with other users." + } + }, + "required": [ + "url" + ] + }, + "MediaResponse": { + "type": "object", + "properties": { + "url": { + "type": "string" + } + }, + "required": [ + "url" + ] + }, + "TestResponse": { + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "Status of the test", + "example": "OK" + }, + "message": { + "type": "string", + "description": "Message of the test", + "example": "Test connection successful" + } + }, + "required": [ + "status", + "message" + ] + }, + "TranscriptionConfiguration": { + "type": "object", + "properties": { + "model": { + "type": "string", + "default": "parakeet-tdt-0.6b-v3", + "enum": [ + "parakeet-tdt-0.6b-v3", + "faster-whisper-large-v3-turbo" + ], + "description": "Transcription model to use. Supported languages depend on the chosen model:\n - `parakeet-tdt-0.6b-v3`: Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Russian or Ukrainian.\n - `faster-whisper-large-v3-turbo`: Afrikaans, Albanian, Amharic, Arabic, Armenian, Assamese, Azerbaijani, Bashkir, Basque, Belarusian, Bengali, Bosnian, Breton, Bulgarian, Cantonese, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Faroese, Finnish, French, Galician, Georgian, German, Greek, Gujarati, HaitianCreole, Hausa, Hawaiian, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Lao, Latin, Latvian, Lingala, Lithuanian, Luxembourgish, Macedonian, Malagasy, Malay, Malayalam, Maltese, Maori, Marathi, Mongolian, Myanmar, Nepali, Norwegian, Nynorsk, Occitan, Pashto, Persian, Polish, Portuguese, Punjabi, Romanian, Russian, Sanskrit, Serbian, Shona, Sindhi, Sinhala, Slovak, Slovenian, Somali, Spanish, Sundanese, Swahili, Swedish, Tagalog, Tajik, Tamil, Tatar, Telugu, Thai, Tibetan, Turkish, Turkmen, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yiddish or Yoruba.", + "example": "faster-whisper-large-v3-turbo" + } + } + }, + "DiarizeRequest": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "URL of the audio file to be processed", + "example": "https://example.com/audio.wav" + }, + "webhook": { + "type": "string", + "description": "Webhook URL to receive results when job is completed (optional)", + "example": "https://example.com/webhook" + }, + "webhookStatusOnly": { + "type": "boolean", + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true + }, + "model": { + "type": "string", + "enum": [ + "precision-2", + "community-1" + ], + "example": "precision-2", + "default": "precision-2", + "nullable": true + }, + "numSpeakers": { + "type": "number", + "minimum": 1, + "description": "Number of speakers. Only use if the number of speakers is known in advance. Number of speakers is detected automatically if not provided. Setting this value results in better overall diarization performance. In rare cases where we cannot honor this request (e.g. short files and large number of speakers), a warning will be added to the output. Equivalent to sending minSpeakers==maxSpeakers", + "example": 2 + }, + "minSpeakers": { + "type": "number", + "minimum": 1, + "description": "Minimum number of speakers (must be <= maxSpeakers if both are set)", + "example": 1 + }, + "maxSpeakers": { + "type": "number", + "minimum": 1, + "description": "Maximum number of speakers (must be >= minSpeakers if both are set)", + "example": 4 + }, + "turnLevelConfidence": { + "type": "boolean", + "nullable": true, + "default": false, + "description": "Includes turn-level confidence values in the output.", + "example": true + }, + "exclusive": { + "type": "boolean", + "default": false, + "description": "Includes exclusive diarization values in the output in `exclusiveDiarization` key (equivalent to diarization but without overlapping speech).", + "example": true + }, + "confidence": { + "type": "boolean", + "default": false, + "description": "Include confidence values in the output. Output is considerably larger when this option is enabled. Output includes a list of confidence scores with a resolution.", + "example": true + }, + "transcription": { + "type": "boolean", + "default": false, + "description": "Enable speaker attributed transcription. Only available for the `precision-2` diarization model." + }, + "transcriptionConfig": { + "description": "Transcription configuration, if `transcription: true`", + "allOf": [ + { + "$ref": "#/components/schemas/TranscriptionConfiguration" + } + ] + } + }, + "required": [ + "url" + ] + }, + "JobCreated": { + "type": "object", + "properties": { + "jobId": { + "type": "string", + "example": "3c8a89a5-dcc6-4edb-a75d-ffd64739674d", + "description": "ID of the job" + }, + "status": { + "type": "string", + "description": "Status of the job", + "example": "created", + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ] + }, + "warning": { + "type": "string", + "description": "Warning message if any" + } + }, + "required": [ + "jobId", + "status" + ] + }, + "VoiceprintRequest": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "URL of the voiceprint audio file", + "example": "https://example.com/voice.wav" + }, + "model": { + "type": "string", + "enum": [ + "precision-2" + ], + "example": "precision-2", + "default": "precision-2", + "nullable": true + }, + "webhook": { + "type": "string", + "description": "Webhook URL to receive voiceprint results (optional)", + "example": "https://example.com/webhook" + }, + "webhookStatusOnly": { + "type": "boolean", + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true + } + }, + "required": [ + "url" + ] + }, + "MatchingOptions": { + "type": "object", + "properties": { + "exclusive": { + "type": "boolean", + "nullable": true, + "default": true, + "description": "Prevent multiple speakers from being matched to the same voiceprint. Default to true" + }, + "threshold": { + "type": "number", + "nullable": true, + "default": 0, + "minimum": 0, + "maximum": 100, + "format": "float", + "description": "Prevent matching if confidence score is below this threshold. Value is between 0 and 100. Default is 0, meaning all voiceprints are matched" + } + } + }, + "Voiceprint": { + "type": "object", + "properties": { + "label": { + "type": "string", + "maxLength": 100, + "pattern": "^(?!speaker_).*", + "description": "Label for the speaker. Labels can't start with \"SPEAKER_\"", + "example": "John Doe" + }, + "voiceprint": { + "type": "string", + "maxLength": 20000, + "format": "base64", + "description": "Voiceprint of a speaker", + "example": "U29tZUJhc2U2NERhdGE" + } + }, + "required": [ + "label", + "voiceprint" + ] + }, + "IdentifyRequest": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "URL of the audio file to be processed", + "example": "https://example.com/audio.wav" + }, + "webhook": { + "type": "string", + "description": "Webhook URL to receive results when job is completed (optional)", + "example": "https://example.com/webhook" + }, + "webhookStatusOnly": { + "type": "boolean", + "default": false, + "description": "When true, webhook payload only includes jobId and status (excludes output). Useful for large payloads.", + "example": true + }, + "model": { + "type": "string", + "enum": [ + "precision-2" + ], + "example": "precision-2", + "default": "precision-2", + "nullable": true + }, + "numSpeakers": { + "type": "number", + "minimum": 1, + "description": "Number of speakers. Only use if the number of speakers is known in advance. Number of speakers is detected automatically if not provided. Setting this value results in better overall diarization performance. In rare cases where we cannot honor this request (e.g. short files and large number of speakers), a warning will be added to the output. Equivalent to sending minSpeakers==maxSpeakers", + "example": 2 + }, + "minSpeakers": { + "type": "number", + "minimum": 1, + "description": "Minimum number of speakers (must be <= maxSpeakers if both are set)", + "example": 1 + }, + "maxSpeakers": { + "type": "number", + "minimum": 1, + "description": "Maximum number of speakers (must be >= minSpeakers if both are set)", + "example": 4 + }, + "turnLevelConfidence": { + "type": "boolean", + "nullable": true, + "default": false, + "description": "Includes turn-level confidence values in the output.", + "example": true + }, + "exclusive": { + "type": "boolean", + "default": false, + "description": "Includes exclusive diarization values in the output in `exclusiveDiarization` key (equivalent to diarization but without overlapping speech).", + "example": true + }, + "confidence": { + "type": "boolean", + "default": false, + "description": "Include confidence values in the output. Output is considerably larger when this option is enabled. Output includes a list of confidence scores with a resolution.", + "example": true + }, + "matching": { + "description": "Customize how voiceprints are matched against speakers", + "allOf": [ + { + "$ref": "#/components/schemas/MatchingOptions" + } + ] + }, + "voiceprints": { + "minItems": 1, + "maxItems": 50, + "description": "List of voiceprints to identify against", + "type": "array", + "items": { + "$ref": "#/components/schemas/Voiceprint" + } + } + }, + "required": [ + "voiceprints" + ] + }, + "JobListItem": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "status": { + "type": "string" + }, + "createdAt": { + "format": "date-time", + "type": "string" + } + }, + "required": [ + "id", + "status", + "createdAt" + ] + }, + "GetJobsResponse": { + "type": "object", + "properties": { + "items": { + "description": "List of jobs. Sorted by creation date, descending. Does not include output data.", + "type": "array", + "items": { + "$ref": "#/components/schemas/JobListItem" + } + }, + "total": { + "type": "number", + "description": "Total number of jobs", + "example": 100 + } + }, + "required": [ + "items", + "total" + ] + }, + "JobStatus": { + "type": "string", + "enum": [ + "pending", + "created", + "succeeded", + "canceled", + "failed", + "running" + ], + "description": "Status of the job" + }, + "DiarizationSegment": { + "type": "object", + "properties": { + "speaker": { + "type": "string", + "description": "Speaker label", + "example": "SPEAKER_00" + }, + "start": { + "type": "number", + "description": "Start time of the segment in seconds", + "example": 15 + }, + "end": { + "type": "number", + "description": "End time of the segment in seconds", + "example": 30.5 + }, + "confidence": { + "type": "object", + "description": "Confidence scores that this speech turn matches each diarization speaker. Only available if `turnLevelConfidence` is set to true when job is created.", + "example": { + "SPEAKER_00": 16, + "SPEAKER_01": 93 + } + } + }, + "required": [ + "speaker", + "start", + "end" + ] + }, + "Confidence": { + "type": "object", + "properties": { + "score": { + "description": "List of confidence scores for each sample. Values are between 0 and 100", + "example": [ + 95, + 89, + 78, + 67, + 56, + 45, + 34, + 23, + 12, + 1 + ], + "type": "array", + "items": { + "type": "number" + } + }, + "resolution": { + "type": "number", + "description": "Resolution of the confidence scores. Value is number of seconds per sample", + "example": 0.02 + } + }, + "required": [ + "score", + "resolution" + ] + }, + "TranscriptionSegment": { + "type": "object", + "properties": { + "start": { + "type": "number", + "description": "Start time of the segment in seconds" + }, + "end": { + "type": "number", + "description": "End time of the segment in seconds" + }, + "text": { + "type": "string", + "description": "The transcribed speech content for this segment" + }, + "speaker": { + "type": "string", + "description": "Speaker label" + } + }, + "required": [ + "start", + "end", + "text", + "speaker" + ] + }, + "DiarizationJobOutput": { + "type": "object", + "properties": { + "diarization": { + "description": "List of diarization segments", + "type": "array", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + } + }, + "confidence": { + "description": "Confidence scores details. Only returned if `confidence` is set to true when job is created", + "allOf": [ + { + "$ref": "#/components/schemas/Confidence" + } + ] + }, + "exclusiveDiarization": { + "description": "Exclusive diarization segments where only one speaker is active at a time. Only returned if `exclusive` is set to true when job is created.", + "type": "array", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + } + }, + "wordLevelTranscription": { + "description": "Word-level transcription segments with text. Only returned if `transcription` is set to true when job is created.", + "example": [ + { + "start": 0.5, + "end": 0.8, + "text": "Hello", + "speaker": "SPEAKER_00" + } + ], + "type": "array", + "items": { + "$ref": "#/components/schemas/TranscriptionSegment" + } + }, + "turnLevelTranscription": { + "description": "Turn-level (speaker turn) transcription segments with text. Only returned if `transcription` is set to true when job is created.", + "example": [ + { + "start": 0.5, + "end": 2.3, + "text": "Hello, how are you?", + "speaker": "SPEAKER_00" + } + ], + "type": "array", + "items": { + "$ref": "#/components/schemas/TranscriptionSegment" + } + }, + "error": { + "type": "string", + "description": "Error message if any" + }, + "warning": { + "type": "string", + "description": "Warning message if any" + } + }, + "required": [ + "diarization" + ] + }, + "DiarizationJob": { + "type": "object", + "properties": { + "jobId": { + "type": "string", + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b" + }, + "status": { + "description": "Status of the job", + "allOf": [ + { + "$ref": "#/components/schemas/JobStatus" + } + ] + }, + "createdAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z" + }, + "updatedAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z" + }, + "output": { + "description": "Output segments of a diarization, available for 24 hours after job completion", + "allOf": [ + { + "$ref": "#/components/schemas/DiarizationJobOutput" + } + ] + } + } + }, + "IdentificationSegment": { + "type": "object", + "properties": { + "speaker": { + "type": "string", + "description": "Speaker label", + "example": "SPEAKER_00" + }, + "start": { + "type": "number", + "description": "Start time of the segment in seconds", + "example": 15 + }, + "end": { + "type": "number", + "description": "End time of the segment in seconds", + "example": 30.5 + }, + "confidence": { + "type": "object", + "description": "Confidence scores that this speech turn matches each diarization speaker. Only available if `turnLevelConfidence` is set to true when job is created.", + "example": { + "SPEAKER_00": 16, + "SPEAKER_01": 93 + } + }, + "diarizationSpeaker": { + "type": "string", + "description": "Speaker label", + "example": "SPEAKER_00" + }, + "match": { + "type": "string", + "description": "Label of the voiceprint that was identified following the matching settings", + "example": "Sam", + "nullable": true + } + }, + "required": [ + "diarizationSpeaker", + "match" + ] + }, + "IdentificationVoiceprint": { + "type": "object", + "properties": { + "speaker": { + "type": "string", + "description": "Diarization speaker", + "example": "SPEAKER_00" + }, + "match": { + "type": "string", + "description": "Label of the voiceprint that was identified following the matching settings", + "example": "Sam" + }, + "confidence": { + "type": "object", + "description": "Confidence for each speaker label, as a dictionary of speaker label to confidence score", + "example": { + "Sam": 16, + "Rick": 24 + } + } + }, + "required": [ + "speaker", + "match", + "confidence" + ] + }, + "IdentificationJobOutput": { + "type": "object", + "properties": { + "diarization": { + "description": "List of diarization segments", + "type": "array", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + } + }, + "confidence": { + "description": "Confidence scores details. Only returned if `confidence` is set to true when job is created", + "allOf": [ + { + "$ref": "#/components/schemas/Confidence" + } + ] + }, + "exclusiveDiarization": { + "description": "Exclusive diarization segments where only one speaker is active at a time. Only returned if `exclusive` is set to true when job is created.", + "type": "array", + "items": { + "$ref": "#/components/schemas/DiarizationSegment" + } + }, + "error": { + "type": "string", + "description": "Error message if any" + }, + "warning": { + "type": "string", + "description": "Warning message if any" + }, + "identification": { + "description": "List of identification segments", + "type": "array", + "items": { + "$ref": "#/components/schemas/IdentificationSegment" + } + }, + "voiceprints": { + "type": "array", + "items": { + "$ref": "#/components/schemas/IdentificationVoiceprint" + } + } + } + }, + "IdentifyJob": { + "type": "object", + "properties": { + "jobId": { + "type": "string", + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b" + }, + "status": { + "description": "Status of the job", + "allOf": [ + { + "$ref": "#/components/schemas/JobStatus" + } + ] + }, + "createdAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z" + }, + "updatedAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z" + }, + "output": { + "description": "Output segments of an identification job, available for 24 hours after job completion", + "allOf": [ + { + "$ref": "#/components/schemas/IdentificationJobOutput" + } + ] + } + } + }, + "VoiceprintJobResults": { + "type": "object", + "properties": { + "voiceprint": { + "type": "string", + "description": "Voiceprint of the audio. To be used for identification", + "example": "aGVsbG8gd29ybGQ" + }, + "warning": { + "type": "string", + "description": "Warning message if any" + }, + "error": { + "type": "string", + "description": "Error message if any" + } + }, + "required": [ + "voiceprint" + ] + }, + "VoiceprintJob": { + "type": "object", + "properties": { + "jobId": { + "type": "string", + "description": "Job ID to track the progress or get the results", + "example": "fb16c565-f3f0-4402-a08c-9d44df0ccc7b" + }, + "status": { + "description": "Status of the job", + "allOf": [ + { + "$ref": "#/components/schemas/JobStatus" + } + ] + }, + "createdAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was created", + "example": "2024-02-20T12:00:00Z" + }, + "updatedAt": { + "format": "date-time", + "type": "string", + "description": "Date and time the job was last updated", + "example": "2024-02-20T12:00:00Z" + }, + "output": { + "description": "Output of a voiceprint job, available for 24 hours after job completion", + "allOf": [ + { + "$ref": "#/components/schemas/VoiceprintJobResults" + } + ] + } + } + }, + "ValidationError": { + "type": "object", + "properties": { + "field": { + "type": "string", + "description": "Field name", + "example": "url" + }, + "message": { + "type": "string", + "description": "Error message", + "example": "Invalid URL" + } + }, + "required": [ + "field", + "message" + ] + }, + "ValidationErrorResponse": { + "type": "object", + "properties": { + "message": { + "type": "string", + "description": "Error message", + "example": "Invalid request" + }, + "errors": { + "description": "List of errors", + "type": "array", + "items": { + "$ref": "#/components/schemas/ValidationError" + } + } + }, + "required": [ + "message", + "errors" + ] + }, + "ApiError": { + "type": "object", + "properties": { + "requestId": { + "type": "string", + "description": "Request ID", + "example": "37a4c3a0-b034-4e8c-9ed9-76da6645544a" + }, + "message": { + "type": "string", + "description": "Error message", + "example": "Error message" + } + }, + "required": [ + "requestId", + "message" + ] + } + } + }, + "externalDocs": { + "description": "pyannoteAI Docs", + "url": "https://docs.pyannote.ai/" + } +} diff --git a/crates/pyannote-cloud/src/get_job.rs b/crates/pyannote-cloud/src/get_job.rs deleted file mode 100644 index 31d744d975..0000000000 --- a/crates/pyannote-cloud/src/get_job.rs +++ /dev/null @@ -1,82 +0,0 @@ -use super::PyannoteClient; - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -#[specta(rename = "DiarizationRetrieveRequest")] -pub struct Request { - pub job_id: String, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -#[serde(untagged)] -#[specta(rename = "DiarizationRetrieveResponse")] -pub enum Response { - Ok { - status: JobStatus, - #[serde(rename = "jobId")] - job_id: String, - #[serde(rename = "createdAt")] - created_at: String, - #[serde(rename = "updatedAt")] - updated_at: String, - output: JobResult, - }, - Error { - message: String, - }, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -pub enum JobStatus { - #[serde(rename = "pending")] - Pending, - #[serde(rename = "created")] - Created, - #[serde(rename = "succeeded")] - Succeeded, - #[serde(rename = "canceled")] - Canceled, - #[serde(rename = "failed")] - Failed, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -#[serde(untagged)] -pub enum JobResult { - Diarization(DiarizationResult), -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -pub struct DiarizationResult { - pub diarization: Vec, - pub confidence: Option, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -pub struct DiarizationSegment { - pub speaker: String, - pub start: f32, - pub end: f32, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -pub struct DiarizationConfidence { - pub resolution: f32, - pub score: Vec, -} - -// https://docs.pyannote.ai/api-reference/get-job -impl PyannoteClient { - pub async fn get_job(&self, input: Request) -> Result { - let mut url = self.api_base.clone(); - url.set_path(&format!("/v1/jobs/{}", input.job_id)); - - let res = self - .client - .get(url) - .send() - .await? - .json::() - .await?; - Ok(res) - } -} diff --git a/crates/pyannote-cloud/src/lib.rs b/crates/pyannote-cloud/src/lib.rs index 18ee4c2a49..77eba6cf8a 100644 --- a/crates/pyannote-cloud/src/lib.rs +++ b/crates/pyannote-cloud/src/lib.rs @@ -1,103 +1 @@ -pub mod get_job; -pub mod submit_diarization_job; -pub mod test_key; - -#[derive(Debug, Clone)] -pub struct PyannoteClient { - client: reqwest::Client, - api_base: url::Url, -} - -impl PyannoteClient { - pub fn builder() -> PyannoteClientBuilder { - PyannoteClientBuilder { api_key: None } - } -} - -pub struct PyannoteClientBuilder { - api_key: Option, -} - -impl PyannoteClientBuilder { - pub fn api_key(mut self, api_key: impl Into) -> Self { - self.api_key = Some(api_key.into()); - self - } - - pub fn build(self) -> PyannoteClient { - let mut headers = reqwest::header::HeaderMap::new(); - - // https://docs.pyannote.ai/authentication - let auth_str = format!("Bearer {}", self.api_key.unwrap()); - let mut auth_value = reqwest::header::HeaderValue::from_str(&auth_str).unwrap(); - auth_value.set_sensitive(true); - - headers.insert(reqwest::header::AUTHORIZATION, auth_value); - - let client = reqwest::Client::builder() - .default_headers(headers) - .build() - .unwrap(); - - let api_base = "https://api.pyannote.ai".parse().unwrap(); - PyannoteClient { client, api_base } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn get_client() -> PyannoteClient { - PyannoteClient::builder() - .api_key(std::env::var("PYANNOTE_API_KEY").unwrap()) - .build() - } - - // cargo test test_client -p pyannote -- --ignored --nocapture - #[ignore] - #[tokio::test] - async fn test_client() { - let client = get_client(); - - match client.test().await.unwrap() { - test_key::Response::Ok { status, .. } => assert_eq!(status, "OK"), - test_key::Response::Error { message } => panic!("{}", message), - } - } - - // cargo test test_diarization -p pyannote --features cloud -- --ignored --nocapture - #[ignore] - #[tokio::test] - async fn test_diarization() { - let _ = hypr_data::english_1::AUDIO; - - let client = get_client(); - let res = client - .submit_diarization_job(submit_diarization_job::Request { - url: "https://pub-b3736ee27dd54b7aa6bb39be9fcd398d.r2.dev/audio.wav".to_string(), - webhook: None, - num_speakers: None, - confidence: None, - }) - .await - .unwrap(); - - if let submit_diarization_job::Response::Ok { job_id, .. } = res { - println!("{:?}", job_id); - } - } - - // cargo test test_get_job -p pyannote --features cloud -- --ignored --nocapture - #[ignore] - #[tokio::test] - async fn test_get_job() { - let client = get_client(); - - let req = get_job::Request { - job_id: "b5360dac-9676-4cf2-836f-118312b207d9".to_string(), - }; - let res = client.get_job(req).await.unwrap(); - println!("{:?}", res); - } -} +include!(concat!(env!("OUT_DIR"), "/codegen.rs")); diff --git a/crates/pyannote-cloud/src/submit_diarization_job.rs b/crates/pyannote-cloud/src/submit_diarization_job.rs deleted file mode 100644 index 5c239ef58a..0000000000 --- a/crates/pyannote-cloud/src/submit_diarization_job.rs +++ /dev/null @@ -1,45 +0,0 @@ -use super::PyannoteClient; - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -#[specta(rename = "DiarizationSubmitRequest")] -pub struct Request { - pub url: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub webhook: Option, - #[serde(rename = "numSpeakers", skip_serializing_if = "Option::is_none")] - pub num_speakers: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, -} - -#[derive(Debug, serde::Serialize, serde::Deserialize, specta::Type)] -#[serde(untagged)] -#[specta(rename = "DiarizationSubmitResponse")] -pub enum Response { - Ok { - status: String, - #[serde(rename = "jobId")] - job_id: String, - }, - Error { - message: String, - }, -} - -// https://docs.pyannote.ai/api-reference/diarize -impl PyannoteClient { - pub async fn submit_diarization_job(&self, req: Request) -> Result { - let mut url = self.api_base.clone(); - url.set_path("/v1/diarize"); - - let res = self - .client - .post(url) - .json(&req) - .send() - .await? - .json::() - .await?; - Ok(res) - } -} diff --git a/crates/pyannote-cloud/src/test_key.rs b/crates/pyannote-cloud/src/test_key.rs deleted file mode 100644 index e8fa25dd07..0000000000 --- a/crates/pyannote-cloud/src/test_key.rs +++ /dev/null @@ -1,25 +0,0 @@ -use super::PyannoteClient; - -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(untagged)] -pub enum Response { - Ok { status: String, message: String }, - Error { message: String }, -} - -impl PyannoteClient { - // https://docs.pyannote.ai/api-reference/test - pub async fn test(&self) -> Result { - let mut url = self.api_base.clone(); - url.set_path("/v1/test"); - - let res = self - .client - .get(url) - .send() - .await? - .json::() - .await?; - Ok(res) - } -} From 1625ae6247d1d76938da2959fc180a57968ee829 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 22 Feb 2026 14:34:31 +0000 Subject: [PATCH 2/2] rename openapi.filtered.json to openapi-filtered.gen.json Co-Authored-By: yujonglee --- crates/pyannote-cloud/build.rs | 2 +- .../{openapi.filtered.json => openapi-filtered.gen.json} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename crates/pyannote-cloud/{openapi.filtered.json => openapi-filtered.gen.json} (100%) diff --git a/crates/pyannote-cloud/build.rs b/crates/pyannote-cloud/build.rs index a2ae88e705..4701032785 100644 --- a/crates/pyannote-cloud/build.rs +++ b/crates/pyannote-cloud/build.rs @@ -12,7 +12,7 @@ fn main() { .flatten_all_of() .remove_unreferenced_schemas() .write_filtered( - std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("openapi.filtered.json"), + std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("openapi-filtered.gen.json"), ) .generate("codegen.rs"); } diff --git a/crates/pyannote-cloud/openapi.filtered.json b/crates/pyannote-cloud/openapi-filtered.gen.json similarity index 100% rename from crates/pyannote-cloud/openapi.filtered.json rename to crates/pyannote-cloud/openapi-filtered.gen.json