@@ -489,6 +489,18 @@ class SpeechModel(str, Enum):
489489 "The model optimized for accuracy, low latency, ease of use, and multi-language support"
490490
491491
492+ class LanguageDetectionOptions (BaseModel ):
493+ """Options for controlling the behavior or Automatic Language Detection"""
494+
495+ expected_languages : Optional [List [str ]] = Field (
496+ None , description = "A list of languages that the audio could be expected to be."
497+ )
498+ fallback_language : Optional [str ] = Field (
499+ None ,
500+ description = "The language to fallback to in case the language detection does not predict any of the expected ones." ,
501+ )
502+
503+
492504class SpeakerOptions (BaseModel ):
493505 """
494506 Speaker options for controlling speaker diarization parameters
@@ -633,6 +645,9 @@ class RawTranscriptionConfig(BaseModel):
633645 if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
634646 """
635647
648+ language_detection_options : Optional [LanguageDetectionOptions ] = None
649+ "Options for controlling the behavior or Automatic Language Detection."
650+
636651 speech_threshold : Optional [float ] = None
637652 "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
638653
@@ -688,6 +703,7 @@ def __init__(
688703 auto_highlights : Optional [bool ] = None ,
689704 language_detection : Optional [bool ] = None ,
690705 language_confidence_threshold : Optional [float ] = None ,
706+ language_detection_options : Optional [LanguageDetectionOptions ] = None ,
691707 speech_threshold : Optional [float ] = None ,
692708 raw_transcription_config : Optional [RawTranscriptionConfig ] = None ,
693709 speech_model : Optional [SpeechModel ] = None ,
@@ -731,6 +747,7 @@ def __init__(
731747 language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
732748 language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
733749 An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
750+ language_detection_options: Options for controlling the behavior or Automatic Language Detection.
734751 speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
735752 raw_transcription_config: Create the config from a `RawTranscriptionConfig`
736753 """
@@ -780,6 +797,7 @@ def __init__(
780797 self .auto_highlights = auto_highlights
781798 self .language_detection = language_detection
782799 self .language_confidence_threshold = language_confidence_threshold
800+ self .language_detection_options = language_detection_options
783801 self .speech_threshold = speech_threshold
784802 self .speech_model = speech_model
785803 self .prompt = prompt
@@ -1175,6 +1193,20 @@ def language_confidence_threshold(self, threshold: Optional[float]) -> None:
11751193
11761194 self ._raw_transcription_config .language_confidence_threshold = threshold
11771195
1196+ @property
1197+ def language_detection_options (self ) -> Optional [LanguageDetectionOptions ]:
1198+ "Returns the options for controlling the behavior or Automatic Language Detection."
1199+
1200+ return self ._raw_transcription_config .language_detection_options
1201+
1202+ @language_detection_options .setter
1203+ def language_detection_options (
1204+ self , options : Optional [LanguageDetectionOptions ]
1205+ ) -> None :
1206+ "Set the options for controlling the behavior or Automatic Language Detection."
1207+
1208+ self ._raw_transcription_config .language_detection_options = options
1209+
11781210 @property
11791211 def speech_threshold (self ) -> Optional [float ]:
11801212 "Returns the current speech threshold."
@@ -1441,6 +1473,44 @@ def set_summarize(
14411473
14421474 return self
14431475
1476+ def set_language_detection (
1477+ self ,
1478+ enable : Optional [bool ] = True ,
1479+ confidence_threshold : Optional [float ] = None ,
1480+ expected_languages : Optional [List [str ]] = None ,
1481+ fallback_language : Optional [str ] = None ,
1482+ ) -> Self :
1483+ """
1484+ Enable Automatic Language Detection with optional configuration.
1485+
1486+ Args:
1487+ enable: whether to enable or disable the Language Detection feature.
1488+ confidence_threshold: The confidence threshold that must be reached.
1489+ expected_languages: A list of languages that the audio could be expected to be.
1490+ fallback_language: The language to fallback to if detection fails.
1491+ """
1492+
1493+ if not enable :
1494+ self ._raw_transcription_config .language_detection = None
1495+ self ._raw_transcription_config .language_confidence_threshold = None
1496+ self ._raw_transcription_config .language_detection_options = None
1497+ return self
1498+
1499+ self ._raw_transcription_config .language_detection = True
1500+ self ._raw_transcription_config .language_confidence_threshold = (
1501+ confidence_threshold
1502+ )
1503+
1504+ if expected_languages or fallback_language :
1505+ self ._raw_transcription_config .language_detection_options = (
1506+ LanguageDetectionOptions (
1507+ expected_languages = expected_languages ,
1508+ fallback_language = fallback_language ,
1509+ )
1510+ )
1511+
1512+ return self
1513+
14441514 # endregion
14451515
14461516
@@ -1818,6 +1888,9 @@ class BaseTranscript(BaseModel):
18181888 language_confidence_threshold : Optional [float ] = None
18191889 "The confidence threshold that must be reached if `language_detection` is enabled."
18201890
1891+ language_detection_options : Optional [LanguageDetectionOptions ] = None
1892+ "Options for controlling the behavior or Automatic Language Detection."
1893+
18211894 language_confidence : Optional [float ] = None
18221895 "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."
18231896
0 commit comments