From 225bd036edf39ea23589e1ad2220246494ea466d Mon Sep 17 00:00:00 2001
From: Patrick Loeber <50772274+patrickloeber@users.noreply.github.com>
Date: Fri, 20 Oct 2023 12:20:42 -0400
Subject: [PATCH] Fixes and improvements for `recognize_assemblyai()` method:
- Adds support for `AudioData` instance.
Before, it only worked with a path to a file
- Add more error handling
- Removes inner `read_file` function since the requests module
automatically handles chunking
- Removes "content-type" from header since this is not needed
- Add docstring
- Add example code snippet in `examples/audio_transcribe.py`
- List AssemblyAI in README
---
README.rst | 3 +-
examples/audio_transcribe.py | 21 +++++++
speech_recognition/__init__.py | 100 ++++++++++++++++++++++++---------
3 files changed, 96 insertions(+), 28 deletions(-)
diff --git a/README.rst b/README.rst
index 410e289d..5a24e2b1 100644
--- a/README.rst
+++ b/README.rst
@@ -30,6 +30,7 @@ Speech recognition engine/API support:
* `CMU Sphinx `__ (works offline)
* Google Speech Recognition
* `Google Cloud Speech API `__
+* `AssemblyAI API `__
* `Wit.ai `__
* `Microsoft Azure Speech `__
* `Microsoft Bing Voice Recognition (Deprecated) `__
@@ -202,7 +203,7 @@ The solution is to decrease this threshold, or call ``recognizer_instance.adjust
The recognizer doesn't understand my particular language/dialect.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``.
+Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``, and ``recognizer_instance.recognize_assemblyai``.
For example, if your language/dialect is British English, it is better to use ``"en-GB"`` as the language rather than ``"en-US"``.
diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
index 7806023f..7d9e8869 100644
--- a/examples/audio_transcribe.py
+++ b/examples/audio_transcribe.py
@@ -87,3 +87,24 @@
print("IBM Speech to Text could not understand audio")
except sr.RequestError as e:
print("Could not request results from IBM Speech to Text service; {0}".format(e))
+
+# recognize speech using the AssemblyAI API
+ASSEMBLYAI_API_TOKEN = "INSERT ASSEMBLYAI API TOKEN HERE" # Get a Free token at https://www.assemblyai.com/
+
+# First submit the file for transcription and obtain the job_name that corresponds to the transcription_id
+try:
+ r.recognize_assemblyai(audio, api_token=ASSEMBLYAI_API_TOKEN)
+except sr.TranscriptionNotReady as e:
+ job_name = e.job_name
+except sr.TranscriptionFailed as e:
+ print(e)
+except sr.RequestError as e:
+ print("Could not request results from AssemblyAI service; {0}".format(e))
+
+# Wait a little bit, then query the transcript with the job_name
+try:
+ print("AssemblyAI thinks you said " + r.recognize_assemblyai(audio_data=None, api_token=ASSEMBLYAI_API_TOKEN, job_name=job_name)[0])
+except sr.TranscriptionFailed as e:
+ print(e)
+except sr.RequestError as e:
+ print("Could not request results from AssemblyAI service; {0}".format(e))
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index bbff8dad..0400bf1a 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1289,33 +1289,51 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec
def recognize_assemblyai(self, audio_data, api_token, job_name=None, **kwargs):
"""
- Wraps the AssemblyAI STT service.
+ Performs speech recognition using the AssemblyAI API.
+
https://www.assemblyai.com/
+
+ Args:
+ audio_data: Can be an ``AudioData`` instance or a str with a path to a file.
+ api_token: An AssemblyAI API token.
+ job_name: The name of the job which corresponds to the transcription id. If no job_name is given, it submits the file for transcription
+ and raises a ``speech_recognition.TranscriptionNotReady`` exception. The final transcript can then be queried at a later time
+ by passing the job_name.
+
+ Raises a ``speech_recognition.TranscriptionFailed`` exception if the speech recognition operation failed or if the key isn't valid.
+ Raises a ``speech_recognition.RequestError`` exception if API requests failed, e.g. if there is no internet connection.
+
+ Example:
+ ```
+ try:
+ r.recognize_assemblyai(audio_data=audio, api_token=your_token)
+ except sr.TranscriptionNotReady as e:
+ job_name = e.job_name
+
+ # wait a little bit...
+ result = r.recognize_assemblyai(audio_data=None, api_token=your_token, job_name=job_name)
+ ```
"""
- def read_file(filename, chunk_size=5242880):
- with open(filename, 'rb') as _file:
- while True:
- data = _file.read(chunk_size)
- if not data:
- break
- yield data
+ headers = {"authorization": api_token}
check_existing = audio_data is None and job_name
if check_existing:
# Query status.
transciption_id = job_name
endpoint = f"https://api.assemblyai.com/v2/transcript/{transciption_id}"
- headers = {
- "authorization": api_token,
- }
- response = requests.get(endpoint, headers=headers)
+
+ try:
+ response = requests.get(endpoint, headers=headers)
+ except requests.exceptions.RequestException as e:
+ raise RequestError("recognition request failed: {}".format(e.reason))
+
data = response.json()
status = data['status']
if status == 'error':
# Handle error.
- exc = TranscriptionFailed()
+ exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
exc.job_name = None
exc.file_key = None
raise exc
@@ -1332,24 +1350,52 @@ def read_file(filename, chunk_size=5242880):
exc.file_key = None
raise exc
else:
- # Upload file.
- headers = {'authorization': api_token}
- response = requests.post('https://api.assemblyai.com/v2/upload',
- headers=headers,
- data=read_file(audio_data))
- upload_url = response.json()['upload_url']
+ # Upload file and queue for transcription.
+ # This path raises a TranscriptionNotReady error that contains the job_id.
+ # The job_id can then be used at a later point to query the transcript
+ if isinstance(audio_data, AudioData):
+ # convert to flac first
+ upload_data = audio_data.get_flac_data(
+ convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples should be at least 8 kHz
+ convert_width=None if audio_data.sample_width >= 2 else 2 # audio samples should be at least 16-bit
+ )
+ else:
+ # assume audio_data is a path to a file that can be uploaded directly
+ upload_data = audio_data
+
+ try:
+ response = requests.post('https://api.assemblyai.com/v2/upload',
+ headers=headers,
+ data=upload_data)
+ except requests.exceptions.RequestException as e:
+ raise RequestError("recognition request failed: {}".format(e.reason))
+
+ data = response.json()
+ if "error" in data:
+ exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
+ exc.job_name = None
+ exc.file_key = None
+ raise exc
+
+ upload_url = data['upload_url']
# Queue file for transcription.
endpoint = "https://api.assemblyai.com/v2/transcript"
- json = {
- "audio_url": upload_url
- }
- headers = {
- "authorization": api_token,
- "content-type": "application/json"
- }
- response = requests.post(endpoint, json=json, headers=headers)
+ json = { "audio_url": upload_url }
+
+ try:
+ response = requests.post(endpoint, json=json, headers=headers)
+ except requests.exceptions.RequestException as e:
+ raise RequestError("recognition request failed: {}".format(e.reason))
+
data = response.json()
+
+ if "error" in data:
+ exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
+ exc.job_name = None
+ exc.file_key = None
+ raise exc
+
transciption_id = data['id']
exc = TranscriptionNotReady()
exc.job_name = transciption_id