Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Speech recognition engine/API support:
* `CMU Sphinx <http://cmusphinx.sourceforge.net/wiki/>`__ (works offline)
* Google Speech Recognition
* `Google Cloud Speech API <https://cloud.google.com/speech/>`__
* `AssemblyAI API <https://www.assemblyai.com/>`__
* `Wit.ai <https://wit.ai/>`__
* `Microsoft Azure Speech <https://azure.microsoft.com/en-us/services/cognitive-services/speech/>`__
* `Microsoft Bing Voice Recognition (Deprecated) <https://www.microsoft.com/cognitive-services/en-us/speech-api>`__
Expand Down Expand Up @@ -202,7 +203,7 @@ The solution is to decrease this threshold, or call ``recognizer_instance.adjust
The recognizer doesn't understand my particular language/dialect.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``.
Try setting the recognition language to your language/dialect. To do this, see the documentation for ``recognizer_instance.recognize_sphinx``, ``recognizer_instance.recognize_google``, ``recognizer_instance.recognize_wit``, ``recognizer_instance.recognize_bing``, ``recognizer_instance.recognize_api``, ``recognizer_instance.recognize_houndify``, and ``recognizer_instance.recognize_ibm``, and ``recognizer_instance.recognize_assemblyai``.

For example, if your language/dialect is British English, it is better to use ``"en-GB"`` as the language rather than ``"en-US"``.

Expand Down
21 changes: 21 additions & 0 deletions examples/audio_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,24 @@
print("IBM Speech to Text could not understand audio")
except sr.RequestError as e:
print("Could not request results from IBM Speech to Text service; {0}".format(e))

# recognize speech using the AssemblyAI API
ASSEMBLYAI_API_TOKEN = "INSERT ASSEMBLYAI API TOKEN HERE" # Get a Free token at https://www.assemblyai.com/

# First submit the file for transcription and obtain the job_name that corresponds to the transcription_id
try:
r.recognize_assemblyai(audio, api_token=ASSEMBLYAI_API_TOKEN)
except sr.TranscriptionNotReady as e:
job_name = e.job_name
except sr.TranscriptionFailed as e:
print(e)
except sr.RequestError as e:
print("Could not request results from AssemblyAI service; {0}".format(e))

# Wait a little bit, then query the transcript with the job_name
try:
print("AssemblyAI thinks you said " + r.recognize_assemblyai(audio_data=None, api_token=ASSEMBLYAI_API_TOKEN, job_name=job_name)[0])
except sr.TranscriptionFailed as e:
print(e)
except sr.RequestError as e:
print("Could not request results from AssemblyAI service; {0}".format(e))
100 changes: 73 additions & 27 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,33 +1289,51 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec

def recognize_assemblyai(self, audio_data, api_token, job_name=None, **kwargs):
"""
Wraps the AssemblyAI STT service.
Performs speech recognition using the AssemblyAI API.

https://www.assemblyai.com/

Args:
audio_data: Can be an ``AudioData`` instance or a str with a path to a file.
api_token: An AssemblyAI API token.
job_name: The name of the job which corresponds to the transcription id. If no job_name is given, it submits the file for transcription
and raises a ``speech_recognition.TranscriptionNotReady`` exception. The final transcript can then be queried at a later time
by passing the job_name.

Raises a ``speech_recognition.TranscriptionFailed`` exception if the speech recognition operation failed or if the key isn't valid.
Raises a ``speech_recognition.RequestError`` exception if API requests failed, e.g. if there is no internet connection.

Example:
```
try:
r.recognize_assemblyai(audio_data=audio, api_token=your_token)
except sr.TranscriptionNotReady as e:
job_name = e.job_name

# wait a little bit...
result = r.recognize_assemblyai(audio_data=None, api_token=your_token, job_name=job_name)
```
"""

def read_file(filename, chunk_size=5242880):
with open(filename, 'rb') as _file:
while True:
data = _file.read(chunk_size)
if not data:
break
yield data
headers = {"authorization": api_token}

check_existing = audio_data is None and job_name
if check_existing:
# Query status.
transciption_id = job_name
endpoint = f"https://api.assemblyai.com/v2/transcript/{transciption_id}"
headers = {
"authorization": api_token,
}
response = requests.get(endpoint, headers=headers)

try:
response = requests.get(endpoint, headers=headers)
except requests.exceptions.RequestException as e:
raise RequestError("recognition request failed: {}".format(e.reason))

data = response.json()
status = data['status']

if status == 'error':
# Handle error.
exc = TranscriptionFailed()
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
exc.job_name = None
exc.file_key = None
raise exc
Expand All @@ -1332,24 +1350,52 @@ def read_file(filename, chunk_size=5242880):
exc.file_key = None
raise exc
else:
# Upload file.
headers = {'authorization': api_token}
response = requests.post('https://api.assemblyai.com/v2/upload',
headers=headers,
data=read_file(audio_data))
upload_url = response.json()['upload_url']
# Upload file and queue for transcription.
# This path raises a TranscriptionNotReady error that contains the job_id.
# The job_id can then be used at a later point to query the transcript
if isinstance(audio_data, AudioData):
# convert to flac first
upload_data = audio_data.get_flac_data(
convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples should be at least 8 kHz
convert_width=None if audio_data.sample_width >= 2 else 2 # audio samples should be at least 16-bit
)
else:
# assume audio_data is a path to a file that can be uploaded directly
upload_data = audio_data

try:
response = requests.post('https://api.assemblyai.com/v2/upload',
headers=headers,
data=upload_data)
except requests.exceptions.RequestException as e:
raise RequestError("recognition request failed: {}".format(e.reason))

data = response.json()
if "error" in data:
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
exc.job_name = None
exc.file_key = None
raise exc

upload_url = data['upload_url']

# Queue file for transcription.
endpoint = "https://api.assemblyai.com/v2/transcript"
json = {
"audio_url": upload_url
}
headers = {
"authorization": api_token,
"content-type": "application/json"
}
response = requests.post(endpoint, json=json, headers=headers)
json = { "audio_url": upload_url }

try:
response = requests.post(endpoint, json=json, headers=headers)
except requests.exceptions.RequestException as e:
raise RequestError("recognition request failed: {}".format(e.reason))

data = response.json()

if "error" in data:
exc = TranscriptionFailed("Transcription failed: {}".format(data["error"]))
exc.job_name = None
exc.file_key = None
raise exc

transciption_id = data['id']
exc = TranscriptionNotReady()
exc.job_name = transciption_id
Expand Down