-
Notifications
You must be signed in to change notification settings - Fork 21
/
file_handler.py
executable file
·70 lines (63 loc) · 2.38 KB
/
file_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import subprocess
import time
import logging
import uuid
from speech_recognizer import SpeechRecognizer
from punctuator import Punctuator
from number_utils.text2numbers import TextToNumbers
speech_recognizer = SpeechRecognizer()
punctuator = Punctuator(model_path="data/punctuator")
text2numbers = TextToNumbers()
class FileHandler:
@staticmethod
def get_recognized_text(blob):
try:
filename = str(uuid.uuid4())
os.makedirs('./records', exist_ok=True)
new_record_path = os.path.join('./records', filename + '.webm')
blob.save(new_record_path)
new_filename = filename + '.wav'
converted_record_path = FileHandler.convert_to_wav(new_record_path, new_filename)
response_models_result = FileHandler.get_models_result(converted_record_path)
return 0, new_filename, response_models_result
except Exception as e:
logging.exception(e)
return 1, None, str(e)
@staticmethod
def convert_to_wav(webm_full_filepath, new_filename):
converted_record_path = os.path.join('./records', new_filename)
subprocess.call('ffmpeg -i {0} -ar 16000 -b:a 256k -ac 1 -sample_fmt s16 {1}'.format(
webm_full_filepath, converted_record_path
),
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
os.remove(webm_full_filepath)
return converted_record_path
@staticmethod
def check_format(files):
return (files.mimetype.startswith('audio/') or [
files.filename.endswith(audio_format) for audio_format in [
'mp3', 'ogg', 'acc', 'flac', 'au', 'm4a', 'mp4', 'mov', 'avi', 'wmv', '3gp', 'flv', 'mkv'
]
])
return True
@staticmethod
def get_models_result(converted_record_path, delimiter='<br>'):
results = []
start = time.time()
decoder_result = speech_recognizer.recognize(converted_record_path)
text = punctuator.predict(decoder_result.text)
text = text2numbers.convert(text)
end = time.time()
results.append(
{
'text': text,
'time': round(end - start, 3),
'confidence': decoder_result.score,
'words': decoder_result.words
}
)
return results