forked from lorenzbaum/Hackathon-Pubquiz
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_audio.py
36 lines (28 loc) · 990 Bytes
/
extract_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import openai
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
azure_api_key_whisper = os.getenv('AZURE_OPENAI_API_KEY_WHISPER')
azure_endpoint_whisper = os.getenv('AZURE_OPENAI_ENDPOINT_WHISPER')
client = openai.AzureOpenAI(
api_key=azure_api_key_whisper,
azure_endpoint=azure_endpoint_whisper,
azure_deployment="whisper",
api_version="2023-09-01-preview",
)
def get_transcript(audio_file):
if not os.path.exists(audio_file):
audio_file = "./data/" + audio_file
client.audio.with_raw_response
return client.audio.transcriptions.create(
file=open(audio_file, "rb"),
model="whisper",
language="de",
).text
audio_dir = Path("./PubAudio/")
transcripts = []
for audio_file in audio_dir.glob("*.mp3"):
transcripts.append(get_transcript(str(audio_file)))
with open("./PubTexts/" + audio_file.stem + ".txt", "w", encoding="utf-8") as f:
f.write(get_transcript(str(audio_file)))