-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
150 lines (122 loc) · 4.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from os import path, makedirs, remove, listdir
from uuid import uuid4
import speech_recognition as sr
from pydub import AudioSegment
from base64 import b64encode
from io import BytesIO
import matplotlib.pyplot as plt
from librosa import load
import librosa.display
import google.generativeai as genai
from werkzeug.utils import secure_filename
app = Flask(__name__)
CORS(app)
app.config["UPLOAD_FOLDER"] = "uploads/"
if not path.exists(app.config["UPLOAD_FOLDER"]):
makedirs(app.config["UPLOAD_FOLDER"])
gemini_api_key = ""
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-1.5-flash")
@app.route("/")
def index():
return render_template("index.html")
@app.route("/transcribe", methods=["POST"])
def transcribe():
if "audio" not in request.files:
return jsonify({"error": "Nenhum arquivo foi enviado."}), 400
file = request.files["audio"]
if file.filename == "":
return jsonify({"error": "Nenhum arquivo selecionado."}), 400
filename = secure_filename(file.filename)
audio_path = path.join(app.config["UPLOAD_FOLDER"], filename)
try:
file.save(audio_path)
except Exception as e:
return jsonify({"error": "Erro ao salvar o arquivo.", "details": str(e)}), 500
try:
if filename.endswith(".ogg"):
audio = AudioSegment.from_ogg(audio_path)
wav_path = audio_path.replace(".ogg", ".wav")
audio.export(wav_path, format="wav")
if not path.exists(wav_path):
return jsonify({"error": "Erro na conversão do arquivo."}), 500
else:
wav_path = audio_path
transcription = transcribe_audio(wav_path)
plot_base64 = generate_waveform(wav_path)
unique_id = str(uuid4())
if path.exists(audio_path):
remove(audio_path)
if filename.endswith(".ogg") and path.exists(wav_path):
remove(wav_path)
response_data = {
"transcription": transcription,
"plot": plot_base64,
"id": unique_id,
}
clear_uploads()
return jsonify(response_data)
except Exception as e:
return (
jsonify({"error": "Erro ao processar o arquivo.", "details": str(e)}),
500,
)
@app.route("/analyze", methods=["POST"])
def analyze():
data = request.get_json()
transcription = data.get("transcription", "")
if not transcription:
return jsonify({"error": "Texto de transcrição ausente."}), 400
analysis_result = analyze_transcription(transcription)
return jsonify({"analysis": analysis_result})
def clear_uploads():
for filename in listdir(app.config["UPLOAD_FOLDER"]):
file_path = path.join(app.config["UPLOAD_FOLDER"], filename)
try:
if path.isfile(file_path):
remove(file_path)
except Exception as e:
print(f"Erro ao tentar remover o arquivo {file_path}: {e}")
def transcribe_audio(wav_path: str) -> str:
recognizer = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio_data = recognizer.record(source)
try:
return recognizer.recognize_google(audio_data, language="pt-BR")
except sr.UnknownValueError:
return "Não foi possível entender o áudio."
except sr.RequestError as e:
return f"Erro ao conectar ao serviço de reconhecimento de fala: {e}"
def analyze_transcription(transcription: str) -> str:
prompt = (
f"Você é um assistente inteligente. O texto a seguir é uma transcrição de áudio "
f'que pode conter erros. Tente entender e resumir o conteúdo: "{transcription}"'
)
try:
response = model.generate_content(prompt)
if response and response.candidates:
candidate = response.candidates[0]
gemini_response = "".join(part.text for part in candidate.content.parts)
return gemini_response
else:
return "Desculpe, não consegui analisar a transcrição."
except Exception as e:
return f"Erro ao tentar analisar a transcrição: {e}"
def generate_waveform(audio_path: str) -> str:
y, sr = load(audio_path, sr=None)
plt.figure(figsize=(12, 4))
librosa.display.waveshow(y, sr=sr, color="white")
plt.axis("off")
buf = BytesIO()
plt.tight_layout()
plt.savefig(
buf, format="png", bbox_inches="tight", pad_inches=0.1, transparent=True
)
plt.close()
buf.seek(0)
return f"data:image/png;base64,{b64encode(buf.read()).decode('utf-8')}"
if __name__ == "__main__":
app.run(debug=True, port=5001)