Skip to content

Commit

Permalink
fix markdown clear
Browse files Browse the repository at this point in the history
  • Loading branch information
traderpedroso committed Oct 17, 2024
1 parent a9304e3 commit 8113377
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 19 deletions.
29 changes: 10 additions & 19 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from fastapi.responses import StreamingResponse
from io import BytesIO
import uvicorn
from bs4 import BeautifulSoup
from markdown import markdown
import re


app = FastAPI()


Expand Down Expand Up @@ -349,22 +348,6 @@ class TTSRequest(BaseModel):
voice: str



def markdown_to_text(markdown_string):
""" Converts a markdown string to plaintext """

# md -> html -> text since BeautifulSoup can extract text cleanly
html = markdown(markdown_string)

# remove code snippets
html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
html = re.sub(r'<code>(.*?)</code >', ' ', html)

# extract text
soup = BeautifulSoup(html, "html.parser")
text = ''.join(soup.findAll(string=True))

return text
# Função para geração de fala a partir do texto
async def text_to_speech(input_text, voice_short_name):
communicate = edge_tts.Communicate(input_text, voice_short_name)
Expand All @@ -376,6 +359,14 @@ async def text_to_speech(input_text, voice_short_name):

return tmp_path

def clear(texto):
"""
Remove todos os caracteres "*" e "#" de um texto,
incluindo múltiplas ocorrências como "##", "**", "***", "####", etc.
"""
novo_texto = re.sub(r"[\*\#]+", "", texto)
return novo_texto

# Endpoint para converter texto em fala
@app.post("/v1/audio/speech")
async def generate_speech(request: TTSRequest):
Expand All @@ -388,7 +379,7 @@ async def generate_speech(request: TTSRequest):
selected_voice = voice_mapping["default"][request.voice]

# Gerar o arquivo de áudio
audio_path = await text_to_speech(markdown_to_text(request.input), selected_voice)
audio_path = await text_to_speech(clear(request.input), selected_voice)

# Retornar o arquivo de áudio como streaming
with open(audio_path, "rb") as audio_file:
Expand Down
1 change: 1 addition & 0 deletions src/plain-text-markdown-extention
Submodule plain-text-markdown-extention added at 8740e7

0 comments on commit 8113377

Please sign in to comment.