Skip to content

Commit

Permalink
Merge pull request #62 from aedocw/main
Browse files Browse the repository at this point in the history
Trigger docker image build
  • Loading branch information
aedocw authored Nov 11, 2023
2 parents 4098012 + e7bb221 commit b6be765
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 5 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ This script takes an epub (or text file) and reads it to an mp3 or an m4b audiob

I recognize this is not very user friendly, but I wanted to share in case folks thought it was useful. If there are a few more people than myself that find this is useful I will keep working on turning it into something that could be used by someone without dev experience.

**NOTE:** Now with [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech) support! It's not free, but the average cost for a few books I tested was around $7. If you use `--openai <API key>` flag epub2tts will provide a cost estimate and prompt you to approve before continuing.

**NOTE:** HUGE thanks to a recent PR from [wonka929](https://github.com/wonka929), epub2tts now recognizes when a CUDA GPU is available and will use it automatically. In a brief test I did, the speedup was incredible!

## USAGE:
Expand All @@ -13,6 +15,8 @@ Usage:

URL: `epub2tts --url https://www.example.com/page --name example-page`

To use OpenAI TTS, add: `--openai <your API key>` (Use speaker option to specify voice other than onyx: `--speaker shimmer`)

To change speaker (ex p307 for a good male voice), add: `--speaker p307`

To output in mp3 format instead of m4b, add: `--mp3`
Expand Down
66 changes: 63 additions & 3 deletions epub2tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@
from ebooklib import epub
from newspaper import Article
from pydub import AudioSegment
import pysbd
from TTS.api import TTS
import torch, gc
from openai import OpenAI


# Verify if CUDA or mps is available and select it
if torch.cuda.is_available():
Expand Down Expand Up @@ -134,7 +137,10 @@ def get_speaker():
index = sys.argv.index("--speaker")
speaker_used = sys.argv[index + 1]
else:
speaker_used = "p335"
if "--openai" in sys.argv:
speaker_used = "onyx"
else:
speaker_used = "p335"
print(f"Speaker: {speaker_used}")
return(speaker_used)

Expand Down Expand Up @@ -219,10 +225,29 @@ def get_end(chapters_to_read):
end = len(chapters_to_read)
return(end)

def get_api_key():
if "--openai" in sys.argv:
key = str(sys.argv[sys.argv.index("--openai") + 1])
else:
key = ''
print(key)
return(key)

def combine_sentences(sentences, length=3500):
combined = ""
for sentence in sentences:
if len(combined) + len(sentence) <= length:
combined += sentence + " "
else:
yield combined
combined = sentence
yield combined

def main():
bookname = get_bookname() #detect .txt, .epub or https
booktype = bookname.split('.')[-1]
speaker_used = get_speaker()
openai_api_key = get_api_key()
if booktype == "epub":
book = epub.read_epub(bookname)
chapters_to_read = get_chapters_epub(book, bookname)
Expand All @@ -249,17 +274,52 @@ def main():
start = get_start()
end = get_end(chapters_to_read)
total_chars = get_length(start, end, chapters_to_read)
print("Total characters: " + str(total_chars))
if "--openai" in sys.argv:
while True:
openai_sdcost = (total_chars/1000) * 0.015
print("OpenAI TTS SD Cost: $" + str(openai_sdcost))
user_input = input("This will not be free, continue? (y/n): ")
if user_input.lower() not in ['y', 'n']:
print("Invalid input. Please enter y for yes or n for no.")
elif user_input.lower() == 'n':
sys.exit()
else:
print("Continuing...")
break
files = []
position = 0
start_time = time.time()
tts = TTS(model_name).to(device)
if "--openai" in sys.argv:
client = OpenAI(api_key=openai_api_key)
else:
tts = TTS(model_name).to(device)

for i in range(start, end):
outputwav = bookname.split(".")[0]+"-"+str(i+1)+".wav"
print("Reading " + str(i))
if os.path.isfile(outputwav):
print(outputwav + " exists, skipping to next chapter")
else:
tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav)
if "--openai" in sys.argv:
tempfiles = []
segmenter = pysbd.Segmenter(language="en", clean=True)
sentences = segmenter.segment(chapters_to_read[i])
sentence_groups = list(combine_sentences(sentences))
for x in range(len(sentence_groups)):
tempwav = "temp" + str(x) + ".mp3"
print(sentence_groups[x])
response = client.audio.speech.create( model="tts-1", voice=speaker_used, input=sentence_groups[x])
response.stream_to_file(tempwav)
tempfiles.append(tempwav)
tempwavfiles = [AudioSegment.from_mp3(f"{f}") for f in tempfiles]
concatenated = sum(tempwavfiles)
concatenated.export(outputwav, format="wav")
for f in tempfiles:
os.remove(f)
else:
tts.tts_to_file(text = chapters_to_read[i], speaker = speaker_used, file_path = outputwav)

files.append(outputwav)
position += len(chapters_to_read[i])
percentage = (position / total_chars) *100
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
TTS
ebooklib
beautifulsoup4
openai
pydub
newspaper3k
pysbd
newspaper3k
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
author_email='doc@aedo.net',
url='https://github.com/aedocw/epub2tts',
license='Apache License, Version 2.0',
version='1.3.12',
version='1.4.0',
packages=find_packages(),
install_requires=requirements,
py_modules=['epub2tts'],
Expand Down

0 comments on commit b6be765

Please sign in to comment.