From b0f3d7bae0e7bd0868c8da3268f4c7c020285c82 Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Fri, 5 Jan 2024 17:08:51 -0800 Subject: [PATCH 1/4] WIP --- epub2tts.py | 18 ++++++++++++------ setup.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/epub2tts.py b/epub2tts.py index c42b853..aa64303 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -210,12 +210,18 @@ def get_chapters_text(self): text = file.read() text = self.prep_text(text) max_len = 50000 - while len(text) > max_len: - pos = text.rfind(" ", 0, max_len) # find the last space within the limit - self.chapters_to_read.append(text[:pos]) - print(f"Part: {len(self.chapters_to_read)}") - print(str(self.chapters_to_read[-1])[:256]) - text = text[pos + 1 :] # +1 to avoid starting the next chapter with a space + lines_with_hashtag = [line for line in text.splitlines() if line.startswith("# ")] + if lines_with_hashtag: + sections = re.split(r"#\s+", text.strip())[1:] + for section in sections: + self.chapters_to_read.append(section.strip()) + else: + while len(text) > max_len: + pos = text.rfind(" ", 0, max_len) # find the last space within the limit + self.chapters_to_read.append(text[:pos]) + print(f"Part: {len(self.chapters_to_read)}") + print(str(self.chapters_to_read[-1])[:256]) + text = text[pos + 1 :] # +1 to avoid starting the next chapter with a space self.chapters_to_read.append(text) self.end = len(self.chapters_to_read) diff --git a/setup.py b/setup.py index 20cf5a1..b4880f2 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ author_email="doc@aedo.net", url="https://github.com/aedocw/epub2tts", license="Apache License, Version 2.0", - version="2.3.1", + version="2.3.2", packages=find_packages(), install_requires=requirements, py_modules=["epub2tts"], From 1afd6837d503d0f8644c3e001a37b04149f3b59a Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Fri, 5 Jan 2024 18:10:41 -0800 Subject: [PATCH 2/4] Still WIP --- epub2tts.py | 5 +---- setup.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/epub2tts.py b/epub2tts.py index aa64303..c08049c 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -675,9 +675,6 @@ def main(): parser.add_argument( "--export", type=str, - nargs="?", - const="txt", - default="txt", help="Export epub contents to file (txt, md coming soon)" ) @@ -710,7 +707,7 @@ def main(): mybook.get_chapters_text() if args.scan: sys.exit() - if args.export: + if args.export is not None: mybook.export( format=args.export, ) diff --git a/setup.py b/setup.py index b4880f2..f7dedbf 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ author_email="doc@aedo.net", url="https://github.com/aedocw/epub2tts", license="Apache License, Version 2.0", - version="2.3.2", + version="2.3.4", packages=find_packages(), install_requires=requirements, py_modules=["epub2tts"], From dd32ac29c733d7de72170d93a532906d2eac2dd7 Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Fri, 5 Jan 2024 18:15:02 -0800 Subject: [PATCH 3/4] think about the indents --- epub2tts.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/epub2tts.py b/epub2tts.py index c08049c..46e15ed 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -313,11 +313,11 @@ def export(self, format): if overwrite.lower() != 'y': print("Exiting without overwriting the file.") sys.exit() - print(f"Exporting parts {self.start + 1} to {self.end} to {outputfile}") - with open(outputfile, "w") as file: - for partnum, i in enumerate(range(self.start, self.end)): - file.write(f"\n# Part {partnum + 1}\n\n") - file.write(self.chapters_to_read[i] + "\n") + print(f"Exporting parts {self.start + 1} to {self.end} to {outputfile}") + with open(outputfile, "w") as file: + for partnum, i in enumerate(range(self.start, self.end)): + file.write(f"\n# Part {partnum + 1}\n\n") + file.write(self.chapters_to_read[i] + "\n") def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate): self.model_name = model_name From 690facec73cd3b89d2d7804457c5ce3952311e58 Mon Sep 17 00:00:00 2001 From: Christopher Aedo Date: Fri, 5 Jan 2024 20:11:59 -0800 Subject: [PATCH 4/4] Tested and everything seems to be working properly --- README.md | 8 ++++---- epub2tts.py | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5ddcc2d..96277ec 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This script takes an epub (or text file) and reads it to an m4b audiobook file, I recognize this is not very user friendly, but I wanted to share in case folks thought it was useful. If there are a few more people than myself that find this is useful I will keep working on turning it into something that could be used by someone without dev experience. +**NOTE:** Latest release adds a new workflow allowing you to export the epub to text, make any necessary modifications, then read the book as a text file. Any line beginning with "# " is considered a chapter break, and will be automatically inserted during export, named "# Part 1", etc. If you replace "Part 1" with whatever you want that section to be called it will be labeled that way in the audiobook metadata. + **NOTE:** DeepSpeed support for XTTS has been added! If deepspeed is installed and you have a compatible GPU, it will be detected and used. For XTTS, this will yeild a 3x-4x speed improvement! Install deepspeed with `pip install deepspeed`. **NOTE:** The Coqui team released their curated XTTS voice models recently, and they sound great. A recent update here @@ -14,10 +16,6 @@ Example usage: `epub2tts my-book.epub --engine xtts --speaker "Damien Black"` Example usage: `epub2tts my-book.epub --start 4 --end 20 --xtts shadow-1.wav,shadow-2.wav,shadow-3.wav` -**NOTE:** Now with [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech) support! It's not free, but the average cost for a few books I tested was around $7. If you use `--openai ` flag epub2tts will provide a cost estimate and prompt you to approve before continuing. - -**NOTE:** HUGE thanks to a recent PR from [wonka929](https://github.com/wonka929), epub2tts now recognizes when a CUDA GPU is available and will use it automatically. In a brief test I did, the speedup was incredible! - ## USAGE: Usage: @@ -35,6 +33,8 @@ To skip reading any links, add: `--skiplinks` Using `--scan` will list excerpts of each chapter, then exit. This is helpful for finding which chapter to start and end on if you want to skip bibliography, TOC, etc. +Using `--export txt` will export the entire book to text file. This will honor `--start` and `--end` arguments as well. + To specify which chapter to start on (ex 3): `--start 3` To specify which chapter to end on (ex 20): `--end 20` diff --git a/epub2tts.py b/epub2tts.py index 46e15ed..59e5d7a 100644 --- a/epub2tts.py +++ b/epub2tts.py @@ -58,6 +58,7 @@ def __init__( self.output_filename = self.bookname + ".m4b" self.chapters = [] self.chapters_to_read = [] + self.section_names = [] if source.endswith(".epub"): self.book = epub.read_epub(source) self.sourcetype = "epub" @@ -110,7 +111,10 @@ def generate_metadata(self, files, title, author): file.write("TIMEBASE=1/1000\n") file.write("START=" + str(start_time) + "\n") file.write("END=" + str(start_time + duration) + "\n") - file.write("title=Part " + str(chap) + "\n") + if len(self.section_names) > 0: + file.write(f"title={self.section_names[chap-1]}\n") + else: + file.write("title=Part " + str(chap) + "\n") chap += 1 start_time += duration @@ -212,8 +216,13 @@ def get_chapters_text(self): max_len = 50000 lines_with_hashtag = [line for line in text.splitlines() if line.startswith("# ")] if lines_with_hashtag: + for line in lines_with_hashtag: + self.section_names.append(line.lstrip("# ").strip()) sections = re.split(r"#\s+", text.strip())[1:] for section in sections: + if self.sayparts == False: + lines = section.splitlines() + section = "\n".join(lines[1:]) self.chapters_to_read.append(section.strip()) else: while len(text) > max_len: @@ -222,7 +231,7 @@ def get_chapters_text(self): print(f"Part: {len(self.chapters_to_read)}") print(str(self.chapters_to_read[-1])[:256]) text = text[pos + 1 :] # +1 to avoid starting the next chapter with a space - self.chapters_to_read.append(text) + self.chapters_to_read.append(text) self.end = len(self.chapters_to_read) def read_chunk_xtts(self, sentences, wav_file_path): @@ -414,7 +423,7 @@ def read_book(self, voice_samples, engine, openai, model_name, speaker, bitrate) print(f"{outputflac} exists, skipping to next chapter") else: tempfiles = [] - if self.sayparts: + if self.sayparts and len(self.section_names) == 0: chapter = "Part " + str(partnum + 1) + ". " + self.chapters_to_read[i] else: chapter = self.chapters_to_read[i]