-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
86 lines (74 loc) · 4.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from os import listdir
from os.path import isfile, join
from datetime import datetime
from scripts.clean import clean
from scripts.segment import segment
from scripts.prefix import get_smil_prefix
from scripts.generate_ids import generate_id
import sys
import shutil
import re
if __name__ == "__main__":
try:
language = 'isl'
# job is done whenever the for loop below has finished
jobDone = False
# bookname takes the name of the book.
foldername = sys.argv[1]
bookname = sys.argv[2]
# Only include the mp3 files and sort for linux env
mp3files = [f for f in listdir("./public/uploads/{}/".format(foldername)) if isfile(join("./public/uploads/{}/".format(foldername), f)) and f.endswith(".mp3") and not 'daisy-online-sample' in f]
mp3files.sort()
# Makes sure that all spans with class="sentence" have some ID
generate_id(foldername, bookname)
# Clean the book before segmenting
# Combs the book for headers and sentences
clean(foldername, bookname)
# Segment the book
# Outputs the segments to a seperate folder located in ./output/bookname/
segment(foldername, bookname)
# Only include the text files that end in html
segments = [f for f in listdir("./public/uploads/{}/segments/".format(foldername)) if isfile(join("./public/uploads/{}/segments/".format(foldername), f)) and f.endswith(".html")]
segments.sort()
# There needs to be the same number of mp3 files as there are segment files. 1 to 1 ratio!
print("{} - Number of mp3 files: {}".format(datetime.now().time().strftime("%H:%M:%S"), len(mp3files)))
print("{} - Number of segments: {}".format(datetime.now().time().strftime("%H:%M:%S"), len(segments)))
# Clear buffer
sys.stdout.flush()
segmentation_correct = len(mp3files) == len(segments)
if segmentation_correct:
smil_prefix, smil_num_len = get_smil_prefix(foldername)
# Run through each mp3 file and book segment
for i, mp3 in enumerate(mp3files):
# Setup config string & absolute file path for audio/text/syncfile
config_string = u"task_language={}|is_text_type=unparsed|os_task_file_format=smil|os_task_file_smil_audio_ref={}|os_task_file_smil_page_ref={}.html".format(language, mp3, bookname)
# Create Task
task = Task(config_string=config_string)
task.audio_file_path_absolute = u"./public/uploads/{}/{}".format(foldername, mp3)
task.text_file_path_absolute = u"./public/uploads/{}/segments/b{}.html".format(foldername, i+1)
# Each smil file is named the expected smil_prefix + number with leading zeros (3 or 4)
task.sync_map_file_path_absolute = u"./public/output/{}/{}{}.smil".format(foldername, smil_prefix, str(i+1).zfill(smil_num_len))
# stdout.flush forces the progress print to be relayed to the server in real time
print("{} - {}/{}".format(datetime.now().time().strftime("%H:%M:%S"), i+1, len(mp3files)))
# Clear buffer
sys.stdout.flush()
# Execute Task to output path
ExecuteTask(task).execute()
task.output_sync_map_file()
jobDone = True
else:
# Raise the exception if segmented files dont match mp3 files (equal number of files)
raise Exception("The number of segmentation files and mp3 files does not match.\nPlease fix, refresh and try again.")
if jobDone:
shutil.make_archive("./public/output/{}".format(foldername), 'zip', "./public/output/{}".format(foldername))
# This "Done" print statement is used by the server to detect when the program finishes running. (Websocket is listening for it)
print("Done")
except Exception as e:
print("ERROR: ", e)
raise
# Delete output/bookname/ folder and files when aeneas is done processing
shutil.rmtree("./public/output/{}".format(foldername))
# Delete uploads/bookname/ folder and files when aeneas is done processing
shutil.rmtree("./public/uploads/{}".format(foldername))