-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAudioRequestSpreadsheet.py
291 lines (242 loc) · 10.2 KB
/
AudioRequestSpreadsheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/env python
"""Create a spreadsheet of glossary term names without pronunciations.
See the notes in GlossaryTermAudioReview.py for an overview of the
process for which the Excel workbook created by this script is used.
"""
from functools import cached_property
from cdrcgi import Controller, Excel
from cdrapi.docs import Doc
from cdr import run_command
class Control(Controller):
"""Script logic encapsulated here."""
SUBTITLE = "Audio Spreadsheet Creation"
COLUMNS = (
("CDR ID", 10, "unique ID for the GlossaryTermName document"),
("Term Name", 30, "string for the name needing pronunciation"),
("Language", 10, "English or Spanish"),
("Pronunciation", 30, "representation of the name's pronunciation"),
("Filename", 30, "relative path where the audio file will be stored"),
("Notes (Vanessa)", 20, "column where contractor can enter notes"),
("Notes (NCI)", 30, "for instructions provided to the contractor"),
("Reuse Media ID", 15, "optional ID of Media document to be reused"),
)
NAME_PATH = "/GlossaryTermName/T%Name/TermNameString"
MEDIA_PATH = "/GlossaryTermName/%/MediaLink/MediaID/@cdr:ref"
REDO_PATH = "/GlossaryTermName/%/MediaLink/@NeedsReplacementMedia"
INSTRUCTIONS = (
"Click Submit to request an Excel workbook in which are recorded "
"GlossaryTermName documents with names which need to have audio "
"pronunciation files created. This workbook can be edited, as "
"appropriate, to reduce the amount of work requested, or to add "
"instructions for the contractor who created the pronunciation "
"files. The generation of the workbook may take up to a minute or "
"two. The Term Names sheet (the only sheet in the workbook) contains "
"the following columns:"
)
MORE_INSTRUCTIONS = (
"The workbook will be posted by the contractor to the NCI sFTP server "
"as part of a zipfile, which will also contain the individual MP3 "
"audio pronunciation files, each located in the relative path shown "
"in the Filename column of the workbook."
)
def populate_form(self, page):
"""Generate the workbook and provide the link to it.
Pass:
page - HTMLPage object where we communicate with the user.
"""
if self.request != self.SUBMIT:
fieldset = page.fieldset("Instructions")
fieldset.append(page.B.P(self.INSTRUCTIONS))
columns = page.B.UL(page.B.CLASS("usa-list"))
for label, _, description in self.COLUMNS:
extra = f" ({description})"
columns.append(page.B.LI(page.B.STRONG(label), extra))
fieldset.append(columns)
fieldset.append(page.B.P(self.MORE_INSTRUCTIONS))
else:
legend = "Glossary Term Names Without Pronunciation"
fieldset = page.fieldset(legend)
if self.count:
para = page.B.P(
f"Pronunciation files are needed for {self.count} "
"glossary term names. ",
page.B.A("Download the workbook", href=self.url),
" to track the creation and review of those pronunciation "
"files."
)
else:
para = page.B.P("No glossary term names need pronunciations.")
fieldset.append(para)
page.form.append(fieldset)
def show_report(self):
"""Redirect back to form."""
self.show_form()
@cached_property
def book(self):
"""Excel workbook contining the names without pronunciations."""
book = Excel(f"Week_{self.week}")
book.add_sheet("Term Names")
styles = dict(alignment=book.center, font=book.bold)
col = 1
for name, width, _ in self.COLUMNS:
book.set_width(col, width)
book.write(1, col, name, styles)
col += 1
row = 2
counts = {}
for doc in self.docs:
for name in doc.names:
lang = "en" if name.language == "English" else "es"
filename = f"{doc.id}_{lang}"
if filename not in counts:
counts[filename] = 1
else:
counts[filename] += 1
n = counts[filename]
filename = f"{filename}{n}"
book.write(row, 1, doc.id)
book.write(row, 2, name.string)
book.write(row, 3, name.language)
book.write(row, 4, name.pronunciation)
book.write(row, 5, f"Week_{self.week}/{filename}.mp3")
if name.media_id:
book.write(row, 8, name.media_id)
row += 1
return book
@cached_property
def buttons(self):
"""Hide the Submit button on the second page."""
return [] if self.request == self.SUBMIT else [self.SUBMIT]
@cached_property
def count(self):
"""Number of term names needing pronunciations."""
return sum([len(doc.names) for doc in self.docs])
@cached_property
def docs(self):
"""Name documents with at least one name needing an MP3."""
query = self.Query("query_term n", "n.doc_id").order("n.doc_id")
query.join("pub_proc_cg c", "c.id = n.doc_id")
query.outer("query_term m", "m.doc_id = n.doc_id",
f"m.path LIKE '{self.MEDIA_PATH}'",
"LEFT(m.node_loc, 4) = LEFT(n.node_loc, 4)")
query.outer("query_term r", "r.doc_id = n.doc_id",
f"r.path LIKE '{self.REDO_PATH}'", "r.value = 'Yes'")
query.where(f"n.path LIKE '{self.NAME_PATH}'")
query.where("m.doc_id IS NULL OR r.doc_id IS NOT NULL")
rows = query.unique().execute(self.cursor).fetchall()
return [TermNameDoc(self, row.doc_id) for row in rows]
@property
def same_window(self):
"""Don't open new browser tabs."""
return [self.SUBMIT]
@cached_property
def url(self):
"""Address of the new Excel workbook, if any."""
if self.book:
directory = f"{self.session.tier.basedir}/reports"
self.book.save(directory)
path = f"{directory}/{self.book.filename}"
path = path.replace("/", "\\")
process = run_command(f"fix-permissions.cmd {path}")
if process.stderr:
self.bail(f"Failure settings permissions for {path}",
extra=[process.stderr])
return f"/cdrReports/{self.book.filename}"
return None
@cached_property
def week(self):
"""String for the current week using ISO numbering."""
year, week, day = self.started.isocalendar()
return f"{year}_{week:02d}"
class TermNameDoc:
"""Information for a CDR GlossaryTermName document."""
NAME_ELEMENTS = "TermName", "TranslatedName"
def __init__(self, control, id):
"""Capture the caller's information.
Pass:
control - access to the current session
id - CDR ID for the GlossaryTermName document
"""
self.__control = control
self.__id = id
@property
def id(self):
"""CDR ID for the GlossaryTermName document."""
return self.__id
@property
def session(self):
"""Needed for creating the `Doc` object."""
return self.__control.session
@cached_property
def doc(self):
"""Object with the parsed XML for the term name document."""
return Doc(self.session, id=self.id)
@cached_property
def names(self):
"""The term names and translated names for the glossary term."""
names = []
for tag in self.NAME_ELEMENTS:
for node in self.doc.root.findall(tag):
name = self.Name(node)
if not name.exclude:
names.append(name)
return names
class Name:
"""One of the English or Spanish names in a glossary term name doc."""
def __init__(self, node):
"""Capture the caller's information.
Pass:
node - parsed XML for the name
"""
self.__node = node
@property
def node(self):
"""Parsed XML node for the name."""
return self.__node
@cached_property
def exclude(self):
"""Boolean; True if no audio recording is needed for this name."""
exclude = self.node.get("AudioRecording") == "No"
if not exclude:
if self.media_id and not self.needs_replacement:
exclude = True
return exclude
@cached_property
def language(self):
"""English or Spanish."""
return "English" if self.node.tag == "TermName" else "Spanish"
@cached_property
def media_id(self):
"""Media ID if we already have audio for this name."""
node = self.node.find("MediaLink/MediaID")
if node is not None:
value = node.get(f"{{{Doc.NS}}}ref")
try:
return Doc.extract_id(value)
except Exception:
pass
return None
@cached_property
def needs_replacement(self):
"""True if the existing media needs to be replaced."""
needs_replacement = None
node = self.node.find("MediaLink")
if node is not None:
needs_replacement = False
if node.get("NeedsReplacementMedia") == "Yes":
needs_replacement = True
return needs_replacement
@cached_property
def string(self):
"""The value of the name."""
return Doc.get_text(self.node.find("TermNameString"))
@cached_property
def pronunciation(self):
"""Optional pronuciation string for the name."""
if self.language == "English":
node = self.node.find("TermPronunciation")
return Doc.get_text(node)
return None
if __name__ == "__main__":
"""Don't execute the script if loaded as a module."""
Control().run()