-
Notifications
You must be signed in to change notification settings - Fork 57
/
laia-force-align
executable file
·201 lines (178 loc) · 6.39 KB
/
laia-force-align
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env th
require 'laia'
local batcher = laia.RandomBatcher()
local parser = laia.argparse(){
name = 'laia-force-align',
description = ''
}
-- Register laia.Version options
laia.Version():registerOptions(parser)
-- Register laia.log options
laia.log.registerOptions(parser)
-- Register cudnn options, only if available
if cudnn then cudnn.registerOptions(parser, true) end
-- Register batcher options
batcher:registerOptions(parser)
parser:argument('checkpoint',
'Path of the file containing the trained checkpoint/model.')
parser:argument('symbols_table', 'Table mapping from symbols to integer IDs.')
parser:argument('image_list', 'File containing the list of images to align.')
parser:argument('text_table',
'File containing the table of transcripts to align.')
parser:argument('output_align', 'File containing the output alignments.')
parser:argument('output_prior', 'File containing the output label priors.')
:args('?') -- This argument is optional
parser:option(
'--seed -s', 'Seed for random numbers generation.',
0x012345, laia.toint)
parser:option(
'--gpu', 'If gpu>0, uses the specified GPU, otherwise uses the CPU.',
1, laia.toint)
parser:option(
'--smoothing_additive', 'If > 0, adds this amount to all symbols for prior ' ..
'computation.',
0, tonumber)
:argname('<float>')
parser:option(
'--auto_width_factor', 'If true, sets the width factor for the batchers ' ..
'automatically, from the size of the pooling layers.',
false, laia.toboolean)
:argname('<bool>')
parser:option(
'--batch_size -b', 'Batch size', 16, laia.toint)
:ge(1)
parser:option(
'--output_hpad', 'If given, write the horizontal padding applied to each ' ..
'image to this file.', '')
:argname('<file>')
parser:option(
'--skip_alignments', 'If true, does not output the alignments.',
false, laia.toboolean)
:argname('<bool>')
-- Parse options
local opts = parser:parse()
-- Initialize random seeds
laia.manualSeed(opts.seed)
-- Load *BEST* model from the checkpoint.
local model = laia.Checkpoint():load(opts.checkpoint):Best():getModel()
assert(model ~= nil, 'No model was found in the checkpoint file!')
-- Add output layer
model:add(nn.LogSoftMax())
-- If a GPU is requested, check that we have everything necessary.
if opts.gpu > 0 then
assert(cutorch ~= nil, 'Package cutorch is required in order to use the GPU.')
assert(nn ~= nil, 'Package nn is required in order to use the GPU.')
cutorch.setDevice(opts.gpu)
model = model:cuda()
-- If cudnn_force_convert=true, force all possible layers to use cuDNN impl.
if cudnn and cudnn.force_convert then
cudnn.convert(model, cudnn)
end
else
-- This should not be necessary, but just in case
model = model:float()
end
-- We are going to evaluate the model
model:evaluate()
-- Prepare batcher
if opts.auto_width_factor then
local width_factor = laia.getWidthFactor(model)
batcher:setOptions({width_factor = width_factor})
laia.log.info('Batcher width factor was automatically set to %d',
width_factor)
end
batcher:load(opts.image_list, opts.text_table, opts.symbols_table)
batcher:epochReset()
-- Open file to output the alignments.
local output_align = nil
if not opts.skip_alignments then
output_align = opts.output_align == '-' and io.stdout or
io.open(opts.output_align, 'w')
assert(output_align ~= nil, 'File %q could not be opened for writing!',
opts.output_align)
end
-- Open file to write the horizontal padding of each sample.
local output_hpad = nil
if opts.output_hpad ~= '' then
output_hpad = opts.output_hpad == '-' and io.stdout or
io.open(opts.output_hpad, 'w')
assert(output_hpad ~= nil, 'File %q could not be opened for writing!',
opts.output_hpad)
end
-- Open file to write the label priors.
local output_prior = opts.output_prior == '-' and io.stdout or
opts.output_prior ~= '' and io.open(opts.output_prior, 'w') or nil
local prior_count = nil
for b=1,batcher:numSamples(),opts.batch_size do
-- Prepare batch
local batch_img, batch_gt, _, batch_ids, batch_hpad = batcher:next(opts.batch_size)
if opts.gpu > 0 then batch_img = batch_img:cuda() end
-- Forward through network, and copy to the CPU
local output = model:forward(batch_img):float()
-- Put output in batch x frame x label layout
local num_frames = output:size(1) / opts.batch_size
output = output:view(num_frames, opts.batch_size, output:size(2))
output = output:permute(2, 1, 3):contiguous()
-- Write horizontal padding of each sample
if output_hpad then
for i=1,opts.batch_size do
if i+b-1 > batcher:numSamples() then break end
output_hpad:write(('%s %d %d %d\n'):format(
batch_ids[i], batch_hpad[i][1], batch_hpad[i][2], batch_hpad[i][3]))
end
output_hpad:flush()
end
-- Initialize prior count
if output_prior and not prior_count then
-- prior_count = torch.LongTensor(output:size(3)):zero()
prior_count = torch.FloatTensor(output:size(3)):zero()
end
for i=1,opts.batch_size do
-- Batch can contain more images, stop here.
if i+b-1 > batcher:numSamples() then break end
if not opts.skip_alignments then
-- Do forced alignment of the sample w.r.t. batch_gt[i]
laia.log.info('Performing forced alignment of sample %q', batch_ids[i])
local alignment = laia.force_alignment(output[i], batch_gt[i])
-- Print alignment
output_align:write(batch_ids[i])
for f=1,#alignment do
output_align:write(' ' .. alignment[f])
end
output_align:write('\n')
output_align:flush()
end
if output_prior then
-- print(output[i]:exp():sum(1):size())
prior_count = prior_count + output[i]:exp():sum(1)
--[[
for _,v in ipairs(alignment) do
prior_count[v] = prior_count[v] + 1
end
]]
end
end
end
-- Close files
if not opts.skip_alignments then
output_align:close()
end
if output_hpad then
output_hpad:close()
end
-- Output priors and total counts
if output_prior and prior_count then
local prior_total = prior_count:sum()
if opts.smoothing_additive > 0 then
for n=1,prior_count:size(1) do
output_prior:write(('%d\t%.6f\t%d\t%.10e\n'):format(
n, prior_count[n], prior_total, (opts.smoothing_additive+prior_count[n])/(prior_count:size(1)*opts.smoothing_additive+prior_total)))
end
else
for n=1,prior_count:size(1) do
output_prior:write(('%d\t%.6f\t%d\t%.10e\n'):format(
n, prior_count[n], prior_total, prior_count[n]/prior_total))
end
end
output_prior:close()
end