-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractAudioTrigger.py
74 lines (64 loc) · 3.09 KB
/
extractAudioTrigger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import argparse
import numpy as np
import os
import pandas as pd
import soundfile as sf
def extract_audio_from(file, out_dir=''):
"""Call ffmpeg on the command line with the video file as input.
The output will be a wav file with the same name as the input file.
If a output path is given, the resulting wav file will be saved there.
This function will return the filepath of the output file.
"""
output_filename = f'{os.path.join(out_dir, os.path.basename(file)[:-4])}.wav'
os.system(f'ffmpeg -i {file} {output_filename}')
return output_filename
def process_file(inpath, input, intermediate_out, final_out):
print(f'Processing {input}')
intermediate_input = extract_audio_from(os.path.join(inpath, input), intermediate_out)
data, fs_audio = sf.read(intermediate_input) # load trigger file
if (data.shape[1] > 1):
# extract trigger on- and offset from signal
proc = np.abs(np.diff(np.abs(data[:, 1])))
thres = proc.mean() + 10*proc.std()
idx = np.argwhere(proc > thres).flatten()
didx = np.diff(idx)
z = np.argwhere(didx > 100).flatten()
final_idx_pairs = np.concatenate(([idx[0]], idx[z+1]))
# make new trigger vectors with 0 and 1
trigger = np.zeros((len(data), ))
for onset, offset in zip(final_idx_pairs[::2], final_idx_pairs[1::2]):
trigger[onset:offset+1] = 1
time = np.linspace(0, (len(data)-1)/fs_audio, len(data))
df1 = pd.DataFrame(time, columns=['time'])
df2 = pd.DataFrame(trigger.astype('int'), columns=['trigger'])
df = df1.join(df2)
print('Saving...(might take some time)')
df.to_csv(os.path.join(final_out, f'{input[:-4]}.csv'), index=False)
print('Done.')
else:
print('No trigger found in right channel. File is single channel!')
if __name__ == "__main__":
"""Take a video file as input, extract the audio and save it as wav file.
Next, reload the audio data and save a table with trigger data [0~1].
If separate output folders for the wav file and the trigger file are
provided the respecting files will be saved there.
"""
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", type=str, required=True,
help="Input video file")
ap.add_argument("-e", "--ext", type=str, required=False, default='MP4',
help="Video file extension")
ap.add_argument("-ao", "--audioout", type=str, required=False, default='',
help="Output path for audio file")
ap.add_argument("-o", "--out", type=str, required=False, default='',
help="Output path for audio trigger file")
args = vars(ap.parse_args())
if (args['video'].endswith(args['ext'])):
print('One-shot processing')
process_file('', args['video'], args['audioout'], args['out'])
else:
print('Batch processing')
file_list = os.listdir(args['video'])
files = [x for x in file_list if x.endswith(args['ext'])]
for file in files:
process_file(args['video'], file, args['audioout'], args['out'])