-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconfig.ini
137 lines (123 loc) · 7.29 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
[preprocess]
# ==================================================================================
# [Related file] preprocess.py, extractor.py
# [Note] Please check and customize configurations before you run the related file.
# ==================================================================================
# [Description] Directory having raw Youtube video files.
src_vid_dir = /hdd/Video
# [Description] Directory having raw audio files from corresponding video in src_vid_dir.
src_aud_dir = /hdd/Audio
# [Description] Directory to have extracted video frames in npz format.
dst_vid_dir = /hdd/data/video
# [Description] Directory to have extracted spectorgrams in npz format.
dst_aud_dir = /hdd/data/audio
# [Description] Extension of raw video files in <src_vid_dir>.
# [Note] Please be aware that dot should be included.
vid_ext = .mp4
# [Description] Extension of raw audio files in <src_aud_dir>.
# [Note] Please be aware that dot should be included.
aud_ext = .wav
# [Description] File head name convention of raw video files in <src_vid_dir>.
# [Note] The name of video file finally must be like "<vid_fname_head><vid_id>.<vid_ext>".
# Here, <vid_id> means video ID in Youtube, and this is automatically given from Youtube URL.
# [Note] Naming convention of video files should have been set before you download videos from Youtube via youtube_dl.
# [Note] If there is no video file head name, leave this argument blank.
# [Warning] Video files which do not follow this name convention above will be rejected.
vid_fname_head = video_
# [Description] File head name convention of raw audio files in src_aud_dir.
# [Note] The name of audio file finally must be like "<aud_fname_head><aud_id>.<aud_ext>".
# Here, <aud_id> means video ID in Youtube, and this is automatically given from Youtube URL.
# [Note] Naming convention of audio files should have been set before you retrieve audio from video files via ffmpeg.
# [Note] If there is no audio file head name, leave this argument as blank.
# [Warning] Audio files which do not follow this name convention above will be rejected.
aud_fname_head = audio_
# [Description] The number of processes (threads) to be run in parallel for extraction.
# [Note] If set to 0, the number of processes will be automatically set to the number of cpu cores.
ncpu = 6
# [Description] Whether to run video frame extraction or not.
# [Note] You can set this to False or false if you already have preprocessed audio spectrograms in npz format.
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
run_vid = True
# [Description] Whether to run audio spectrogram extraction or not
# [Note] You can set this to False or false if you already have preprocessed video files in npz format
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
run_aud = True
# [Description] Whether to discard raw video or audio files whose pair does not exist, before the preprocessing steps.
# [Note] Pair check is done by using <vid_id> and <aud_id>.
# [Note] If set to True, video and audio files in <src_vid_dir> and <src_aud_dir> whose pair does not exist will be deleted.
# [Warning] Please be aware that setting this to True may delete some unpaired raw video and audio files.
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
remove_unpaired_raw = True
# [Description] Whether to remove npz files whose corresponding video or audio failed in preprocessing.
# [Note] If set to True, preprocessed npz files in <dst_vid_dir> and <dst_aud_dir> will be deleted
# when preprocessing steps in corresponding video and audio files have been unsucessfully finished.
# [Note] File IDs to be deleted will be read from <fail_fname>.
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
remove_failure = True
# [Description]
remove_unpaired_npz = True
# [Description] File path where <vid_id> and <aud_id> failed in extraction will be saved.
# [Note] If you do not want to dump the failure list, leave this argument blank.
failure_fname = ./csv/failure.csv
# [Description] Whether to move or copy npz files into in 3 different folders: train, val, and test
make_train_val_test_split = True
# [Description]
train_vid_dir = ./data/train/video
train_aud_dir = ./data/train/audio
# [Description]
val_vid_dir = ./data/val/video
val_aud_dir = ./data/val/audio
# [Description]
test_vid_dir = ./data/test/video
test_aud_dir = ./data/test/audio
# [Description]
total = 60000
# [Description]
val_size = 0.1
# [Description]
test_size = 0.1
# [Description]
random_seed = 2020
# [Description]
mode = copy
# ==============================================================================================================
# [Note] Preprocess configurations from below are recommended not to be changed.
# Please change configurations below only if you are familiar with preprocessing steps in this project.
# ==============================================================================================================
# [Description] Number of time position (in second) where the extraction starts.
start_pos = 0.0
# [Description] Number of time interval (in second) to create one segment.
interval = 1.0
# [Description] Number of segments to extract per file.
# [Note] Please make sure if the video time length satifies: video_time_length >= <start_pos> + <interval> * <nseg>
# [Note] Time length check will be automatically done in preprocessing steps.
nseg = 9
# [Description] Whether RandomCrop will be applied for image augmentation.
# [Note] If True, the height and width of video frames will be resized to (256, 256).
# [Note] If False, they will be resized to (224, 224), the expected input shape of ImageConvNet.
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
randomcrop = True
# [Description] Sampling rate of audio files
# [Note] Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html.
sr = 48000
# [Description] Windows size to be used in Fourier transformation
# [Note] Default value is 480, which is 0.01s window size when sampling rate is 48kHz.
# [Note] Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html.
winsize = 480
# [Description] Ratio of overlapping in the window
# [Note] Default value is the half of window size, which is 480 * 0.5 = 240 when using default <winsize>.
# [Note] Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html.
overlap = 0.5
# [Description] nfft value to be used for generating spectorgram.
# [Note] Default value is the nearst power of two of <winsize>, which is 512.
# [Note] Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html.
nfft = 512
# [Description] Whether to convert spectrogram into log-scale (magnitude to decibel unit).
# [Warning] Only True, true, False, false will be accepted, error will be raised otherwise.
logscale = True
# [Description] A small value to be added before changing the spectrogram into log-scale to prevent zero log.
# [Note] If <logscale> is True, spectrogram will be converted into log-scale by following: 10 * log(spectrogram + eps)
eps = 1e-7
[train]
# related file: train.py
# please check and customize configurations before you run the related file