-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathanonymiser.py
134 lines (113 loc) · 3.42 KB
/
anonymiser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
r""" The aim of this script is to anonymise every .eeg in a tree of folders.
Usage:
.\python-3.8.8-embed-win32\python.exe .\anonymiser.py \
.\dataset -dp .\dataset_anonym -y
"""
import os
import traceback
from utils import (
anonymise_eeg_verbose,
display_arguments,
handle_yes_no,
list_files,
)
def main(path: str, destination_path: str, use_folder_as_name: bool = True):
"""Main process.
Args:
path: path to the dataset.
destination_path: destination path to the anonymised dataset.
If not set, the files will be overwritten.
use_folder_as_name: fill the name field with the name of the parent
folder.
"""
files_in_dataset = [
eeg for eeg in list_files(path) if eeg.lower().endswith('.eeg')
]
def folder_name(path):
return os.path.basename(os.path.dirname(path))
number_of_files = len(files_in_dataset)
for file_index, file_ in enumerate(sorted(files_in_dataset), start=1):
# Set name as the parent folder's name or as an empty field
if use_folder_as_name:
field_name = folder_name(file_)
else:
field_name = ''
# Destination file path
if destination_path is None:
file_path = file_
else:
file_path = os.path.join(
destination_path,
os.path.relpath(file_, path)
)
print(
'\nCurrent file ({0}/{1}):'.format(file_index, number_of_files),
file_,
'-->',
file_path,
)
try:
anonymise_eeg_verbose(
file_, file_path, field_name=field_name, verbose=True
)
except MemoryError:
print('MemoryError: retry...')
try:
anonymise_eeg_verbose(
file_, file_path, field_name=field_name, verbose=True
)
except MemoryError:
print('MemoryError: not able to process the file')
traceback.print_exc()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(allow_abbrev=True)
parser.add_argument(
'path',
type=str,
help='path to the dataset to anonymise',
)
parser.add_argument(
'-dp',
'--destination_path',
type=str,
help=(
'destination of the anonymised dataset (if not set, the dataset '
'will be overwritten)'
),
default=None,
)
parser.add_argument(
'-fn',
'--parent_folder_as_name_field',
action='store_true',
help=(
'if set, fill the name field with the name of the file\'s parent '
'folder.'
),
default=False,
)
# By default ask to the user if a want to proceed.
group = parser.add_mutually_exclusive_group()
group.add_argument(
'-y',
'--yes',
help='if set, the program will start directly',
action='store_true',
default=False,
)
group.add_argument(
'-n',
'--no',
help='if set, the program will exit directly',
action='store_true',
default=False,
)
args = parser.parse_args()
display_arguments(args)
handle_yes_no(args)
main(
path=args.path,
use_folder_as_name=args.parent_folder_as_name_field,
destination_path=args.destination_path,
)