-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilepath_mods.py
executable file
·224 lines (183 loc) · 6.8 KB
/
filepath_mods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import logging
import os
import re
import shutil
from collections import Counter
from pathlib import Path
import config as cnf
from check_obj_size import get_object_size as get_size
logger = logging.getLogger(__name__)
config = cnf.get_config()
script_root = config["paths"]["script_root"]
mac_root_folders = config["paths"]["mac_root_path"]
archive_error_f = [os.path.join(x, config["paths"]["error"]) for x in mac_root_folders]
archive_req_zip_f = [
os.path.join(x, config["paths"]["requires_zip"]) for x in mac_root_folders
]
def check_pathname(path):
"""
Check each path recursively, and eliminate any illegal characters.
"""
total_dir_change = 0
cleanname_total = Counter({"illegal_char_count": 0, "whitespace_count": 0})
while True:
try:
dir_count = 0
dir_chng = False
dir_chng_count = 0
for root, dirs, files in os.walk(path):
for name in dirs:
pathname = os.path.join(root, name)
dir_count += 1
cleanname, cleanname_totals = make_safe_name(root, name)
cleanname_total.update(cleanname_totals)
if not cleanname:
name_err_msg = f"Error (E1) cleaning filename, moving to Archive_Error - {pathname}"
logger.info(name_err_msg)
move_to_archive_error(path)
return
if len(cleanname) != len(name):
dir_chng_count += 1
continue
if dir_chng_count != 0:
total_dir_change = dir_chng_count + total_dir_change
continue
else:
break
except Exception as e:
dir_walk_msg = f"Exception on DIR Walk: \n {e}"
logger.error(dir_walk_msg)
break
try:
file_count = 0
ds_count = 0
file_chng_count = 0
char_limit_count = 0
validation_result = 0
for root, dirs, files in os.walk(path):
for name in files:
pathname = os.path.join(root, name)
file_count += 1
if (
name.startswith((".DS_Store", "._"))
and os.stat(pathname).st_size < 5000
):
os.remove(pathname)
ds_count += 1
continue
cleanname, cleanname_totals = make_safe_name(root, name)
cleanname_total.update(cleanname_totals)
if not cleanname:
name_err_msg = f"Error(E2) cleaning filename, moving to Archive_Error - {pathname}"
logger.info(name_err_msg)
move_to_archive_error(path)
return
if len(cleanname) != len(name):
file_chng_count += 1
except Exception as e:
file_walk_msg = f"Exception on FILE Walk: \n {e}"
logger.error(file_walk_msg)
total_dir_msg = f"{dir_count} sub-directories in project {os.path.basename(path)}"
total_files_msg = (
f"{file_count - ds_count} files in project {os.path.basename(path)}"
)
dir_name_change_msg = (
f"{total_dir_change} directory names changed to remove illegal characters."
)
file_name_change_msg = (
f"{file_chng_count} file names changed to remove illegal characters."
)
char_limit_count_msg = (
f"{char_limit_count} file paths that exceed the 255 Windows limit"
)
illegal_chars_msg = (
f"{cleanname_total['illegal_char_count']} illegal characters were found."
)
whitespace_msg = f"{cleanname_total['whitespace_count']} whitespace characters removed from filenames."
rm_msg = f"{ds_count} .DS_Store or ._ files removed from dir before archive."
logger.info(total_dir_msg)
logger.info(total_files_msg)
logger.info(dir_name_change_msg)
logger.info(file_name_change_msg)
logger.info(char_limit_count_msg)
logger.info(illegal_chars_msg)
logger.info(whitespace_msg)
logger.info(rm_msg)
return validation_result
def make_safe_name(root, name):
"""
Check a path name against a list of illegal characters, remove any found.
"""
illegal_char_count = 0
whitespace_count = 0
illegalchars = [
"@",
":",
"*",
"?",
"!",
'"',
"'",
"<",
">",
"|",
"&",
"#",
"%",
"$",
"~",
"+",
"=",
"'",
'"',
]
illegal_char_count = len([x for x in name if x in illegalchars])
try:
# regex to match on:
# leading and trailing all whitespace
# period preceding "/" or at the end of a path
# remove matches and count number of subs
sub = re.subn(f"(@|\*|\?|!|<|>|&|#|%|\$|~|\+)", "_", name)
cleanname = "".join([x for x in sub[0] if x not in illegalchars])
cleanname = cleanname.replace("&", "_and_")
cleanname = cleanname.replace(":", "_")
cleanname = cleanname.replace("=", "_")
cleanname_re = re.subn(r"(\s+/|/\s+|\.$)", "", cleanname)
cleanname = cleanname_re[0]
whitespace_count = int(cleanname_re[1])
p = Path(os.path.join(root, name))
cleanp = Path(os.path.join(root, cleanname))
if p != cleanp:
p.rename(cleanp)
except Exception as e:
make_safe_msg = (
f"Exception raised on attempt to clean illegal characters: \n {e}"
)
logger.error(make_safe_msg)
cleanname = False
if illegal_char_count > 0 or whitespace_count > 0:
pathname_msg = f"\n\
{illegal_char_count} - illegal characters removed from pathname.\n\
{whitespace_count} - characters removed from head and tail \n\
name: {name} \n\
clean name: {cleanname} \n "
logger.info(pathname_msg)
return cleanname, {
"illegal_char_count": illegal_char_count,
"whitespace_count": whitespace_count,
}
def move_to_archive_error(path):
error_f = os.path.join(path[:28], "_Archive_ERROR/")
shutil.move(path, error_f)
path_err_msg = f"{Path(path).name} moved to Archive Error location."
logger.info(path_err_msg)
return
def write_path_to_txt(path, illegal_path):
req_zip_f = os.path.join(path[:28], "_Archive_REQ_ZIP")
os.chdir(req_zip_f)
with open(os.path.basename(path) + ".txt", "a+") as f:
f.write(illegal_path + "\n")
f.close()
return
if __name__ == "__main__":
check_pathname()