-
Notifications
You must be signed in to change notification settings - Fork 1
/
goop
executable file
·136 lines (118 loc) · 4.17 KB
/
goop
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# goop [1] is a Python 3 script to detox filenames by removing spaces
# and special characters. By default, it restricts the filenames to
# [a-zA-Z0-9._-]. The basic usage is
#
# goop <file>...
#
# Some examples:
#
# $ goop "A Midsummer Night's Dream.pdf"
# ‘A Midsummer Night's Dream.pdf’ -> ‘A_Midsummer_Nights_Dream.pdf’
#
# $ goop -l "A. Camus - L’Étranger.epub"
# ‘A. Camus - L’Étranger.epub’ -> ‘a_camus_letranger.epub’
#
# For more details about goop's options, run
#
# goop --help
#
# Also see, detox <http://detox.sourceforge.net>
#
# [1]: After Ms. Paltrow's eponymous detoxing company, see,
# https://www.bbc.com/news/world-us-canada-45426332
#
import argparse
import os
import re
import sys
import unicodedata
def rename(root, name, lower_case=False, dry=False):
"""Cleanup the filename of a file with the given root and name."""
src = os.path.join(root, name)
# Convert to ASCII after possible Unicode conversions.
name = unicodedata.normalize("NFKD", name).encode("ASCII", "ignore")
name = name.decode("ASCII")
# Remove all nonalnum before the start of the filename if the
# filename doesn't start with a period.
name = re.sub(r"^(?!\.)[_\W^.]+", "", name)
# Remove all nonalnum (including underscores) before
# the end of the filename.
name = re.sub(r"[_\W]+$", "", name)
# Remove nonalnum before extension if src is a file.
if os.path.isfile(src):
name = re.sub(r"[_\W]+\.([^.]*)$", r".\1", name)
# Remove all quotes (" and ').
name = re.sub(r"[\"']+", "", name)
# Replace all nonalnum other than [._-] with underscores.
name = re.sub(r"[^\w.-]+", "_", name)
# Remove dashes and periods flanked by underscores.
name = re.sub(r"(_+[-.]+|[-.]+_+)", "_", name)
# Squeeze dashes and underscores.
name = re.sub(r"_+", "_", name)
name = re.sub(r"-+", "-", name)
if lower_case:
name = name.lower()
dst = os.path.join(root, name)
if src == dst:
pass
elif not name:
print("unable to detox ‘{}’; empty filename".format(src), file=sys.stderr)
elif os.path.exists(dst):
print("‘{}’ exists".format(dst), file=sys.stderr)
else:
print("‘{}’ -> ‘{}’".format(src, dst), file=sys.stderr)
if not dry:
os.replace(src, dst)
def detox(files, recurse=False, lower_case=False, dry=False):
"""Detox a list of files."""
for name in files:
try:
if os.path.isdir(name) and recurse:
for root, dirs, files in os.walk(name, topdown=False):
for f in files + dirs:
rename(root, f, lower_case, dry)
# Finally, rename the parent as well.
rename(os.path.dirname(name), os.path.basename(name), lower_case, dry)
except Exception as e:
print(e, file=sys.stderr)
return 1
def arg_parser_valid(name):
"""Check if the file is valid and can be accessed."""
if not os.path.isfile(name) and not os.path.isdir(name):
raise argparse.ArgumentTypeError("‘{}’ is not a valid path".format(name))
elif not os.access(name, os.W_OK):
raise argparse.ArgumentTypeError("‘{}’ is not writable".format(name))
else:
return os.path.normpath(name)
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(
prog="goop",
description="detox filenames by restricting characters to [a-z0-9._-]",
)
arg_parser.add_argument(
"-l",
"--lower-case",
action="store_true",
default=False,
help="convert filenames to lowercase",
)
arg_parser.add_argument(
"-r",
"--recurse",
action="store_true",
default=False,
help="recurse into directories",
)
arg_parser.add_argument(
"-n",
"--dry-run",
action="store_true",
default=False,
help="perform a trial run",
)
arg_parser.add_argument("files", type=arg_parser_valid, nargs="+", help="input files")
args = arg_parser.parse_args()
sys.exit(detox(args.files, args.recurse, args.lower_case, args.dry_run))