-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_utilities.py
76 lines (60 loc) · 3.22 KB
/
file_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""This file handles details related to the filesystem itself."""
from collections import defaultdict
import shutil
import os
def sizeof_fmt(num, suffix="B"):
"""
Convert a number of bytes into a more human-readable format.
Note that this uses decimal bytes rather than binary. For instance, we use
1 gigabyte (GB) is 10^9 bytes
instead of
1 gibibyte (GiB) is 2^30 ~ 1.07 * 10^9 bytes
"""
for unit in ("", "K", "M", "G", "T", "P", "E", "Z"):
if abs(num) < 10 ** 3:
return f"{num:3.1f} {unit}{suffix}"
num /= 10 ** 3
return f"{num:.1f} Y{suffix}"
def lowercase_file_extension(filename):
"""
Return lowercase version of the file extension for this file.
This really just exists to remind myself to never pull the file extension without converting it to lowercase.
"""
return os.path.splitext(filename)[-1].lower()
def relative_canonical_name(filepath, start, suffix=""):
"""Return name of file with the starting prefix (and optionally the suffix) removed."""
relative_name = os.path.splitext(os.path.relpath(filepath, start))[0]
if suffix and relative_name.endswith(suffix):
relative_name = relative_name[:-len(suffix)]
return relative_name
def prepare_directories(args):
"""
Copy the directory structure (no files) of the input media files over to the output and temp directories.
Notably, the temp subdirectories are mainly to avoid name collisions since they will only ever contain a few files.
"""
for destination_directory in (args.output_directory, args.temp_directory):
shutil.copytree(
args.input_directory, destination_directory,
ignore=lambda directory, files: [f for f in files if os.path.isfile(os.path.join(directory, f))]
)
def summarize_directory_files(args, arg_name, all_files):
"""Print a summary of the files in this directory to stdout, broken down by directory and file extension.
arg_name is used to convert absolute file paths to relative ones for more concise output.
:param args: Command line arguments from argparse
:param arg_name: CLI argument name specifying directory to analyze (e.g., input_directory, output_directory, etc.)
:param all_files: A list of all file paths in this directory to analyze
"""
directory_sizes = defaultdict(int) # directory -> size
filetype_sizes = defaultdict(lambda: defaultdict(int)) # directory -> file extension -> size
for fn in all_files:
if os.path.isfile(fn):
dir_name, file_size = os.path.dirname(fn), os.path.getsize(fn)
directory_sizes[dir_name] += file_size
filetype_sizes[dir_name][lowercase_file_extension(fn)] += file_size
print(f"{arg_name} summary ({sizeof_fmt(sum(directory_sizes.values()))} in total):")
for directory, total_filesize in sorted(directory_sizes.items(), key=lambda x: x[1], reverse=True):
print(f"{arg_name} -> {os.path.relpath(directory, getattr(args, arg_name))} "
f"({sizeof_fmt(total_filesize)} in total):")
for extension, filesize in sorted(filetype_sizes[directory].items(), key=lambda x: x[1], reverse=True):
print(f"| {extension} only: {sizeof_fmt(filesize)}")
print()