Skip to content

Commit 2a29e34

Browse files
Accept a single file as input (#19)
* Return checksum for single file as input * Add warning that ignore/include flags are ignored for single file input * Tests for single file as input * Update documentation to reflect single-file ability * Ensure input to gather_file_paths is a directory * Test directory requirement for gather_file_paths --------- Co-authored-by: Elizabeth Campolongo <38985481+egrace479@users.noreply.github.com>
1 parent 9da8f67 commit 2a29e34

File tree

6 files changed

+84
-37
lines changed

6 files changed

+84
-37
lines changed

README.md

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# sum-buddy
2-
Command-line package to generate a CSV with filepath, filename, and checksum for contents of given directory.
3-
2+
Command-line package to generate a CSV with filepath, filename, and checksum for contents of a given directory or a single file.
43

54
## Requirements
65
Python 3.7+
@@ -18,12 +17,12 @@ pip install git+https://github.com/Imageomics/sum-buddy
1817
### Command Line Usage
1918

2019
```
21-
usage: sum-buddy [-h] [-o OUTPUT_FILE] [-i IGNORE_FILE | -H] [-a ALGORITHM] input_dir
20+
usage: sum-buddy [-h] [-o OUTPUT_FILE] [-i IGNORE_FILE | -H] [-a ALGORITHM] input_path
2221
23-
Generate CSV with filepath, filename, and checksums for all files in a given directory
22+
Generate CSV with filepath, filename, and checksums for all files in a given directory (or a single file)
2423
2524
positional arguments:
26-
input_dir Directory to traverse for files
25+
input_path File or directory to traverse for files
2726
2827
options:
2928
-h, --help show this help message and exit
@@ -34,6 +33,8 @@ options:
3433
-H, --include-hidden Include hidden files
3534
-a ALGORITHM, --algorithm ALGORITHM
3635
Hash algorithm to use (default: md5; available: ripemd160, sha3_224, sha512_224, blake2b, sha384, sha256, sm3, sha3_256, shake_256, sha512, sha1, sha224, md5, md5-sha1, sha3_384, sha3_512, sha512_256, shake_128, blake2s)
36+
-l LENGTH, --length LENGTH
37+
Length of the digest for SHAKE (required) or BLAKE (optional) algorithms in bytes
3738
```
3839

3940
> Note: The available algorithms are determined by those available to `hashlib` and may vary depending on your system and OpenSSL version, so the set shown on your system with `sum-buddy -h` may be different from above. At a minimum, it should include: `{blake2s, blake2b, md5, sha1, sha224, sha256, sha384, sha512, sha3_224, sha3_256, sha3_384, sha3_512, shake_128, shake_256}`, which is given by `hashlib.algorithms_guaranteed`.
@@ -136,24 +137,24 @@ We expose three functions to be used in your Python code:
136137
```python
137138
from sumbuddy import get_checksums, gather_file_paths, checksum_file
138139
139-
input_dir = "examples/example_content"
140+
input_path = "examples/example_content"
140141
output_file = "examples/checksums.csv"
141142
include_hidden = True # Optional
142143
ignore_file = "examples/.sbignore_except_txt" # Optional
143144
alg = "md5" # Optional, possible inputs include list elements returned by hashlib.algorithms_available
144145
145146
# To generate checksums and save to a CSV file
146-
get_checksums(input_dir, output_file, ignore_file=ignore_file, algorithm=alg)
147-
# or get_checksums(input_dir, output_file, ignore_hidden=ignore_hidden)
148-
# or get_checksums(input_dir, output_file)
147+
get_checksums(input_path, output_file, ignore_file=ignore_file, algorithm=alg)
148+
# or get_checksums(input_path, output_file, ignore_hidden=ignore_hidden)
149+
# or get_checksums(input_path, output_file)
149150
150151
# outputs status bar followed by
151152
# Checksums written to examples/checksums.csv
152153
153154
# To gather a list of file paths according to ignore/include patterns
154-
file_paths = gather_file_paths(input_dir, ignore_file=ignore_file)
155-
# or file_paths = gather_file_paths(input_dir, include_hidden=include_hidden)
156-
# or file_paths = gather_file_paths(input_dir)
155+
file_paths = gather_file_paths(input_path, ignore_file=ignore_file)
156+
# or file_paths = gather_file_paths(input_path, include_hidden=include_hidden)
157+
# or file_paths = gather_file_paths(input_path)
157158
158159
# To calculate the checksum of a single file
159160
sum = checksum_file("examples/example_content/file.txt", algorithm=alg)

src/sumbuddy/__main__.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,32 @@
99
import sys
1010
import os
1111

12-
def get_checksums(input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
12+
def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
1313
"""
1414
Generate a CSV file with the filepath, filename, and checksum of all files in the input directory according to patterns to ignore. Checksum column is labeled by the selected algorithm (e.g., 'md5' or 'sha256').
1515
1616
Parameters:
1717
------------
18-
input_directory - String. Directory to traverse for files.
18+
input_path - String. File or directory to traverse for files.
1919
output_filepath - String [optional]. Filepath for the output CSV file. Defaults to None, i.e. output will be to stdout.
2020
ignore_file - String [optional]. Filepath for the ignore patterns file.
2121
include_hidden - Boolean [optional]. Whether to include hidden files. Default is False.
2222
algorithm - String. Algorithm to use for checksums. Default: 'md5', see options with 'hashlib.algorithms_available'.
2323
length - Integer [conditionally optional]. Length of the digest for SHAKE (required) and BLAKE (optional) algorithms in bytes.
2424
"""
2525
mapper = Mapper()
26-
try:
27-
file_paths = mapper.gather_file_paths(input_directory, ignore_file=ignore_file, include_hidden=include_hidden)
28-
except (EmptyInputDirectoryError, NoFilesAfterFilteringError) as e:
29-
sys.exit(str(e))
26+
27+
if os.path.isfile(input_path):
28+
file_paths = [input_path]
29+
if ignore_file:
30+
print("Warning: --ignore-file (-i) flag is ignored when input is a single file.")
31+
if include_hidden:
32+
print("Warning: --include-hidden (-H) flag is ignored when input is a single file.")
33+
else:
34+
try:
35+
file_paths = mapper.gather_file_paths(input_path, ignore_file=ignore_file, include_hidden=include_hidden)
36+
except (EmptyInputDirectoryError, NoFilesAfterFilteringError) as e:
37+
sys.exit(str(e))
3038

3139
# Exclude the output file from being hashed
3240
if output_filepath:
@@ -41,7 +49,7 @@ def get_checksums(input_directory, output_filepath=None, ignore_file=None, inclu
4149
writer.writerow(["filepath", "filename", f"{algorithm}"])
4250

4351
disable_tqdm = output_filepath is None
44-
for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_directory}", disable=disable_tqdm):
52+
for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm):
4553
checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length)
4654
writer.writerow([file_path, os.path.basename(file_path), checksum])
4755

@@ -50,14 +58,14 @@ def get_checksums(input_directory, output_filepath=None, ignore_file=None, inclu
5058
output_stream.close()
5159

5260
if output_filepath:
53-
print(f"{algorithm} checksums for {input_directory} written to {output_filepath}")
61+
print(f"{algorithm} checksums for {input_path} written to {output_filepath}")
5462

5563
def main():
5664

5765
available_algorithms = ', '.join(hashlib.algorithms_available)
58-
59-
parser = argparse.ArgumentParser(description="Generate CSV with filepath, filename, and checksums for all files in a given directory")
60-
parser.add_argument("input_dir", help="Directory to traverse for files")
66+
67+
parser = argparse.ArgumentParser(description="Generate CSV with filepath, filename, and checksums for all files in a given directory (or a single file)")
68+
parser.add_argument("input_path", help="File or directory to traverse for files")
6169
parser.add_argument("-o", "--output-file", help="Filepath for the output CSV file; defaults to stdout", default=None)
6270
group = parser.add_mutually_exclusive_group()
6371
group.add_argument("-i", "--ignore-file", help="Filepath for the ignore patterns file")
@@ -76,7 +84,7 @@ def main():
7684
sys.exit("Exited without executing")
7785

7886
try:
79-
get_checksums(args.input_dir, args.output_file, args.ignore_file, args.include_hidden, args.algorithm, args.length)
87+
get_checksums(args.input_path, args.output_file, args.ignore_file, args.include_hidden, args.algorithm, args.length)
8088
except (LengthUsedForFixedLengthHashError) as e:
8189
sys.exit(str(e))
8290

src/sumbuddy/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ def __init__(self, input_directory):
33
message = f"The directory {input_directory} and subdirectories (if any) contain no files. \nPlease provide a directory with files."
44
super().__init__(message)
55

6+
class NotADirectoryError(Exception):
7+
def __init__(self, input_directory):
8+
message = f"The input path '{input_directory}' is not a directory. \nPlease provide a directory with files."
9+
super().__init__(message)
10+
611
class NoFilesAfterFilteringError(Exception):
712
def __init__(self, input_directory, ignore_file):
813
message = f"The directory {input_directory} contains files, but all are filtered out. \nCheck patterns in your {ignore_file} file and/or hidden files settings."

src/sumbuddy/mapper.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from sumbuddy.filter import Filter
3-
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError
3+
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError, NotADirectoryError
44

55
class Mapper:
66
def __init__(self):
@@ -39,6 +39,10 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
3939
---------
4040
file_paths - List. Files in input_directory that are not ignored.
4141
"""
42+
43+
if not os.path.isdir(input_directory):
44+
raise NotADirectoryError(input_directory)
45+
4246
self.reset_filter(ignore_file=ignore_file, include_hidden=include_hidden)
4347

4448
file_paths = []

tests/test_getChecksums.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,43 @@
1010
class TestGetChecksums(unittest.TestCase):
1111

1212
def setUp(self):
13-
self.input_directory = 'input_dir'
13+
self.input_path = 'input_path'
1414
self.output_filepath = 'output.csv'
1515
self.ignore_file = 'ignore_patterns.txt'
1616
self.mock_file_paths = ['file1.txt', 'file2.txt', '.hidden_file']
1717
self.algorithm = 'md5'
1818
self.dummy_checksum = 'dummychecksum'
1919

20+
@patch('os.path.isfile', return_value=True)
21+
@patch('builtins.open', new_callable=mock_open)
22+
@patch('sumbuddy.Hasher.checksum_file', return_value='dummychecksum')
23+
def test_get_checksums_single_file_to_file(self, mock_checksum, mock_open, mock_isfile):
24+
get_checksums(self.input_path, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
25+
26+
mock_open.assert_called_with(self.output_filepath, 'w', newline='')
27+
handle = mock_open()
28+
handle.write.assert_any_call('filepath,filename,md5\r\n')
29+
handle.write.assert_any_call(f'{self.input_path},{os.path.basename(self.input_path)},dummychecksum\r\n')
30+
31+
@patch('os.path.isfile', return_value=True)
32+
@patch('builtins.open', new_callable=mock_open)
33+
@patch('sumbuddy.Hasher.checksum_file', return_value='dummychecksum')
34+
def test_get_checksums_single_file_to_stdout(self, mock_checksum, mock_open, mock_isfile):
35+
output_stream = StringIO()
36+
with patch('sys.stdout', new=output_stream):
37+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
38+
39+
output = output_stream.getvalue()
40+
self.assertIn('filepath,filename,md5', output)
41+
self.assertIn(f'{self.input_path},{os.path.basename(self.input_path)},dummychecksum', output)
42+
2043
@patch('os.path.abspath', side_effect=lambda x: x)
2144
@patch('os.path.exists', return_value=True)
2245
@patch('builtins.open', new_callable=mock_open)
2346
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
2447
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
2548
def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
26-
get_checksums(self.input_directory, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
49+
get_checksums(self.input_path, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
2750

2851
mock_open.assert_called_with(self.output_filepath, 'w', newline='')
2952
handle = mock_open()
@@ -39,7 +62,7 @@ def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock
3962
def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
4063
output_stream = StringIO()
4164
with patch('sys.stdout', new=output_stream):
42-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
65+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
4366

4467
output = output_stream.getvalue()
4568
self.assertIn('filepath,filename,md5', output)
@@ -52,17 +75,17 @@ def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mo
5275
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
5376
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
5477
def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
55-
get_checksums(self.input_directory, output_filepath=None, ignore_file=self.ignore_file, include_hidden=False, algorithm=self.algorithm)
56-
mock_gather.assert_called_with(self.input_directory, ignore_file=self.ignore_file, include_hidden=False)
78+
get_checksums(self.input_path, output_filepath=None, ignore_file=self.ignore_file, include_hidden=False, algorithm=self.algorithm)
79+
mock_gather.assert_called_with(self.input_path, ignore_file=self.ignore_file, include_hidden=False)
5780

5881
@patch('os.path.abspath', side_effect=lambda x: x)
5982
@patch('os.path.exists', return_value=True)
6083
@patch('builtins.open', new_callable=mock_open)
6184
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt', '.hidden_file'])
6285
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
6386
def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
64-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=True, algorithm=self.algorithm)
65-
mock_gather.assert_called_with(self.input_directory, ignore_file=None, include_hidden=True)
87+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=True, algorithm=self.algorithm)
88+
mock_gather.assert_called_with(self.input_path, ignore_file=None, include_hidden=True)
6689

6790
@patch('os.path.abspath', side_effect=lambda x: x)
6891
@patch('os.path.exists', return_value=True)
@@ -71,11 +94,11 @@ def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_ope
7194
@patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
7295
def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
7396
algorithm = 'sha256'
74-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=algorithm)
97+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=algorithm)
7598

7699
output_stream = StringIO()
77100
with patch('sys.stdout', new=output_stream):
78-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=algorithm)
101+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=algorithm)
79102

80103
output = output_stream.getvalue()
81104
self.assertIn(f'filepath,filename,{algorithm}', output)
@@ -89,7 +112,7 @@ def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, moc
89112
def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists, mock_abspath):
90113
output_stream = StringIO()
91114
with patch('sys.stdout', new=output_stream):
92-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
115+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
93116

94117
output = output_stream.getvalue()
95118
self.assertIn('filepath,filename,md5', output)
@@ -100,7 +123,7 @@ def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists
100123
@patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
101124
def test_get_checksums_invalid_algorithm(self, mock_gather, mock_open, mock_exists, mock_abspath):
102125
with self.assertRaises(ValueError):
103-
get_checksums(self.input_directory, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='invalid_alg')
126+
get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='invalid_alg')
104127

105128
if __name__ == '__main__':
106129
unittest.main()

tests/test_mapper.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from unittest.mock import patch, mock_open, MagicMock
55
from sumbuddy.mapper import Mapper
66
from sumbuddy.filter import Filter
7-
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError
7+
from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError, NotADirectoryError
88

99
class TestMapper(unittest.TestCase):
1010
@patch('sumbuddy.filter.open', new_callable=mock_open, read_data="# This is a sample ignore file\n")
@@ -90,6 +90,12 @@ def test_gather_file_paths_filtered_files(self):
9090

9191
with self.assertRaises(NoFilesAfterFilteringError):
9292
mapper.gather_file_paths(temp_dir, ignore_file=ignore_file_path)
93+
94+
def test_gather_file_paths_input_not_a_directory(self):
95+
mapper = Mapper()
96+
with tempfile.NamedTemporaryFile() as temp_file:
97+
with self.assertRaises(NotADirectoryError):
98+
mapper.gather_file_paths(temp_file.name)
9399

94100

95101
if __name__ == '__main__':

0 commit comments

Comments
 (0)