diff --git a/README.md b/README.md index e296a13..1abf7a8 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ python twinTrim.py [OPTIONS] - `--max-size`: Specify the maximum file size to include in the scan (e.g., `1gb`). - `--file-type`: Specify the file type to include (e.g., `.txt`, `.jpg`). - `--exclude`: Exclude specific files by name. +- `--label-color`: Set the font color of the output label of the progress bar. +- `--bar-color`: Set the color of the progress bar. ### Examples diff --git a/tests/fileFilter.py b/tests/fileFilter.py deleted file mode 100644 index 47d2d1b..0000000 --- a/tests/fileFilter.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest -from twinTrim.dataStructures.fileFilter import FileFilter - -def test_set_max_file_size_valid(): - """Test setting valid max file size values.""" - file_filter = FileFilter() - - # Test with a smaller size - file_filter.setMaxFileSize("500mb") - assert file_filter.maxFileSize == "500mb", "Failed to set max file size to 500mb" - - # Test with a larger size - file_filter.setMaxFileSize("2gb") - assert file_filter.maxFileSize == "2gb", "Failed to set max file size to 2gb" - - # Test with a minimum size (edge case) - file_filter.setMaxFileSize("1kb") - assert file_filter.maxFileSize == "1kb", "Failed to set max file size to 1kb" - -def test_set_max_file_size_same_value(): - """Test setting the same max file size.""" - file_filter = FileFilter() - - # Test setting the max file size to the default value - file_filter.setMaxFileSize("1gb") - assert file_filter.maxFileSize == "1gb", "Failed to set max file size to 1gb" - -def test_set_max_file_size_boundary(): - """Test boundary values for max file size.""" - file_filter = FileFilter() - - # Test setting a value just under the default - file_filter.setMaxFileSize("999mb") - assert file_filter.maxFileSize == "999mb", "Failed to set max file size to 999mb" - -def test_set_max_file_size_empty_value(): - """Test setting an empty value to max file size.""" - file_filter = FileFilter() - - # Since there is no validation, an empty value would still set it to the empty string - file_filter.setMaxFileSize("") - assert file_filter.maxFileSize == "", "Failed to set max file size to empty value" diff --git a/tests/fileFilter_test.py b/tests/fileFilter_test.py new file mode 100644 index 0000000..beb1eef --- /dev/null +++ b/tests/fileFilter_test.py @@ -0,0 +1,44 @@ +import pytest +from twinTrim.dataStructures.fileFilter import FileFilter + + +def test_set_min_file_size_valid(): + """Test setting valid minimum file size values.""" + file_filter = FileFilter() + + # Test with a larger valid size + file_filter.setMinFileSize("20kb") + assert file_filter.minFileSize == "20kb", "Failed to set min file size to 20kb" + + # Test with a smaller valid size + file_filter.setMinFileSize("5kb") + assert file_filter.minFileSize == "5kb", "Failed to set min file size to 5kb" + + # Test with an edge case (1kb) + file_filter.setMinFileSize("1kb") + assert file_filter.minFileSize == "1kb", "Failed to set min file size to 1kb" + +def test_set_min_file_size_empty_string(): + """Test setting an empty string for minimum file size.""" + file_filter = FileFilter() + + # Empty string should be allowed since no validation exists + file_filter.setMinFileSize("") + assert file_filter.minFileSize == "", "Failed to set min file size to an empty string" + +def test_set_min_file_size_special_characters(): + """Test setting special characters or random string as min file size.""" + file_filter = FileFilter() + + # Special characters should be accepted since no validation exists + file_filter.setMinFileSize("!!invalid!!") + assert file_filter.minFileSize == "!!invalid!!", "Failed to set min file size to special characters" + +def test_set_min_file_size_numeric_string(): + """Test setting numeric string as minimum file size.""" + file_filter = FileFilter() + + # Numeric string should be accepted, even without units + file_filter.setMinFileSize("123") + assert file_filter.minFileSize == "123", "Failed to set min file size to a numeric string" + diff --git a/tests/fileMetadata.py b/tests/fileMetadata.py new file mode 100644 index 0000000..507f1e5 --- /dev/null +++ b/tests/fileMetadata.py @@ -0,0 +1,76 @@ +import threading +import pytest +from twinTrim.dataStructures.fileMetadata import add_or_update_normal_file, normalStore, normalStore_lock + +# Mocking the get_file_hash function so that we don't have to worry about the actual file +def mock_get_file_hash(file_path): + return f"hash_{file_path}" + +# This will automatically replaces the get_file_hash function with the mock_get_file_hash function +@pytest.fixture(autouse=True) +def mock_get_file_hash_func(monkeypatch): + monkeypatch.setattr("twinTrim.dataStructures.fileMetadata.get_file_hash", mock_get_file_hash) + +# Automatically reset the normalStore before each test +@pytest.fixture(autouse=True) +def reset_normal_store(): + normalStore.clear() + +def test_add_or_update_normal_file_concurrently(): + "Test that normalStore is consistant when add_or_update_normal_file is called concurrently with different file paths by multiple threads" + + # List of file paths to be added concurrently + file_paths = [f"file_{i}" for i in range(10)] + + # this function will be called by each thread to add a file path + def worker(file_path): + add_or_update_normal_file(file_path) + + # Threads for concurrent execution + threads = [threading.Thread(target=worker, args=(file_path,)) for file_path in file_paths] + + # starting all the threads for concurrent execution + for thread in threads: + thread.start() + + # waiting for all the threads to finish + for thread in threads: + thread.join() + + # Checking that normalStore has exactly 10 unique file hashes (since all file paths are unique) + assert len(normalStore) == 10 + + # Checking that each file hash has exactly 1 file path + for file_path in file_paths: + file_hash = mock_get_file_hash(file_path) + assert file_hash in normalStore + assert normalStore[file_hash].filepaths == [file_path] + +def test_add_or_update_normal_file_with_duplicates_concurrently(): + "Test that adding duplicate file paths to normalStore does not create duplicate entries" + + file_paths = "duplicate_file.txt" + num_threads = 5 + + # this function will be called by each thread to add a file path + def worker(): + add_or_update_normal_file(file_paths) + + # Create a list of threads to add the same file path concurrently + threads = [threading.Thread(target=worker) for _ in range(num_threads)] + + # starting all the threads for concurrent execution + for thread in threads: + thread.start() + + # waiting for all the threads to finish + for thread in threads: + thread.join() + + # Checking that normalStore has exactly 1 unique file hash (since all file paths are the same) + file_hash = mock_get_file_hash(file_paths) + assert len(normalStore) == 1 + assert file_hash in normalStore + + # Checking that the file path is added only once despite being added by multiple threads + assert normalStore[file_hash].filepaths == [file_paths] \ No newline at end of file diff --git a/twinTrim/dataStructures/fileFilter.py b/twinTrim/dataStructures/fileFilter.py index 86bf6e2..e2d210d 100644 --- a/twinTrim/dataStructures/fileFilter.py +++ b/twinTrim/dataStructures/fileFilter.py @@ -3,7 +3,7 @@ class FileFilter: def __init__(self): - self.minFileSize = "10kb" + self.minFileSize = "0kb" self.maxFileSize = "1gb" self.fileType = r"^.+\.*$" self.fileExclude = [] diff --git a/twinTrim/dataStructures/fileMetadata.py b/twinTrim/dataStructures/fileMetadata.py index 09cbc14..e3c92bb 100644 --- a/twinTrim/dataStructures/fileMetadata.py +++ b/twinTrim/dataStructures/fileMetadata.py @@ -27,5 +27,4 @@ def add_or_update_normal_file(file_path: str): if existing_file_metadata is None: normalStore[file_hash] = new_file_metadata else: - existing_file_metadata.insert_file(file_path) - + existing_file_metadata.insert_file(file_path) \ No newline at end of file diff --git a/twinTrim/flagController.py b/twinTrim/flagController.py index 23d333e..25cbfbb 100644 --- a/twinTrim/flagController.py +++ b/twinTrim/flagController.py @@ -7,12 +7,20 @@ from twinTrim.dataStructures.fileMetadata import FileMetadata, normalStore , add_or_update_normal_file from tqdm import tqdm -def handleAllFlag(directory,file_filter): +#bar function to be used by scanner thread pools +progress_bar_format = "{desc}: {n_fmt}/{total_fmt} | ETA={remaining} | {rate_fmt} {bar} {percentage:.3f}%" +"""adaptive progress bar, returns tqdm object""" +def progress_bar_func(bar_desc, total, unit='file',color="yellow", bar_color='white'): + try: #default to yellow + bar_desc_obj = click.style(bar_desc, fg=color, bold=True) # set to bold by default + except: + print(f"Warning, invalid ColorType: {color} falling back to default yellow.") + bar_desc_obj = click.style(bar_desc, fg='yellow', bold=True) + return tqdm(total=total, desc=bar_desc_obj, unit=unit, bar_format= progress_bar_format, colour=bar_color) + +def handleAllFlag(directory,file_filter,pb_color,bar_color): """Handle all duplicates automatically without asking if --all flag is set.""" all_start_time = time.time() - yellow = '\033[93m' - reset = '\033[0m' - progress_bar_format = f"{yellow}{{l_bar}}{{bar}}{{r_bar}}{{bar}}{reset}" # Collect all file paths to process all_files = [os.path.join(root, file_name) for root, _, files in os.walk(directory) for file_name in files] @@ -20,7 +28,7 @@ def handleAllFlag(directory,file_filter): total_files = len(all_files) # Use ThreadPoolExecutor to handle files concurrently - with ThreadPoolExecutor() as executor, tqdm(total=total_files, desc="Scanning files", unit="file", bar_format=progress_bar_format) as progress_bar: + with ThreadPoolExecutor() as executor, progress_bar_func("Scanning", total_files, color=pb_color, bar_color=bar_color) as progress_bar: futures = {executor.submit(add_or_update_file, file_path): file_path for file_path in all_files} # Update progress bar as files are processed @@ -35,7 +43,7 @@ def handleAllFlag(directory,file_filter): click.echo(click.style("All duplicates deleted!", fg='green')) -def find_duplicates(directory, file_filter): +def find_duplicates(directory, file_filter, pb_color, bar_color): """Find duplicate files in the given directory and store them in normalStore.""" # Collect all file paths first and apply filters all_files = [os.path.join(root, file_name) for root, _, files in os.walk(directory) for file_name in files] @@ -43,16 +51,11 @@ def find_duplicates(directory, file_filter): # Calculate the total number of files and ensure it is finite total_files = len(all_files) - - # Define yellow color ANSI escape code - yellow = '\033[93m' - reset = '\033[0m' - progress_bar_format = f"{yellow}{{l_bar}}{{bar}}{{r_bar}}{{bar}}{reset}" def process_file(file_path): add_or_update_normal_file(file_path) - with ThreadPoolExecutor() as executor, tqdm(total=total_files, desc="Scanning files", unit="file", bar_format=progress_bar_format) as progress_bar: + with ThreadPoolExecutor() as executor, progress_bar_func("Hashing", total_files, color=pb_color, bar_color=bar_color) as progress_bar: # Submit tasks to the executor futures = {executor.submit(process_file, file_path): file_path for file_path in all_files} diff --git a/twinTrim/flags.py b/twinTrim/flags.py index a0d8136..1d8fce6 100644 --- a/twinTrim/flags.py +++ b/twinTrim/flags.py @@ -10,11 +10,13 @@ @click.command() @click.argument("directory", type=click.Path(exists=True)) @click.option("--all", is_flag=True, help="Delete duplicates automatically without asking.") -@click.option("--min-size", default="10kb", type=str, help="Minimum file size in bytes.") +@click.option("--min-size", default="0kb", type=str, help="Minimum file size in bytes.") @click.option("--max-size", default="1gb", type=str, help="Maximum file size in bytes.") @click.option("--file-type", default=".*", help="File type to include (e.g., .txt, .jpg).") @click.option("--exclude", multiple=True, help="Files to exclude by name.") -def cli(directory, all, min_size, max_size, file_type, exclude): +@click.option("--label-color", default="yellow", type=str, help="Color of the label of progress bar.") +@click.option("--bar-color", default='#aaaaaa', type=str, help="Color of the progress bar.") +def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar_color): """Find and manage duplicate files in the specified DIRECTORY.""" # Initialize the FileFilter object @@ -26,11 +28,11 @@ def cli(directory, all, min_size, max_size, file_type, exclude): file_filter.addFileExclude(file_name) if all: - handleAllFlag(directory, file_filter) + handleAllFlag(directory, file_filter, label_color, bar_color) return start_time = time.time() - duplicates = find_duplicates(directory, file_filter) + duplicates = find_duplicates(directory, file_filter, label_color, bar_color) end_time = time.time() time_taken = end_time - start_time