Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updated Dry Run feature #128

Closed
wants to merge 9 commits into from
Closed
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ TwinTrim is a powerful and efficient tool designed to find and manage duplicate
- **Multi-Threaded Processing**: Utilizes multi-threading to quickly scan and process large numbers of files concurrently.
- **Deadlock Prevention**: Implements locks to prevent deadlocks during multi-threaded operations, ensuring smooth and safe execution.
- **User-Friendly Interface**: Offers clear prompts and feedback via the command line, making the process straightforward and interactive.
- **Dry Run**: Use the --dry-run option to simulate the process without making any changes, allowing you to review what will happen before executing.

## How It Works

Expand All @@ -46,7 +47,10 @@ TwinTrim is a powerful and efficient tool designed to find and manage duplicate
- Duplicate files are identified by comparing their hashes.
- Based on file modification time, the latest file is retained, and older duplicates are removed.

5. **Deadlock Prevention**:
5. **Dry Run Mode**:
- The --dry-run flag allows you to simulate the duplicate removal process without making any actual changes, giving you an opportunity to review potential actions before committing to them.

6. **Deadlock Prevention**:
- Uses locks within multi-threaded processes to ensure that resources are accessed safely, preventing deadlocks that could otherwise halt execution.

### Key Functions
Expand Down Expand Up @@ -74,6 +78,7 @@ python -m twinTrim.main <directory> [OPTIONS]
- `--exclude`: Exclude specific files by name.
- `--label-color`: Set the font color of the output label of the progress bar.
- `--bar-color`: Set the color of the progress bar.
- `--dry-run`: Simulate the duplicate removal process without making any changes.

### Examples

Expand All @@ -92,6 +97,12 @@ python -m twinTrim.main <directory> [OPTIONS]
python -m twinTrim.main /path/to/directory --min-size "50kb" --max-size "500mb" --file-type "txt"
```

4. **Dry Run Simulation**:

```bash
python twinTrim.py /path/to/directory --dry-run
```

## Dependencies

- Python 3.6+
Expand Down
15 changes: 3 additions & 12 deletions twinTrim/flagController.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ def handleAllFlag(directory,file_filter,pb_color,bar_color):

# Update progress bar as files are processed
for future in as_completed(futures):
try:
future.result() # Ensures exception handling for each future
except Exception as e:
click.echo(click.style(f"Error processing file {futures[future]}: {str(e)}", fg='red'))
progress_bar.update(1)

click.echo(click.style("All files scanned and duplicates handled.", fg='green'))
Expand All @@ -50,7 +46,6 @@ def handleAllFlag(directory,file_filter,pb_color,bar_color):
def find_duplicates(directory, file_filter, pb_color, bar_color):
"""Find duplicate files in the given directory and store them in normalStore."""
# Collect all file paths first and apply filters
start_time=time.time()
all_files = [os.path.join(root, file_name) for root, _, files in os.walk(directory) for file_name in files]
all_files = [f for f in all_files if file_filter.filter_files(f)] # Apply filters

Expand All @@ -65,14 +60,8 @@ def process_file(file_path):
futures = {executor.submit(process_file, file_path): file_path for file_path in all_files}

for future in as_completed(futures):
try:
future.result() # Ensures exception handling for each future
except Exception as e:
click.echo(click.style(f"Error processing file {futures[future]}: {str(e)}", fg='red'))
progress_bar.update(1)
progress_bar.update(1)

end_time=time.time()
click.echo(click.style(f"Time taken to find all duplicate files: {end_time-start_time:.2f} seconds.", fg='green'))
duplicates = []
for _, metadata in normalStore.items():
if len(metadata.filepaths) > 1:
Expand All @@ -81,3 +70,5 @@ def process_file(file_path):
duplicates.append((original_path, duplicate_path))

return duplicates


88 changes: 32 additions & 56 deletions twinTrim/flags.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
import os
import click
import time
import logging
import inquirer
from collections import defaultdict
from twinTrim.utils import handle_and_remove, parse_size
from twinTrim.flagController import handleAllFlag, find_duplicates
from beaupy import select_multiple
from twinTrim.dataStructures.fileFilter import FileFilter

# Setting up logging configuration
logging.basicConfig (
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adityakalburgi
check this
your changes removes the logging part and interactive cli part totally
This is not cause you removed them , but because you havent pulled chaanges

filename='duplicate_file_manager.log',
level = logging.INFO,
format = '%(asctime)s - %(levelname)s - %(message)s'
)

@click.command()
@click.argument("directory", type=click.Path(exists=True))
@click.option("--all", is_flag=True, help="Delete duplicates automatically without asking.")
Expand All @@ -24,7 +16,8 @@
@click.option("--exclude", multiple=True, help="Files to exclude by name.")
@click.option("--label-color", default="yellow", type=str, help="Color of the label of progress bar.")
@click.option("--bar-color", default='#aaaaaa', type=str, help="Color of the progress bar.")
def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar_color):
@click.option("--dry-run", is_flag=True, help="Simulate the process without deleting files.")
def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar_color, dry_run):
"""Find and manage duplicate files in the specified DIRECTORY."""

# Initialize the FileFilter object
Expand All @@ -36,27 +29,23 @@ def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar
file_filter.addFileExclude(file_name)

if all:
logging.info("Deleting all duplicate files without asking.")
handleAllFlag(directory, file_filter, label_color, bar_color)
if dry_run:
click.echo(click.style("Dry run mode enabled: Skipping actual deletion.", fg='yellow'))
handleAllFlag(directory, file_filter, label_color, bar_color, dry_run=dry_run) # Modify handleAllFlag to support dry_run
return

start_time = time.time()
logging.info(f"Searching for duplicates in directory: {directory}")
duplicates = find_duplicates(directory, file_filter, label_color, bar_color)

try:
duplicates = find_duplicates(directory, file_filter, label_color, bar_color)
except Exception as e:
logging.error(f"Error finding duplicates: {str(e)}")
click.echo(click.style("Error while finding duplicates. Check the log for details.", fg='red'))
return
end_time = time.time()
time_taken = end_time - start_time

if not duplicates:
click.echo(click.style("No duplicate files found.", fg='green'))
logging.info("No duplicate files found.")
click.echo(click.style(f"Time taken: {time_taken:.2f} seconds.", fg='green'))
return

click.echo(click.style(f"Found {len(duplicates)} sets of duplicate files:", fg='yellow'))
logging.info(f"Found {len(duplicates)} set of duplicate files")

duplicates_dict = defaultdict(list)
for original, duplicate in duplicates:
Expand All @@ -66,42 +55,29 @@ def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar
for original, duplicates_list in duplicates_dict.items():
click.echo(click.style(f"Original file: \"{original}\"", fg='cyan'))
click.echo(click.style(f"Number of duplicate files found: {len(duplicates_list)}", fg='cyan'))
logging.info(f"Original file: \"{original}\" with {len(duplicates_list)} duplicates")

click.echo(click.style("They are:", fg='cyan'))

# Create file options with additional information
file_options = [
f"{idx + 1}) {duplicate} (Size: {os.path.getsize(duplicate)} bytes)" for idx, duplicate in enumerate(duplicates_list)
]

answers = inquirer.prompt(
[
inquirer.Checkbox(
'files',
message="Select files to delete (Use space to select, enter to confirm, or ctr + c to cancel, arrow key to navigate.)",
choices=file_options,
validate=lambda answer, current: len(answer) > 0 or "You must choose at least one file.",
),
inquirer.Confirm(
'confirm',
message="Are you sure you want to delete the selected files?",
default=True
)
])


if answers and answers['confirm']:
selected_files = answers['files']
# Convert the selected options back to the original file paths
files_to_delete = [duplicates_list[int(option.split(")")[0]) - 1] for option in selected_files]

for file_path in files_to_delete:
file_options = [f"{idx + 1}) {duplicate}" for idx, duplicate in enumerate(duplicates_list)]

# Prompt user to select which files to delete
selected_indices = select_multiple(
file_options, # List of files to choose from
ticked_indices=[], # Default indices that are selected
maximal_count=len(file_options)
)

# Convert the indices back to the original file paths
files_to_delete = [duplicates_list[int(option.split(")")[0]) - 1] for option in selected_indices]

for file_path in files_to_delete:
if dry_run:
click.echo(click.style(f"[Dry Run] Would delete: {file_path}", fg='yellow'))
else:
handle_and_remove(file_path)
else:
click.echo(click.style("File deletion canceled.", fg='yellow'))

end_time = time.time()
time_taken = end_time - start_time
if not dry_run:
click.echo(click.style("Selected duplicate files removed!", fg='green'))
else:
click.echo(click.style("Dry run completed. No files were actually deleted.", fg='yellow'))

click.echo(click.style(f"Time taken: {time_taken:.2f} seconds.", fg='green'))
logging.info(f"Total time taken: {time_taken:.2f} seconds.")

Loading