diff --git a/rex/main.py b/rex/main.py index e596b0f..b59147e 100644 --- a/rex/main.py +++ b/rex/main.py @@ -10,7 +10,7 @@ from yaspin import yaspin from glob import glob from difflib import SequenceMatcher - +import hashlib from pydavinci import davinci rich_tracebacks.install() @@ -25,14 +25,22 @@ class Backup: def __init__(self): + # TODO: Ensure no wrongful file collisions + # Use database name, type, ip address, and project path/folder structure in project manager + # to create a unique hash for the backup name. Keep the timestamp. So backups are still unique. + self.project = resolve.project + self.db_name = resolve.project_manager.db["DbName"] self.timestamp = datetime.now().strftime("%H%M%S") - self.backup_filename = f"{self.project.name}_{self.timestamp}.drp" - static_dir = os.path.normpath(settings["backup"]["static_dir"]) - self.backup_filepath = os.path.join(static_dir, self.backup_filename) - print(f"Backup Name: {self.backup_filename}") - print(f"Backup Path: {self.backup_filepath}") + self.backup_filename = ( + f"{self.db_name}_{self.project.name}_{self.timestamp}.drp" + ) + self.static_dir = os.path.normpath(settings["backup"]["static_dir"]) + self.backup_filepath = os.path.join(self.static_dir, self.backup_filename) + + print(f"Backup Name: '{self.backup_filename}'") + print(f"Backup Path: '{self.static_dir}'") def run(self, generate_checksum: bool = True, de_duplicate: bool = True): """ @@ -43,23 +51,30 @@ def run(self, generate_checksum: bool = True, de_duplicate: bool = True): de_duplicate (bool, optional): Compare neighbouring checksums and symlink file if they're identical. Defaults to True. """ - if not generate_checksum and de_duplicate: - raise ValueError("generate_checksum must be enabled to de-duplicate!") - if de_duplicate: logger.info("Gathering existing checksums...") - self.gather_checksums() + self.gather_project_series() logger.info("Exporting project backup...") self.export_project() + if de_duplicate: + logger.info("De-duplicating...") + self.de_duplicate() + if generate_checksum: logger.info("Generating checksum...") self.generate_checksum() - if de_duplicate: - logger.info("De-duplicating...") - self.de_duplicate() + def generate_checksum(self): + + hash_md5 = hashlib.md5() + with open(self.backup_filepath, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + + with open(self.backup_filepath + ".md5", "x") as checksum_file: + checksum_file.write(hash_md5.hexdigest()) def export_project(self): @@ -69,30 +84,13 @@ def export_project(self): stills_and_luts=True, ) - def generate_checksum(self): - - assert os.path.exists(self.backup_filepath) - - def md5sum(file_): - hash = md5() - with open(file_, "rb") as f: - for chunk in iter(lambda: f.read(128 * hash.block_size), b""): - hash.update(chunk) - return hash.hexdigest() - - self.checksum_filepath = self.backup_filepath + ".md5" - with open(self.checksum_filepath, "x") as checksum_file: - checksum_file.write(md5sum(self.backup_filepath)) - - def gather_checksums(self): + def gather_project_series(self): """ - Get checksums. - - Gathers all checksums that match the project backup series. + Get all projects in the project series (all backups matching current project) """ self.backup_filepath = os.path.normpath(self.backup_filepath) parent_dir = os.path.dirname(self.backup_filepath) - self.matching_checksums = glob(f"{parent_dir}/*{self.project.name}*.drp.md5") + self.matching_checksums = glob(f"{parent_dir}/*{self.project.name}*.drp") sorted( self.matching_checksums, key=lambda name: os.path.getmtime(os.path.join(name)), @@ -103,47 +101,30 @@ def de_duplicate(self): most_recent_checksums = self.matching_checksums[:3] - def compare_checksums(x: str, y: str): - """ - Compare two checksum files. - - Args: - x (str): First file to compare - y (str): Second file to compare + def compare_projects(x: str, y: str): - Returns: - bool: Returns True if they match - """ + file_x = open(x, "rb").read() + file_y = open(y, "rb").read() + m = SequenceMatcher(None, file_x, file_y) + ratio = m.quick_ratio() - file_x = open(x, "rb") - file_y = open(y, "rb") + logger.debug(f"[magenta]Match ratio is: {ratio}") - checksum_x = file_x.read() - checksum_y = file_y.read() - logger.debug(f"[magenta]{checksum_x} - {checksum_y}") - - if checksum_x == checksum_y: + if ratio >= 0.990: return True return False matching_checksum = None - x = self.checksum_filepath + x = self.backup_filepath for y in most_recent_checksums: logger.debug( f"[magenta]Comparing checksum files: '{os.path.basename(x)}' and '{os.path.basename(y)}'" ) - if compare_checksums(x, y): + if compare_projects(x, y): matching_checksum = y break if matching_checksum: logger.info(f"Most recent match {matching_checksum}") - - def compare_projects(self): - - text1 = open(file1).read() - text2 = open(file2).read() - m = SequenceMatcher(None, text1, text2) - m.ratio()