diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 6ee293f..ffe90c4 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -29,7 +29,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: '1.22' + go-version: '1.20' - name: Build run: | diff --git a/app/consumer.go b/app/consumer.go index 8f77704..440a040 100644 --- a/app/consumer.go +++ b/app/consumer.go @@ -19,7 +19,6 @@ func consumer( geoLocation bool, format string, verbose bool, - totalFiles int, duplicateStrategy string, processedFiles *int64, done chan<- struct{}) { @@ -39,7 +38,6 @@ func consumer( geoLocation, format, verbose, - totalFiles, duplicateStrategy, ) @@ -59,7 +57,6 @@ func processFileInfo( geoLocation bool, format string, verbose bool, - totalFiles int, duplicateStrategy string, ) { var generatedPath string @@ -93,9 +90,6 @@ func processFileInfo( fileInfo.Path, generatedPath, verbose, - /*processedImages,*/ - 0, - totalFiles, fileInfo.isDuplicate, duplicateStrategy, ) @@ -104,14 +98,14 @@ func processFileInfo( } } -func moveFile(sourcePath, destinationPath string, verbose bool /*processedImages *list.List,*/, processedFiles int, totalFiles int, isDuplicate bool, duplicateStrategy string) error { +func moveFile(sourcePath, destinationPath string, verbose bool, isDuplicate bool, duplicateStrategy string) error { destPath := filepath.Dir(destinationPath) if err := os.MkdirAll(destPath, os.ModePerm); err != nil { return fmt.Errorf("failed to create destination directory %s: %v", destPath, err) } if verbose { - moveActionLog, err := logMoveAction(sourcePath, destPath, isDuplicate, duplicateStrategy, processedFiles, totalFiles) + moveActionLog, err := logMoveAction(sourcePath, destPath, isDuplicate, duplicateStrategy) if err != nil { return err } diff --git a/app/creator.go b/app/creator.go index e94cc73..8d1b8ce 100644 --- a/app/creator.go +++ b/app/creator.go @@ -120,13 +120,13 @@ func processFile( switch duplicateStrategy { case "skip": fmt.Printf("Skipped duplicate file: %v\n", path) - logMoveAction(path, "", true, duplicateStrategy, 0, 0) + logMoveAction(path, "", true, duplicateStrategy) return case "delete": if err := os.Remove(path); err != nil { errorQueue <- fmt.Errorf("failed to delete duplicate file: %v", err) } else { - logMoveAction(path, "", true, duplicateStrategy, 0, 0) + logMoveAction(path, "", true, duplicateStrategy) } return } diff --git a/app/logger.go b/app/logger.go index 60c5657..25d3871 100644 --- a/app/logger.go +++ b/app/logger.go @@ -52,7 +52,7 @@ func infoHandler(infoQueue chan string) { } } -func logMoveAction(sourcePath, destinationDirectory string, isDuplicate bool, duplicateStrategy string, processedFiles int, totalFiles int) (string, error) { +func logMoveAction(sourcePath, destinationDirectory string, isDuplicate bool, duplicateStrategy string) (string, error) { colorCode := "\033[32m" actionName := "Moved (original)" diff --git a/app/main.go b/app/main.go index de0f32d..b4d11fb 100644 --- a/app/main.go +++ b/app/main.go @@ -41,9 +41,7 @@ func main() { flag.Parse() fileTypes := flagProcessor() - sourcePath := filepath.Clean(*inputPath) - destinationPath := filepath.Clean(*outputPath) - validatePaths(sourcePath, destinationPath) + sourcePath, destinationPath := validatePaths(*inputPath, *outputPath) fileQueue := make(chan FileInfo, 100) infoQueue := make(chan string, 50) @@ -114,7 +112,6 @@ func main() { *geoLocation, *format, *verbose, - totalFilesToMove, *duplicateStrategy, &processedFiles, done, @@ -142,7 +139,7 @@ func formatElapsedTime(elapsed time.Duration) string { return fmt.Sprintf("%d minutes and %d seconds", minutes, seconds) } - return fmt.Sprintf("%.2f seconds.", elapsed.Seconds()) + return fmt.Sprintf("%.2f seconds", elapsed.Seconds()) } func spinner(stopSpinner chan bool, verb string, processedFiles *int64, totalFiles int) { @@ -269,7 +266,10 @@ func directoryExists(path string) error { return nil } -func validatePaths(sourcePath, destinationPath string) { +func validatePaths(inputPath, outputPath string) (string, string) { + sourcePath := filepath.Clean(inputPath) + destinationPath := filepath.Clean(outputPath) + if sourcePath == "" || destinationPath == "" { logger(LoggerTypeFatal, "input and output paths must be supplied") } @@ -286,4 +286,6 @@ func validatePaths(sourcePath, destinationPath string) { } else if err := directoryExists(destinationPath); err != nil { logger(LoggerTypeFatal, err.Error()) } + + return sourcePath, destinationPath } diff --git a/go.mod b/go.mod index cd04068..dd604dd 100644 --- a/go.mod +++ b/go.mod @@ -3,3 +3,9 @@ module github.com/keybraker/mediarizer-2 go 1.22 require github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd + +require ( + github.com/klauspost/cpuid/v2 v2.0.12 // indirect + github.com/zeebo/blake3 v0.2.4 // indirect + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect +) diff --git a/go.sum b/go.sum index 4f6df19..531b0ea 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,8 @@ +github.com/klauspost/cpuid/v2 v2.0.12 h1:p9dKCg8i4gmOxtv35DvrYoWqYzQrvEVdjQ762Y0OqZE= +github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/zeebo/blake3 v0.2.4 h1:KYQPkhpRtcqh0ssGYcKLG1JYvddkEA8QwCM/yBqhaZI= +github.com/zeebo/blake3 v0.2.4/go.mod h1:7eeQ6d2iXWRGF6npfaxl2CU+xy2Fjo2gxeyZGCRUjcE= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= diff --git a/hash/hash.go b/hash/hash.go index ab2934f..7a21dbc 100644 --- a/hash/hash.go +++ b/hash/hash.go @@ -11,8 +11,36 @@ import ( "strings" "sync" "sync/atomic" + "time" + + "golang.org/x/exp/mmap" ) +type FileMeta struct { + Size int64 + ModTime time.Time +} + +type CachedFile struct { + FileMeta + Hash []byte +} + +type readerAtWrapper struct { + readerAt io.ReaderAt + offset int64 + size int64 +} + +func (r *readerAtWrapper) Read(p []byte) (n int, err error) { + if r.offset >= r.size { + return 0, io.EOF + } + n, err = r.readerAt.ReadAt(p, r.offset) + r.offset += int64(n) + return n, err +} + // isImageFile checks if the file is an image based on its extension. func isImageFile(filePath string) bool { lowerFilePath := strings.ToLower(filePath) @@ -23,15 +51,27 @@ func isImageFile(filePath string) bool { // calculateFileHash calculates the SHA-256 hash of the file at the given filePath. func calculateFileHash(filePath string) ([]byte, error) { - file, err := os.Open(filePath) + readerAt, err := mmap.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to memory-map file %s: %v", filePath, err) + } + defer readerAt.Close() + + fileInfo, err := os.Stat(filePath) if err != nil { - return nil, fmt.Errorf("failed to open file at %s: %v", filePath, err) + return nil, fmt.Errorf("failed to stat file %s: %v", filePath, err) + } + fileSize := fileInfo.Size() + + reader := &readerAtWrapper{ + readerAt: readerAt, + offset: 0, + size: fileSize, } - defer file.Close() hash := sha256.New() - if _, err := io.Copy(hash, file); err != nil { - return nil, fmt.Errorf("failed to calculate hash for file: %v", err) + if _, err := io.Copy(hash, reader); err != nil { + return nil, fmt.Errorf("failed to calculate hash for file %s: %v", filePath, err) } return hash.Sum(nil), nil @@ -39,29 +79,42 @@ func calculateFileHash(filePath string) ([]byte, error) { // GetFileHash retrieves or calculates the hash of the file at filePath. func GetFileHash(filePath string, hashCache *sync.Map) ([]byte, error) { - if hash, found := hashCache.Load(filePath); found { - return hash.([]byte), nil + info, err := os.Stat(filePath) + if err != nil { + return nil, err } + meta := FileMeta{Size: info.Size(), ModTime: info.ModTime()} - calculatedHash, err := calculateFileHash(filePath) + if cached, found := hashCache.Load(filePath); found { + cachedFile := cached.(CachedFile) + if cachedFile.Size == meta.Size && cachedFile.ModTime.Equal(meta.ModTime) { + return cachedFile.Hash, nil + } + } + + hashValue, err := calculateFileHash(filePath) if err != nil { return nil, err } - hashCache.Store(filePath, calculatedHash) - return calculatedHash, nil + cachedFile := CachedFile{ + FileMeta: meta, + Hash: hashValue, + } + hashCache.Store(filePath, cachedFile) + + return hashValue, nil } // hashImagesInPath hashes all images in the given path and updates the fileHashMap. func HashImagesInPath(path string, hashCache *sync.Map, hashedFiles *int64) (*sync.Map, error) { fileHashMap := &sync.Map{} - fileChan := make(chan string) // Channel to pass file paths to workers - errChan := make(chan error) // Channel to collect errors - var wg sync.WaitGroup // WaitGroup to track the worker goroutines + fileChan := make(chan string) + errChan := make(chan error) + var wg sync.WaitGroup - numWorkers := runtime.NumCPU() / 2 + numWorkers := runtime.NumCPU() * 4 - // Start worker goroutines for i := 0; i < numWorkers; i++ { wg.Add(1) go func() { @@ -84,9 +137,8 @@ func HashImagesInPath(path string, hashCache *sync.Map, hashedFiles *int64) (*sy }() } - // Walk the directory and send file paths to the channel go func() { - defer close(fileChan) // Close the channel when done + defer close(fileChan) err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error { if err != nil { errChan <- fmt.Errorf("failed to walk path %s: %v", filePath, err) @@ -94,25 +146,22 @@ func HashImagesInPath(path string, hashCache *sync.Map, hashedFiles *int64) (*sy } if !info.IsDir() { - fileChan <- filePath // Send file to channel for hashing + fileChan <- filePath } return nil }) - // If an error occurred during filepath walk, send it to the error channel if err != nil { errChan <- err } }() - // Wait for all workers to finish go func() { wg.Wait() - close(errChan) // Close error channel when all workers are done + close(errChan) }() - // Check for errors during execution for err := range errChan { if err != nil { return nil, err