From 31c18be00762771689604ad51a06242a1266b531 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 12 Sep 2020 11:08:39 -0400 Subject: [PATCH 1/4] Removed max thread limit --- pkg/cmd/cli.go | 7 +-- pkg/core/core.go | 100 ++++++++++++---------------------------- pkg/structures/map.go | 36 +++++++-------- pkg/structures/queue.go | 37 --------------- 4 files changed, 47 insertions(+), 133 deletions(-) delete mode 100644 pkg/structures/queue.go diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go index db2288e..ac24532 100644 --- a/pkg/cmd/cli.go +++ b/pkg/cmd/cli.go @@ -8,9 +8,8 @@ import ( // CobraInterface represents the CMD interface type CobraInterface struct { - RootCmd *cobra.Command - NumberOfThreads uint64 - RootFolder string + RootCmd *cobra.Command + RootFolder string } func (cmd *CobraInterface) setRootCommand() { @@ -38,10 +37,8 @@ func (cmd *CobraInterface) setStart() { Long: "Long description", Run: func(c *cobra.Command, arg []string) { cmd.RootFolder, _ = c.PersistentFlags().GetString("path") - cmd.NumberOfThreads, _ = c.PersistentFlags().GetUint64("threads") }, } - start.PersistentFlags().Uint64P("threads", "t", 4, "--threads 2") start.PersistentFlags().StringP("path", "f", "", "--path /home") start.MarkPersistentFlagRequired("path") diff --git a/pkg/core/core.go b/pkg/core/core.go index 9ba0923..da89d3d 100644 --- a/pkg/core/core.go +++ b/pkg/core/core.go @@ -3,9 +3,7 @@ package core import ( "fmt" "path" - "runtime" "sync" - "time" "github.com/Alex99y/duplicate-files/pkg/cmd" "github.com/Alex99y/duplicate-files/pkg/crypto" @@ -13,56 +11,29 @@ import ( "github.com/Alex99y/duplicate-files/pkg/utils" ) -const threadRetryBeforeReturn = 1 - var wg sync.WaitGroup -// StructureInfo contains the configuration to start the process -type StructureInfo struct { - folderQueue *structures.QueueWithSync - resultMap *structures.MapWithSync -} - -func (s *StructureInfo) processFile() { - // Process file -} - -func (s *StructureInfo) processFolder(id int) { +func processFolder(file string) { // Retries before return - retriesLeft := threadRetryBeforeReturn - for { - // Dequeue next file to process - nextFolderToProcess := s.folderQueue.Dequeue() - - if nextFolderToProcess != nil { - retriesLeft = threadRetryBeforeReturn - file := nextFolderToProcess.(string) - isDir, err := utils.IsDirectory(file) - if err != nil { - fmt.Println(err) - continue - } - if isDir { - // Process folder - files := utils.ReadFilesFromDirectory(file) - for _, f := range files { - s.folderQueue.Enqueue(path.Join(file, f)) - } - } else { - // Process regular file - fileContent := utils.ReadFile(file) - fileHash := crypto.GetFileHash(fileContent) - s.resultMap.AddElement(fileHash, file) - fileContent = nil - } - } else { - if retriesLeft == 0 { - break - } else { - time.Sleep(500 * time.Millisecond) - retriesLeft-- - } + isDir, err := utils.IsDirectory(file) + if err != nil { + fmt.Println(err) + wg.Done() + return + } + if isDir { + // Process folder + files := utils.ReadFilesFromDirectory(file) + for _, f := range files { + wg.Add(1) + go processFolder(path.Join(file, f)) } + } else { + // Process regular file + fileContent := utils.ReadFile(file) + fileHash := crypto.GetFileHash(fileContent) + structures.AddElement(fileHash, file) + fileContent = nil } // End task @@ -72,44 +43,31 @@ func (s *StructureInfo) processFolder(id int) { // Start function will start the thread process func Start(config cmd.CobraInterface) { - // Prepare queues - structure := StructureInfo{ - // Contains the folder/files to process - folderQueue: structures.NewQueue(), - // Contains the result (duplicated files) - resultMap: structures.NewMap(), - } - structure.folderQueue.Enqueue(config.RootFolder) - - // Total threads to improve paralellism - threads := runtime.NumCPU() / 2 - if int(config.NumberOfThreads) < threads { - threads = int(config.NumberOfThreads) - } - runtime.GOMAXPROCS(threads) - wg.Add(threads) + // Excecute first thread + wg.Add(1) // Start searching - for i := 0; i < threads; i++ { - go structure.processFolder(i) - } + go processFolder(config.RootFolder) // Wait until goroutines ends wg.Wait() - resultMap := structure.resultMap.GetMap() + resultMap := structures.GetMap() gotDuplicates := false - for key, files := range resultMap { + resultMap.Range(func(key interface{}, value interface{}) bool { + files := value.([]string) if len(files) > 1 { gotDuplicates = true - fmt.Println("Duplicated files (" + key + "):") + fmt.Println("Duplicated files (" + key.(string) + "):") for _, file := range files { fmt.Println(file) } fmt.Print("\n") } - } + return true + }) + if gotDuplicates == false { fmt.Println("No duplicated files found") } diff --git a/pkg/structures/map.go b/pkg/structures/map.go index 2146999..18d9537 100644 --- a/pkg/structures/map.go +++ b/pkg/structures/map.go @@ -4,29 +4,25 @@ import ( "sync" ) -// MapWithSync have the map with mutex -type MapWithSync struct { - mutex sync.RWMutex - smap map[string][]string -} +// SMap is the structure that have the results +var SMap sync.Map // AddElement add an element to the map -func (c *MapWithSync) AddElement(key string, value string) { - c.mutex.Lock() - c.smap[key] = append(c.smap[key], value) - c.mutex.Unlock() +func AddElement(key string, value string) { + results, ok := SMap.Load(key) + if ok == true { + tempArray := results.([]string) + tempArray = append(tempArray, value) + SMap.Store(key, tempArray) + } else { + var newArray []string + newArray = make([]string, 0) + newArray = append(newArray, value) + SMap.Store(key, newArray) + } } // GetMap returns the map -func (c *MapWithSync) GetMap() map[string][]string { - c.mutex.RLock() - defer c.mutex.RUnlock() - return c.smap -} - -// NewMap creates a new MapWithSync instance -func NewMap() *MapWithSync { - return &MapWithSync{ - smap: make(map[string][]string), - } +func GetMap() *sync.Map { + return &SMap } diff --git a/pkg/structures/queue.go b/pkg/structures/queue.go deleted file mode 100644 index dff0e75..0000000 --- a/pkg/structures/queue.go +++ /dev/null @@ -1,37 +0,0 @@ -package structures - -import ( - "container/list" - "sync" -) - -// QueueWithSync is the queue interface -type QueueWithSync struct { - queue *list.List - mutex sync.Mutex -} - -// Enqueue add a new element in to the queue -func (f *QueueWithSync) Enqueue(element interface{}) { - f.mutex.Lock() - f.queue.PushBack(element) - f.mutex.Unlock() -} - -// Dequeue remove an element from the queue -func (f *QueueWithSync) Dequeue() interface{} { - f.mutex.Lock() - last := f.queue.Back() - defer f.mutex.Unlock() - if last == nil { - return nil - } - return f.queue.Remove(last) -} - -// NewQueue creates a new instance of QueueWithSync -func NewQueue() *QueueWithSync { - return &QueueWithSync{ - queue: list.New(), - } -} From 29b077052f042d3971acbd67f6d7600aabeb9b5e Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 12 Sep 2020 11:11:01 -0400 Subject: [PATCH 2/4] Updated comments --- pkg/core/core.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pkg/core/core.go b/pkg/core/core.go index da89d3d..3d76b75 100644 --- a/pkg/core/core.go +++ b/pkg/core/core.go @@ -14,7 +14,6 @@ import ( var wg sync.WaitGroup func processFolder(file string) { - // Retries before return isDir, err := utils.IsDirectory(file) if err != nil { fmt.Println(err) @@ -22,34 +21,29 @@ func processFolder(file string) { return } if isDir { - // Process folder files := utils.ReadFilesFromDirectory(file) for _, f := range files { wg.Add(1) go processFolder(path.Join(file, f)) } } else { - // Process regular file fileContent := utils.ReadFile(file) fileHash := crypto.GetFileHash(fileContent) structures.AddElement(fileHash, file) fileContent = nil } - // End task wg.Done() } -// Start function will start the thread process +// Start function will begin the thread process func Start(config cmd.CobraInterface) { // Excecute first thread wg.Add(1) - - // Start searching go processFolder(config.RootFolder) - // Wait until goroutines ends + // Wait until all goroutines ends wg.Wait() resultMap := structures.GetMap() From 2e245f670d2db27b79d0ebfa6df7419f28034bf6 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 12 Sep 2020 13:21:08 -0400 Subject: [PATCH 3/4] Removed -f flag, now cmd reads rootFolder from args --- pkg/cmd/cli.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go index ac24532..90ec570 100644 --- a/pkg/cmd/cli.go +++ b/pkg/cmd/cli.go @@ -3,6 +3,8 @@ package cmd import ( "fmt" + "github.com/Alex99y/duplicate-files/pkg/utils" + "github.com/spf13/cobra" ) @@ -35,12 +37,20 @@ func (cmd *CobraInterface) setStart() { Use: "start", Short: "Execute duplicate files searcher", Long: "Long description", - Run: func(c *cobra.Command, arg []string) { - cmd.RootFolder, _ = c.PersistentFlags().GetString("path") + Run: func(c *cobra.Command, args []string) { + cmd.RootFolder = args[0] + }, + Args: func(c *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("No root folder provided") + } + isDir, err := utils.IsDirectory(args[0]) + if err != nil || !isDir { + return fmt.Errorf("Invalid root folder provided") + } + return nil }, } - start.PersistentFlags().StringP("path", "f", "", "--path /home") - start.MarkPersistentFlagRequired("path") cmd.RootCmd.AddCommand(start) } From ac49fb585cc4dd5f7d9bc7c8782ec6bcb8de9875 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 12 Sep 2020 16:43:58 -0400 Subject: [PATCH 4/4] Updated CLI's help information --- pkg/cmd/cli.go | 12 ++++++------ pkg/main.go | 3 +++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go index 90ec570..a143636 100644 --- a/pkg/cmd/cli.go +++ b/pkg/cmd/cli.go @@ -16,8 +16,7 @@ type CobraInterface struct { func (cmd *CobraInterface) setRootCommand() { cmd.RootCmd = &cobra.Command{ - Short: "Short", - Long: "Long", + Short: "Application to search duplicate files inside a folder", } } @@ -26,7 +25,7 @@ func (cmd *CobraInterface) setVersion() { Use: "version", Short: "Print app version", Run: func(c *cobra.Command, arg []string) { - fmt.Print("v0.0.1") + fmt.Print("v0.1.0") }, } cmd.RootCmd.AddCommand(version) @@ -34,9 +33,10 @@ func (cmd *CobraInterface) setVersion() { func (cmd *CobraInterface) setStart() { start := &cobra.Command{ - Use: "start", - Short: "Execute duplicate files searcher", - Long: "Long description", + Use: "start", + Short: "Search for duplicated files", + Example: "start [rootFolder]", + Long: "This command receives a folder, find recursively and print all duplicate files inside this folder and his subfolderss", Run: func(c *cobra.Command, args []string) { cmd.RootFolder = args[0] }, diff --git a/pkg/main.go b/pkg/main.go index 784a6dd..d2dd602 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -8,5 +8,8 @@ import ( func main() { cobra := new(cmd.CobraInterface) cobra.Execute() + if cobra.RootFolder == "" { + return + } core.Start(*cobra) }