-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deduplicator.go
81 lines (65 loc) · 1.53 KB
/
deduplicator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package main
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
)
var globalStringSet = make(map[string]bool)
var mu sync.Mutex
var validSubdomainsRegex *regexp.Regexp
func deduplicate(filename string, wg *sync.WaitGroup) {
defer wg.Done()
file, err := os.Open(filename)
if err != nil {
fmt.Printf("Failed to open file %s: %v\n", filename, err)
return
}
defer file.Close()
var uniqueStrings []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
line = strings.ToLower(line)
if line != "" && validSubdomainsRegex.MatchString(line) {
mu.Lock()
if !globalStringSet[line] {
globalStringSet[line] = true
uniqueStrings = append(uniqueStrings, line)
}
mu.Unlock()
}
}
if err := scanner.Err(); err != nil {
fmt.Printf("Error reading file %s: %v\n", filename, err)
return
}
err = os.WriteFile(filename, []byte(strings.Join(uniqueStrings, "\n")), 0644)
if err != nil {
fmt.Printf("Failed to write back to file %s: %v\n", filename, err)
}
}
func processDirectory(dir string) {
var wg sync.WaitGroup
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
wg.Add(1)
go deduplicate(path, &wg)
}
return nil
})
if err != nil {
fmt.Printf("Error walking through directory %s: %v\n", dir, err)
}
wg.Wait()
}
func removeDuplicatesFromSecLists() {
validSubdomainsRegex, _ = regexp.Compile(`^[\w0-9._-]+$`)
processDirectory(wordlistCache)
}