forked from merwin-asm/OpenCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfix_db.py
135 lines (73 loc) · 3.13 KB
/
fix_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
Set of Tools to fix your DB | OpenCrawler v 1.0.0
"""
from mongo_db import connect_db, _DB
from rich import print
import json
import os
def mongodb():
# Config File
config_file = "config.json"
# Load configs from config_file - > json
try:
config_file = open(config_file, "r")
configs = json.loads(config_file.read())
config_file.close()
except:
try:
os.system("python3 config.py") # Re-configures
except:
os.system("python config.py") # Re-configures
config_file = open(config_file, "r")
configs = json.loads(config_file.read())
config_file.close()
## Setting Up Configs
MONGODB_PWD = configs["MONGODB_PWD"]
MONGODB_URI = configs["MONGODB_URI"]
# Initializes MongoDB
connect_db(MONGODB_URI, MONGODB_PWD)
mongodb() # Connects to DB
print("\n[blue]---------------------------------------DB-FIXER---------------------------------------[/blue]\n")
print("""[dark_orange]\t[1] Remove Duplicates[/dark_orange]""")
print("\n[blue]Option :[/blue]", end="")
op = input(" ")
if op == "1":
print("[blue] Scan Crawledsites (y/enter to skip) >[/blue]", end="")
if input(" ").lower() == "y":
print("\n[green] [+] Scanning Duplicates In Crawledsites [/green]")
e = _DB().Crawledsites.find({})
for x in e:
ww = list(_DB().Crawledsites.find({"website":x["website"]}))
len_ = len(ww)
ww = ww[0]
if len_ != 1:
_DB().Crawledsites.delete_many({"website":x["website"]})
_DB().Crawledsites.insert_one(ww)
print(f"[green] [+] Removed : {x['website']} [/green]")
print("[blue] Scan waitlist (y/enter to skip) >[/blue]", end="")
if input(" ").lower() == "y":
print("[green] [+] Scanning Duplicates In waitlist [/green]")
e = _DB().waitlist.find({})
for x in e:
ww = list(_DB().waitlist.find({"website":x["website"]}))
len_ = len(ww)
ww = ww[0]
if len_ != 1:
_DB().waitlist.delete_many({"website":x["website"]})
_DB().waitlist.insert_one(ww)
print(f"[green] [+] Removed : {x['website']} [/green]")
# print("[blue] Scan Robots (y/enter to skip) >[/blue]", end="")
# if input(" ").lower() == "y":
# print("[green] [+] Scanning Duplicates In Robots [/green]")
# e = _DB().Robots.find({})
# for x in e:
# ww = list(_DB().Robots.find({"website":x["website"]}))
# len_ = len(ww)
# ww = ww[0]
# if len_ != 1:
# _DB().Robots.delete_many({"website":x["website"]})
# _DB().Robots.insert_one(ww)
# print(f"[green] [+] Removed : {x['website']} [/green]")
else:
print(f"[red] [-] Option '{op}' Not Found[/red]")
print("\n[blue]--------------------------------------------------------------------------------------[/blue]\n")