-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpalooza_wizard_cli.py
83 lines (73 loc) · 2.09 KB
/
palooza_wizard_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- encoding: utf-8 -*-
import argparse
try:
from palooza_wizard import DatapaloozaWizard
except ImportError as e:
msg = (
'"{}"\nPlease install `palooza_wizard` to resolve this error.'
)
raise ImportError(msg.format(str(e)))
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-q", help="query", required=True)
ap.add_argument(
"-e",
help="search engine(s) - " + ", ".join(search_engines_dict),
default="google",
)
ap.add_argument(
"-o", help="output file [html, csv, json]", default="print"
)
ap.add_argument(
"-n",
help="filename for output file",
default=config.OUTPUT_DIR + "output",
)
ap.add_argument(
"-p",
help="number of pages",
default=config.SEARCH_ENGINE_RESULTS_PAGES,
type=int,
)
ap.add_argument(
"-f",
help="filter results [url, title, text, host]",
default=None,
)
ap.add_argument(
"-i",
help="ignore duplicats, useful when multiple search engines are used",
action="store_true",
)
ap.add_argument(
"-proxy",
help="use proxy (protocol://ip:port)",
default=config.PROXY,
)
args = ap.parse_args()
proxy = args.proxy
timeout = config.TIMEOUT + (10 * bool(proxy))
engines = [
e.strip()
for e in args.e.lower().split(",")
if e.strip() in search_engines_dict or e.strip() == "all"
]
if not engines:
print(
"Please choose a search engine: "
+ ", ".join(search_engines_dict)
)
else:
if "all" in engines:
engine = AllSearchEngines(proxy, timeout)
elif len(engines) > 1:
engine = MultipleSearchEngines(engines, proxy, timeout)
else:
engine = search_engines_dict[engines[0]](proxy, timeout)
engine.ignore_duplicate_urls = args.i
if args.f:
engine.set_search_operator(args.f)
engine.search(args.q, args.p)
engine.output(args.o, args.n)
if __name__ == "__main__":
main()