Skip to content

Commit b8b0e12

Browse files
committed
Fix max-scan-time and missing timeout in headless explorer
Signed-off-by: bretfourbe <gwendal@cyberwatch.fr>
1 parent c97316c commit b8b0e12

File tree

4 files changed

+80
-60
lines changed

4 files changed

+80
-60
lines changed

wapitiCore/controller/wapiti.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def __init__(self, scope_request: Request, scope="folder", session_dir=None, con
177177
self._max_links_per_page = 0
178178
self._max_files_per_dir = 0
179179
self._scan_force = "normal"
180-
self._max_scan_time = 0
180+
self._max_scan_time = None
181181
self._max_attack_time = None
182182
self._bug_report = True
183183
self._logfile = ""
@@ -376,6 +376,16 @@ async def save_scan_state(self):
376376
# if stopped and self._start_urls:
377377
# print(_("The scan will be resumed next time unless you pass the --skip-crawl option."))
378378

379+
async def explore_and_save_requests(self, explorer):
380+
self._buffer = []
381+
# Browse URLs are saved them once we have enough in our buffer
382+
async for resource, response in explorer.async_explore(self._start_urls, self._excluded_urls):
383+
self._buffer.append((resource, response))
384+
385+
if len(self._buffer) > 100:
386+
await self.persister.save_requests(self._buffer)
387+
self._buffer = []
388+
379389
async def browse(self, stop_event: asyncio.Event, parallelism: int = 8):
380390
"""Extract hyperlinks and forms from the webpages found on the website"""
381391
stop_event.clear()
@@ -406,22 +416,21 @@ async def browse(self, stop_event: asyncio.Event, parallelism: int = 8):
406416
explorer.qs_limit = SCAN_FORCE_VALUES[self._scan_force]
407417
explorer.load_saved_state(self.persister.output_file[:-2] + "pkl")
408418

409-
start = datetime.utcnow()
410-
buffer = []
411-
412-
# Browse URLs are saved them once we have enough in our buffer
413-
async for resource, response in explorer.async_explore(self._start_urls, self._excluded_urls):
414-
buffer.append((resource, response))
415-
416-
if len(buffer) > 100:
417-
await self.persister.save_requests(buffer)
418-
buffer = []
419+
self._buffer = []
419420

420-
if not stop_event.is_set() and (datetime.utcnow() - start).total_seconds() > self._max_scan_time >= 1:
421-
logging.info("Max scan time was reached, stopping.")
421+
try:
422+
await asyncio.wait_for(
423+
self.explore_and_save_requests(explorer),
424+
self._max_scan_time
425+
)
426+
except asyncio.TimeoutError:
427+
logging.info("Max scan time was reached, stopping.")
428+
if not stop_event.is_set():
422429
stop_event.set()
430+
finally:
431+
await explorer.clean()
423432

424-
await self.persister.save_requests(buffer)
433+
await self.persister.save_requests(self._buffer)
425434

426435
# Let's save explorer values (limits)
427436
explorer.save_state(self.persister.output_file[:-2] + "pkl")

wapitiCore/net/explorer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ async def async_explore(
446446
if not task_to_request and (self._stopped.is_set() or not to_explore):
447447
break
448448

449+
async def clean(self):
449450
self._cookiejar = self._crawler.cookie_jar
450451
await self._crawler.close()
451452

wapitiCore/net/intercepting_explorer.py

Lines changed: 55 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ async def launch_headless_explorer(
332332
continue
333333
else:
334334
try:
335-
response = await crawler.async_send(request)
335+
response = await crawler.async_send(request, timeout=crawler.timeout.connect)
336336
except httpx.RequestError as exception:
337337
logging.error(f"{request} generated an exception: {exception.__class__.__name__}")
338338
continue
@@ -389,13 +389,52 @@ def __init__(
389389
self._final_cookies = None
390390
self._cookies = cookies or CookieJar()
391391
self._wait_time = wait_time
392+
self._headless_task = None
393+
394+
async def process_requests(self, excluded_requests, exclusion_regexes):
395+
while True:
396+
try:
397+
request, response = self._queue.get_nowait()
398+
except asyncio.QueueEmpty:
399+
await asyncio.sleep(.1)
400+
except KeyboardInterrupt:
401+
break
402+
else:
403+
self._queue.task_done()
404+
405+
# Scope check and deduplication are made here
406+
if not self._scope.check(request) or request in self._processed_requests:
407+
continue
408+
409+
# Check for exclusion here because we don't have full control over the headless browser
410+
if request in excluded_requests or any(regex.match(request.url) for regex in exclusion_regexes):
411+
continue
412+
413+
dir_name = request.dir_name
414+
if self._max_files_per_dir and self._file_counts[dir_name] >= self._max_files_per_dir:
415+
continue
416+
417+
self._file_counts[dir_name] += 1
418+
419+
if self.has_too_many_parameters(request):
420+
continue
421+
422+
if self._qs_limit and request.parameters_count:
423+
self._pattern_counts[request.pattern] += 1
424+
425+
yield request, response
426+
self._processed_requests.append(request)
427+
log_verbose(f"[+] {request}")
428+
429+
if self._stopped.is_set():
430+
break
392431

393432
async def async_explore(
394433
self,
395434
to_explore: Deque[Request],
396435
excluded_urls: list = None
397436
) -> AsyncIterator[Tuple[Request, Response]]:
398-
queue = asyncio.Queue()
437+
self._queue = asyncio.Queue()
399438

400439
exclusion_regexes = []
401440
excluded_requests = []
@@ -408,10 +447,10 @@ async def async_explore(
408447
excluded_requests.append(bad_request)
409448

410449
# Launch proxy as asyncio task
411-
mitm_task = asyncio.create_task(
450+
self._mitm_task = asyncio.create_task(
412451
launch_proxy(
413452
self._mitm_port,
414-
queue,
453+
self._queue,
415454
self._crawler.headers,
416455
self._cookies,
417456
self._scope,
@@ -420,12 +459,12 @@ async def async_explore(
420459
)
421460
)
422461

423-
headless_task = None
462+
424463
if self._headless == "no":
425464
# No headless crawler, just intercepting mode so no starting URLs
426465
to_explore.clear()
427466
else:
428-
headless_task = asyncio.create_task(
467+
self._headless_task = asyncio.create_task(
429468
launch_headless_explorer(
430469
self._stopped,
431470
self._crawler,
@@ -440,52 +479,23 @@ async def async_explore(
440479
)
441480
)
442481

443-
while True:
444-
try:
445-
request, response = queue.get_nowait()
446-
except asyncio.QueueEmpty:
447-
await asyncio.sleep(.1)
448-
except KeyboardInterrupt:
449-
break
450-
else:
451-
queue.task_done()
452-
453-
# Scope check and deduplication are made here
454-
if not self._scope.check(request) or request in self._processed_requests:
455-
continue
456-
457-
# Check for exclusion here because we don't have full control over the headless browser
458-
if request in excluded_requests or any(regex.match(request.url) for regex in exclusion_regexes):
459-
continue
460-
461-
dir_name = request.dir_name
462-
if self._max_files_per_dir and self._file_counts[dir_name] >= self._max_files_per_dir:
463-
continue
464-
465-
self._file_counts[dir_name] += 1
466-
467-
if self.has_too_many_parameters(request):
468-
continue
469-
470-
if self._qs_limit and request.parameters_count:
471-
self._pattern_counts[request.pattern] += 1
472-
473-
yield request, response
474-
self._processed_requests.append(request)
475-
log_verbose(f"[+] {request}")
476-
482+
async for request, response in self.process_requests(excluded_requests, exclusion_regexes):
483+
yield request, response
477484
if self._stopped.is_set():
478485
break
479486

480-
await queue.join()
487+
async def clean(self):
488+
if not self._queue.empty():
489+
await self._queue.join()
490+
481491
# The headless crawler must stop when the stop event is set, let's just wait for it
482-
if headless_task:
483-
await headless_task
492+
if self._headless_task:
493+
await self._headless_task
484494

485495
# We are canceling the mitm proxy, but we could have used a special request to shut down the master to.
486496
# https://docs.mitmproxy.org/stable/addons-examples/#shutdown
487-
mitm_task.cancel()
488-
self._final_cookies = await mitm_task
497+
self._mitm_task.cancel()
498+
self._final_cookies = await self._mitm_task
489499
await self._crawler.close()
490500

491501
@property

wapitiCore/parsers/commandline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def parse_args():
329329
"--max-scan-time",
330330
metavar="SECONDS",
331331
help="Set how many seconds you want the scan to last (floats accepted)",
332-
type=float, default=0
332+
type=float, default=None
333333
)
334334

335335
parser.add_argument(

0 commit comments

Comments
 (0)