From 276808aa0f47ececa963f9db14d37da0e25be46a Mon Sep 17 00:00:00 2001 From: raldnor Date: Thu, 20 May 2021 15:47:03 +0200 Subject: [PATCH] First (and possibly last) release --- README.MD | 121 +++++++++++++++++++++++++++++++++++++++++ urlscanner.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 README.MD create mode 100644 urlscanner.py diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..d334eb9 --- /dev/null +++ b/README.MD @@ -0,0 +1,121 @@ +# URL Scanner + +This is simple utility that gets a list of urls from a textfile (one url per line), queries each url and returns the web server status code. +Optionally screenshots can be made during this process of the pages being queries (this requires Firefox to be installed). + +### Usage +``` +usage: urlscanner.py [-h] [--inputfile INPUTFILE] [--outputdir OUTPUTDIR] + [--outputfile OUTPUTFILE] [--verbose] [--append APPEND] + [--firefoxpath FIREFOXPATH] [--screenshots] +``` + +#### Options: +```-h``` or ```--help``` - Show command lines options and a brief help description per option. +```-i``` or ```--inputfile``` - Input text file to read urls from. Basically a text file with one url per line. +```-d``` or ```--outputdir``` - Directory to store screenshots in. This directory has to exist before it can be specified. +```-a``` or ```--append``` - Append a string to the urls to be checked (e.g. 'index.html' or '?action=something'). +```-f``` or ```--firefoxpath``` - Location where the firefox executable is found (required when creating screenshots). +```-s``` or ```--screenshots``` - Create .png screenshots of urls. +```-v``` or ```--verbose``` - More output while the utility is running. + +### Examples: +Let's assume we have a textfile called _urls.txt_ containing the following lines: +``` +www.google.com +https://www.reddit.com +http://github.com +``` +When no http:// or https:// prefix is specified the script will assume https:// needs to be prepended. When specifically http:// needs to be queried make sure it is specified in the text file. + +#### 1. Simple status code query with only console output +Command: +``` +$ python3 ./urlscanner.py -i urls.txt + ____ _____________.____ +| | \______ \ | +| | /| _/ | +| | / | | \ |___ +|______/ |____|_ /_______ \ + ______ ____ ___\/ ____\/ ____ ___________ + / ___// ___\\__ \ / \ / \_/ __ \_ __ \ + \___ \\ \___ / __ \| | \ | \ ___/| | \/ +/____ >\___ >____ /___| /___| /\___ >__| + \/ \/ \/ \/ \/ \/ +URL scanner +Peter Berends - 2021 + +Reading input file (urls.txt)... +[200][OK] - https://www.google.com +[200][OK] - https://www.reddit.com +[301][Moved Permanently] - http://github.com +Done. Processed 3 URLs (0 failed). +``` + +#### 2. Output results to CVS file: +``` +$ python3 ./urlscanner.py -i urls.txt -o out.txt + ____ _____________.____ +| | \______ \ | +| | /| _/ | +| | / | | \ |___ +|______/ |____|_ /_______ \ + ______ ____ ___\/ ____\/ ____ ___________ + / ___// ___\\__ \ / \ / \_/ __ \_ __ \ + \___ \\ \___ / __ \| | \ | \ ___/| | \/ +/____ >\___ >____ /___| /___| /\___ >__| + \/ \/ \/ \/ \/ \/ +URL scanner +Peter Berends - 2021 + +Reading input file (urls.txt)... +[200][OK] - https://www.google.com +[200][OK] - https://www.reddit.com +[301][Moved Permanently] - http://github.com +Saving results to out.txt +Done. Processed 3 URLs (0 failed). + +$ cat out.txt +200,OK,https://www.google.com +200,OK,https://www.reddit.com +301,Moved Permanently,http://github.com +``` + +#### 3. Create screenshots in verbose mode +``` +$ mkdir shots +$ python3 ./urlscanner.py -i urls.txt -s -f /opt/firefox/firefox -d shots -v + ____ _____________.____ +| | \______ \ | +| | /| _/ | +| | / | | \ |___ +|______/ |____|_ /_______ \ + ______ ____ ___\/ ____\/ ____ ___________ + / ___// ___\\__ \ / \ / \_/ __ \_ __ \ + \___ \\ \___ / __ \| | \ | \ ___/| | \/ +/____ >\___ >____ /___| /___| /\___ >__| + \/ \/ \/ \/ \/ \/ +URL scanner +Peter Berends - 2021 + +Verbose mode on +Location to firefox given: /opt/firefox/firefox +Taking screenshots (will be stored in 'shots') +Activating Firefox as screenshot driver +Using Firefox location: /opt/firefox/firefox +Reading input file (urls.txt)... +Prepend https:// to line (www.google.com). +Processing: https://www.google.com +Taking screenshot of https://www.google.com +[200][OK] - https://www.google.com +Processing: https://www.reddit.com +Taking screenshot of https://www.reddit.com +[200][OK] - https://www.reddit.com +Processing: http://github.com +Taking screenshot of http://github.com +[301][Moved Permanently] - http://github.com +Done. Processed 3 URLs (0 failed). + +$ ls -a shots +. .. github.com.png www.google.com.png www.reddit.com.png +``` diff --git a/urlscanner.py b/urlscanner.py new file mode 100644 index 0000000..3d59f31 --- /dev/null +++ b/urlscanner.py @@ -0,0 +1,145 @@ +import argparse +import requests +import re +from http.client import responses +from os import path + +from selenium import webdriver +from selenium.webdriver.firefox.options import Options +from time import sleep + +urlcount = 0 +verbose = 0 +failedurls = 0 +outstring = "" +firefoxpath = "c:\Program Files\Mozilla Firefox\firefox.exe" +interval = 3 + +def printbanner(): + banner = " ____ _____________.____\n"\ +"| | \\______ \\ |\n"\ +"| | /| _/ |\n"\ +"| | / | | \\ |___ \n"\ +"|______/ |____|_ /_______ \\\n"\ +" ______ ____ ___\\/ ____\\/ ____ ___________\n"\ +" / ___// ___\\\\__ \\ / \\ / \\_/ __ \\_ __ \\\n"\ +" \\___ \\\\ \\___ / __ \\| | \\ | \\ ___/| | \\/\n"\ +"/____ >\\___ >____ /___| /___| /\\___ >__|\n"\ +" \\/ \\/ \\/ \\/ \\/ \\/\n"\ +"URL scanner\r\nPeter Berends - 2021\n" + print(banner) + +def statuscode(url): + try: + response = requests.head(url) + return response.status_code + except Exception as err: + if verbose: + print("Request failed for %s (%s)." % (url, err)) + return None + +def formaturl(url): + if not re.match('(?:http|https)://', url): + if verbose: + print("Prepend https:// to line (%s)." % url) + return 'https://{}'.format(url) + return url + +def checkdir(dir): + return path.exists(dir) + + +printbanner() + +parser = argparse.ArgumentParser() +parser.add_argument("--inputfile", "-i", help="specify input file with URLs per line") +parser.add_argument("--outputdir", "-d", help="specify output directory") +parser.add_argument("--outputfile", "-o", help="save results to file (CSV format)") +parser.add_argument("--verbose", "-v", help="verbose mode", action="store_true") +parser.add_argument("--append", "-a", help="append a string to the urls to check") +parser.add_argument("--firefoxpath", "-f", help="location where the executable of Firefox is found") +parser.add_argument("--screenshots", "-s", help="create screenshots of URLs", action="store_true") + +args = parser.parse_args() + +if not args.inputfile: + print("No input file specified, got nothing to work with. Bye.") + exit(1) + +if args.screenshots == True and not args.outputdir: + print("No output directory specified for screenshot storage. Bye.") + exit(1) + +if args.outputdir and not checkdir(args.outputdir): + print("Invalid output directory specified. Bye.") + exit(1) + +if args.verbose: + print("Verbose mode on") + verbose = 1 + +if args.firefoxpath: + firefoxpath = args.firefoxpath + if verbose: + print("Location to firefox given: %s" % args.firefoxpath) + +if args.screenshots: + print("Taking screenshots (will be stored in '%s')" % args.outputdir) + if verbose: + print("Activating Firefox as screenshot driver") + print("Using Firefox location: %s" % firefoxpath) + try: + Options = Options() + Options.headless = True + browser = webdriver.Firefox(options=Options, firefox_binary=firefoxpath) + except Exception as err: + print("Could not activate screenshot driver (%s). Try setting the path to Firefox using the --firefoxpath flag." % err) + exit(1) + +print("Reading input file (%s)..." % args.inputfile) + +try: + inputfile = open(args.inputfile, 'r') + lines = inputfile.readlines() +except: + print("ERROR: Input file specified is invalid. Bye.") + exit(1) + +for line in lines: + line = re.sub(r"[\n\t\s\r]*", "", line) + line = formaturl(line) + if args.append: + line = line + args.append + urlcount += 1 + if verbose: + print("Processing: %s" % line) + result = statuscode(line) + if verbose: + print("Taking screenshot of %s" % line) + try: + browser.get(line) + sleep(interval) + browser.get_screenshot_as_file(args.outputdir + '/' + line[7:] + '.png') + except Exception as err: + if verbose: + print("Could not create screenshot (%s)" % err) + if result: + outline = str(result) + "," + responses[result] + "," + line + outstring += outline + "\n" + print("[%s][%s] - %s" % (result, responses[result], line)) + else: + failedurls += 1 + +if args.outputfile: + print("Saving results to %s" % args.outputfile) + try: + outfile = open(args.outputfile, 'w') + outfile.write(outstring) + outfile.close() + except Exception as err: + print("ERROR: Could not save results (%s)." % err) + +if args.screenshots: + browser.quit() + +print("Done. Processed %i URLs (%i failed)." % (urlcount, failedurls)) \ No newline at end of file