From 4cac4a87501a1934e0ad0b4cdd72e5ba3d6bc4a5 Mon Sep 17 00:00:00 2001 From: suphakin-th Date: Sat, 8 Feb 2025 18:34:24 +0700 Subject: [PATCH 1/2] Refactor : Just try to improve something. --- sherlock_project/sherlock.py | 247 +++-------------------------------- 1 file changed, 18 insertions(+), 229 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 4e80d31c3..07b540383 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -567,6 +567,23 @@ def main(): parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, description=f"{__longname__} (Version {__version__})", + epilog=""" + Example usage: + sherlock user123 + sherlock user1 user2 -s site1,site2 + sherlock --csv user1 user2 + sherlock --xlsx user1 user2 + sherlock --json user1 user2 + sherlock --proxy socks5://127.0.0.1:1080 user1 user2 + sherlock --tor user1 user2 + sherlock --unique-tor user1 user2 + sherlock --dump-response user1 user2 + sherlock --no-txt user1 user2 + sherlock --no-txt --no-color user1 user2 + sherlock --browse user1 user2 + sherlock --local user1 user2 + sherlock --nsfw user1 user2 + """, ) parser.add_argument( "--version", @@ -596,22 +613,6 @@ def main(): dest="output", help="If using single username, the output of the result will be saved to this file.", ) - parser.add_argument( - "--tor", - "-t", - action="store_true", - dest="tor", - default=False, - help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", - ) - parser.add_argument( - "--unique-tor", - "-u", - action="store_true", - dest="unique_tor", - default=False, - help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", - ) parser.add_argument( "--csv", action="store_true", @@ -643,216 +644,6 @@ def main(): default=None, help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080", ) - parser.add_argument( - "--dump-response", - action="store_true", - dest="dump_response", - default=False, - help="Dump the HTTP response to stdout for targeted debugging.", - ) - parser.add_argument( - "--json", - "-j", - metavar="JSON_FILE", - dest="json_file", - default=None, - help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.", - ) - parser.add_argument( - "--timeout", - action="store", - metavar="TIMEOUT", - dest="timeout", - type=timeout_check, - default=60, - help="Time (in seconds) to wait for response to requests (Default: 60)", - ) - parser.add_argument( - "--print-all", - action="store_true", - dest="print_all", - default=False, - help="Output sites where the username was not found.", - ) - parser.add_argument( - "--print-found", - action="store_true", - dest="print_found", - default=True, - help="Output sites where the username was found (also if exported as file).", - ) - parser.add_argument( - "--no-color", - action="store_true", - dest="no_color", - default=False, - help="Don't color terminal output", - ) - parser.add_argument( - "username", - nargs="+", - metavar="USERNAMES", - action="store", - help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').", - ) - parser.add_argument( - "--browse", - "-b", - action="store_true", - dest="browse", - default=False, - help="Browse to all results on default browser.", - ) - - parser.add_argument( - "--local", - "-l", - action="store_true", - default=False, - help="Force the use of the local data.json file.", - ) - - parser.add_argument( - "--nsfw", - action="store_true", - default=False, - help="Include checking of NSFW sites from default list.", - ) - - parser.add_argument( - "--no-txt", - action="store_true", - dest="no_txt", - default=False, - help="Disable creation of a txt file", - ) - - args = parser.parse_args() - - # If the user presses CTRL-C, exit gracefully without throwing errors - signal.signal(signal.SIGINT, handler) - - # Check for newer version of Sherlock. If it exists, let the user know about it - try: - latest_release_raw = requests.get(forge_api_latest_release).text - latest_release_json = json_loads(latest_release_raw) - latest_remote_tag = latest_release_json["tag_name"] - - if latest_remote_tag[1:] != __version__: - print( - f"Update available! {__version__} --> {latest_remote_tag[1:]}" - f"\n{latest_release_json['html_url']}" - ) - - except Exception as error: - print(f"A problem occurred while checking for an update: {error}") - - # Argument check - # TODO regex check on args.proxy - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") - - # Make prompts - if args.proxy is not None: - print("Using the proxy: " + args.proxy) - - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) - - if args.no_color: - # Disable color output. - init(strip=True, convert=False) - else: - # Enable color output. - init(autoreset=True) - - # Check if both output methods are entered as input. - if args.output is not None and args.folderoutput is not None: - print("You can only use one of the output methods.") - sys.exit(1) - - # Check validity for single username output. - if args.output is not None and len(args.username) != 1: - print("You can only use --output with a single username") - sys.exit(1) - - # Create object with all information about sites we are aware of. - try: - if args.local: - sites = SitesInformation( - os.path.join(os.path.dirname(__file__), "resources/data.json") - ) - else: - json_file_location = args.json_file - if args.json_file: - # If --json parameter is a number, interpret it as a pull request number - if args.json_file.isnumeric(): - pull_number = args.json_file - pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}" - pull_request_raw = requests.get(pull_url).text - pull_request_json = json_loads(pull_request_raw) - - # Check if it's a valid pull request - if "message" in pull_request_json: - print(f"ERROR: Pull request #{pull_number} not found.") - sys.exit(1) - - head_commit_sha = pull_request_json["head"]["sha"] - json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json" - - sites = SitesInformation(json_file_location) - except Exception as error: - print(f"ERROR: {error}") - sys.exit(1) - - if not args.nsfw: - sites.remove_nsfw_sites(do_not_remove=args.site_list) - - # Create original dictionary from SitesInformation() object. - # Eventually, the rest of the code will be updated to use the new object - # directly, but this will glue the two pieces together. - site_data_all = {site.name: site.information for site in sites} - if args.site_list == []: - # Not desired to look at a sub-set of sites - site_data = site_data_all - else: - # User desires to selectively run queries on a sub-set of the site list. - # Make sure that the sites are supported & build up pruned site database. - site_data = {} - site_missing = [] - for site in args.site_list: - counter = 0 - for existing_site in site_data_all: - if site.lower() == existing_site.lower(): - site_data[existing_site] = site_data_all[existing_site] - counter += 1 - if counter == 0: - # Build up list of sites not supported for future error message. - site_missing.append(f"'{site}'") - - if site_missing: - print(f"Error: Desired sites not found: {', '.join(site_missing)}.") - - if not site_data: - sys.exit(1) - - # Create notify object for query results. - query_notify = QueryNotifyPrint( - result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse - ) - - # Run report on all specified users. - all_usernames = [] - for username in args.username: - if check_for_parameter(username): - for name in multiple_usernames(username): - all_usernames.append(name) - else: - all_usernames.append(username) for username in all_usernames: results = sherlock( username, @@ -928,6 +719,7 @@ def main(): response_time_s, ] ) + if args.xlsx: usernames = [] names = [] @@ -969,9 +761,6 @@ def main(): ) DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False) - print() - query_notify.finish() - if __name__ == "__main__": main() From a7810cb5e89a9eefb469c20f06bc35c1a91d8d10 Mon Sep 17 00:00:00 2001 From: suphakin-th Date: Sat, 8 Feb 2025 22:51:24 +0700 Subject: [PATCH 2/2] Refactor : Just try to improve something. --- sherlock_project/sherlock.py | 319 +++++++++++++++++++++++++++++------ 1 file changed, 263 insertions(+), 56 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 07b540383..11b6de5e3 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -564,26 +564,10 @@ def handler(signal_received, frame): def main(): + """Main entry point for Sherlock.""" parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, description=f"{__longname__} (Version {__version__})", - epilog=""" - Example usage: - sherlock user123 - sherlock user1 user2 -s site1,site2 - sherlock --csv user1 user2 - sherlock --xlsx user1 user2 - sherlock --json user1 user2 - sherlock --proxy socks5://127.0.0.1:1080 user1 user2 - sherlock --tor user1 user2 - sherlock --unique-tor user1 user2 - sherlock --dump-response user1 user2 - sherlock --no-txt user1 user2 - sherlock --no-txt --no-color user1 user2 - sherlock --browse user1 user2 - sherlock --local user1 user2 - sherlock --nsfw user1 user2 - """, ) parser.add_argument( "--version", @@ -613,6 +597,22 @@ def main(): dest="output", help="If using single username, the output of the result will be saved to this file.", ) + parser.add_argument( + "--tor", + "-t", + action="store_true", + dest="tor", + default=False, + help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", + ) + parser.add_argument( + "--unique-tor", + "-u", + action="store_true", + dest="unique_tor", + default=False, + help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", + ) parser.add_argument( "--csv", action="store_true", @@ -644,6 +644,216 @@ def main(): default=None, help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080", ) + parser.add_argument( + "--dump-response", + action="store_true", + dest="dump_response", + default=False, + help="Dump the HTTP response to stdout for targeted debugging.", + ) + parser.add_argument( + "--json", + "-j", + metavar="JSON_FILE", + dest="json_file", + default=None, + help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.", + ) + parser.add_argument( + "--timeout", + action="store", + metavar="TIMEOUT", + dest="timeout", + type=timeout_check, + default=60, + help="Time (in seconds) to wait for response to requests (Default: 60)", + ) + parser.add_argument( + "--print-all", + action="store_true", + dest="print_all", + default=False, + help="Output sites where the username was not found.", + ) + parser.add_argument( + "--print-found", + action="store_true", + dest="print_found", + default=True, + help="Output sites where the username was found (also if exported as file).", + ) + parser.add_argument( + "--no-color", + action="store_true", + dest="no_color", + default=False, + help="Don't color terminal output", + ) + parser.add_argument( + "username", + nargs="+", + metavar="USERNAMES", + action="store", + help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').", + ) + parser.add_argument( + "--browse", + "-b", + action="store_true", + dest="browse", + default=False, + help="Browse to all results on default browser.", + ) + + parser.add_argument( + "--local", + "-l", + action="store_true", + default=False, + help="Force the use of the local data.json file.", + ) + + parser.add_argument( + "--nsfw", + action="store_true", + default=False, + help="Include checking of NSFW sites from default list.", + ) + + parser.add_argument( + "--no-txt", + action="store_true", + dest="no_txt", + default=False, + help="Disable creation of a txt file", + ) + + args = parser.parse_args() + + # If the user presses CTRL-C, exit gracefully without throwing errors + signal.signal(signal.SIGINT, handler) + + # Check for newer version of Sherlock. If it exists, let the user know about it + try: + latest_release_raw = requests.get(forge_api_latest_release).text + latest_release_json = json_loads(latest_release_raw) + latest_remote_tag = latest_release_json["tag_name"] + + if latest_remote_tag[1:] != __version__: + print( + f"Update available! {__version__} --> {latest_remote_tag[1:]}" + f"\n{latest_release_json['html_url']}" + ) + + except Exception as error: + print(f"A problem occurred while checking for an update: {error}") + + # Argument check + # TODO regex check on args.proxy + if args.tor and (args.proxy is not None): + raise Exception("Tor and Proxy cannot be set at the same time.") + + # Make prompts + if args.proxy is not None: + print("Using the proxy: " + args.proxy) + + if args.tor or args.unique_tor: + print("Using Tor to make requests") + + print( + "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." + ) + + if args.no_color: + # Disable color output. + init(strip=True, convert=False) + else: + # Enable color output. + init(autoreset=True) + + # Check if both output methods are entered as input. + if args.output is not None and args.folderoutput is not None: + print("You can only use one of the output methods.") + sys.exit(1) + + # Check validity for single username output. + if args.output is not None and len(args.username) != 1: + print("You can only use --output with a single username") + sys.exit(1) + + # Create object with all information about sites we are aware of. + try: + if args.local: + sites = SitesInformation( + os.path.join(os.path.dirname(__file__), "resources/data.json") + ) + else: + json_file_location = args.json_file + if args.json_file: + # If --json parameter is a number, interpret it as a pull request number + if args.json_file.isnumeric(): + pull_number = args.json_file + pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}" + pull_request_raw = requests.get(pull_url).text + pull_request_json = json_loads(pull_request_raw) + + # Check if it's a valid pull request + if "message" in pull_request_json: + print(f"ERROR: Pull request #{pull_number} not found.") + sys.exit(1) + + head_commit_sha = pull_request_json["head"]["sha"] + json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json" + + sites = SitesInformation(json_file_location) + except Exception as error: + print(f"ERROR: {error}") + sys.exit(1) + + if not args.nsfw: + sites.remove_nsfw_sites(do_not_remove=args.site_list) + + # Create original dictionary from SitesInformation() object. + # Eventually, the rest of the code will be updated to use the new object + # directly, but this will glue the two pieces together. + site_data_all = {site.name: site.information for site in sites} + if args.site_list == []: + # Not desired to look at a sub-set of sites + site_data = site_data_all + else: + # User desires to selectively run queries on a sub-set of the site list. + # Make sure that the sites are supported & build up pruned site database. + site_data = {} + site_missing = [] + for site in args.site_list: + counter = 0 + for existing_site in site_data_all: + if site.lower() == existing_site.lower(): + site_data[existing_site] = site_data_all[existing_site] + counter += 1 + if counter == 0: + # Build up list of sites not supported for future error message. + site_missing.append(f"'{site}'") + + if site_missing: + print(f"Error: Desired sites not found: {', '.join(site_missing)}.") + + if not site_data: + sys.exit(1) + + # Create notify object for query results. + query_notify = QueryNotifyPrint( + result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse + ) + + # Run report on all specified users. + all_usernames = [] + for username in args.username: + if check_for_parameter(username): + for name in multiple_usernames(username): + all_usernames.append(name) + else: + all_usernames.append(username) for username in all_usernames: results = sherlock( username, @@ -659,9 +869,6 @@ def main(): if args.output: result_file = args.output elif args.folderoutput: - # The usernames results should be stored in a targeted folder. - # If the folder doesn't exist, create it first - os.makedirs(args.folderoutput, exist_ok=True) result_file = os.path.join(args.folderoutput, f"{username}.txt") else: result_file = f"{username}.txt" @@ -677,12 +884,10 @@ def main(): file.write(f"Total Websites Username Detected On : {exists_counter}\n") if args.csv: - result_file = f"{username}.csv" if args.folderoutput: - # The usernames results should be stored in a targeted folder. - # If the folder doesn't exist, create it first - os.makedirs(args.folderoutput, exist_ok=True) - result_file = os.path.join(args.folderoutput, result_file) + result_file = os.path.join(args.folderoutput, f"{username}.csv") + else: + result_file = f"{username}.csv" with open(result_file, "w", newline="", encoding="utf-8") as csv_report: writer = csv.writer(csv_report) @@ -706,8 +911,6 @@ def main(): continue response_time_s = results[site]["status"].query_time - if response_time_s is None: - response_time_s = "" writer.writerow( [ username, @@ -716,19 +919,17 @@ def main(): results[site]["url_user"], str(results[site]["status"].status), results[site]["http_status"], - response_time_s, + response_time_s if response_time_s is not None else "", ] ) if args.xlsx: - usernames = [] - names = [] - url_main = [] - url_user = [] - exists = [] - http_status = [] - response_time_s = [] + if args.folderoutput: + result_file = os.path.join(args.folderoutput, f"{username}.xlsx") + else: + result_file = f"{username}.xlsx" + result_data = [] for site in results: if ( args.print_found @@ -737,29 +938,35 @@ def main(): ): continue - if response_time_s is None: - response_time_s.append("") - else: - response_time_s.append(results[site]["status"].query_time) - usernames.append(username) - names.append(site) - url_main.append(results[site]["url_main"]) - url_user.append(results[site]["url_user"]) - exists.append(str(results[site]["status"].status)) - http_status.append(results[site]["http_status"]) - + result_data.append( + [ + username, + site, + results[site]["url_main"], + results[site]["url_user"], + str(results[site]["status"].status), + results[site]["http_status"], + results[site]["status"].query_time + if results[site]["status"].query_time is not None + else "", + ] + ) DataFrame = pd.DataFrame( - { - "username": usernames, - "name": names, - "url_main": url_main, - "url_user": url_user, - "exists": exists, - "http_status": http_status, - "response_time_s": response_time_s, - } + result_data, + columns=[ + "username", + "name", + "url_main", + "url_user", + "exists", + "http_status", + "response_time_s", + ], ) - DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False) + DataFrame.to_excel(result_file, sheet_name="sheet1", index=False) + + print() + query_notify.finish() if __name__ == "__main__":