From 4cac4a87501a1934e0ad0b4cdd72e5ba3d6bc4a5 Mon Sep 17 00:00:00 2001
From: suphakin-th <suphakin.th@gmail.com>
Date: Sat, 8 Feb 2025 18:34:24 +0700
Subject: [PATCH 1/2] Refactor : Just try to improve something.

---
 sherlock_project/sherlock.py | 247 +++--------------------------------
 1 file changed, 18 insertions(+), 229 deletions(-)

diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py
index 4e80d31c3..07b540383 100644
--- a/sherlock_project/sherlock.py
+++ b/sherlock_project/sherlock.py
@@ -567,6 +567,23 @@ def main():
     parser = ArgumentParser(
         formatter_class=RawDescriptionHelpFormatter,
         description=f"{__longname__} (Version {__version__})",
+        epilog="""
+            Example usage:
+            sherlock user123
+            sherlock user1 user2 -s site1,site2
+            sherlock --csv user1 user2
+            sherlock --xlsx user1 user2
+            sherlock --json user1 user2
+            sherlock --proxy socks5://127.0.0.1:1080 user1 user2
+            sherlock --tor user1 user2
+            sherlock --unique-tor user1 user2
+            sherlock --dump-response user1 user2
+            sherlock --no-txt user1 user2
+            sherlock --no-txt --no-color user1 user2
+            sherlock --browse user1 user2
+            sherlock --local user1 user2
+            sherlock --nsfw user1 user2
+            """,
     )
     parser.add_argument(
         "--version",
@@ -596,22 +613,6 @@ def main():
         dest="output",
         help="If using single username, the output of the result will be saved to this file.",
     )
-    parser.add_argument(
-        "--tor",
-        "-t",
-        action="store_true",
-        dest="tor",
-        default=False,
-        help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
-    )
-    parser.add_argument(
-        "--unique-tor",
-        "-u",
-        action="store_true",
-        dest="unique_tor",
-        default=False,
-        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
-    )
     parser.add_argument(
         "--csv",
         action="store_true",
@@ -643,216 +644,6 @@ def main():
         default=None,
         help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
     )
-    parser.add_argument(
-        "--dump-response",
-        action="store_true",
-        dest="dump_response",
-        default=False,
-        help="Dump the HTTP response to stdout for targeted debugging.",
-    )
-    parser.add_argument(
-        "--json",
-        "-j",
-        metavar="JSON_FILE",
-        dest="json_file",
-        default=None,
-        help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
-    )
-    parser.add_argument(
-        "--timeout",
-        action="store",
-        metavar="TIMEOUT",
-        dest="timeout",
-        type=timeout_check,
-        default=60,
-        help="Time (in seconds) to wait for response to requests (Default: 60)",
-    )
-    parser.add_argument(
-        "--print-all",
-        action="store_true",
-        dest="print_all",
-        default=False,
-        help="Output sites where the username was not found.",
-    )
-    parser.add_argument(
-        "--print-found",
-        action="store_true",
-        dest="print_found",
-        default=True,
-        help="Output sites where the username was found (also if exported as file).",
-    )
-    parser.add_argument(
-        "--no-color",
-        action="store_true",
-        dest="no_color",
-        default=False,
-        help="Don't color terminal output",
-    )
-    parser.add_argument(
-        "username",
-        nargs="+",
-        metavar="USERNAMES",
-        action="store",
-        help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
-    )
-    parser.add_argument(
-        "--browse",
-        "-b",
-        action="store_true",
-        dest="browse",
-        default=False,
-        help="Browse to all results on default browser.",
-    )
-
-    parser.add_argument(
-        "--local",
-        "-l",
-        action="store_true",
-        default=False,
-        help="Force the use of the local data.json file.",
-    )
-
-    parser.add_argument(
-        "--nsfw",
-        action="store_true",
-        default=False,
-        help="Include checking of NSFW sites from default list.",
-    )
-
-    parser.add_argument(
-        "--no-txt",
-        action="store_true",
-        dest="no_txt",
-        default=False,
-        help="Disable creation of a txt file",
-    )
-
-    args = parser.parse_args()
-
-    # If the user presses CTRL-C, exit gracefully without throwing errors
-    signal.signal(signal.SIGINT, handler)
-
-    # Check for newer version of Sherlock. If it exists, let the user know about it
-    try:
-        latest_release_raw = requests.get(forge_api_latest_release).text
-        latest_release_json = json_loads(latest_release_raw)
-        latest_remote_tag = latest_release_json["tag_name"]
-
-        if latest_remote_tag[1:] != __version__:
-            print(
-                f"Update available! {__version__} --> {latest_remote_tag[1:]}"
-                f"\n{latest_release_json['html_url']}"
-            )
-
-    except Exception as error:
-        print(f"A problem occurred while checking for an update: {error}")
-
-    # Argument check
-    # TODO regex check on args.proxy
-    if args.tor and (args.proxy is not None):
-        raise Exception("Tor and Proxy cannot be set at the same time.")
-
-    # Make prompts
-    if args.proxy is not None:
-        print("Using the proxy: " + args.proxy)
-
-    if args.tor or args.unique_tor:
-        print("Using Tor to make requests")
-
-        print(
-            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
-        )
-
-    if args.no_color:
-        # Disable color output.
-        init(strip=True, convert=False)
-    else:
-        # Enable color output.
-        init(autoreset=True)
-
-    # Check if both output methods are entered as input.
-    if args.output is not None and args.folderoutput is not None:
-        print("You can only use one of the output methods.")
-        sys.exit(1)
-
-    # Check validity for single username output.
-    if args.output is not None and len(args.username) != 1:
-        print("You can only use --output with a single username")
-        sys.exit(1)
-
-    # Create object with all information about sites we are aware of.
-    try:
-        if args.local:
-            sites = SitesInformation(
-                os.path.join(os.path.dirname(__file__), "resources/data.json")
-            )
-        else:
-            json_file_location = args.json_file
-            if args.json_file:
-                # If --json parameter is a number, interpret it as a pull request number
-                if args.json_file.isnumeric():
-                    pull_number = args.json_file
-                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
-                    pull_request_raw = requests.get(pull_url).text
-                    pull_request_json = json_loads(pull_request_raw)
-
-                    # Check if it's a valid pull request
-                    if "message" in pull_request_json:
-                        print(f"ERROR: Pull request #{pull_number} not found.")
-                        sys.exit(1)
-
-                    head_commit_sha = pull_request_json["head"]["sha"]
-                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
-
-            sites = SitesInformation(json_file_location)
-    except Exception as error:
-        print(f"ERROR:  {error}")
-        sys.exit(1)
-
-    if not args.nsfw:
-        sites.remove_nsfw_sites(do_not_remove=args.site_list)
-
-    # Create original dictionary from SitesInformation() object.
-    # Eventually, the rest of the code will be updated to use the new object
-    # directly, but this will glue the two pieces together.
-    site_data_all = {site.name: site.information for site in sites}
-    if args.site_list == []:
-        # Not desired to look at a sub-set of sites
-        site_data = site_data_all
-    else:
-        # User desires to selectively run queries on a sub-set of the site list.
-        # Make sure that the sites are supported & build up pruned site database.
-        site_data = {}
-        site_missing = []
-        for site in args.site_list:
-            counter = 0
-            for existing_site in site_data_all:
-                if site.lower() == existing_site.lower():
-                    site_data[existing_site] = site_data_all[existing_site]
-                    counter += 1
-            if counter == 0:
-                # Build up list of sites not supported for future error message.
-                site_missing.append(f"'{site}'")
-
-        if site_missing:
-            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
-
-        if not site_data:
-            sys.exit(1)
-
-    # Create notify object for query results.
-    query_notify = QueryNotifyPrint(
-        result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
-    )
-
-    # Run report on all specified users.
-    all_usernames = []
-    for username in args.username:
-        if check_for_parameter(username):
-            for name in multiple_usernames(username):
-                all_usernames.append(name)
-        else:
-            all_usernames.append(username)
     for username in all_usernames:
         results = sherlock(
             username,
@@ -928,6 +719,7 @@ def main():
                             response_time_s,
                         ]
                     )
+
         if args.xlsx:
             usernames = []
             names = []
@@ -969,9 +761,6 @@ def main():
             )
             DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
 
-        print()
-    query_notify.finish()
-
 
 if __name__ == "__main__":
     main()

From a7810cb5e89a9eefb469c20f06bc35c1a91d8d10 Mon Sep 17 00:00:00 2001
From: suphakin-th <suphakin.th@gmail.com>
Date: Sat, 8 Feb 2025 22:51:24 +0700
Subject: [PATCH 2/2] Refactor : Just try to improve something.

---
 sherlock_project/sherlock.py | 319 +++++++++++++++++++++++++++++------
 1 file changed, 263 insertions(+), 56 deletions(-)

diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py
index 07b540383..11b6de5e3 100644
--- a/sherlock_project/sherlock.py
+++ b/sherlock_project/sherlock.py
@@ -564,26 +564,10 @@ def handler(signal_received, frame):
 
 
 def main():
+    """Main entry point for Sherlock."""
     parser = ArgumentParser(
         formatter_class=RawDescriptionHelpFormatter,
         description=f"{__longname__} (Version {__version__})",
-        epilog="""
-            Example usage:
-            sherlock user123
-            sherlock user1 user2 -s site1,site2
-            sherlock --csv user1 user2
-            sherlock --xlsx user1 user2
-            sherlock --json user1 user2
-            sherlock --proxy socks5://127.0.0.1:1080 user1 user2
-            sherlock --tor user1 user2
-            sherlock --unique-tor user1 user2
-            sherlock --dump-response user1 user2
-            sherlock --no-txt user1 user2
-            sherlock --no-txt --no-color user1 user2
-            sherlock --browse user1 user2
-            sherlock --local user1 user2
-            sherlock --nsfw user1 user2
-            """,
     )
     parser.add_argument(
         "--version",
@@ -613,6 +597,22 @@ def main():
         dest="output",
         help="If using single username, the output of the result will be saved to this file.",
     )
+    parser.add_argument(
+        "--tor",
+        "-t",
+        action="store_true",
+        dest="tor",
+        default=False,
+        help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
+    )
+    parser.add_argument(
+        "--unique-tor",
+        "-u",
+        action="store_true",
+        dest="unique_tor",
+        default=False,
+        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
+    )
     parser.add_argument(
         "--csv",
         action="store_true",
@@ -644,6 +644,216 @@ def main():
         default=None,
         help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
     )
+    parser.add_argument(
+        "--dump-response",
+        action="store_true",
+        dest="dump_response",
+        default=False,
+        help="Dump the HTTP response to stdout for targeted debugging.",
+    )
+    parser.add_argument(
+        "--json",
+        "-j",
+        metavar="JSON_FILE",
+        dest="json_file",
+        default=None,
+        help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
+    )
+    parser.add_argument(
+        "--timeout",
+        action="store",
+        metavar="TIMEOUT",
+        dest="timeout",
+        type=timeout_check,
+        default=60,
+        help="Time (in seconds) to wait for response to requests (Default: 60)",
+    )
+    parser.add_argument(
+        "--print-all",
+        action="store_true",
+        dest="print_all",
+        default=False,
+        help="Output sites where the username was not found.",
+    )
+    parser.add_argument(
+        "--print-found",
+        action="store_true",
+        dest="print_found",
+        default=True,
+        help="Output sites where the username was found (also if exported as file).",
+    )
+    parser.add_argument(
+        "--no-color",
+        action="store_true",
+        dest="no_color",
+        default=False,
+        help="Don't color terminal output",
+    )
+    parser.add_argument(
+        "username",
+        nargs="+",
+        metavar="USERNAMES",
+        action="store",
+        help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
+    )
+    parser.add_argument(
+        "--browse",
+        "-b",
+        action="store_true",
+        dest="browse",
+        default=False,
+        help="Browse to all results on default browser.",
+    )
+
+    parser.add_argument(
+        "--local",
+        "-l",
+        action="store_true",
+        default=False,
+        help="Force the use of the local data.json file.",
+    )
+
+    parser.add_argument(
+        "--nsfw",
+        action="store_true",
+        default=False,
+        help="Include checking of NSFW sites from default list.",
+    )
+
+    parser.add_argument(
+        "--no-txt",
+        action="store_true",
+        dest="no_txt",
+        default=False,
+        help="Disable creation of a txt file",
+    )
+
+    args = parser.parse_args()
+
+    # If the user presses CTRL-C, exit gracefully without throwing errors
+    signal.signal(signal.SIGINT, handler)
+
+    # Check for newer version of Sherlock. If it exists, let the user know about it
+    try:
+        latest_release_raw = requests.get(forge_api_latest_release).text
+        latest_release_json = json_loads(latest_release_raw)
+        latest_remote_tag = latest_release_json["tag_name"]
+
+        if latest_remote_tag[1:] != __version__:
+            print(
+                f"Update available! {__version__} --> {latest_remote_tag[1:]}"
+                f"\n{latest_release_json['html_url']}"
+            )
+
+    except Exception as error:
+        print(f"A problem occurred while checking for an update: {error}")
+
+    # Argument check
+    # TODO regex check on args.proxy
+    if args.tor and (args.proxy is not None):
+        raise Exception("Tor and Proxy cannot be set at the same time.")
+
+    # Make prompts
+    if args.proxy is not None:
+        print("Using the proxy: " + args.proxy)
+
+    if args.tor or args.unique_tor:
+        print("Using Tor to make requests")
+
+        print(
+            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
+        )
+
+    if args.no_color:
+        # Disable color output.
+        init(strip=True, convert=False)
+    else:
+        # Enable color output.
+        init(autoreset=True)
+
+    # Check if both output methods are entered as input.
+    if args.output is not None and args.folderoutput is not None:
+        print("You can only use one of the output methods.")
+        sys.exit(1)
+
+    # Check validity for single username output.
+    if args.output is not None and len(args.username) != 1:
+        print("You can only use --output with a single username")
+        sys.exit(1)
+
+    # Create object with all information about sites we are aware of.
+    try:
+        if args.local:
+            sites = SitesInformation(
+                os.path.join(os.path.dirname(__file__), "resources/data.json")
+            )
+        else:
+            json_file_location = args.json_file
+            if args.json_file:
+                # If --json parameter is a number, interpret it as a pull request number
+                if args.json_file.isnumeric():
+                    pull_number = args.json_file
+                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
+                    pull_request_raw = requests.get(pull_url).text
+                    pull_request_json = json_loads(pull_request_raw)
+
+                    # Check if it's a valid pull request
+                    if "message" in pull_request_json:
+                        print(f"ERROR: Pull request #{pull_number} not found.")
+                        sys.exit(1)
+
+                    head_commit_sha = pull_request_json["head"]["sha"]
+                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
+
+            sites = SitesInformation(json_file_location)
+    except Exception as error:
+        print(f"ERROR:  {error}")
+        sys.exit(1)
+
+    if not args.nsfw:
+        sites.remove_nsfw_sites(do_not_remove=args.site_list)
+
+    # Create original dictionary from SitesInformation() object.
+    # Eventually, the rest of the code will be updated to use the new object
+    # directly, but this will glue the two pieces together.
+    site_data_all = {site.name: site.information for site in sites}
+    if args.site_list == []:
+        # Not desired to look at a sub-set of sites
+        site_data = site_data_all
+    else:
+        # User desires to selectively run queries on a sub-set of the site list.
+        # Make sure that the sites are supported & build up pruned site database.
+        site_data = {}
+        site_missing = []
+        for site in args.site_list:
+            counter = 0
+            for existing_site in site_data_all:
+                if site.lower() == existing_site.lower():
+                    site_data[existing_site] = site_data_all[existing_site]
+                    counter += 1
+            if counter == 0:
+                # Build up list of sites not supported for future error message.
+                site_missing.append(f"'{site}'")
+
+        if site_missing:
+            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
+
+        if not site_data:
+            sys.exit(1)
+
+    # Create notify object for query results.
+    query_notify = QueryNotifyPrint(
+        result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
+    )
+
+    # Run report on all specified users.
+    all_usernames = []
+    for username in args.username:
+        if check_for_parameter(username):
+            for name in multiple_usernames(username):
+                all_usernames.append(name)
+        else:
+            all_usernames.append(username)
     for username in all_usernames:
         results = sherlock(
             username,
@@ -659,9 +869,6 @@ def main():
         if args.output:
             result_file = args.output
         elif args.folderoutput:
-            # The usernames results should be stored in a targeted folder.
-            # If the folder doesn't exist, create it first
-            os.makedirs(args.folderoutput, exist_ok=True)
             result_file = os.path.join(args.folderoutput, f"{username}.txt")
         else:
             result_file = f"{username}.txt"
@@ -677,12 +884,10 @@ def main():
                 file.write(f"Total Websites Username Detected On : {exists_counter}\n")
 
         if args.csv:
-            result_file = f"{username}.csv"
             if args.folderoutput:
-                # The usernames results should be stored in a targeted folder.
-                # If the folder doesn't exist, create it first
-                os.makedirs(args.folderoutput, exist_ok=True)
-                result_file = os.path.join(args.folderoutput, result_file)
+                result_file = os.path.join(args.folderoutput, f"{username}.csv")
+            else:
+                result_file = f"{username}.csv"
 
             with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
                 writer = csv.writer(csv_report)
@@ -706,8 +911,6 @@ def main():
                         continue
 
                     response_time_s = results[site]["status"].query_time
-                    if response_time_s is None:
-                        response_time_s = ""
                     writer.writerow(
                         [
                             username,
@@ -716,19 +919,17 @@ def main():
                             results[site]["url_user"],
                             str(results[site]["status"].status),
                             results[site]["http_status"],
-                            response_time_s,
+                            response_time_s if response_time_s is not None else "",
                         ]
                     )
 
         if args.xlsx:
-            usernames = []
-            names = []
-            url_main = []
-            url_user = []
-            exists = []
-            http_status = []
-            response_time_s = []
+            if args.folderoutput:
+                result_file = os.path.join(args.folderoutput, f"{username}.xlsx")
+            else:
+                result_file = f"{username}.xlsx"
 
+            result_data = []
             for site in results:
                 if (
                     args.print_found
@@ -737,29 +938,35 @@ def main():
                 ):
                     continue
 
-                if response_time_s is None:
-                    response_time_s.append("")
-                else:
-                    response_time_s.append(results[site]["status"].query_time)
-                usernames.append(username)
-                names.append(site)
-                url_main.append(results[site]["url_main"])
-                url_user.append(results[site]["url_user"])
-                exists.append(str(results[site]["status"].status))
-                http_status.append(results[site]["http_status"])
-
+                result_data.append(
+                    [
+                        username,
+                        site,
+                        results[site]["url_main"],
+                        results[site]["url_user"],
+                        str(results[site]["status"].status),
+                        results[site]["http_status"],
+                        results[site]["status"].query_time
+                        if results[site]["status"].query_time is not None
+                        else "",
+                    ]
+                )
             DataFrame = pd.DataFrame(
-                {
-                    "username": usernames,
-                    "name": names,
-                    "url_main": url_main,
-                    "url_user": url_user,
-                    "exists": exists,
-                    "http_status": http_status,
-                    "response_time_s": response_time_s,
-                }
+                result_data,
+                columns=[
+                    "username",
+                    "name",
+                    "url_main",
+                    "url_user",
+                    "exists",
+                    "http_status",
+                    "response_time_s",
+                ],
             )
-            DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
+            DataFrame.to_excel(result_file, sheet_name="sheet1", index=False)
+
+        print()
+    query_notify.finish()
 
 
 if __name__ == "__main__":