From 6480eebbdf7b2e1e55a7742a6187cbecb5b99774 Mon Sep 17 00:00:00 2001 From: Soxoj Date: Sun, 31 Oct 2021 18:25:01 +0300 Subject: [PATCH] Accelerated start time & fixed some false positives --- CHANGELOG.md | 4 ++++ maigret/__version__.py | 2 +- maigret/checking.py | 17 +++++++++++------ maigret/maigret.py | 12 +++++++----- maigret/report.py | 17 +++++++++++++---- maigret/resources/data.json | 27 ++++++++++++++++++++------- setup.py | 2 +- 7 files changed, 57 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64d4874c..bda589bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +## [0.3.1] - 2021-10-31 +* fixed false positives +* accelerated maigret start time by 3 times + ## [0.3.0] - 2021-06-02 * added support of Tor and I2P sites * added experimental DNS checking feature diff --git a/maigret/__version__.py b/maigret/__version__.py index 7f50aab6..ab86bebf 100644 --- a/maigret/__version__.py +++ b/maigret/__version__.py @@ -1,3 +1,3 @@ """Maigret version file""" -__version__ = '0.3.0' +__version__ = '0.3.1' diff --git a/maigret/checking.py b/maigret/checking.py index 0a682a55..163316e2 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -13,12 +13,11 @@ from typing import Tuple, Optional, Dict, List from urllib.parse import quote -import aiohttp import aiodns import tqdm.asyncio -from aiohttp_socks import ProxyConnector from python_socks import _errors as proxy_errors from socid_extractor import extract +from aiohttp import TCPConnector, ClientSession, http_exceptions from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError from .activation import ParsingActivator, import_aiohttp_cookies @@ -60,12 +59,15 @@ def __init__(self, *args, **kwargs): cookie_jar = kwargs.get('cookie_jar') self.logger = kwargs.get('logger', Mock()) + # moved here to speed up the launch of Maigret + from aiohttp_socks import ProxyConnector + # make http client session connector = ( - ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) + ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False) ) connector.verify_ssl = False - self.session = aiohttp.ClientSession( + self.session = ClientSession( connector=connector, trust_env=True, cookie_jar=cookie_jar ) @@ -113,7 +115,7 @@ async def check(self, future) -> Tuple[str, int, Optional[CheckError]]: error = CheckError("Connecting failure", str(e)) except ServerDisconnectedError as e: error = CheckError("Server disconnected", str(e)) - except aiohttp.http_exceptions.BadHttpMessage as e: + except http_exceptions.BadHttpMessage as e: error = CheckError("HTTP", str(e)) except proxy_errors.ProxyError as e: error = CheckError("Proxy", str(e)) @@ -139,9 +141,12 @@ def __init__(self, *args, **kwargs): cookie_jar = kwargs.get('cookie_jar') self.logger = kwargs.get('logger', Mock()) + # moved here to speed up the launch of Maigret + from aiohttp_socks import ProxyConnector + connector = ProxyConnector.from_url(proxy) connector.verify_ssl = False - self.session = aiohttp.ClientSession( + self.session = ClientSession( connector=connector, trust_env=True, cookie_jar=cookie_jar ) diff --git a/maigret/maigret.py b/maigret/maigret.py index 0adbf861..27a7919e 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -1,7 +1,6 @@ """ Maigret main module """ -import aiohttp import asyncio import logging import os @@ -10,8 +9,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter from typing import List, Tuple -import requests -from socid_extractor import extract, parse, __version__ as socid_version +from socid_extractor import extract, parse from .__version__ import __version__ from .checking import ( @@ -114,12 +112,16 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) - def setup_arguments_parser(): + from aiohttp import __version__ as aiohttp_version + from requests import __version__ as requests_version + from socid_extractor import __version__ as socid_version + version_string = '\n'.join( [ f'%(prog)s {__version__}', f'Socid-extractor: {socid_version}', - f'Aiohttp: {aiohttp.__version__}', - f'Requests: {requests.__version__}', + f'Aiohttp: {aiohttp_version}', + f'Requests: {requests_version}', f'Python: {platform.python_version()}', ] ) diff --git a/maigret/report.py b/maigret/report.py index 3efd0bee..b7478395 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -7,13 +7,9 @@ from datetime import datetime from typing import Dict, Any -import pycountry import xmind from dateutil.parser import parse as parse_datetime_str from jinja2 import Template -from xhtml2pdf import pisa -from pyvis.network import Network -import networkx as nx from .checking import SUPPORTED_IDS from .result import QueryStatus @@ -78,6 +74,10 @@ def save_html_report(filename: str, context: dict): def save_pdf_report(filename: str, context: dict): template, css = generate_report_template(is_pdf=True) filled_template = template.render(**context) + + # moved here to speed up the launch of Maigret + from xhtml2pdf import pisa + with open(filename, "w+b") as f: pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) @@ -117,6 +117,9 @@ def link(self, node1_name, node2_name): def save_graph_report(filename: str, username_results: list, db: MaigretDatabase): + # moved here to speed up the launch of Maigret + import networkx as nx + G = nx.Graph() graph = MaigretGraph(G) @@ -201,6 +204,9 @@ def process_ids(parent_node, ids): [G.remove_node(node) for node in nodes_to_remove] + # moved here to speed up the launch of Maigret + from pyvis.network import Network + nt = Network(notebook=True, height="750px", width="100%") nt.from_nx(G) nt.show(filename) @@ -254,6 +260,9 @@ def generate_report_context(username_results: list): first_seen = None + # moved here to speed up the launch of Maigret + import pycountry + for username, id_type, results in username_results: found_accounts = 0 new_ids = [] diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 31b150e9..5a4b51ab 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -1159,7 +1159,8 @@ ], "checkType": "message", "absenceStrs": [ - "does not exist" + "does not exist", + "This user has not filled out their profile page yet." ], "alexaRank": 80, "urlMain": "https://armchairgm.fandom.com/", @@ -2039,7 +2040,11 @@ "ru", "wiki" ], - "checkType": "status_code", + "checkType": "message", + "absenceStrs": [ + "does not exist", + "\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c." + ], "alexaRank": 80, "urlMain": "https://bleach.fandom.com/ru", "url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", @@ -4459,7 +4464,8 @@ ], "checkType": "message", "absenceStrs": [ - "does not exist" + "does not exist", + "This user has not filled out their profile page yet." ], "alexaRank": 80, "urlMain": "https://community.fandom.com", @@ -8720,6 +8726,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Metacafe": { + "disabled": true, "tags": [ "in", "us" @@ -13071,7 +13078,7 @@ "us" ], "headers": { - "authorization": "Bearer BQBtoXAQab7ErdN63dPUer1RqjDLcX2v54xE9AfGPwYnncjU8HS5PlFW5mJE2cgDEDImvT07Xcpjb_ggsww" + "authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4" }, "errors": { "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" @@ -14906,7 +14913,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjU0MjEzMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.-F7S6fx7mold8Qhve4N3GjIv2Ue8RIaej4kXQUMBxpE" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc" }, "activation": { "url": "https://vimeo.com/_rv/viewer", @@ -19212,11 +19219,17 @@ "tags": [ "cn" ], - "checkType": "status_code", + "checkType": "message", + "absenceStrs": [ + "message\":\"Not Found\"" + ], + "presenseStrs": [ + "- SegmentFault \u601d\u5426" + ], "alexaRank": 2697, "urlMain": "https://segmentfault.com/", "url": "https://segmentfault.com/u/{username}", - "usernameClaimed": "bule", + "usernameClaimed": "john", "usernameUnclaimed": "noonewouldeverusethis7" }, "shadow-belgorod.ucoz.ru": { diff --git a/setup.py b/setup.py index caa91a05..606174e6 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ requires = rf.read().splitlines() setup(name='maigret', - version='0.3.0', + version='0.3.1', description='Collect a dossier on a person by username from a huge number of sites', long_description=long_description, long_description_content_type="text/markdown",