Skip to content

Commit

Permalink
Accelerated start time & fixed some false positives
Browse files Browse the repository at this point in the history
  • Loading branch information
soxoj committed Oct 31, 2021
1 parent aad862b commit 6480eeb
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 24 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.3.1] - 2021-10-31
* fixed false positives
* accelerated maigret start time by 3 times

## [0.3.0] - 2021-06-02
* added support of Tor and I2P sites
* added experimental DNS checking feature
Expand Down
2 changes: 1 addition & 1 deletion maigret/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Maigret version file"""

__version__ = '0.3.0'
__version__ = '0.3.1'
17 changes: 11 additions & 6 deletions maigret/checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@
from typing import Tuple, Optional, Dict, List
from urllib.parse import quote

import aiohttp
import aiodns
import tqdm.asyncio
from aiohttp_socks import ProxyConnector
from python_socks import _errors as proxy_errors
from socid_extractor import extract
from aiohttp import TCPConnector, ClientSession, http_exceptions
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError

from .activation import ParsingActivator, import_aiohttp_cookies
Expand Down Expand Up @@ -60,12 +59,15 @@ def __init__(self, *args, **kwargs):
cookie_jar = kwargs.get('cookie_jar')
self.logger = kwargs.get('logger', Mock())

# moved here to speed up the launch of Maigret
from aiohttp_socks import ProxyConnector

# make http client session
connector = (
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
)
connector.verify_ssl = False
self.session = aiohttp.ClientSession(
self.session = ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar
)

Expand Down Expand Up @@ -113,7 +115,7 @@ async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
error = CheckError("Connecting failure", str(e))
except ServerDisconnectedError as e:
error = CheckError("Server disconnected", str(e))
except aiohttp.http_exceptions.BadHttpMessage as e:
except http_exceptions.BadHttpMessage as e:
error = CheckError("HTTP", str(e))
except proxy_errors.ProxyError as e:
error = CheckError("Proxy", str(e))
Expand All @@ -139,9 +141,12 @@ def __init__(self, *args, **kwargs):
cookie_jar = kwargs.get('cookie_jar')
self.logger = kwargs.get('logger', Mock())

# moved here to speed up the launch of Maigret
from aiohttp_socks import ProxyConnector

connector = ProxyConnector.from_url(proxy)
connector.verify_ssl = False
self.session = aiohttp.ClientSession(
self.session = ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar
)

Expand Down
12 changes: 7 additions & 5 deletions maigret/maigret.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Maigret main module
"""
import aiohttp
import asyncio
import logging
import os
Expand All @@ -10,8 +9,7 @@
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from typing import List, Tuple

import requests
from socid_extractor import extract, parse, __version__ as socid_version
from socid_extractor import extract, parse

from .__version__ import __version__
from .checking import (
Expand Down Expand Up @@ -114,12 +112,16 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -


def setup_arguments_parser():
from aiohttp import __version__ as aiohttp_version
from requests import __version__ as requests_version
from socid_extractor import __version__ as socid_version

version_string = '\n'.join(
[
f'%(prog)s {__version__}',
f'Socid-extractor: {socid_version}',
f'Aiohttp: {aiohttp.__version__}',
f'Requests: {requests.__version__}',
f'Aiohttp: {aiohttp_version}',
f'Requests: {requests_version}',
f'Python: {platform.python_version()}',
]
)
Expand Down
17 changes: 13 additions & 4 deletions maigret/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@
from datetime import datetime
from typing import Dict, Any

import pycountry
import xmind
from dateutil.parser import parse as parse_datetime_str
from jinja2 import Template
from xhtml2pdf import pisa
from pyvis.network import Network
import networkx as nx

from .checking import SUPPORTED_IDS
from .result import QueryStatus
Expand Down Expand Up @@ -78,6 +74,10 @@ def save_html_report(filename: str, context: dict):
def save_pdf_report(filename: str, context: dict):
template, css = generate_report_template(is_pdf=True)
filled_template = template.render(**context)

# moved here to speed up the launch of Maigret
from xhtml2pdf import pisa

with open(filename, "w+b") as f:
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)

Expand Down Expand Up @@ -117,6 +117,9 @@ def link(self, node1_name, node2_name):


def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
# moved here to speed up the launch of Maigret
import networkx as nx

G = nx.Graph()
graph = MaigretGraph(G)

Expand Down Expand Up @@ -201,6 +204,9 @@ def process_ids(parent_node, ids):

[G.remove_node(node) for node in nodes_to_remove]

# moved here to speed up the launch of Maigret
from pyvis.network import Network

nt = Network(notebook=True, height="750px", width="100%")
nt.from_nx(G)
nt.show(filename)
Expand Down Expand Up @@ -254,6 +260,9 @@ def generate_report_context(username_results: list):

first_seen = None

# moved here to speed up the launch of Maigret
import pycountry

for username, id_type, results in username_results:
found_accounts = 0
new_ids = []
Expand Down
27 changes: 20 additions & 7 deletions maigret/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,8 @@
],
"checkType": "message",
"absenceStrs": [
"does not exist"
"does not exist",
"This user has not filled out their profile page yet."
],
"alexaRank": 80,
"urlMain": "https://armchairgm.fandom.com/",
Expand Down Expand Up @@ -2039,7 +2040,11 @@
"ru",
"wiki"
],
"checkType": "status_code",
"checkType": "message",
"absenceStrs": [
"does not exist",
"\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c."
],
"alexaRank": 80,
"urlMain": "https://bleach.fandom.com/ru",
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
Expand Down Expand Up @@ -4459,7 +4464,8 @@
],
"checkType": "message",
"absenceStrs": [
"does not exist"
"does not exist",
"This user has not filled out their profile page yet."
],
"alexaRank": 80,
"urlMain": "https://community.fandom.com",
Expand Down Expand Up @@ -8720,6 +8726,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Metacafe": {
"disabled": true,
"tags": [
"in",
"us"
Expand Down Expand Up @@ -13071,7 +13078,7 @@
"us"
],
"headers": {
"authorization": "Bearer BQBtoXAQab7ErdN63dPUer1RqjDLcX2v54xE9AfGPwYnncjU8HS5PlFW5mJE2cgDEDImvT07Xcpjb_ggsww"
"authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
Expand Down Expand Up @@ -14906,7 +14913,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjU0MjEzMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.-F7S6fx7mold8Qhve4N3GjIv2Ue8RIaej4kXQUMBxpE"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
Expand Down Expand Up @@ -19212,11 +19219,17 @@
"tags": [
"cn"
],
"checkType": "status_code",
"checkType": "message",
"absenceStrs": [
"message\":\"Not Found\""
],
"presenseStrs": [
"- SegmentFault \u601d\u5426</title>"
],
"alexaRank": 2697,
"urlMain": "https://segmentfault.com/",
"url": "https://segmentfault.com/u/{username}",
"usernameClaimed": "bule",
"usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"shadow-belgorod.ucoz.ru": {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
requires = rf.read().splitlines()

setup(name='maigret',
version='0.3.0',
version='0.3.1',
description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 6480eeb

Please sign in to comment.