diff --git a/CHANGELOG.md b/CHANGELOG.md index d7235bc..934015f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +* [v2.71.0](https://github.com/a4k-openproject/a4kScrapers/releases/tag/a4kScrapers-2.71.0): + * remove solidtorrents - dead + * remove torrentparadise - broken search + * remove eztv - keeping only eztv_api + * add torrentio + * [v2.70.0](https://github.com/a4k-openproject/a4kScrapers/releases/tag/a4kScrapers-2.70.0): * bring back bitcq * propagate pre-emptive termination exc from request diff --git a/meta.json b/meta.json index facfe72..fa3027a 100644 --- a/meta.json +++ b/meta.json @@ -1,6 +1,6 @@ { "author": "Unknown", - "version":"2.70.0", + "version":"2.71.0", "name":"a4kScrapers", "update_directory": "https://github.com/a4k-openproject/a4kScrapers/archive/", "remote_meta": "https://raw.githubusercontent.com/newt-sc/a4kScrapers/master/meta.json", diff --git a/providerModules/a4kScrapers/core.py b/providerModules/a4kScrapers/core.py index 0e35b1a..968b005 100644 --- a/providerModules/a4kScrapers/core.py +++ b/providerModules/a4kScrapers/core.py @@ -176,7 +176,7 @@ def optimize_requests(self): def is_movie_query(self): return self.query_type == 'movie' - def movie(self, title, year, imdb=None, auto_query=True): + def movie(self, title, year, imdb=None, auto_query=True, **kwargs): self.query_type = 'movie' return self._get_scraper(title) \ .movie_query(title, @@ -185,7 +185,7 @@ def movie(self, title, year, imdb=None, auto_query=True): auto_query=auto_query, single_query=self._single_query) - def episode(self, simple_info, all_info, auto_query=True, query_seasons=True, query_show_packs=True): + def episode(self, simple_info, all_info, auto_query=True, query_seasons=True, query_show_packs=True, **kwargs): self.query_type = 'episode' return self._get_scraper(simple_info['show_title']) \ .episode_query(simple_info, @@ -211,150 +211,6 @@ def _get_scraper(self, title, custom_filter=None): use_thread_for_info=True, custom_filter=custom_filter) -class DefaultHosterSources(DefaultSources): - def movie(self, imdb, title, localtitle, aliases, year): - self.start_time = time.time() - self.query_type = 'movie' - - if isinstance(self._get_scraper(title), NoResultsScraper): - return None - - self._request = self.scraper._request - - simple_info = {} - simple_info['title'] = source_utils.clean_title(title) - simple_info['query_title'] = simple_info['title'] - simple_info['year'] = year - return simple_info - - def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): - self.start_time = time.time() - self.query_type = 'episode' - - if isinstance(self._get_scraper(tvshowtitle), NoResultsScraper): - return None - - self._request = self.scraper._request - - simple_info = {} - simple_info['show_title'] = re.sub(r'\s+', ' ', source_utils.clean_title(tvshowtitle).replace(year, '')) - simple_info['query_title'] = simple_info['show_title'] - simple_info['year'] = year - return simple_info - - def episode(self, simple_info, imdb, tvdb, title, premiered, season, episode): - if simple_info is None: - return None - - simple_info['episode_title'] = title - simple_info['episode_number'] = episode - simple_info['season_number'] = season - simple_info['episode_number_xx'] = episode.zfill(2) - simple_info['season_number_xx'] = season.zfill(2) - simple_info['show_aliases'] = [] - - return simple_info - - def resolve(self, url): - return url - - def sources(self, simple_info, hostDict, hostprDict): - if simple_info is None: - return [] - - supported_hosts = hostDict + hostprDict - sources = [] - - try: - if self.is_movie_query(): - query = '%s %s' % (source_utils.clean_title(simple_info['title']), simple_info['year']) - else: - query = '%s S%sE%s' % (source_utils.clean_title(simple_info['show_title']), simple_info['season_number_xx'], simple_info['episode_number_xx']) - - if len(supported_hosts) > 0: - url = self.scraper._find_url() - - def search(url): - if self._cancellation_token.is_cancellation_requested: - return [] - - try: - result = self.search(url, query) - if result is None: - raise requests.exceptions.RequestException() - return result - except requests.exceptions.RequestException: - if self._request.exc_msg: - deprioritize_url(self._caller_name) - return [] - if self._request.request_time < 2: - url = self.scraper._find_next_url(url) - if url is None: - return [] - return search(url) - return [] - - hoster_results = search(url) if url is not None else [] - else: - hoster_results = [] - - if self.query_type == 'episode': - filter_single_episode_fn = source_utils.get_filter_single_episode_fn(simple_info) - - for result in hoster_results: - quality = source_utils.get_quality(result.title) - release_title = source_utils.clean_release_title_with_simple_info(result.title, simple_info) - - if self.query_type == 'movie' and not source_utils.filter_movie_title(result.title, release_title, simple_info['title'], simple_info): - continue - - if self.query_type == 'episode' and not filter_single_episode_fn(release_title): - continue - - for url in result.urls: - domain = re.findall(r"https?:\/\/(www\.)?(.*?)\/.*?", url)[0][1] - - if domain not in supported_hosts: - continue - if any(x in url for x in ['.rar', '.zip', '.iso']): - continue - - quality_from_url = source_utils.get_quality(url) - if quality_from_url != 'SD': - quality = quality_from_url - - release_title = source_utils.strip_non_ascii_and_unprintable(result.title) - if DEV_MODE and len(sources) == 0: - tools.log(release_title, 'info') - sources.append({ - 'release_title': release_title, - 'source': domain, - 'quality': quality, - 'language': 'en', - 'url': url, - 'info': [], - 'direct': False, - 'debridonly': False - }) - - sources.reverse() - - result_count = len(sources) if len(supported_hosts) > 0 else 'disabled' - tools.log('a4kScrapers.%s.%s: %s' % (self.query_type, self._caller_name, result_count), 'notice') - - - self.end_time = time.time() - self.time_ms = clock_time_ms(self.start_time, self.end_time) - tools.log('a4kScrapers.%s.%s: took %s ms' % (self.query_type, self._caller_name, self.time_ms), 'notice') - - return sources - except: - traceback.print_exc() - return sources - - def search(self, hoster_url, query): - return [] - class CoreScraper(object): def __init__(self, urls, @@ -574,7 +430,7 @@ def _find_url(self): if self._url is not None: return self._url - if self.caller_name in ['showrss', 'lime', 'bt4g', 'btscene', 'glo', 'torrentapi', 'torrentz2', 'scenerls', 'piratebay', 'magnetdl']: + if self.caller_name in ['showrss', 'lime', 'bt4g', 'btscene', 'glo', 'torrentapi', 'torrentz2', 'scenerls', 'piratebay', 'magnetdl', 'torrentio']: self._request.skip_head = True return self._request.find_url(self._urls) diff --git a/providerModules/a4kScrapers/request.py b/providerModules/a4kScrapers/request.py index 916d37c..68d7731 100644 --- a/providerModules/a4kScrapers/request.py +++ b/providerModules/a4kScrapers/request.py @@ -7,6 +7,7 @@ import re import os import json +import requests from collections import OrderedDict from . import source_utils @@ -73,12 +74,15 @@ def _get(cfscrape, url, headers, timeout, allow_redirects, update_options_fn): 'url': url, 'headers': headers, 'timeout': timeout, - 'allow_redirects': allow_redirects + 'allow_redirects': allow_redirects, } if update_options_fn is not None: update_options_fn(request_options) + if url.endswith('.json'): + request_options['verify']=False + return requests.request(**request_options) return cfscrape.request(**request_options) def _is_cloudflare_iuam_challenge(resp, allow_empty_body=False): diff --git a/providerModules/a4kScrapers/scrapers.py b/providerModules/a4kScrapers/scrapers.py index 007868d..21b43d2 100644 --- a/providerModules/a4kScrapers/scrapers.py +++ b/providerModules/a4kScrapers/scrapers.py @@ -102,6 +102,8 @@ def parse_seeds(self, row): seeds = safe_list_get(re.findall(r'Seeders:?.*?(\d+)', row), 0) if seeds == '': seeds = safe_list_get(re.findall(r'Seed:?.*?(\d+)', row), 0) + if seeds == '': + seeds = safe_list_get(re.findall(r'\n👤 (\d+) ', row), 0) if seeds == '': seeds = self._parse_number(row, -2) if seeds == 'N/A': diff --git a/providerModules/a4kScrapers/test_utils.py b/providerModules/a4kScrapers/test_utils.py index 4f8d6ed..d706b07 100644 --- a/providerModules/a4kScrapers/test_utils.py +++ b/providerModules/a4kScrapers/test_utils.py @@ -171,7 +171,10 @@ def _disable_warnings(): def test_torrent(self, scraper_module, scraper, url=None): _disable_warnings() - if scraper in ['showrss', 'eztv', 'eztv_api']: + if os.getenv('WERCKER_MAIN_PIPELINE_STARTED') and scraper in ['torrentio']: + tools.log('skipping %s in Travis build' % scraper, 'notice') + return + if scraper in ['showrss', 'eztv', 'torrentio']: return _episode(scraper_module, scraper, url, test=self) return _movie(scraper_module, scraper, url, test=self) diff --git a/providerModules/a4kScrapers/urls.json b/providerModules/a4kScrapers/urls.json index a0be563..7cbcd5c 100644 --- a/providerModules/a4kScrapers/urls.json +++ b/providerModules/a4kScrapers/urls.json @@ -41,7 +41,7 @@ }, "-ext": { "search": "", "domains": [{ "base": "" }] }, "-extratorrent": { "search": "", "domains": [{ "base": "" }] }, - "eztv_api": { + "eztv": { "search": "/api/get-torrents?limit=100&imdb_id=%s", "domains": [ { "base": "https://eztv.re" }, @@ -51,16 +51,6 @@ { "base": "https://eztv.unblocked.llc" } ] }, - "eztv": { - "search": "/search/%s", - "domains": [ - { "base": "https://eztv.re" }, - { "base": "https://eztv.ag" }, - { "base": "https://eztv.it" }, - { "base": "https://eztv.ch" }, - { "base": "https://eztv.unblocked.llc" } - ] - }, "glo": { "search": "/search_results.php?search=%s&cat={{category}}&incldead=0&inclexternal=0&lang=1&sort=seeders&order=desc", "cat_movie": "1", @@ -144,12 +134,7 @@ ] }, "-skytorrents": { "search": "", "domains": [{ "base": "" }] }, - "solidtorrents": { - "search": "/api/v1/search?q=%s&category=video&sort=size", - "domains": [ - { "base": "https://solidtorrents.net" } - ] - }, + "-solidtorrents": { "search": "", "domains": [{ "base": "" }] }, "torrentapi": { "search": "&mode=search&search_string=%s&token=%s&sort=seeders&ranked=0&limit=100&format=json_extended", "domains": [ @@ -171,13 +156,16 @@ { "base": "https://torrentgalaxy.su" } ] }, - "torrentparadise": { - "search": "/api/search?q=%s", + "-torrentparadise": { "search": "", "domains": [{ "base": "" }] }, + "torrenttm": { "search": "", "domains": [{ "base": "" }] }, + "torrentio": { + "search": "/language=english/stream/{{category}}/%s.json", + "cat_movie": "movie", + "cat_episode": "series", "domains": [ - { "base": "https://torrent-paradise.ml" } + { "base": "https://torrentio.strem.fun" } ] }, - "torrenttm": { "search": "", "domains": [{ "base": "" }] }, "torrentz2": { "search": "/kick.php?q=%s", "domains": [ diff --git a/providers/a4kScrapers/en/torrent/cached.py b/providers/a4kScrapers/en/torrent/cached.py index c5021d5..aa43510 100644 --- a/providers/a4kScrapers/en/torrent/cached.py +++ b/providers/a4kScrapers/en/torrent/cached.py @@ -6,5 +6,5 @@ class sources(core.DefaultSources): def __init__(self, *args, **kwargs): super(sources, self).__init__(__name__, *args, **kwargs) - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): return [] diff --git a/providers/a4kScrapers/en/torrent/eztv.py b/providers/a4kScrapers/en/torrent/eztv.py index a4c950e..da49fda 100644 --- a/providers/a4kScrapers/en/torrent/eztv.py +++ b/providers/a4kScrapers/en/torrent/eztv.py @@ -5,6 +5,78 @@ class sources(core.DefaultSources): def __init__(self, *args, **kwargs): super(sources, self).__init__(__name__, *args, single_query=True, **kwargs) + self._filter = core.Filter(fn=self._filter_fn, type='single') - def movie(self, title, year): + def _filter_fn(self, title, clean_title): + if self.is_movie_query(): + return False + + # ignore title + title = core.re.sub(r'.*(S\d\d.*)', r'%s \1' % self.scraper.show_title, title) + clean_title = core.re.sub(r'.*(s\d\d.*)', r'%s \1' % self.scraper.show_title, clean_title) + + if self.scraper.filter_single_episode.fn(title, clean_title): + self._filter.type = self.scraper.filter_single_episode.type + return True + + if self.scraper.filter_show_pack.fn(title, clean_title): + self._filter.type = self.scraper.filter_show_pack.type + return True + + if self.scraper.filter_season_pack.fn(title, clean_title): + self._filter.type = self.scraper.filter_season_pack.type + return True + + return False + + def _get_scraper(self, title): + return super(sources, self)._get_scraper(title, custom_filter=self._filter) + + def _search_request(self, url, query, page=1, prev_total=0): + if page > 10: + return [] + + query = core.quote_plus(self._imdb.replace('tt', '')) + response = self._request.get(url.base + (url.search % query) + ('&page=%s' % page)) + + if response.status_code != 200: + return [] + + try: + results = core.json.loads(response.text) + except Exception as e: + self._request.exc_msg = 'Failed to parse json: %s' % response.text + return [] + + if not results or not results.get('torrents', None) or len(results['torrents']) == 0: + return [] + + torrents = results['torrents'] + total = len(torrents) + prev_total + if total < results['torrents_count']: + more_results = self._search_request(url, None, page+1, total) + torrents += more_results + + return torrents + + def _soup_filter(self, response): + return response + + def _title_filter(self, el): + return el['filename'] + + def _info(self, el, url, torrent): + torrent['hash'] = el['hash'] + torrent['size'] = int(el['size_bytes']) / 1024 / 1024 + torrent['seeds'] = el['seeds'] + + return torrent + + def movie(self, title, year, imdb=None, **kwargs): return [] + + def episode(self, simple_info, all_info, **kwargs): + self._imdb = all_info.get('info', {}).get('tvshow.imdb_id', None) + if self._imdb is None: + self._imdb = all_info.get('showInfo', {}).get('ids', {}).get('imdb', None) + return super(sources, self).episode(simple_info, all_info) diff --git a/providers/a4kScrapers/en/torrent/piratebay.py b/providers/a4kScrapers/en/torrent/piratebay.py index 56aa054..1ef23ce 100644 --- a/providers/a4kScrapers/en/torrent/piratebay.py +++ b/providers/a4kScrapers/en/torrent/piratebay.py @@ -59,11 +59,11 @@ def _info(self, el, url, torrent): return torrent - def movie(self, title, year, imdb=None): + def movie(self, title, year, imdb=None, **kwargs): self._imdb = imdb return super(sources, self).movie(title, year, imdb, auto_query=False) - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): self._imdb = all_info.get('info', {}).get('tvshow.imdb_id', None) if self._imdb is None: self._imdb = all_info.get('showInfo', {}).get('ids', {}).get('imdb', None) diff --git a/providers/a4kScrapers/en/torrent/showrss.py b/providers/a4kScrapers/en/torrent/showrss.py index 09994ac..47a60a7 100644 --- a/providers/a4kScrapers/en/torrent/showrss.py +++ b/providers/a4kScrapers/en/torrent/showrss.py @@ -60,8 +60,8 @@ def _info(self, el, url, torrent): return torrent - def movie(self, title, year): + def movie(self, title, year, **kwargs): return [] - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): return super(sources, self).episode(simple_info, all_info, auto_query=False) diff --git a/providers/a4kScrapers/en/torrent/solidtorrents.py b/providers/a4kScrapers/en/torrent/solidtorrents.py deleted file mode 100644 index 69443ab..0000000 --- a/providers/a4kScrapers/en/torrent/solidtorrents.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- - -from providerModules.a4kScrapers import core - -class sources(core.DefaultSources): - def __init__(self, *args, **kwargs): - super(sources, self).__init__(__name__, *args, **kwargs) - - def _soup_filter(self, response): - try: - response = core.json.loads(response.text) - except: - core.tools.log('a4kScrapers.solidtorrents: fail to parse json \n' + response.text) - return [] - - torrents = response.get('results', []) - results = [] - for torrent in torrents: - result = lambda: None - result.hash = torrent.get('infohash', '') - result.title = torrent.get('title', '') - result.size = '%s B' % torrent['size'] if torrent.get('size', None) is not None else None - result.seeds = torrent.get('swarm', {}).get('seeders', None) - results.append(result) - - return results diff --git a/providers/a4kScrapers/en/torrent/torrentapi.py b/providers/a4kScrapers/en/torrent/torrentapi.py index 0701d5c..e303f51 100644 --- a/providers/a4kScrapers/en/torrent/torrentapi.py +++ b/providers/a4kScrapers/en/torrent/torrentapi.py @@ -89,11 +89,11 @@ def _get_scraper(self, title): custom_filter = core.Filter(fn=filter_fn, type='single') return super(sources, self)._get_scraper(title, custom_filter=custom_filter) - def movie(self, title, year, imdb=None): + def movie(self, title, year, imdb=None, **kwargs): self._imdb = imdb return super(sources, self).movie(title, year, imdb, auto_query=False) - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): self._imdb = all_info.get('info', {}).get('tvshow.imdb_id', None) if self._imdb is None: self._imdb = all_info.get('showInfo', {}).get('ids', {}).get('imdb', None) diff --git a/providers/a4kScrapers/en/torrent/torrentgalaxy.py b/providers/a4kScrapers/en/torrent/torrentgalaxy.py index cf71259..20b5acf 100644 --- a/providers/a4kScrapers/en/torrent/torrentgalaxy.py +++ b/providers/a4kScrapers/en/torrent/torrentgalaxy.py @@ -19,7 +19,7 @@ def _soup_filter(self, response): def _parse_seeds(self, row): return core.safe_list_get(core.re.findall(r'color=\'green\'>(\d+).*', row), 0) - def movie(self, title, year, imdb=None): + def movie(self, title, year, imdb=None, **kwargs): self._imdb = imdb auto_query = False if imdb else True return super(sources, self).movie(title, year, imdb, auto_query=auto_query) diff --git a/providers/a4kScrapers/en/torrent/eztv_api.py b/providers/a4kScrapers/en/torrent/torrentio.py similarity index 62% rename from providers/a4kScrapers/en/torrent/eztv_api.py rename to providers/a4kScrapers/en/torrent/torrentio.py index 003dde6..2605d8b 100644 --- a/providers/a4kScrapers/en/torrent/eztv_api.py +++ b/providers/a4kScrapers/en/torrent/torrentio.py @@ -11,10 +11,6 @@ def _filter_fn(self, title, clean_title): if self.is_movie_query(): return False - # ignore title - title = core.re.sub(r'.*(S\d\d.*)', r'%s \1' % self.scraper.show_title, title) - clean_title = core.re.sub(r'.*(s\d\d.*)', r'%s \1' % self.scraper.show_title, clean_title) - if self.scraper.filter_single_episode.fn(title, clean_title): self._filter.type = self.scraper.filter_single_episode.type return True @@ -32,9 +28,12 @@ def _filter_fn(self, title, clean_title): def _get_scraper(self, title): return super(sources, self)._get_scraper(title, custom_filter=self._filter) - def _search_request(self, url, query, page=1, prev_total=0): - query = core.quote_plus(self._imdb.replace('tt', '')) - response = self._request.get(url.base + (url.search % query) + ('&page=%s' % page)) + def _search_request(self, url, query): + query = self._imdb + if not self.is_movie_query(): + query += ':' + self.scraper.season_x + ':' + self.scraper.episode_x + + response = self._request.get(url.base + (url.search % core.quote_plus(query))) if response.status_code != 200: return [] @@ -45,34 +44,29 @@ def _search_request(self, url, query, page=1, prev_total=0): self._request.exc_msg = 'Failed to parse json: %s' % response.text return [] - if not results or not results.get('torrents', None) or len(results['torrents']) == 0: + if not results or 'streams' not in results or len(results['streams']) == 0: return [] - - torrents = results['torrents'] - total = len(torrents) + prev_total - if total < results['torrents_count']: - more_results = self._search_request(url, None, page+1, total) - torrents += more_results - - return torrents + else: + return results['streams'] def _soup_filter(self, response): return response def _title_filter(self, el): - return el['filename'] + return el['title'] def _info(self, el, url, torrent): - torrent['hash'] = el['hash'] - torrent['size'] = int(el['size_bytes']) / 1024 / 1024 - torrent['seeds'] = el['seeds'] + torrent['hash'] = el['infoHash'] + torrent['size'] = core.source_utils.de_string_size(self.genericScraper.parse_size(el['title'])) + torrent['seeds'] = self.genericScraper.parse_seeds(el['title']) return torrent - def movie(self, title, year, imdb=None): - return [] + def movie(self, title, year, imdb=None, **kwargs): + self._imdb = imdb + return super(sources, self).movie(title, year, imdb, auto_query=False) - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): self._imdb = all_info.get('info', {}).get('tvshow.imdb_id', None) if self._imdb is None: self._imdb = all_info.get('showInfo', {}).get('ids', {}).get('imdb', None) diff --git a/providers/a4kScrapers/en/torrent/torrentparadise.py b/providers/a4kScrapers/en/torrent/torrentparadise.py deleted file mode 100644 index 667270e..0000000 --- a/providers/a4kScrapers/en/torrent/torrentparadise.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- - -from providerModules.a4kScrapers import core - -class sources(core.DefaultSources): - def __init__(self, *args, **kwargs): - super(sources, self).__init__(__name__, *args, **kwargs) - - def _search_request(self, url, query): - response = super(sources, self)._search_request(url, query) - if response.status_code != 200: - return [] - - try: - results = core.json.loads(response.text) - except Exception as e: - self._request.exc_msg = 'Failed to parse json: %s' % response.text - return [] - - if not results or len(results) == 0: - return [] - else: - return results - - def _soup_filter(self, response): - return response - - def _title_filter(self, el): - return el['text'] - - def _info(self, el, url, torrent): - torrent['hash'] = el['id'] - torrent['size'] = int(el['len']) / 1024 / 1024 - torrent['seeds'] = el['s'] - - return torrent diff --git a/providers/a4kScrapers/en/torrent/yts.py b/providers/a4kScrapers/en/torrent/yts.py index cb9c4aa..5c8bb18 100644 --- a/providers/a4kScrapers/en/torrent/yts.py +++ b/providers/a4kScrapers/en/torrent/yts.py @@ -31,5 +31,5 @@ def _soup_filter(self, response): return results - def episode(self, simple_info, all_info): + def episode(self, simple_info, all_info, **kwargs): return []