diff --git a/NyaaTranspiler/entities/DataProcess.py b/NyaaTranspiler/entities/DataProcess.py index 698eaef..0ccb63f 100644 --- a/NyaaTranspiler/entities/DataProcess.py +++ b/NyaaTranspiler/entities/DataProcess.py @@ -19,6 +19,13 @@ def __init__(self): self.base__view__link = "https://nyaa.si/view/" self.base__dir = os.path.dirname(__file__) + def _check_registration(self): + html = requests.get('https://nyaa.si/register').content + soup = BeautifulSoup(html, 'lxml') + if soup.find('pre'): + return "Registations are currently closed." + else: + return "Registrations are now open." def get_torrent_link(self, url): BASE_TORRENT_LINK = "https://nyaa.si/download/" @@ -112,7 +119,7 @@ def _rss_get_torrent_files(self, url=None, limit=None): return self.get_data(feed_data) - def get_file(self, id_): + def _get_file(self, id_): try: # get file name first html = requests.get((self.base__view__link + str(id_))).content @@ -125,6 +132,7 @@ def get_file(self, id_): print('Directory created.') else: print('directory exists.') + print(f"file name: {title}") with requests.get(url, stream=True) as r: r.raise_for_status() invalid_chars = f'<>:"\/|?*' @@ -140,7 +148,7 @@ def get_file(self, id_): print('file saved.') # get multiple files from structure - def get_data(self, item_list): + def _get_data(self, item_list): """ Download torrent files from a list of item provided by _parse_rss_feed() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -175,11 +183,17 @@ def get_data(self, item_list): finally: print(f"Downloaded {_count} torrent files.") - def get_magnet(self, id_): + def _get_magnet(self, id_, file=False): view_link = "{0}{1}".format(self.base__view__link, str(id_)) html = requests.get(view_link).content soup = BeautifulSoup(html, 'lxml') - return soup.find('a', 'card-footer-item').get('href') + if file == True: + with open(os.path.join((self.base__dir + r'\automated'), 'magnet.txt'), "w") as f: + f.write(soup.find('a', 'card-footer-item').get('href')) + f.close() + return + if file == False: + return print(soup.find('a', 'card-footer-item').get('href')) # This is purely exprimental, not guaranteed to @@ -216,7 +230,7 @@ def create_magnet_link(self, infohash=str(), title=str()): # Nyaa Scraper methods/properties ######################################################## - def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): + def _parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): _count = 0 if pages == None: print("Pages value was not provided.") @@ -227,7 +241,10 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): try: for p in range(1, (2 if pages is None else (pages + 1))): if pages is not None: - create_url = url + f"&?p={p}" + # kind of a hack, but it works + if url[-1] == "/": + url = url + "?" + create_url = url + f"&p={p}" print(create_url) html = requests.get(create_url if pages is not None else url).content soup = BeautifulSoup(html, "lxml") @@ -299,7 +316,7 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): print('no connection error') - def get_magnet_links(self, item_list): + def _get_magnet_links(self, item_list): try: _count = 0 mdir = os.path.join(self.base__dir, "automated") @@ -308,7 +325,6 @@ def get_magnet_links(self, item_list): print('Directory created.') else: print('directory exists.') - with open(os.path.join(mdir, 'magnets.txt'), "w") as f: for i in item_list['data']: f.write(f"{i['magnet_link']} \n") @@ -316,7 +332,4 @@ def get_magnet_links(self, item_list): f.close() finally: print(f"Saved {_count} magnet links.") - - -debug = DataProcess() -pp = pprint.PrettyPrinter(indent=4) + diff --git a/NyaaTranspiler/entities/NyaaRSS.py b/NyaaTranspiler/entities/NyaaRSS.py index 70b3d71..2634dd7 100644 --- a/NyaaTranspiler/entities/NyaaRSS.py +++ b/NyaaTranspiler/entities/NyaaRSS.py @@ -76,5 +76,4 @@ def get_data_by_username(self, username=None, limit=None): def get_torrents_by_username(self, username=None, limit=None): search_url = self._create_search_query(username=username, search_type='rss') - self._rss_get_torrent_files(search_url, limit=limit) - + self._rss_get_torrent_files(search_url, limit=limit) \ No newline at end of file diff --git a/NyaaTranspiler/entities/NyaaScraper.py b/NyaaTranspiler/entities/NyaaScraper.py index 390517c..530d4d1 100644 --- a/NyaaTranspiler/entities/NyaaScraper.py +++ b/NyaaTranspiler/entities/NyaaScraper.py @@ -7,7 +7,6 @@ ---Magnet links file can have more file info as optional ---"optional" add exceeding pages exception """ - from bs4 import BeautifulSoup from json import JSONDecodeError from DataProcess import DataProcess @@ -17,7 +16,6 @@ class NyaaScraper(DataProcess): def __init__(self): super().__init__() - self.base__url = "http://nyaa.si/" ################################################################## ## Debug Methods for NyaaScraper @@ -32,8 +30,8 @@ def _debug_show_titles(self): return mlist - def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None): - page_data = self.parse_scraper_data(pages=pages, per_page=per_page) + def get_latest_data(self, rtype='dict', pages=None, per_page=None): + page_data = self._parse_scraper_data(pages=pages, per_page=per_page) try: if rtype == 'json': return json.dumps(page_data) @@ -49,13 +47,13 @@ def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None): def get_latest_torrent_files(self, pages=None, per_page=None): - pages_data = self.parse_scraper_data(pages=pages, per_page=per_page) - self.get_data(pages_data) + pages_data = self._parse_scraper_data(pages=pages, per_page=per_page) + self._get_data(pages_data) def get_latest_magnet_links(self, pages=None, per_page=None): - pages_data = self.parse_scraper_data(pages=pages, per_page=per_page) - self.get_magnet_links(pages_data) + pages_data = self._parse_scraper_data(pages=pages, per_page=per_page) + self._get_magnet_links(pages_data) ########################################################## @@ -65,62 +63,60 @@ def get_latest_magnet_links(self, pages=None, per_page=None): ########################################################## - def get_data_by_query(self, filter_=None, search_string=None, category=None, username=None, pages=None, per_page=None): + def get_data_by_query(self, filter_=None, search_query=None, category=None, username=None, pages=None, per_page=None): # Maybe we can move this somewhere else... scraper_data = OrderedDict({ "title" : f"Nyaa Scraper v0.1 (Under construction v0204)", - "description": f"Nyaa scraper for {search_string}" + "description": f"Nyaa scraper for {search_query}" }) - search_url = self.create_search_query( + search_url = self._create_search_query( filter_=filter_, - search_string=search_string, + search_query=search_query, category=category, username=username, search_type="scraper") - print(f"Search link: {search_url}") - return self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + return self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) def get_torrent_files_by_query(self, filter_=None, - search_string=None, + search_query=None, category=None, username=None, pages=None, per_page=None): scraper_data = OrderedDict({ "title" : f"Nyaa Scraper v0.1 (Under construction v0204)", - "description": f"Nyaa scraper for {search_string}" + "description": f"Nyaa scraper for {search_query}" }) - search_url = self.create_search_query(filter_=filter_, - search_string=search_string, + search_url = self._create_search_query(filter_=filter_, + search_query=search_query, category=category, username=username, search_type='scraper') - print(f"Search link: '{search_url}") - data = self.parse_scraper_data(url=search_url) - return self.get_data(data) + data = self._parse_scraper_data(url=search_url) + return self._get_data(data) + def get_magnet_links_by_query(self, filter_=None, - search_string=None, + search_query=None, category=None, username=None, pages=None, per_page=None): - search_url = self.create_search_query(filter_=filter_, - search_string=search_string, + search_url = self._create_search_query(filter_=filter_, + search_query=search_query, category=category, username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) - return self.get_magnet_links(data) + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + return self._get_magnet_links(data) + def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None): - search_url = self.create_search_query(username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + search_url = self._create_search_query(username=username, search_type='scraper') + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) if rtype == 'dict': return data if rtype == 'json': @@ -129,23 +125,19 @@ def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None raise TypeError("Specify data type for 'rtype' argument. 'dict' to return a dictionary, 'json' for JSON object notation.") def get_files_by_username(self, username:None, rtype='torrent', pages=None, per_page=None): - search_url = self.create_search_query(username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + search_url = self._create_search_query(username=username, search_type='scraper') + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) if rtype == 'magnet': - return self.get_magnet_links(data) + return self._get_magnet_links(data) if rtype == 'torrent': - return self.get_data(data) + return self._get_data(data) if rtype is not ['magnet', 'torrent']: raise TypeError("Please specify return type. either 'magnet' for links / 'torrent' for files ") def get_torrent_by_id(self, id_=None): - self.get_file(id_=id_) + self._get_file(id_=id_) - def get_magnet_by_id(self, id_=None): - return self.get_magnet(id_=id_) - -debug = NyaaScraper() -pp = pprint.PrettyPrinter(indent=4) + def get_magnet_by_id(self, id_=None, file=None): + return self._get_magnet(id_=id_, file=file) diff --git a/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc b/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc index b2b8bad..529b00a 100644 Binary files a/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc and b/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc differ