Skip to content

Commit

Permalink
all commands are now fully functional
Browse files Browse the repository at this point in the history
  • Loading branch information
mpierce35 committed Apr 3, 2022
1 parent 6b85df1 commit cf79c98
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 55 deletions.
37 changes: 25 additions & 12 deletions NyaaTranspiler/entities/DataProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ def __init__(self):
self.base__view__link = "https://nyaa.si/view/"
self.base__dir = os.path.dirname(__file__)

def _check_registration(self):
html = requests.get('https://nyaa.si/register').content
soup = BeautifulSoup(html, 'lxml')
if soup.find('pre'):
return "Registations are currently closed."
else:
return "Registrations are now open."

def get_torrent_link(self, url):
BASE_TORRENT_LINK = "https://nyaa.si/download/"
Expand Down Expand Up @@ -112,7 +119,7 @@ def _rss_get_torrent_files(self, url=None, limit=None):
return self.get_data(feed_data)


def get_file(self, id_):
def _get_file(self, id_):
try:
# get file name first
html = requests.get((self.base__view__link + str(id_))).content
Expand All @@ -125,6 +132,7 @@ def get_file(self, id_):
print('Directory created.')
else:
print('directory exists.')
print(f"file name: {title}")
with requests.get(url, stream=True) as r:
r.raise_for_status()
invalid_chars = f'<>:"\/|?*'
Expand All @@ -140,7 +148,7 @@ def get_file(self, id_):
print('file saved.')

# get multiple files from structure
def get_data(self, item_list):
def _get_data(self, item_list):
"""
Download torrent files from a list of item provided by _parse_rss_feed()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -175,11 +183,17 @@ def get_data(self, item_list):
finally:
print(f"Downloaded {_count} torrent files.")

def get_magnet(self, id_):
def _get_magnet(self, id_, file=False):
view_link = "{0}{1}".format(self.base__view__link, str(id_))
html = requests.get(view_link).content
soup = BeautifulSoup(html, 'lxml')
return soup.find('a', 'card-footer-item').get('href')
if file == True:
with open(os.path.join((self.base__dir + r'\automated'), 'magnet.txt'), "w") as f:
f.write(soup.find('a', 'card-footer-item').get('href'))
f.close()
return
if file == False:
return print(soup.find('a', 'card-footer-item').get('href'))


# This is purely exprimental, not guaranteed to
Expand Down Expand Up @@ -216,7 +230,7 @@ def create_magnet_link(self, infohash=str(), title=str()):
# Nyaa Scraper methods/properties
########################################################

def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None):
def _parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None):
_count = 0
if pages == None:
print("Pages value was not provided.")
Expand All @@ -227,7 +241,10 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None):
try:
for p in range(1, (2 if pages is None else (pages + 1))):
if pages is not None:
create_url = url + f"&?p={p}"
# kind of a hack, but it works
if url[-1] == "/":
url = url + "?"
create_url = url + f"&p={p}"
print(create_url)
html = requests.get(create_url if pages is not None else url).content
soup = BeautifulSoup(html, "lxml")
Expand Down Expand Up @@ -299,7 +316,7 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None):
print('no connection error')


def get_magnet_links(self, item_list):
def _get_magnet_links(self, item_list):
try:
_count = 0
mdir = os.path.join(self.base__dir, "automated")
Expand All @@ -308,15 +325,11 @@ def get_magnet_links(self, item_list):
print('Directory created.')
else:
print('directory exists.')

with open(os.path.join(mdir, 'magnets.txt'), "w") as f:
for i in item_list['data']:
f.write(f"{i['magnet_link']} \n")
_count += 1
f.close()
finally:
print(f"Saved {_count} magnet links.")


debug = DataProcess()
pp = pprint.PrettyPrinter(indent=4)

3 changes: 1 addition & 2 deletions NyaaTranspiler/entities/NyaaRSS.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,4 @@ def get_data_by_username(self, username=None, limit=None):

def get_torrents_by_username(self, username=None, limit=None):
search_url = self._create_search_query(username=username, search_type='rss')
self._rss_get_torrent_files(search_url, limit=limit)

self._rss_get_torrent_files(search_url, limit=limit)
74 changes: 33 additions & 41 deletions NyaaTranspiler/entities/NyaaScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
---Magnet links file can have more file info as optional
---"optional" add exceeding pages exception
"""

from bs4 import BeautifulSoup
from json import JSONDecodeError
from DataProcess import DataProcess
Expand All @@ -17,7 +16,6 @@
class NyaaScraper(DataProcess):
def __init__(self):
super().__init__()
self.base__url = "http://nyaa.si/"

##################################################################
## Debug Methods for NyaaScraper
Expand All @@ -32,8 +30,8 @@ def _debug_show_titles(self):
return mlist


def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None):
page_data = self.parse_scraper_data(pages=pages, per_page=per_page)
def get_latest_data(self, rtype='dict', pages=None, per_page=None):
page_data = self._parse_scraper_data(pages=pages, per_page=per_page)
try:
if rtype == 'json':
return json.dumps(page_data)
Expand All @@ -49,13 +47,13 @@ def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None):


def get_latest_torrent_files(self, pages=None, per_page=None):
pages_data = self.parse_scraper_data(pages=pages, per_page=per_page)
self.get_data(pages_data)
pages_data = self._parse_scraper_data(pages=pages, per_page=per_page)
self._get_data(pages_data)


def get_latest_magnet_links(self, pages=None, per_page=None):
pages_data = self.parse_scraper_data(pages=pages, per_page=per_page)
self.get_magnet_links(pages_data)
pages_data = self._parse_scraper_data(pages=pages, per_page=per_page)
self._get_magnet_links(pages_data)


##########################################################
Expand All @@ -65,62 +63,60 @@ def get_latest_magnet_links(self, pages=None, per_page=None):
##########################################################


def get_data_by_query(self, filter_=None, search_string=None, category=None, username=None, pages=None, per_page=None):
def get_data_by_query(self, filter_=None, search_query=None, category=None, username=None, pages=None, per_page=None):
# Maybe we can move this somewhere else...
scraper_data = OrderedDict({
"title" : f"Nyaa Scraper v0.1 (Under construction v0204)",
"description": f"Nyaa scraper for {search_string}"
"description": f"Nyaa scraper for {search_query}"
})
search_url = self.create_search_query(
search_url = self._create_search_query(
filter_=filter_,
search_string=search_string,
search_query=search_query,
category=category,
username=username,
search_type="scraper")
print(f"Search link: {search_url}")
return self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
return self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page)


def get_torrent_files_by_query(self,
filter_=None,
search_string=None,
search_query=None,
category=None,
username=None,
pages=None,
per_page=None):
scraper_data = OrderedDict({
"title" : f"Nyaa Scraper v0.1 (Under construction v0204)",
"description": f"Nyaa scraper for {search_string}"
"description": f"Nyaa scraper for {search_query}"
})
search_url = self.create_search_query(filter_=filter_,
search_string=search_string,
search_url = self._create_search_query(filter_=filter_,
search_query=search_query,
category=category,
username=username,
search_type='scraper')
print(f"Search link: '{search_url}")
data = self.parse_scraper_data(url=search_url)
return self.get_data(data)
data = self._parse_scraper_data(url=search_url)
return self._get_data(data)


def get_magnet_links_by_query(self,
filter_=None,
search_string=None,
search_query=None,
category=None,
username=None,
pages=None,
per_page=None):
search_url = self.create_search_query(filter_=filter_,
search_string=search_string,
search_url = self._create_search_query(filter_=filter_,
search_query=search_query,
category=category,
username=username,
search_type='scraper')
print(f"Search link {search_url}")
data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
return self.get_magnet_links(data)
data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
return self._get_magnet_links(data)


def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None):
search_url = self.create_search_query(username=username, search_type='scraper')
print(f"Search link {search_url}")
data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
search_url = self._create_search_query(username=username, search_type='scraper')
data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
if rtype == 'dict':
return data
if rtype == 'json':
Expand All @@ -129,23 +125,19 @@ def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None
raise TypeError("Specify data type for 'rtype' argument. 'dict' to return a dictionary, 'json' for JSON object notation.")

def get_files_by_username(self, username:None, rtype='torrent', pages=None, per_page=None):
search_url = self.create_search_query(username=username, search_type='scraper')
print(f"Search link {search_url}")
data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
search_url = self._create_search_query(username=username, search_type='scraper')
data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page)
if rtype == 'magnet':
return self.get_magnet_links(data)
return self._get_magnet_links(data)
if rtype == 'torrent':
return self.get_data(data)
return self._get_data(data)
if rtype is not ['magnet', 'torrent']:
raise TypeError("Please specify return type. either 'magnet' for links / 'torrent' for files ")


def get_torrent_by_id(self, id_=None):
self.get_file(id_=id_)
self._get_file(id_=id_)


def get_magnet_by_id(self, id_=None):
return self.get_magnet(id_=id_)

debug = NyaaScraper()
pp = pprint.PrettyPrinter(indent=4)
def get_magnet_by_id(self, id_=None, file=None):
return self._get_magnet(id_=id_, file=file)
Binary file modified NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc
Binary file not shown.

0 comments on commit cf79c98

Please sign in to comment.