From cf79c98dbb2e57c0b953d415b1d37e337d24ef14 Mon Sep 17 00:00:00 2001 From: Mike_Denton Date: Sun, 3 Apr 2022 23:17:08 +0100 Subject: [PATCH] all commands are now fully functional --- NyaaTranspiler/entities/DataProcess.py | 37 ++++++--- NyaaTranspiler/entities/NyaaRSS.py | 3 +- NyaaTranspiler/entities/NyaaScraper.py | 74 ++++++++---------- .../__pycache__/DataProcess.cpython-310.pyc | Bin 10028 -> 10582 bytes 4 files changed, 59 insertions(+), 55 deletions(-) diff --git a/NyaaTranspiler/entities/DataProcess.py b/NyaaTranspiler/entities/DataProcess.py index 698eaef..0ccb63f 100644 --- a/NyaaTranspiler/entities/DataProcess.py +++ b/NyaaTranspiler/entities/DataProcess.py @@ -19,6 +19,13 @@ def __init__(self): self.base__view__link = "https://nyaa.si/view/" self.base__dir = os.path.dirname(__file__) + def _check_registration(self): + html = requests.get('https://nyaa.si/register').content + soup = BeautifulSoup(html, 'lxml') + if soup.find('pre'): + return "Registations are currently closed." + else: + return "Registrations are now open." def get_torrent_link(self, url): BASE_TORRENT_LINK = "https://nyaa.si/download/" @@ -112,7 +119,7 @@ def _rss_get_torrent_files(self, url=None, limit=None): return self.get_data(feed_data) - def get_file(self, id_): + def _get_file(self, id_): try: # get file name first html = requests.get((self.base__view__link + str(id_))).content @@ -125,6 +132,7 @@ def get_file(self, id_): print('Directory created.') else: print('directory exists.') + print(f"file name: {title}") with requests.get(url, stream=True) as r: r.raise_for_status() invalid_chars = f'<>:"\/|?*' @@ -140,7 +148,7 @@ def get_file(self, id_): print('file saved.') # get multiple files from structure - def get_data(self, item_list): + def _get_data(self, item_list): """ Download torrent files from a list of item provided by _parse_rss_feed() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -175,11 +183,17 @@ def get_data(self, item_list): finally: print(f"Downloaded {_count} torrent files.") - def get_magnet(self, id_): + def _get_magnet(self, id_, file=False): view_link = "{0}{1}".format(self.base__view__link, str(id_)) html = requests.get(view_link).content soup = BeautifulSoup(html, 'lxml') - return soup.find('a', 'card-footer-item').get('href') + if file == True: + with open(os.path.join((self.base__dir + r'\automated'), 'magnet.txt'), "w") as f: + f.write(soup.find('a', 'card-footer-item').get('href')) + f.close() + return + if file == False: + return print(soup.find('a', 'card-footer-item').get('href')) # This is purely exprimental, not guaranteed to @@ -216,7 +230,7 @@ def create_magnet_link(self, infohash=str(), title=str()): # Nyaa Scraper methods/properties ######################################################## - def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): + def _parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): _count = 0 if pages == None: print("Pages value was not provided.") @@ -227,7 +241,10 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): try: for p in range(1, (2 if pages is None else (pages + 1))): if pages is not None: - create_url = url + f"&?p={p}" + # kind of a hack, but it works + if url[-1] == "/": + url = url + "?" + create_url = url + f"&p={p}" print(create_url) html = requests.get(create_url if pages is not None else url).content soup = BeautifulSoup(html, "lxml") @@ -299,7 +316,7 @@ def parse_scraper_data(self, url="http://nyaa.si/", pages=None, per_page=None): print('no connection error') - def get_magnet_links(self, item_list): + def _get_magnet_links(self, item_list): try: _count = 0 mdir = os.path.join(self.base__dir, "automated") @@ -308,7 +325,6 @@ def get_magnet_links(self, item_list): print('Directory created.') else: print('directory exists.') - with open(os.path.join(mdir, 'magnets.txt'), "w") as f: for i in item_list['data']: f.write(f"{i['magnet_link']} \n") @@ -316,7 +332,4 @@ def get_magnet_links(self, item_list): f.close() finally: print(f"Saved {_count} magnet links.") - - -debug = DataProcess() -pp = pprint.PrettyPrinter(indent=4) + diff --git a/NyaaTranspiler/entities/NyaaRSS.py b/NyaaTranspiler/entities/NyaaRSS.py index 70b3d71..2634dd7 100644 --- a/NyaaTranspiler/entities/NyaaRSS.py +++ b/NyaaTranspiler/entities/NyaaRSS.py @@ -76,5 +76,4 @@ def get_data_by_username(self, username=None, limit=None): def get_torrents_by_username(self, username=None, limit=None): search_url = self._create_search_query(username=username, search_type='rss') - self._rss_get_torrent_files(search_url, limit=limit) - + self._rss_get_torrent_files(search_url, limit=limit) \ No newline at end of file diff --git a/NyaaTranspiler/entities/NyaaScraper.py b/NyaaTranspiler/entities/NyaaScraper.py index 390517c..530d4d1 100644 --- a/NyaaTranspiler/entities/NyaaScraper.py +++ b/NyaaTranspiler/entities/NyaaScraper.py @@ -7,7 +7,6 @@ ---Magnet links file can have more file info as optional ---"optional" add exceeding pages exception """ - from bs4 import BeautifulSoup from json import JSONDecodeError from DataProcess import DataProcess @@ -17,7 +16,6 @@ class NyaaScraper(DataProcess): def __init__(self): super().__init__() - self.base__url = "http://nyaa.si/" ################################################################## ## Debug Methods for NyaaScraper @@ -32,8 +30,8 @@ def _debug_show_titles(self): return mlist - def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None): - page_data = self.parse_scraper_data(pages=pages, per_page=per_page) + def get_latest_data(self, rtype='dict', pages=None, per_page=None): + page_data = self._parse_scraper_data(pages=pages, per_page=per_page) try: if rtype == 'json': return json.dumps(page_data) @@ -49,13 +47,13 @@ def get_latest_torrent_data(self, rtype='dict', pages=None, per_page=None): def get_latest_torrent_files(self, pages=None, per_page=None): - pages_data = self.parse_scraper_data(pages=pages, per_page=per_page) - self.get_data(pages_data) + pages_data = self._parse_scraper_data(pages=pages, per_page=per_page) + self._get_data(pages_data) def get_latest_magnet_links(self, pages=None, per_page=None): - pages_data = self.parse_scraper_data(pages=pages, per_page=per_page) - self.get_magnet_links(pages_data) + pages_data = self._parse_scraper_data(pages=pages, per_page=per_page) + self._get_magnet_links(pages_data) ########################################################## @@ -65,62 +63,60 @@ def get_latest_magnet_links(self, pages=None, per_page=None): ########################################################## - def get_data_by_query(self, filter_=None, search_string=None, category=None, username=None, pages=None, per_page=None): + def get_data_by_query(self, filter_=None, search_query=None, category=None, username=None, pages=None, per_page=None): # Maybe we can move this somewhere else... scraper_data = OrderedDict({ "title" : f"Nyaa Scraper v0.1 (Under construction v0204)", - "description": f"Nyaa scraper for {search_string}" + "description": f"Nyaa scraper for {search_query}" }) - search_url = self.create_search_query( + search_url = self._create_search_query( filter_=filter_, - search_string=search_string, + search_query=search_query, category=category, username=username, search_type="scraper") - print(f"Search link: {search_url}") - return self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + return self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) def get_torrent_files_by_query(self, filter_=None, - search_string=None, + search_query=None, category=None, username=None, pages=None, per_page=None): scraper_data = OrderedDict({ "title" : f"Nyaa Scraper v0.1 (Under construction v0204)", - "description": f"Nyaa scraper for {search_string}" + "description": f"Nyaa scraper for {search_query}" }) - search_url = self.create_search_query(filter_=filter_, - search_string=search_string, + search_url = self._create_search_query(filter_=filter_, + search_query=search_query, category=category, username=username, search_type='scraper') - print(f"Search link: '{search_url}") - data = self.parse_scraper_data(url=search_url) - return self.get_data(data) + data = self._parse_scraper_data(url=search_url) + return self._get_data(data) + def get_magnet_links_by_query(self, filter_=None, - search_string=None, + search_query=None, category=None, username=None, pages=None, per_page=None): - search_url = self.create_search_query(filter_=filter_, - search_string=search_string, + search_url = self._create_search_query(filter_=filter_, + search_query=search_query, category=category, username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) - return self.get_magnet_links(data) + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + return self._get_magnet_links(data) + def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None): - search_url = self.create_search_query(username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + search_url = self._create_search_query(username=username, search_type='scraper') + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) if rtype == 'dict': return data if rtype == 'json': @@ -129,23 +125,19 @@ def get_data_by_username(self, username, rtype='dict', pages=None, per_page=None raise TypeError("Specify data type for 'rtype' argument. 'dict' to return a dictionary, 'json' for JSON object notation.") def get_files_by_username(self, username:None, rtype='torrent', pages=None, per_page=None): - search_url = self.create_search_query(username=username, search_type='scraper') - print(f"Search link {search_url}") - data = self.parse_scraper_data(url=search_url, pages=pages, per_page=per_page) + search_url = self._create_search_query(username=username, search_type='scraper') + data = self._parse_scraper_data(url=search_url, pages=pages, per_page=per_page) if rtype == 'magnet': - return self.get_magnet_links(data) + return self._get_magnet_links(data) if rtype == 'torrent': - return self.get_data(data) + return self._get_data(data) if rtype is not ['magnet', 'torrent']: raise TypeError("Please specify return type. either 'magnet' for links / 'torrent' for files ") def get_torrent_by_id(self, id_=None): - self.get_file(id_=id_) + self._get_file(id_=id_) - def get_magnet_by_id(self, id_=None): - return self.get_magnet(id_=id_) - -debug = NyaaScraper() -pp = pprint.PrettyPrinter(indent=4) + def get_magnet_by_id(self, id_=None, file=None): + return self._get_magnet(id_=id_, file=file) diff --git a/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc b/NyaaTranspiler/entities/__pycache__/DataProcess.cpython-310.pyc index b2b8bada71dfcf5e4e33d7a83f1296f176de04e1..529b00a93236df40b271ed3ebfa4b6e9ed754b0b 100644 GIT binary patch delta 3963 zcmZ`++jA4w8Q-(oT}i8pb+Ii=w(RuQ=I+OhaI;9WkQ(x$`^!s)V0*0^Pknw)@u}GRfgkD|9 z5-MaxN~m~I$a<)UO7NDbOci*`y0RslBh*WM7fIGj{Zs|6k7_hXL%{LVVH&1gK&ils z&?ry=8l!QL3es-c1Md(Wp$XatJz>3TOThm*ktS(61*`}ir2{k#RFn?VArOzz0L*ag z_I~jWX%bw)_Tzy!{$GN>u?xTyT7;4|SrHq=^jNThEwLe5VoRc)HeoR;oe-!D#+0Rc z-+9xl83$)(Dx3NIv{9U4`f|}Qb!JPY3+0mSsWH7i@`l6Ao5gCy(DF>5)e7s3=@qlI zsTE39L#NaAepiTfq_j$PL#x*G%5+BNL1k3Rcvu%dqzt_r&lS$=h1Hxpn5)u@)5ntu z5*2`d`ey*V|-P9PiAr8@qfzc z{j3Klh6At!0`3;;1F&VoWW`!WVqM765RwQf0KTMXWY_Jg@(t08rZU9lcYG(vI3MyS z;=5670@;!+n?C+LUp}jcBX85s%KYhxoGNTgYDtJ z3hX9Rx4#H5LbCi==)l-KDtlnf>;rM?(7&TjG`8Mi55pZ3{<)GyX=%3bE@}u+?LQP>?|1pxD|HB5z;O{N?pAyUn?Zeej;lvZI2R$1?Cb{Ier_HtK>!Et!T(P8U zm3&!0sPS?%-F*~x?CuSKd)`uYdD^^SvN8Tq^hl=TL4N3EO9+qq^tQ+FKy0e-5OmuS zmM`Hq!_-gf#?)U;t!-P71L7 z7&|!(MD`WzTSwS~@D9QT!Ucp)1l$OQTc)vhk$mcoVK`jpClfC%g{zGP!QX~LK?LB3G1C0tu^mrG4+dX<@lpSVVN@9A zq}lbPoh<@o1vD~&rMmL&p2oYg4adjdtgEN<>t?mw%$qu`tL6N1MK{r>ZL;B>@D5lf z8wXg-MA#5EI^Ny!_>P}DXNR?d1rR!i@0_Qb*`2!N_&~<%ruU`^!nTASob|s`)HmRq z!Oc5yfG7Fn83r7$cjx?D=r_=Mj9`)Skpa>NC=%^IKB<>T_v?wr;s8(B)pU;d_UC;v zA#wiuWa2C?wjLQb*}|^riA=syfkX}|80$Ja29OCo;<=rLFF=>A)Sv`cApNhiQmJ^> zmZ5FvS?_vfZM_Pm+wt*1ejqgp)#!9;Zs_3(vtD;PhE2*gxnN7C4i$%gn%Yh7a67d| z7(dp3m~8UP{przT;4UH+)(LwV?hwADK>cv=3qY%ogbBO@{PX_w%yG+Gffo3q((svH z%SVH7-|$=h79{O9Y58fmO&Y2NsTyppa(Q5=*#(>sq-B*xfC@WQ6sRtTiUAdIs5nqj zhw26@=1@IA#T}{_sBS9^Df?@U9`tvs2gXda3GGV?FvniEO_Obrees~L6{LwKOiQMz zHnD;LMv?-?0dNQA1@Lgk9ICt0AeDBdwoo%UFRXp|(ClOOTnn@#6@p2o z8VQqF37BGY5X``_IF0T3f^iMF3heXnhfof|#0U)VrPT$h8mtA^*9*#{bgV6|K#_6Q z*tgBw@qpJzT79594$Dfyd*{4xm8=D<2rh0V3Y=Z@0&l0|M>8fONQ8O-_;dW)V4CdXpAIf-P7Qe;g5WM* zo9^C+=N_54M-J6J{%4w0)XnApQ6=`N~xNs>>{#f`F?FKZTrn)83HI@uEA9b zl-XYFoI}{h-`CDGoeJb!@SK~M%<`3G{i%VCS%taLZRjPvU@`?b?qzUEhw}e@6xfM8 zPZ6$s-^1o}2+mE;>!yOewyIw!=r!k}o{jOs@WIzK)PWSStQ)qkrn4N9ZgIoiv`H648JTiXV(_k|91DjNiUA>LIgF+!N}7?HuLYdYU+}bL!WDs*9Qm z1^;(ryH+|m|A~U%sq5bcjf~>v{s!9VRP;q`I*WRuWJe!K_fB~*4)L$=NF0TP-wn$U z|2uGBmBz(-`u`+)S0^JbEoI~-d@^JaK6|dhctFSbFLvxc+K+<|BYcD~ih$p3&J~)S zLh3C93*kM4?;tpLy(>t4g79;m7$0s90_hWAkG(!$Sn)}!tj?+lpX^Jkqp_$u7(B@& z5XppXZ!Slxgsz{(0$rpCb2>_DMfLWjg}KZd?+d(Ikj h&Nq~;1mJrEofXI6cgIKjx`B`c7~eT}#Gtcv{{!#Uwax$l delta 3520 zcmZ`*TWlN072R3xE|<&K@*z?WQr63|ReibP6QTN`Z zELBF!1<%~MbLY3$?b*!_uIw-sI&%!uV*h*CdQ z?+{a`;zhv-{Oq1cnjeC_C76T?rbr1D-w{k7^-&2*$&%NFc|v8XyhBU{JA>9wvs9x& z(5SSZhG-b10O%q#3X(>38Uw2!jnf2_A)2IJP=>9@x`6+Ak)~)GETeQ6?V*WY5Ovx| zGhh^>0hrgo<0O4pb*Zz%kcG_1RF-DahlA+abpVQK!Qw2Ob>*U8}H@B6Q z8L)BAwLv9~{igUYwI7ilel_?hnc!0Bbnh-y4 zCeW2DmrIK!+f{Of>T<ufLoV`!X=KK6wfAuhiWIWls@vw?kJ zM^Tb!sbIT)X4T61g2hgvn`8X1#j!kg0Mgw znFgqKN4#5Y_#J;M))t6BgAJ9295pRa@Yx91@m9hK(1;_oeN>>)cf}r|q1j1WTL`v7 zjzaaeNMjglcYJqe8bK%6>Y_!6{+#pi0z zT2=(3dzN8vnf&$mYtxq=5m$i|u=0x=fteFWhu+%foE+aEZ*Gt)>@1i)c?huKFIS6G z_D!2*_+R2@jqOMYKr1_supKi)SnUJ~zmS+D$N23;iX{0*iT(OkY%njJ()_E$L2`y` z$=*@5XxX{>QrW6Q|N034k)%Y8#EI^G8qtU<^4F3R;pABe=?kn{u=L5pE!y0qA8bDC%1fL+KL0UH)nJndXjN!_7uwDBul$_LD)t zKYFX!5_;aAfFj3OQF0SK--(NGn5RuLy0H)(UJ5u-|ql?Od^| z;o27S#foMB9QxE%fH(;yAb#RxEwv+dzUobqCSC`Zlr{qM`N>YdCq7Vl^IX$M;S|QE zCX9Tg0`vyN1XLlgR{)Igf1T|1q+ddJN-fW<3XtM5E0;@irVRDEW%`#ZOUsxMycp<0 zOPOEG^pkP^e&(RkSrh+trW>f=&5V;@aXGt0uJV=aDN^O1XS31M4aE_uuuRwqhftOO zE1PYebNm(J5I7a3q1t{&r2#mVfD>q`RBMw~fCk&7p}}ckzr3SS;`orTpqB>J^RyAP z$+`e!6l`fU1h!#M0vc*WJShTF)RUqh>7Jy66ho;=V<5&oWgMi0CnZ2iIuUUHwMG{N zQPAmv=_K2PcJ&Crps7xsrrIL=-Ltk9WM!DJOw(=RglKw3a8j)>?S@{YM`%Ub0%&_? z1UtIjyVdl}!K}AqwvJs>Gs4n?Ew?)yc3i+7JhKYJBzqfan>cBhVjp0Tx0j9Zs?h=(c^T(u*0yf9w zjzgK;Y{@!V&`f|vqRL?bZmdjY*wDBGrwys5(21#ggjH>U?sb%h!pATiwCvq#S%;QM zM}h=4<%od$K9@RDFf*`%d~Nzli$&9yFWc-@s6swJfZA{&Z(GGG zTg~O{e39W>f!zXF3&M4#n@r6dR8R8e(7~)3uuF?p-Oex8yvLEn_F?0G1eog3m8M79 zI6vk+QDl~{6s`Yx*N4yuFTAPLt+G|HnSvE{zEq*J79`AXqrotM>np+A=r&fp+~_@d z{2g|9!PKmq1kr$QDO8L`~{=IRJk#g`{%%giu27dzc`Ue0KQQ^}_M2rxL z3=lrMdo|p@lR0+>$iC;Y?pm-zpLKjIxMZc1g}(z1*Teyh9p=$JW7q$0mfT8)u3RWr z>z1K(h<_Cqv7HDpIe7>CT&8pNZQl1XM16|4_oQQDh$LYt;=eObHEC2_%kW43zI5FX zXJ-w07Jr71y$4P~i1Vj=#!q+S2zL;A5k?VSM!1OZErgp0m|EEy!g+*u5k5lr9