From b2fa1495984165049369b1a378065951495f4450 Mon Sep 17 00:00:00 2001 From: Achim Date: Thu, 14 Nov 2024 16:50:06 +0100 Subject: [PATCH 1/2] fix imagechest extractor --- gallery_dl/extractor/imagechest.py | 71 +++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 115fff32c7..3ee91ba44d 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -11,6 +11,7 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, exception +import json BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com" @@ -36,32 +37,60 @@ def _init(self): self.images = self._images_api def metadata(self, page): - if "Sorry, but the page you requested could not be found." in page: + if "Not Found" in page: raise exception.NotFoundError("gallery") - return { - "gallery_id": self.gallery_id, - "title": text.unescape(text.extr( - page, 'property="og:title" content="', '"').strip()) + page_data = self._retrieve_page_data(page) + + metadata = { + "gallery_id": self.gallery_id } - def images(self, page): - if ' load-all">' in page: - url = "{}/p/{}/loadAll".format(self.root, self.gallery_id) - headers = { - "X-Requested-With": "XMLHttpRequest", - "Origin" : self.root, - "Referer" : self.gallery_url, - } - csrf_token = text.extr(page, 'name="csrf-token" content="', '"') - data = {"_token": csrf_token} - page += self.request( - url, method="POST", headers=headers, data=data).text + for attribute in [ + "id", + "slug", + "status", + "title", + "nsfw", + "score", + "comments", + "upvotes", + "downvotes", + "favorites", + "views", + "created" + ]: + try: + metadata[attribute] = page_data["props"]["post"][attribute] + except Exception: + pass - return [ - (url, None) - for url in text.extract_iter(page, 'data-url="', '"') - ] + try: + metadata["tags"] = ",".join(page_data["props"]["post"]["tags"]) + except Exception: + pass + + return metadata + + def images(self, page): + try: + return [ + (file["link"], None) + for file in self._retrieve_page_data(page)["props"]["post"]["files"] + ] + except Exception: + return [] + + def _retrieve_page_data(self, page): + return json.loads( + text.unescape( + text.extr( + page, + begin='data-page="', + end='"', + default='{}') + ) + ) def _metadata_api(self, page): post = self.api.post(self.gallery_id) From 917e873c630e6b38abf19757836d7fe514278371 Mon Sep 17 00:00:00 2001 From: Achim Date: Thu, 14 Nov 2024 16:54:59 +0100 Subject: [PATCH 2/2] fix imagechest extractor --- gallery_dl/extractor/imagechest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 3ee91ba44d..abd14900f2 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -73,10 +73,12 @@ def metadata(self, page): return metadata def images(self, page): + page_data = self._retrieve_page_data(page) + try: return [ (file["link"], None) - for file in self._retrieve_page_data(page)["props"]["post"]["files"] + for file in page_data["props"]["post"]["files"] ] except Exception: return []