From f3358d1eb883d521528be23e8a8294427248ec60 Mon Sep 17 00:00:00 2001 From: iamatulsingh Date: Mon, 28 Oct 2024 14:51:34 +0100 Subject: [PATCH] feat: added list to print any error that occured during search --- .github/workflows/pytest.yml | 2 +- e2e.py | 6 +++++- pinscrape/v2.py | 21 +++++++++++++-------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index a887138..4420bfb 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -25,4 +25,4 @@ jobs: - name: Run test suite run: | - pytest -v e2e.py + pytest -sv e2e.py diff --git a/e2e.py b/e2e.py index e136181..da193bd 100644 --- a/e2e.py +++ b/e2e.py @@ -1,5 +1,8 @@ from pinscrape import scraper, Pinterest +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() keyword = "messi" output_folder = "output" @@ -23,5 +26,6 @@ def test_single_data(): def test_v2(): p = Pinterest() images_url = p.search(keyword, images_to_download) - p.download(url_list=images_url, number_of_workers=number_of_workers, output_folder=output_folder) + print(p.errors) assert len(images_url) == images_to_download + p.download(url_list=images_url, number_of_workers=number_of_workers, output_folder=output_folder) diff --git a/pinscrape/v2.py b/pinscrape/v2.py index b967888..29c1616 100644 --- a/pinscrape/v2.py +++ b/pinscrape/v2.py @@ -12,6 +12,7 @@ class Pinterest: def __init__(self, user_agent: str = "", proxies: dict = None): + self.errors = [] self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.71 Safari/537.36" \ if not user_agent else user_agent self.BASE_URL = "https://www.pinterest.com" @@ -201,15 +202,19 @@ def search(self, query: str, page_size=26) -> list: image_urls = [] if response.status_code != 200: logging.warning(f"Image search has failed!, {response.status_code}, {response.text}") + self.errors.append(f"Image search has failed!, {response.status_code}, {response.text}") + return [] + try: + json_data = response.json() + results = json_data.get('resource_response', {}).get('data', {}).get('results', []) + for result in results: + image_urls.append(result['images']['orig']['url']) + self.client_context = json_data['client_context'] + logging.info(f"Total {len(image_urls)} image(s) found.") + return image_urls + except requests.exceptions.JSONDecodeError as jde: + self.errors.append(jde.args) return [] - - json_data = response.json() - results = json_data.get('resource_response', {}).get('data', {}).get('results', []) - for result in results: - image_urls.append(result['images']['orig']['url']) - self.client_context = json_data['client_context'] - logging.info(f"Total {len(image_urls)} image(s) found.") - return image_urls if __name__ == "__main__":