Skip to content

Commit

Permalink
Revert "re-tweak some snippet for failed requests."
Browse files Browse the repository at this point in the history
This reverts commit 9b2fc5d.
  • Loading branch information
sushil-rgb committed Jan 14, 2024
1 parent 9b2fc5d commit 5d5d028
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


async def main():
base_url = "https://www.amazon.com/s?k=gaming+headsets&_encoding=UTF8&content-id=amzn1.sym.12129333-2117-4490-9c17-6d31baf0582a&pd_rd_r=56793f4a-5a4c-4c75-b342-04eaeb38676b&pd_rd_w=t37se&pd_rd_wg=pRvMP&pf_rd_p=12129333-2117-4490-9c17-6d31baf0582a&pf_rd_r=ZVVAJVJTHSE47KE7FH42&ref=pd_gw_unk"
base_url = "https://www.amazon.com/s?k=gaming+keyboard&_encoding=UTF8&content-id=amzn1.sym.12129333-2117-4490-9c17-6d31baf0582a&pd_rd_r=34c04b08-58c2-4dec-8cce-e1ba5b33f1b4&pd_rd_w=6uYt1&pd_rd_wg=yxxCi&pf_rd_p=12129333-2117-4490-9c17-6d31baf0582a&pf_rd_r=0FTRXQKJYSVRXBPV695G&ref=pd_gw_unk"
status = await Amazon(base_url, None).status()

if status == 503:
Expand Down
32 changes: 18 additions & 14 deletions scrapers/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ async def product_urls(self, url, max_retries = 13):
Raises:
-Expecation: If there is an error while loading the content of the Amazon search results page.
"""
url_lists = []
for retry in range(max_retries):
try:
# Use the 'static_connection' method to download the HTML content of the search results bage
Expand All @@ -211,15 +210,17 @@ async def product_urls(self, url, max_retries = 13):
return f"Content loading error. Please try again in few minutes. Error message: {e}"
# Get product card contents from current page:
card_contents = [f"""https://www.amazon.{self.country_domain}{prod.select_one(self.scrape['hyperlink']).get('href')}""" for prod in soup.select(self.scrape['main_content'])]
url_lists.append(card_contents)
break
except Exception as e:
print(f"Retry {retry + 1} failed: {str(e)} || Retrying... {retry + 1} / {max_retries}")
return card_contents
except ConnectionResetError as se:
print(f"Connection lost: {str(e)}. Retrying... ({retry + 1} / {max_retries})")
if retry < max_retries - 1:
await asyncio.sleep(5) # Delay before retrying.
else:
raise Exception(f"Failed to retrieve valid data after {max_retries} retries.")
return flat(url_lists)
except Exception as e:
print(f"Retry {retry + 1} failed: {str(e)}")
if retry < max_retries - 1:
await asyncio.sleep(4) # Delay before retrying.

raise Exception(f"Failed to retrieve valid data after {max_retries} retries.")


async def scrape_product_info(self, url, max_retries = 13):
Expand Down Expand Up @@ -313,14 +314,17 @@ async def scrape_product_info(self, url, max_retries = 13):
'Store link': store_link,
}
amazon_dicts.append(datas)
break
except Exception as e:
print(f"Retry {retry + 1} failed: {str(e)} || Retrying... {retry + 1} / {max_retries}")
return amazon_dicts
except ConnectionResetError as se:
print(f"Connection lost: {str(e)}. Retrying... ({retry + 1} / {max_retries})")
if retry < max_retries - 1:
await asyncio.sleep(5) # Delay before retrying.
else:
raise Exception(f"Failed to retrieve valid data after {max_retries} retries.")
return amazon_dicts
except Exception as e:
print(f"Retry {retry + 1} failed: {str(e)} | Error URL : {url}")
if retry < max_retries - 1:
await asyncio.sleep(4) # Delay before retrying.
return amazon_dicts
raise Exception(f"Failed to retrieve valid data after {max_retries} retries.")


async def crawl_url(self):
Expand Down
2 changes: 1 addition & 1 deletion scrapers/selector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
product_name: "div.a-section.a-spacing-none.a-spacing-top-small.s-title-instructions-style h2 a span"
searches: div[cel_widget_id="UPPER-RESULT_INFO_BAR-0"]
searches_I: "div#departments span.a-size-base.a-color-base.a-text-bold"
searches_II: "div.a-section.a-spacing-small.a-spacing-top-small span.a-color-state.a-text-bold"
searches_II: "span.a-color-state.a-text-bold"
searches_III: "span.a-list-item span.a-size-base.a-color-base.a-text-bold"
searches_IV: "a.a-link-normal.s-navigation-item span.a-size-base.a-color-base"

Expand Down

0 comments on commit 5d5d028

Please sign in to comment.