Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
saraswatpuneet committed Sep 2, 2023
1 parent d55fed7 commit 7a7eec9
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions querent/collectors/webscaper/web_scraper_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ async def poll(self):
content = await self.scrape_website(url)
yield CollectedBytes(file=None, data=content.data, error=None)
# Find and add links from this page to the list of URLs to scrape
new_urls = self.extract_links(content.data, url)
new_urls = self.extract_links(url)
urls_to_scrape.extend(new_urls)

async def scrape_website(self, website_url: str):
Expand All @@ -44,7 +44,7 @@ async def scrape_website(self, website_url: str):
data=content[:max_length], file=None, error=None
)

def extract_links(self, content: str, base_url: str):
def extract_links(self, base_url: str):
# Use a proper HTML parser to extract links
extractor = WebpageExtractor()
links = extractor.extract_links(base_url)
Expand Down

0 comments on commit 7a7eec9

Please sign in to comment.