Skip to content

Commit

Permalink
Added test cases for webscrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
Ansh5461 committed Aug 19, 2023
1 parent c532bcd commit ede0117
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
2 changes: 1 addition & 1 deletion querent/collectors/webscaper/web_scraper_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ async def poll(self):
async def scrape_website(self, website_url: str):
content = WebpageExtractor().extract_with_bs4(website_url)
max_length = len(' '.join(content.split(" ")[:600]))
return content[:max_length]
return CollectorResult({"content": content[:max_length]})

class WebScraperFactory(CollectorFactory):
def __init__(self):
Expand Down
10 changes: 10 additions & 0 deletions tests/test_webscrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,13 @@ def test_scrapping_data():
webscrapperConfig = WebScraperConfig(website_url = uri.uri)
collector = resolver.resolve(uri, webscrapperConfig)
assert collector is not None

print("REached here")
async def poll_and_print():
print("Part 2")
async for result in collector.poll():
print("Hola...")
assert not result.is_error()
print(result.unwrap())

asyncio.run(poll_and_print())

0 comments on commit ede0117

Please sign in to comment.