Skip to content

Commit

Permalink
fix(scrapers.tests): test new logger calls inside get_binary_content
Browse files Browse the repository at this point in the history
  • Loading branch information
grossir committed Aug 15, 2024
1 parent 2294515 commit fac3f13
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
16 changes: 11 additions & 5 deletions cl/scrapers/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.conf import settings
from django.core.files.base import ContentFile
from django.utils.timezone import now
from juriscraper.AbstractSite import logger

from cl.alerts.factories import AlertFactory
from cl.alerts.models import Alert
Expand Down Expand Up @@ -626,37 +627,42 @@ def setUp(self):
self.mock_response.content = b"not empty"
self.mock_response.headers = {"Content-Type": "application/pdf"}
self.site = test_opinion_scraper.Site()
self.site.method = "GET"
self.logger = logger

@mock.patch("requests.Session.get")
def test_unexpected_content_type(self, mock_get):
"""Test when content type doesn't match scraper expectation."""
mock_get.return_value = self.mock_response
self.site.expected_content_types = ["text/html"]

with self.assertLogs(level="ERROR") as cm:
with mock.patch.object(self.logger, "error") as error_mock:
get_binary_content("/dummy/url/", self.site)
self.assertIn("UnexpectedContentTypeError:", cm.output[0])
self.assertIn(
"UnexpectedContentTypeError:", error_mock.call_args_list[0][0][0]
)

@mock.patch("requests.Session.get")
def test_correct_content_type(self, mock_get):
"""Test when content type matches scraper expectation."""
mock_get.return_value = self.mock_response
self.site.expected_content_types = ["application/pdf"]

with self.assertNoLogs(level="ERROR"):
with mock.patch.object(self.logger, "error") as error_mock:
_ = get_binary_content("/dummy/url/", self.site)

self.mock_response.headers = {
"Content-Type": "application/pdf;charset=utf-8"
}
mock_get.return_value = self.mock_response
_ = get_binary_content("/dummy/url/", self.site)
error_mock.assert_not_called()

@mock.patch("requests.Session.get")
def test_no_content_type(self, mock_get):
"""Test for no content type expected (ie. Montana)"""
mock_get.return_value = self.mock_response
self.site.expected_content_types = None

with self.assertNoLogs(level="ERROR"):
with mock.patch.object(self.logger, "error") as error_mock:
_ = get_binary_content("/dummy/url/", self.site)
error_mock.assert_not_called()
4 changes: 2 additions & 2 deletions cl/scrapers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def get_binary_content(
url = os.path.join(settings.MEDIA_ROOT, download_url)
mr = MockRequest(url=url)
r = mr.get()
r = follow_redirections(r, requests.Session())
r.raise_for_status()
s = requests.Session()
else:
# some sites require a custom ssl_context, contained in the Site's
# session. However, we can't send a request with both a
Expand Down Expand Up @@ -222,6 +221,7 @@ def get_binary_content(
content_type in mime.lower()
for mime in site.expected_content_types
)

if not m:
error = (
f"UnexpectedContentTypeError: {download_url}\n"
Expand Down

0 comments on commit fac3f13

Please sign in to comment.