From fac3f134c60ccb87e7fe904550a19da50376a300 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Mon, 12 Aug 2024 23:32:08 -0500 Subject: [PATCH] fix(scrapers.tests): test new logger calls inside get_binary_content --- cl/scrapers/tests.py | 16 +++++++++++----- cl/scrapers/utils.py | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py index 497a491d4..9749042ca 100644 --- a/cl/scrapers/tests.py +++ b/cl/scrapers/tests.py @@ -8,6 +8,7 @@ from django.conf import settings from django.core.files.base import ContentFile from django.utils.timezone import now +from juriscraper.AbstractSite import logger from cl.alerts.factories import AlertFactory from cl.alerts.models import Alert @@ -626,16 +627,19 @@ def setUp(self): self.mock_response.content = b"not empty" self.mock_response.headers = {"Content-Type": "application/pdf"} self.site = test_opinion_scraper.Site() + self.site.method = "GET" + self.logger = logger @mock.patch("requests.Session.get") def test_unexpected_content_type(self, mock_get): """Test when content type doesn't match scraper expectation.""" mock_get.return_value = self.mock_response self.site.expected_content_types = ["text/html"] - - with self.assertLogs(level="ERROR") as cm: + with mock.patch.object(self.logger, "error") as error_mock: get_binary_content("/dummy/url/", self.site) - self.assertIn("UnexpectedContentTypeError:", cm.output[0]) + self.assertIn( + "UnexpectedContentTypeError:", error_mock.call_args_list[0][0][0] + ) @mock.patch("requests.Session.get") def test_correct_content_type(self, mock_get): @@ -643,7 +647,7 @@ def test_correct_content_type(self, mock_get): mock_get.return_value = self.mock_response self.site.expected_content_types = ["application/pdf"] - with self.assertNoLogs(level="ERROR"): + with mock.patch.object(self.logger, "error") as error_mock: _ = get_binary_content("/dummy/url/", self.site) self.mock_response.headers = { @@ -651,6 +655,7 @@ def test_correct_content_type(self, mock_get): } mock_get.return_value = self.mock_response _ = get_binary_content("/dummy/url/", self.site) + error_mock.assert_not_called() @mock.patch("requests.Session.get") def test_no_content_type(self, mock_get): @@ -658,5 +663,6 @@ def test_no_content_type(self, mock_get): mock_get.return_value = self.mock_response self.site.expected_content_types = None - with self.assertNoLogs(level="ERROR"): + with mock.patch.object(self.logger, "error") as error_mock: _ = get_binary_content("/dummy/url/", self.site) + error_mock.assert_not_called() diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py index 15ef2bda7..a1a2e6eab 100644 --- a/cl/scrapers/utils.py +++ b/cl/scrapers/utils.py @@ -181,8 +181,7 @@ def get_binary_content( url = os.path.join(settings.MEDIA_ROOT, download_url) mr = MockRequest(url=url) r = mr.get() - r = follow_redirections(r, requests.Session()) - r.raise_for_status() + s = requests.Session() else: # some sites require a custom ssl_context, contained in the Site's # session. However, we can't send a request with both a @@ -222,6 +221,7 @@ def get_binary_content( content_type in mime.lower() for mime in site.expected_content_types ) + if not m: error = ( f"UnexpectedContentTypeError: {download_url}\n"