Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: unescape CrossRef journal #6373

Merged
merged 2 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions backend/common/providers/crossref_provider.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import html
import logging
from datetime import datetime
from urllib.parse import urlparse
Expand Down Expand Up @@ -101,14 +102,16 @@
# Journal
try:
if "short-container-title" in message and message["short-container-title"]:
journal = message["short-container-title"][0]
raw_journal = message["short-container-title"][0]

Check warning on line 105 in backend/common/providers/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/common/providers/crossref_provider.py#L105

Added line #L105 was not covered by tests
elif "container-title" in message and message["container-title"]:
journal = message["container-title"][0]
raw_journal = message["container-title"][0]
elif "institution" in message:
journal = message["institution"][0]["name"]
raw_journal = message["institution"][0]["name"]

Check warning on line 109 in backend/common/providers/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/common/providers/crossref_provider.py#L109

Added line #L109 was not covered by tests
except Exception:
raise CrossrefParseException("Journal node missing") from None

journal = html.unescape(raw_journal)

# Authors
# Note: make sure that the order is preserved, as it is a relevant information
authors = message["author"]
Expand Down Expand Up @@ -138,7 +141,6 @@
raise CrossrefParseException("Cannot parse metadata from Crossref") from e

def fetch_preprint_published_doi(self, doi):

res = self._fetch_crossref_payload(doi)
message = res.json()["message"]
is_preprint = message.get("subtype") == "preprint"
Expand Down
9 changes: 6 additions & 3 deletions backend/layers/thirdparty/crossref_provider.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import html
import logging
from datetime import datetime
from urllib.parse import urlparse
Expand Down Expand Up @@ -107,14 +108,16 @@
# Journal
try:
if "short-container-title" in message and message["short-container-title"]:
journal = message["short-container-title"][0]
raw_journal = message["short-container-title"][0]

Check warning on line 111 in backend/layers/thirdparty/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/crossref_provider.py#L111

Added line #L111 was not covered by tests
elif "container-title" in message and message["container-title"]:
journal = message["container-title"][0]
raw_journal = message["container-title"][0]

Check warning on line 113 in backend/layers/thirdparty/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/crossref_provider.py#L113

Added line #L113 was not covered by tests
elif "institution" in message:
journal = message["institution"][0]["name"]
raw_journal = message["institution"][0]["name"]

Check warning on line 115 in backend/layers/thirdparty/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/crossref_provider.py#L115

Added line #L115 was not covered by tests
except Exception:
raise CrossrefParseException("Journal node missing") from None

journal = html.unescape(raw_journal)

Check warning on line 119 in backend/layers/thirdparty/crossref_provider.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/thirdparty/crossref_provider.py#L119

Added line #L119 was not covered by tests

# Authors
# Note: make sure that the order is preserved, as it is a relevant information
authors = message["author"]
Expand Down
41 changes: 39 additions & 2 deletions tests/unit/backend/layers/thirdparty/test_crossref_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def test__provider_does_not_call_crossref_in_test(self, mock_get):
@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
def test__provider_calls_crossref_if_api_key_defined(self, mock_config, mock_get):

# Defining a mocked CorporaConfig will allow the provider to consider the `crossref_api_key`
# not None, so it will go ahead and do the mocked call.

Expand Down Expand Up @@ -75,7 +74,6 @@ def test__provider_calls_crossref_if_api_key_defined(self, mock_config, mock_get
@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
def test__provider_parses_authors_and_dates_correctly(self, mock_config, mock_get):

response = Response()
response.status_code = 200
response._content = str.encode(
Expand Down Expand Up @@ -138,6 +136,45 @@ def test__provider_parses_authors_and_dates_correctly(self, mock_config, mock_ge

self.assertDictEqual(expected_response, res)

@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
def test__provider_unescapes_journal_correctly(self, mock_config, mock_get):
response = Response()
response.status_code = 200
response._content = str.encode(
json.dumps(
{
"status": "ok",
"message": {
"author": [
{"name": "A consortium"},
],
"published-online": {"date-parts": [[2021, 11]]},
"container-title": ["Clinical & Translational Med"],
},
}
)
)

mock_get.return_value = response
provider = CrossrefProvider()
res = provider.fetch_metadata("test_doi")
mock_get.assert_called_once()

expected_response = {
"authors": [
{"name": "A consortium"},
],
"published_year": 2021,
"published_month": 11,
"published_day": 1,
"published_at": 1635724800.0,
"journal": "Clinical & Translational Med",
"is_preprint": False,
}

self.assertDictEqual(expected_response, res)

@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
def test__provider_throws_exception_if_request_fails(self, mock_config, mock_get):
Expand Down
Loading