-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(backend): add functions to submit DOI requests to CrossRef (#1429)
- Loading branch information
Showing
8 changed files
with
301 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
204 changes: 204 additions & 0 deletions
204
backend/src/main/kotlin/org/loculus/backend/service/crossref/CrossRefService.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
package org.loculus.backend.service.crossref | ||
|
||
import mu.KotlinLogging | ||
import org.jsoup.Jsoup | ||
import org.redundent.kotlin.xml.PrintOptions | ||
import org.redundent.kotlin.xml.xml | ||
import org.springframework.boot.context.properties.ConfigurationProperties | ||
import org.springframework.stereotype.Service | ||
import java.io.DataOutputStream | ||
import java.io.OutputStreamWriter | ||
import java.io.PrintWriter | ||
import java.net.HttpURLConnection | ||
import java.net.URI | ||
import java.net.URLEncoder | ||
import java.time.LocalDate | ||
import java.time.ZoneId | ||
import java.time.format.DateTimeFormatter | ||
import java.util.UUID | ||
|
||
private val log = KotlinLogging.logger { } | ||
|
||
@ConfigurationProperties(prefix = "crossref") | ||
data class CrossRefServiceProperties( | ||
val endpoint: String?, | ||
val username: String?, | ||
val password: String?, | ||
) | ||
|
||
@Service | ||
class CrossRefService(private val crossRefServiceProperties: CrossRefServiceProperties) { | ||
val isActive = crossRefServiceProperties.endpoint != null && crossRefServiceProperties.username != null && | ||
crossRefServiceProperties.password != null | ||
val dateTimeFormatterMM = DateTimeFormatter.ofPattern("MM") | ||
val dateTimeFormatterdd = DateTimeFormatter.ofPattern("dd") | ||
val dateTimeFormatteryyyy = DateTimeFormatter.ofPattern("yyyy") | ||
|
||
private fun checkIsActive() { | ||
if (!isActive) { | ||
throw RuntimeException("The CrossRefService is not active as it has not been configured.") | ||
} | ||
} | ||
|
||
fun generateCrossRefXML(data: Map<String, Any>): String { | ||
checkIsActive() | ||
|
||
// Timestamp used to fill the publication date, assumed to be the moment the xml is generated | ||
val doiBatchID = data["DOIBatchID"] as String? ?: UUID.randomUUID().toString() | ||
val now = data["now"] as LocalDate? ?: LocalDate.now() | ||
|
||
val crossRef = xml("doi_batch") { | ||
// All these attributes are needed for the xml to parse correctly | ||
attribute("version", "5.3.1") | ||
attribute("xmlns", "http://www.crossref.org/schema/5.3.1") | ||
attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") | ||
attribute( | ||
"xsi:schemaLocation", | ||
"http://www.crossref.org/schema/5.3.1 http://data.crossref.org/schemas/crossref5.3.1.xsd", | ||
) | ||
|
||
"head" { | ||
// The doi_batch_id gets ignored and the actual one is assigned after the equest is processed through | ||
// CrossRef's queue. Because of this, presumably, the doi_batch_id is not sent back when a request to | ||
// the service is successful. For this, one would have to query the equest queue and retrieve it from there | ||
"doi_batch_id" { -doiBatchID } | ||
"timestamp" { -now.atStartOfDay(ZoneId.systemDefault()).toInstant().toEpochMilli().toString() } | ||
} | ||
|
||
"body" { | ||
"database" { | ||
// Name of the database (that holds many dataset entries) | ||
"database_metadata" { "titles" { "title" { -(data["databaseTitle"] as String) } } } | ||
"dataset" { | ||
"contributors" { | ||
// At the moment, we only use the first contributor organization and the first | ||
// contributor names specified in the input data. More organizations and names can | ||
// be passed on the metadata, so this is open for further consideration | ||
"organization" { | ||
attribute("contributor_role", "author") | ||
attribute("sequence", "first") | ||
|
||
-((data["organizations"] as Array<*>)[0] as String) | ||
} | ||
"person_name" { | ||
attribute("contributor_role", "author") | ||
attribute("sequence", "first") | ||
|
||
"given_name" { -(((data["contributors"] as Array<*>)[0] as Array<*>)[0] as String) } | ||
"surname" { -(((data["contributors"] as Array<*>)[0] as Array<*>)[1] as String) } | ||
} | ||
} | ||
// Name of this particular dataset | ||
"titles" { "title" { -(data["datasetTitle"] as String) } } | ||
"database_date" { | ||
"publication_date" { | ||
"month" { -now.format(dateTimeFormatterMM) } | ||
"day" { -now.format(dateTimeFormatterdd) } | ||
"year" { -now.format(dateTimeFormatteryyyy) } | ||
} | ||
} | ||
"doi_data" { | ||
// The requested DOI (pending approval from them), it needs to have a prefix | ||
// for which the user is authorized to mint DOIs for | ||
"doi" { -(data["DOI"] as String) } | ||
// The "payload" of the DOI request, usually an URL | ||
// If the request is successful, the newly minted DOI will resolve to this URL | ||
"resource" { -(data["URL"] as String) } | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Explicitly append the xml header as the library we are using only output the "body" of it | ||
return "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" + | ||
crossRef.toString(PrintOptions(pretty = false)) | ||
} | ||
|
||
fun postCrossRefXML(XML: String): String { | ||
checkIsActive() | ||
|
||
// This is needed per their API specification | ||
val formData = mapOf( | ||
"operation" to "doQueryUpload", | ||
"login_id" to crossRefServiceProperties.username, | ||
"login_passwd" to crossRefServiceProperties.password, | ||
"fname" to mapOf( | ||
"data" to XML, | ||
// "filename" could be any string, using the one from their code samples, though | ||
"filename" to "crossref_query.xml", | ||
), | ||
) | ||
|
||
val connection = URI( | ||
crossRefServiceProperties.endpoint + "/servlet/deposit", | ||
).toURL().openConnection() as HttpURLConnection | ||
connection.requestMethod = "POST" | ||
connection.doOutput = true | ||
|
||
val boundary = "---------------------------" + System.currentTimeMillis() | ||
// Some weird edge cases come out if the Connection header is not explicitly set to "close" | ||
connection.setRequestProperty("Connection", "close") | ||
connection.setRequestProperty("Content-Type", "multipart/form-data; boundary=$boundary") | ||
// If the User-Agent does not contain the string curl, the request fails | ||
connection.setRequestProperty("User-Agent", "curl/7.81.0") | ||
|
||
val dataOutputStream = DataOutputStream(connection.outputStream) | ||
// Encoding has to be UTF-8, otherwise the request will fail | ||
val printWriter = PrintWriter(OutputStreamWriter(dataOutputStream, "UTF-8"), true) | ||
|
||
formData.forEach { (key, value) -> | ||
if (value is String) { | ||
// Both carriage return and new line characters have to be sent ("\r\n"), | ||
// otherwise the request witll cause a 500 error on CrossRef's end | ||
printWriter.append("--$boundary").append("\r\n") | ||
printWriter.append( | ||
"Content-Disposition: form-data; name=\"${URLEncoder.encode(key, "UTF-8")}\"", | ||
).append("\r\n") | ||
printWriter.append("\r\n") | ||
printWriter.append(value).append("\r\n") | ||
} else if (key == "fname" && value is Map<*, *>) { | ||
printWriter.append("--$boundary").append("\r\n") | ||
printWriter.append( | ||
"Content-Disposition: form-data; name=\"${URLEncoder.encode( | ||
key, | ||
"UTF-8", | ||
)}\"; filename=\"${URLEncoder.encode(value["filename"] as String, "UTF-8")}\"", | ||
).append("\r\n") | ||
printWriter.append("Content-Type: application/xml").append("\r\n") | ||
printWriter.append("\r\n") | ||
// The xml must be a single line, otherwise is easy to run into formatting | ||
// errors introduced by newlines here and there | ||
printWriter.append(value["data"] as String).append("\r\n\r\n") | ||
} | ||
} | ||
|
||
printWriter.append("--$boundary--").append("\r\n") | ||
printWriter.close() | ||
|
||
val responseCode = connection.responseCode | ||
if (responseCode == HttpURLConnection.HTTP_OK) { | ||
// Slurp the whole request, instead of reading it line by line as it is usually done, | ||
// because otherwise it would be trickier to work out the behavior of "\r\n". | ||
val response = String(connection.inputStream.readAllBytes()) | ||
connection.inputStream.close() | ||
|
||
// CrossRef's API response is quite vague, they always give you back a 200 status, | ||
// and the only noticeable difference between a successful response vs. a failed one | ||
// is the presence of a "SUCCESS" or "FAILURE" string in it. | ||
|
||
val doc = Jsoup.parse(response) | ||
val text = doc.select("h2").text() | ||
|
||
if (text == "SUCCESS") { | ||
log.debug { "DOI creation successful for XML: " + XML } | ||
} else { | ||
throw RuntimeException("DOI creation request failed. \"FAILURE\" present in the response") | ||
} | ||
|
||
return text | ||
} else { | ||
throw RuntimeException("DOI creation request returned a " + responseCode.toString() + " response status") | ||
} | ||
} | ||
} |
45 changes: 45 additions & 0 deletions
45
backend/src/test/kotlin/org/loculus/backend/service/crossref/CrossRefServiceTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package org.loculus.backend.service.crossref | ||
|
||
import org.junit.jupiter.api.Assertions.assertEquals | ||
import org.junit.jupiter.api.Test | ||
import org.loculus.backend.SpringBootTestWithoutDatabase | ||
import org.springframework.beans.factory.annotation.Autowired | ||
import java.time.Instant | ||
import java.time.LocalDate | ||
import java.time.LocalDateTime | ||
import java.time.ZoneId | ||
|
||
@SpringBootTestWithoutDatabase | ||
class CrossRefServiceTest( | ||
@Autowired private val crossRefService: CrossRefService, | ||
) { | ||
private val doiBatchID: String = "3cbae87e-77b2-4560-b411-502288f3f636" | ||
private val doiPrefix: String = "10.62599" | ||
private val now: LocalDate = LocalDateTime.ofInstant( | ||
Instant.ofEpochSecond(1711411200), | ||
ZoneId.systemDefault(), | ||
).toLocalDate() | ||
|
||
private val crossRefXMLReference = """ | ||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | ||
<doi_batch version="5.3.1" xmlns="http://www.crossref.org/schema/5.3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 http://data.crossref.org/schemas/crossref5.3.1.xsd"><head><doi_batch_id>$doiBatchID</doi_batch_id><timestamp>1711411200000</timestamp></head><body><database><database_metadata><titles><title>Pathoplexus Database</title></titles></database_metadata><dataset><contributors><organization contributor_role="author" sequence="first">pathoplexus.org</organization><person_name contributor_role="author" sequence="first"><given_name>Pathoplexus</given_name><surname>Contributor</surname></person_name></contributors><titles><title>Pathoplexus Dataset</title></titles><database_date><publication_date><month>03</month><day>26</day><year>2024</year></publication_date></database_date><doi_data><doi>$doiPrefix/XXXX</doi><resource>https://pathoplexus.org/</resource></doi_data></dataset></database></body></doi_batch> | ||
""".trimIndent() | ||
|
||
@Test | ||
fun `Create an XML metadata string complying with CrossRef's schema`() { | ||
val crossRefXML = crossRefService.generateCrossRefXML( | ||
mapOf( | ||
"DOIBatchID" to doiBatchID, | ||
"now" to now, | ||
"databaseTitle" to "Pathoplexus Database", | ||
"organizations" to arrayOf("pathoplexus.org"), | ||
"contributors" to arrayOf(arrayOf("Pathoplexus", "Contributor")), | ||
"datasetTitle" to "Pathoplexus Dataset", | ||
"DOI" to doiPrefix + "/XXXX", | ||
"URL" to "https://pathoplexus.org/", | ||
), | ||
) | ||
|
||
assertEquals(crossRefXML, crossRefXMLReference) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,14 @@ | ||
spring.config.import=file:src/main/resources/application.properties | ||
loculus.config.path=src/test/resources/backend_config.json | ||
|
||
crossref.endpoint=dummy | ||
crossref.username=dummy | ||
crossref.password=dummy | ||
|
||
keycloak.user=dummy | ||
keycloak.password=dummy | ||
keycloak.realm=dummyRealm | ||
keycloak.client=dummy-cli | ||
keycloak.url=dummy:420 | ||
|
||
|
||
spring.security.oauth2.resourceserver.jwt.jwk-set-uri=http://some.value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters