Skip to content

Commit

Permalink
feat(backend): add functions to submit DOI requests to CrossRef (#1429)
Browse files Browse the repository at this point in the history
  • Loading branch information
almosnow authored Apr 26, 2024
1 parent 6407554 commit 66ea4fb
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ on:

env:
DOCKER_IMAGE_NAME: ghcr.io/loculus-project/backend
CROSSREF_USERNAME: ${{ secrets.CROSSREF_USERNAME }}
CROSSREF_TEST_PASSWORD: ${{ secrets.CROSSREF_TEST_PASSWORD }}
CROSSREF_TEST_ENDPOINT: ${{ secrets.CROSSREF_TEST_ENDPOINT }}
CROSSREF_DOI_PREFIX: ${{ secrets.CROSSREF_DOI_PREFIX }}

concurrency:
group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-backend
Expand Down
3 changes: 3 additions & 0 deletions backend/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ dependencies {
implementation 'com.github.luben:zstd-jni:1.5.6-2'
implementation 'org.tukaani:xz:1.9'

implementation("org.redundent:kotlin-xml-builder:1.8.0")
implementation("org.jsoup:jsoup:1.17.2")

testImplementation("org.springframework.boot:spring-boot-starter-test") {
exclude group: "org.mockito"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package org.loculus.backend.service.crossref

import mu.KotlinLogging
import org.jsoup.Jsoup
import org.redundent.kotlin.xml.PrintOptions
import org.redundent.kotlin.xml.xml
import org.springframework.boot.context.properties.ConfigurationProperties
import org.springframework.stereotype.Service
import java.io.DataOutputStream
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.net.HttpURLConnection
import java.net.URI
import java.net.URLEncoder
import java.time.LocalDate
import java.time.ZoneId
import java.time.format.DateTimeFormatter
import java.util.UUID

private val log = KotlinLogging.logger { }

@ConfigurationProperties(prefix = "crossref")
data class CrossRefServiceProperties(
val endpoint: String?,
val username: String?,
val password: String?,
)

@Service
class CrossRefService(private val crossRefServiceProperties: CrossRefServiceProperties) {
val isActive = crossRefServiceProperties.endpoint != null && crossRefServiceProperties.username != null &&
crossRefServiceProperties.password != null
val dateTimeFormatterMM = DateTimeFormatter.ofPattern("MM")
val dateTimeFormatterdd = DateTimeFormatter.ofPattern("dd")
val dateTimeFormatteryyyy = DateTimeFormatter.ofPattern("yyyy")

private fun checkIsActive() {
if (!isActive) {
throw RuntimeException("The CrossRefService is not active as it has not been configured.")
}
}

fun generateCrossRefXML(data: Map<String, Any>): String {
checkIsActive()

// Timestamp used to fill the publication date, assumed to be the moment the xml is generated
val doiBatchID = data["DOIBatchID"] as String? ?: UUID.randomUUID().toString()
val now = data["now"] as LocalDate? ?: LocalDate.now()

val crossRef = xml("doi_batch") {
// All these attributes are needed for the xml to parse correctly
attribute("version", "5.3.1")
attribute("xmlns", "http://www.crossref.org/schema/5.3.1")
attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
attribute(
"xsi:schemaLocation",
"http://www.crossref.org/schema/5.3.1 http://data.crossref.org/schemas/crossref5.3.1.xsd",
)

"head" {
// The doi_batch_id gets ignored and the actual one is assigned after the equest is processed through
// CrossRef's queue. Because of this, presumably, the doi_batch_id is not sent back when a request to
// the service is successful. For this, one would have to query the equest queue and retrieve it from there
"doi_batch_id" { -doiBatchID }
"timestamp" { -now.atStartOfDay(ZoneId.systemDefault()).toInstant().toEpochMilli().toString() }
}

"body" {
"database" {
// Name of the database (that holds many dataset entries)
"database_metadata" { "titles" { "title" { -(data["databaseTitle"] as String) } } }
"dataset" {
"contributors" {
// At the moment, we only use the first contributor organization and the first
// contributor names specified in the input data. More organizations and names can
// be passed on the metadata, so this is open for further consideration
"organization" {
attribute("contributor_role", "author")
attribute("sequence", "first")

-((data["organizations"] as Array<*>)[0] as String)
}
"person_name" {
attribute("contributor_role", "author")
attribute("sequence", "first")

"given_name" { -(((data["contributors"] as Array<*>)[0] as Array<*>)[0] as String) }
"surname" { -(((data["contributors"] as Array<*>)[0] as Array<*>)[1] as String) }
}
}
// Name of this particular dataset
"titles" { "title" { -(data["datasetTitle"] as String) } }
"database_date" {
"publication_date" {
"month" { -now.format(dateTimeFormatterMM) }
"day" { -now.format(dateTimeFormatterdd) }
"year" { -now.format(dateTimeFormatteryyyy) }
}
}
"doi_data" {
// The requested DOI (pending approval from them), it needs to have a prefix
// for which the user is authorized to mint DOIs for
"doi" { -(data["DOI"] as String) }
// The "payload" of the DOI request, usually an URL
// If the request is successful, the newly minted DOI will resolve to this URL
"resource" { -(data["URL"] as String) }
}
}
}
}
}

// Explicitly append the xml header as the library we are using only output the "body" of it
return "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
crossRef.toString(PrintOptions(pretty = false))
}

fun postCrossRefXML(XML: String): String {
checkIsActive()

// This is needed per their API specification
val formData = mapOf(
"operation" to "doQueryUpload",
"login_id" to crossRefServiceProperties.username,
"login_passwd" to crossRefServiceProperties.password,
"fname" to mapOf(
"data" to XML,
// "filename" could be any string, using the one from their code samples, though
"filename" to "crossref_query.xml",
),
)

val connection = URI(
crossRefServiceProperties.endpoint + "/servlet/deposit",
).toURL().openConnection() as HttpURLConnection
connection.requestMethod = "POST"
connection.doOutput = true

val boundary = "---------------------------" + System.currentTimeMillis()
// Some weird edge cases come out if the Connection header is not explicitly set to "close"
connection.setRequestProperty("Connection", "close")
connection.setRequestProperty("Content-Type", "multipart/form-data; boundary=$boundary")
// If the User-Agent does not contain the string curl, the request fails
connection.setRequestProperty("User-Agent", "curl/7.81.0")

val dataOutputStream = DataOutputStream(connection.outputStream)
// Encoding has to be UTF-8, otherwise the request will fail
val printWriter = PrintWriter(OutputStreamWriter(dataOutputStream, "UTF-8"), true)

formData.forEach { (key, value) ->
if (value is String) {
// Both carriage return and new line characters have to be sent ("\r\n"),
// otherwise the request witll cause a 500 error on CrossRef's end
printWriter.append("--$boundary").append("\r\n")
printWriter.append(
"Content-Disposition: form-data; name=\"${URLEncoder.encode(key, "UTF-8")}\"",
).append("\r\n")
printWriter.append("\r\n")
printWriter.append(value).append("\r\n")
} else if (key == "fname" && value is Map<*, *>) {
printWriter.append("--$boundary").append("\r\n")
printWriter.append(
"Content-Disposition: form-data; name=\"${URLEncoder.encode(
key,
"UTF-8",
)}\"; filename=\"${URLEncoder.encode(value["filename"] as String, "UTF-8")}\"",
).append("\r\n")
printWriter.append("Content-Type: application/xml").append("\r\n")
printWriter.append("\r\n")
// The xml must be a single line, otherwise is easy to run into formatting
// errors introduced by newlines here and there
printWriter.append(value["data"] as String).append("\r\n\r\n")
}
}

printWriter.append("--$boundary--").append("\r\n")
printWriter.close()

val responseCode = connection.responseCode
if (responseCode == HttpURLConnection.HTTP_OK) {
// Slurp the whole request, instead of reading it line by line as it is usually done,
// because otherwise it would be trickier to work out the behavior of "\r\n".
val response = String(connection.inputStream.readAllBytes())
connection.inputStream.close()

// CrossRef's API response is quite vague, they always give you back a 200 status,
// and the only noticeable difference between a successful response vs. a failed one
// is the presence of a "SUCCESS" or "FAILURE" string in it.

val doc = Jsoup.parse(response)
val text = doc.select("h2").text()

if (text == "SUCCESS") {
log.debug { "DOI creation successful for XML: " + XML }
} else {
throw RuntimeException("DOI creation request failed. \"FAILURE\" present in the response")
}

return text
} else {
throw RuntimeException("DOI creation request returned a " + responseCode.toString() + " response status")
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package org.loculus.backend.service.crossref

import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
import org.loculus.backend.SpringBootTestWithoutDatabase
import org.springframework.beans.factory.annotation.Autowired
import java.time.Instant
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.ZoneId

@SpringBootTestWithoutDatabase
class CrossRefServiceTest(
@Autowired private val crossRefService: CrossRefService,
) {
private val doiBatchID: String = "3cbae87e-77b2-4560-b411-502288f3f636"
private val doiPrefix: String = "10.62599"
private val now: LocalDate = LocalDateTime.ofInstant(
Instant.ofEpochSecond(1711411200),
ZoneId.systemDefault(),
).toLocalDate()

private val crossRefXMLReference = """
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<doi_batch version="5.3.1" xmlns="http://www.crossref.org/schema/5.3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 http://data.crossref.org/schemas/crossref5.3.1.xsd"><head><doi_batch_id>$doiBatchID</doi_batch_id><timestamp>1711411200000</timestamp></head><body><database><database_metadata><titles><title>Pathoplexus Database</title></titles></database_metadata><dataset><contributors><organization contributor_role="author" sequence="first">pathoplexus.org</organization><person_name contributor_role="author" sequence="first"><given_name>Pathoplexus</given_name><surname>Contributor</surname></person_name></contributors><titles><title>Pathoplexus Dataset</title></titles><database_date><publication_date><month>03</month><day>26</day><year>2024</year></publication_date></database_date><doi_data><doi>$doiPrefix/XXXX</doi><resource>https://pathoplexus.org/</resource></doi_data></dataset></database></body></doi_batch>
""".trimIndent()

@Test
fun `Create an XML metadata string complying with CrossRef's schema`() {
val crossRefXML = crossRefService.generateCrossRefXML(
mapOf(
"DOIBatchID" to doiBatchID,
"now" to now,
"databaseTitle" to "Pathoplexus Database",
"organizations" to arrayOf("pathoplexus.org"),
"contributors" to arrayOf(arrayOf("Pathoplexus", "Contributor")),
"datasetTitle" to "Pathoplexus Dataset",
"DOI" to doiPrefix + "/XXXX",
"URL" to "https://pathoplexus.org/",
),
)

assertEquals(crossRefXML, crossRefXMLReference)
}
}
5 changes: 4 additions & 1 deletion backend/src/test/resources/application.properties
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
spring.config.import=file:src/main/resources/application.properties
loculus.config.path=src/test/resources/backend_config.json

crossref.endpoint=dummy
crossref.username=dummy
crossref.password=dummy

keycloak.user=dummy
keycloak.password=dummy
keycloak.realm=dummyRealm
keycloak.client=dummy-cli
keycloak.url=dummy:420


spring.security.oauth2.resourceserver.jwt.jwk-set-uri=http://some.value
5 changes: 5 additions & 0 deletions kubernetes/loculus/templates/externaldb-sealed-secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,9 @@ spec:
keycloak-db-username: {{ .Values.keycloakDatabase.usernameSealedSecret | quote }}
keycloak-db-password: {{ .Values.keycloakDatabase.passwordSealedSecret | quote }}
{{- end }}
{{- if .Values.crossRef.usernameSealedSecret }}
crossref-username: {{ .Values.crossRef.usernameSealedSecret | quote }}
crossref-test-password: {{ .Values.crossRef.testPasswordSealedSecret | quote }}
crossref-live-password: {{ .Values.crossRef.livePasswordSealedSecret | quote }}
{{- end }}
{{ end }}
33 changes: 29 additions & 4 deletions kubernetes/loculus/templates/loculus-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,41 @@ spec:
ports:
- containerPort: 8079
args:
- "--spring.datasource.url=$(DB_URL)"
- "--spring.datasource.username=$(DB_USERNAME)"
- "--spring.datasource.password=$(DB_PASSWORD)"
- "--keycloak.user=backend"
- "--crossref.doi-prefix=$(CROSSREF_DOI_PREFIX)"
- "--crossref.endpoint=$(CROSSREF_TEST_ENDPOINT)"
- "--crossref.username=$(CROSSREF_USERNAME)"
- "--crossref.password=$(CROSSREF_TEST_PASSWORD)"
- "--keycloak.password=backend"
- "--keycloak.realm=loculus"
- "--keycloak.client=backend-client"
- "--keycloak.url=http://loculus-keycloak-service:8083"
- "--keycloak.user=backend"
- "--spring.datasource.password=$(DB_PASSWORD)"
- "--spring.datasource.url=$(DB_URL)"
- "--spring.datasource.username=$(DB_USERNAME)"
- "--spring.security.oauth2.resourceserver.jwt.jwk-set-uri=http://loculus-keycloak-service:8083/realms/loculus/protocol/openid-connect/certs"
env:
{{- if .Values.crossRef.usernameSealedSecret }}
- name: CROSSREF_USERNAME
valueFrom:
secretKeyRef:
name: externaldb-credentials
key: crossref-username
{{- end }}
{{- if .Values.crossRef.testPasswordSealedSecret }}
- name: CROSSREF_TEST_PASSWORD
valueFrom:
secretKeyRef:
name: externaldb-credentials
key: crossref-test-password
{{- end }}
{{- if .Values.crossRef.livePasswordSealedSecret }}
- name: CROSSREF_LIVE_PASSWORD
valueFrom:
secretKeyRef:
name: externaldb-credentials
key: crossref-live-password
{{- end }}
{{- if .Values.externalDatabase.urlSealedSecret }}
- name: DB_URL
valueFrom:
Expand Down
7 changes: 7 additions & 0 deletions kubernetes/loculus/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ keycloakDatabase:
usernameSealedSecret: ""
passwordSealedSecret: ""
portSealedSecret: ""
crossRef:
DOIPrefix: "10.62599"
testEndpoint: "https://test.crossref.org"
liveEndpoint: "https://doi.crossref.org"
usernameSealedSecret:
testPasswordSealedSecret:
livePasswordSealedSecret:
disableWebsite: false
disableBackend: false
disablePreprocessing: false
Expand Down

0 comments on commit 66ea4fb

Please sign in to comment.