From 37b5be314d7593e49c03f3385e672e78254e6a5e Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Tue, 22 Jan 2019 11:43:44 +0100 Subject: [PATCH 1/3] change in codemeta schema title -> name agents -> authors https://github.com/datacite/bolognese/issues/30 --- lib/bolognese/readers/codemeta_reader.rb | 8 ++++++-- lib/bolognese/writers/codemeta_writer.rb | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/bolognese/readers/codemeta_reader.rb b/lib/bolognese/readers/codemeta_reader.rb index 1cc9d686..ce830c55 100644 --- a/lib/bolognese/readers/codemeta_reader.rb +++ b/lib/bolognese/readers/codemeta_reader.rb @@ -36,7 +36,9 @@ def read_codemeta(string: nil, **options) id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil) doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil) - creators = get_authors(from_schema_org_creators(Array.wrap(meta.fetch("agents", nil)))) + creators = meta.fetch("agents", nil).nil? ? get_authors(from_schema_org_creators(Array.wrap(meta.fetch("authors", nil)))) : get_authors(from_schema_org_creators(Array.wrap(meta.fetch("agents", nil)))) + + # creators = get_authors(from_schema_org_creators(Array.wrap(meta.fetch("authors", nil)))) contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil)))) dates = [] dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present? @@ -58,12 +60,14 @@ def read_codemeta(string: nil, **options) { "subject" => s } end + titles = meta.fetch("title", nil).nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => meta.fetch("title", nil) }] + { "id" => id, "types" => types, "identifiers" => identifiers, "doi" => doi_from_url(doi), "url" => normalize_id(meta.fetch("codeRepository", nil)), - "titles" => [{ "title" => meta.fetch("title", nil) }], + "titles" => titles, "creators" => creators, "contributors" => contributors, "publisher" => publisher, diff --git a/lib/bolognese/writers/codemeta_writer.rb b/lib/bolognese/writers/codemeta_writer.rb index 03e16e04..a0849782 100644 --- a/lib/bolognese/writers/codemeta_writer.rb +++ b/lib/bolognese/writers/codemeta_writer.rb @@ -12,8 +12,8 @@ def codemeta "@id" => normalize_doi(doi), "identifier" => to_schema_org_identifiers(identifiers), "codeRepository" => url, - "title" => parse_attributes(titles, content: "title", first: true), - "agents" => creators, + "name" => parse_attributes(titles, content: "title", first: true), + "authors" => creators, "description" => parse_attributes(descriptions, content: "description", first: true), "version" => version_info, "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil, From 9f43df047467585c9d16fcfbcb3b73804465d362 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Tue, 22 Jan 2019 11:43:49 +0100 Subject: [PATCH 2/3] specs --- spec/fixtures/codemeta_v2.json | 86 +++++++++++++++++++ spec/fixtures/maremma/codemeta.json | 2 +- ...eSourceCode_DataCite_check_codemeta_v2.yml | 60 +++++++++++++ spec/writers/codemeta_writer_spec.rb | 13 ++- spec/writers/crosscite_writer_spec.rb | 10 +++ spec/writers/datacite_writer_spec.rb | 22 +++++ 6 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 spec/fixtures/codemeta_v2.json create mode 100644 spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_codemeta/SoftwareSourceCode_DataCite_check_codemeta_v2.yml diff --git a/spec/fixtures/codemeta_v2.json b/spec/fixtures/codemeta_v2.json new file mode 100644 index 00000000..16d641b2 --- /dev/null +++ b/spec/fixtures/codemeta_v2.json @@ -0,0 +1,86 @@ +{ + "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", + "@type": "SoftwareSourceCode", + "authors": [{ + "@id": "http://orcid.org/0000-0003-0077-4738", + "@type": "person", + "email": "jones@nceas.ucsb.edu", + "name": "Matt Jones", + "affiliation": "NCEAS", + "mustBeCited": true, + "isMaintainer": true, + "isRightsHolder": true, + "role": { + "namespace": "http://www.ngdc.noaa.gov/metadata/published/xsd/schema/resources/Codelist/gmxCodelists.xml#CI_RoleCode", + "roleCode": [ + "originator", + "resourceProvider" + ] + } + }, { + "@id": "http://orcid.org/0000-0002-2192-403X", + "@type": "person", + "email": "slaughter@nceas.ucsb.edu", + "name": "Peter Slaughter", + "affiliation": "NCEAS", + "mustBeCited": true, + "isMaintainer": false, + "isRightsHolder": false, + "role": { + "namespace": "http://www.ngdc.noaa.gov/metadata/published/xsd/schema/resources/Codelist/gmxCodelists.xml#CI_RoleCode", + "roleCode": "contributor" + } + }, { + "@type": "organization", + "email": "info@ucop.edu", + "name": "University of California, Santa Barbara", + "role": { + "namespace": "http://www.ngdc.noaa.gov/metadata/published/xsd/schema/resources/Codelist/gmxCodelists.xml#CI_RoleCode", + "roleCode": "copyrightHolder" + } + }], + "depends": { + "@type": "URL", + "@value": "https://github.com/ropensci/datapack" + }, + "identifier": "http://dx.doi.org/10.5063/F1M61H5X", + "codeRepository": "https://github.com/DataONEorg/rdataone", + "controlledTem": "software", + "datePublished": "2016-05-27", + "dateModified": "2016-05-27", + "dateCreated": "2016-05-27", + "description": "Provides read and write access to data and metadata from the DataONE network of data repositories.Each DataONE repository implements a consistent repository application programming interface. Users call methods in R to access these remote repository functions, such as methods to query the metadata catalog, get access to metadata for particular data packages, and read the data objects from the data repository.Users can also insert and update data objects on repositories that support these methods.", + "downloadLink": "https://cran.r-project.org/src/contrib/dataone_2.0.0.tar.gz", + "funding": "National Science Foundation grant #012345678", + "isAutomatedBuild": false, + "issueTracker": "https://github.com/DataONEorg/rdataone/issues", + "licenseId": "Apache-2.0", + "publisher": "https://cran.r-project.org", + "tags": [ + "data sharing", + "data repository", + "DataONE" + ], + "name": "R Interface to the DataONE REST API", + "version": "2.0.0", + "uploadedBy": { + "@id": "http://orcid.org/0000-0003-0077-4738", + "@type": "person", + "email": "mbjones@nceas.ucsb.edu", + "name": "Matt Jones" + }, + "programmingLanguage": { + "name": "R", + "version": "> 3.1.1", + "URL": "https://www.r-project.org" + }, + "readme": "https://github.com/DataONEorg/rdataone/README.md", + "relatedPublications": "ISBN:0201703726", + "relationship": { + "relationshipType": "isPartOf", + "relatedIdentifier": "urn:uuid:F1A0A7AF-ECF3-4C7D-B675-7C6949963995", + "relatedIdentifierType": "UUID" + }, + "softwarePaperCitationIdentifiers": "http://doi.org/0000/0000", + "zippedCode": "https://cran.r-project.org/src/contrib/dataone_2.0.0.tar.gz" +} diff --git a/spec/fixtures/maremma/codemeta.json b/spec/fixtures/maremma/codemeta.json index 7b734de3..f176ddea 100644 --- a/spec/fixtures/maremma/codemeta.json +++ b/spec/fixtures/maremma/codemeta.json @@ -2,7 +2,7 @@ "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", "@type": "SoftwareSourceCode", "@id": "https://doi.org/10.5438/QEG0-3GM3", - "agents": { + "authors": { "@id": "http://orcid.org/0000-0003-0077-4738", "@type": "person", "name": "Martin Fenner", diff --git a/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_codemeta/SoftwareSourceCode_DataCite_check_codemeta_v2.yml b/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_codemeta/SoftwareSourceCode_DataCite_check_codemeta_v2.yml new file mode 100644 index 00000000..146fe90f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_codemeta/SoftwareSourceCode_DataCite_check_codemeta_v2.yml @@ -0,0 +1,60 @@ +--- +http_interactions: +- request: + method: get + uri: https://api.datacite.org/dois/10.5063/f1m61h5x + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (compatible; Maremma/4.2.1; +https://github.com/datacite/maremma) + Accept: + - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5 + response: + status: + code: 200 + message: OK + headers: + Date: + - Tue, 22 Jan 2019 10:40:18 GMT + Content-Type: + - application/json; charset=utf-8 + Connection: + - keep-alive + Status: + - 200 OK + X-Anonymous-Consumer: + - 'true' + Cache-Control: + - max-age=0, private, must-revalidate + Vary: + - Accept-Encoding, Origin + X-Request-Id: + - 48c91bca-5323-4337-8889-6fda12bb6952 + Etag: + - W/"cd7a4f850597028d5190ad0be9c79b23" + X-Runtime: + - '0.026371' + X-Powered-By: + - Phusion Passenger 6.0.1 + Server: + - nginx/1.15.8 + Phusion Passenger 6.0.1 + body: + encoding: ASCII-8BIT + string: '{"data":{"id":"10.5063/f1m61h5x","type":"dois","attributes":{"doi":"10.5063/f1m61h5x","prefix":"10.5063","suffix":"f1m61h5x","identifiers":[{"identifier":"https://doi.org/10.5063/f1m61h5x","identifierType":"DOI"}],"creators":[{"name":"Jones, + Matthew B.; Slaughter, Peter; Nahf, Rob; Boettiger, Carl ; Jones, Chris; Read, + Jordan; Walker, Lauren; Hart, Edmund; Chamberlain, Scott"}],"titles":[{"title":"dataone: + R interface to the DataONE network of data repositories"}],"publisher":"KNB + Data Repository","container":{},"publicationYear":2016,"subjects":[],"contributors":[],"dates":[{"date":"2016","dateType":"Issued"}],"language":null,"types":{"ris":"COMP","bibtex":"misc","citeproc":"article","schemaOrg":"SoftwareSourceCode","resourceTypeGeneral":"Software"},"relatedIdentifiers":[],"sizes":[],"formats":[],"version":null,"rightsList":[],"descriptions":[],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtMyIKICB4bWxuczp4c2k9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hLWluc3RhbmNlIgogIHhzaTpzY2hlbWFMb2NhdGlvbj0iaHR0cDovL2RhdGFjaXRlLm9yZy9zY2hlbWEva2VybmVsLTMKICAgIGh0dHA6Ly9zY2hlbWEuZGF0YWNpdGUub3JnL21ldGEva2VybmVsLTMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC41MDYzL0YxTTYxSDVYPC9pZGVudGlmaWVyPgogIDxjcmVhdG9ycz4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWU+Sm9uZXMsIE1hdHRoZXcgQi47IFNsYXVnaHRlciwgUGV0ZXI7IE5haGYsIFJvYjsgQm9ldHRpZ2VyLCBDYXJsIDsgSm9uZXMsIENocmlzOyBSZWFkLCBKb3JkYW47IFdhbGtlciwgTGF1cmVuOyBIYXJ0LCBFZG11bmQ7IENoYW1iZXJsYWluLCBTY290dDwvY3JlYXRvck5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPmRhdGFvbmU6IFIgaW50ZXJmYWNlIHRvIHRoZSBEYXRhT05FIG5ldHdvcmsgb2YgZGF0YSByZXBvc2l0b3JpZXM8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+S05CIERhdGEgUmVwb3NpdG9yeTwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAxNjwvcHVibGljYXRpb25ZZWFyPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iU29mdHdhcmUiLz4KPC9yZXNvdXJjZT4K","url":"https://github.com/DataONEorg/rdataone","contentUrl":null,"metadataVersion":1,"schemaVersion":"http://datacite.org/schema/kernel-3","source":null,"isActive":true,"state":"findable","reason":null,"created":"2016-03-12T03:15:03.000Z","registered":"2016-03-12T03:15:04.000Z","updated":"2018-10-07T13:22:40.000Z"},"relationships":{"client":{"data":{"id":"cdl.nceas","type":"clients"}},"media":{"data":[]}}},"included":[{"id":"cdl.nceas","type":"clients","attributes":{"name":"National + Center for Ecological Analysis and Synthesis (NCEAS)","symbol":"CDL.NCEAS","year":2011,"contactName":"EZID + Support Desk","contactEmail":"ezid@ucop.edu","description":"The NCEAS Data + Repository contains information about the research data sets collected and + collated as part of NCEAS'' funded activities. Information in the NCEAS Data + Repository is concurrently available through the Knowledge Network for Biocomplexity + (KNB), an international data repository. A number of the data sets were synthesized + from multiple data sources that originated from the efforts of many contributors, + while others originated from a single","domains":"*","url":"https://data.nceas.ucsb.edu/","created":"2011-12-13T04:27:25.000Z","updated":"2018-12-20T23:09:38.000Z","isActive":true,"hasPassword":true},"relationships":{"provider":{"data":{"id":"cdl","type":"providers"}},"repository":{"data":{"id":"10.17616/R3FW2J","type":"repositories"}},"prefixes":{"data":[{"id":"10.5063","type":"prefixes"},{"id":"10.5072","type":"prefixes"}]}}}]}' + http_version: + recorded_at: Tue, 22 Jan 2019 10:40:18 GMT +recorded_with: VCR 3.0.3 diff --git a/spec/writers/codemeta_writer_spec.rb b/spec/writers/codemeta_writer_spec.rb index 8df28222..0b4a9073 100644 --- a/spec/writers/codemeta_writer_spec.rb +++ b/spec/writers/codemeta_writer_spec.rb @@ -28,10 +28,19 @@ expect(json["@id"]).to eq("https://doi.org/10.5063/f1m61h5x") expect(json["@type"]).to eq("SoftwareSourceCode") expect(json["identifier"]).to eq("@type"=>"PropertyValue", "propertyID"=>"DOI", "value"=>"https://doi.org/10.5063/f1m61h5x") - expect(json["agents"]).to eq([{"name"=>"Jones, Matthew B.; Slaughter, Peter; Nahf, Rob; Boettiger, Carl ; Jones, Chris; Read, Jordan; Walker, Lauren; Hart, Edmund; Chamberlain, Scott"}]) - expect(json["title"]).to eq("dataone: R interface to the DataONE network of data repositories") + expect(json["authors"]).to eq([{"name"=>"Jones, Matthew B.; Slaughter, Peter; Nahf, Rob; Boettiger, Carl ; Jones, Chris; Read, Jordan; Walker, Lauren; Hart, Edmund; Chamberlain, Scott"}]) + expect(json["name"]).to eq("dataone: R interface to the DataONE network of data repositories") expect(json["datePublished"]).to eq("2016") expect(json["publisher"]).to eq("KNB Data Repository") end + + it "SoftwareSourceCode DataCite check codemeta v2" do + input = "https://doi.org/10.5063/f1m61h5x" + subject = Bolognese::Metadata.new(input: input, from: "datacite") + expect(subject.valid?).to be true + json = JSON.parse(subject.codemeta) + expect(json["agents"]).to be_nil + expect(json["title"]).to be_nil + end end end diff --git a/spec/writers/crosscite_writer_spec.rb b/spec/writers/crosscite_writer_spec.rb index b8853bfa..0dd082e0 100644 --- a/spec/writers/crosscite_writer_spec.rb +++ b/spec/writers/crosscite_writer_spec.rb @@ -65,6 +65,16 @@ expect(crosscite["version"]).to eq("2.0.0") end + it "rdataone codemeta v2" do + input = fixture_path + 'codemeta_v2.json' + subject = Bolognese::Metadata.new(input: input, from: "codemeta") + crosscite = JSON.parse(subject.crosscite) + expect(crosscite["titles"]).to eq([{"title"=>"R Interface to the DataONE REST API"}]) + expect(crosscite["creators"].length).to eq(3) + expect(crosscite["creators"].last).to eq("nameType" => "Organizational", "name"=>"University Of California, Santa Barbara") + expect(crosscite["version"]).to eq("2.0.0") + end + it "datacite database attributes" do input = "https://doi.org/10.5061/DRYAD.8515" subject = Bolognese::Metadata.new(input: input, from: "datacite") diff --git a/spec/writers/datacite_writer_spec.rb b/spec/writers/datacite_writer_spec.rb index 22e903af..4e72349a 100644 --- a/spec/writers/datacite_writer_spec.rb +++ b/spec/writers/datacite_writer_spec.rb @@ -87,6 +87,28 @@ expect(datacite.fetch("version")).to eq("2.0.0") end + it "rdataone and codemeta_v2" do + input = fixture_path + 'codemeta_v2.json' + subject = Bolognese::Metadata.new(input: input, from: "codemeta") + expect(subject.valid?).to be true + datacite = Maremma.from_xml(subject.datacite).fetch("resource", {}) + expect(datacite.dig("titles", "title")).to eq("R Interface to the DataONE REST API") + expect(datacite.dig("creators", "creator")).to eq([{"creatorName"=>{"__content__"=>"Jones, Matt", "nameType"=>"Personal"}, + "givenName"=>"Matt", + "familyName"=>"Jones", + "nameIdentifier"=> + {"nameIdentifierScheme"=>"ORCID", + "__content__"=>"https://orcid.org/0000-0003-0077-4738"}}, + {"creatorName"=>{"__content__"=>"Slaughter, Peter", "nameType"=>"Personal"}, + "givenName"=>"Peter", + "familyName"=>"Slaughter", + "nameIdentifier"=> + {"nameIdentifierScheme"=>"ORCID", + "__content__"=>"https://orcid.org/0000-0002-2192-403X"}}, + {"creatorName"=>{"__content__"=>"University Of California, Santa Barbara", "nameType"=>"Organizational"}}]) + expect(datacite.fetch("version")).to eq("2.0.0") + end + it "maremma" do input = "https://github.com/datacite/maremma" subject = Bolognese::Metadata.new(input: input, from: "codemeta") From d0c818d7473f4a0b5a70669501c87b2135f13d48 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 7 Feb 2019 11:28:17 +0100 Subject: [PATCH 3/3] remove ugly one-liner --- lib/bolognese/readers/codemeta_reader.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/bolognese/readers/codemeta_reader.rb b/lib/bolognese/readers/codemeta_reader.rb index ce830c55..0384be6b 100644 --- a/lib/bolognese/readers/codemeta_reader.rb +++ b/lib/bolognese/readers/codemeta_reader.rb @@ -36,9 +36,10 @@ def read_codemeta(string: nil, **options) id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil) doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil) - creators = meta.fetch("agents", nil).nil? ? get_authors(from_schema_org_creators(Array.wrap(meta.fetch("authors", nil)))) : get_authors(from_schema_org_creators(Array.wrap(meta.fetch("agents", nil)))) - - # creators = get_authors(from_schema_org_creators(Array.wrap(meta.fetch("authors", nil)))) + has_agents = meta.fetch("agents", nil) + authors = has_agents.nil? ? meta.fetch("authors", nil) : has_agents + creators = get_authors(from_schema_org_creators(Array.wrap(authors))) + contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil)))) dates = [] dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present? @@ -60,7 +61,9 @@ def read_codemeta(string: nil, **options) { "subject" => s } end - titles = meta.fetch("title", nil).nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => meta.fetch("title", nil) }] + has_title = meta.fetch("title", nil) + + titles = has_title.nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => has_title }] { "id" => id, "types" => types,