diff --git a/lib/bolognese/author_utils.rb b/lib/bolognese/author_utils.rb index 82520d47..0a3ce219 100644 --- a/lib/bolognese/author_utils.rb +++ b/lib/bolognese/author_utils.rb @@ -30,6 +30,7 @@ def get_one_author(author) name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true) name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni| + ni["__content__"] = ni["__content__"].strip if ni["nameIdentifierScheme"] == "ORCID" { "nameIdentifier" => normalize_orcid(ni["__content__"]), diff --git a/spec/author_utils_spec.rb b/spec/author_utils_spec.rb index 3ae05ba9..e95acc35 100644 --- a/spec/author_utils_spec.rb +++ b/spec/author_utils_spec.rb @@ -173,9 +173,10 @@ end context "affiliationIdentifier" do + let(:input) { fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' } + subject { Bolognese::Metadata.new(input: input, from: "datacite") } + it "should normalize ROR affiliationIdentifier with and without URL" do - input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' - subject = Bolognese::Metadata.new(input: input, from: "datacite") # without URL inside affiliationIdentifier="05bp8ka77" ror_affiliater0 = subject.creators[0]["affiliation"].select { |r| r["affiliationIdentifierScheme"] == "ROR" } expect(ror_affiliater0[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka77") @@ -184,6 +185,28 @@ expect(ror_affiliater1[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka05") end + it "should normalize the valid ORCID nameIdentifier to URL with schemeURI" do + # with "schemeURI" + # ORICD normalization 0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117 + expect(subject.creators[0]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should normalize the valid ORCID nameIdentifier to URL without schemeURI" do + # without "schemeURI" + # ORICD normalization 0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117 + expect(subject.creators[7]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should keep nameIdentifier URL after normalization" do + # ORICD normalization https://orcid.org/0000-0001-9998-0114 => https://orcid.org/0000-0001-9998-0114 + expect(subject.creators[1]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0114", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should sanitize valid ORCID id/URL before normalization" do + #" 0000-0001-9998-0118 ", # Valid ORCID with leading/trailing spaces + expect(subject.creators[8]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0118", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + it "should parse non ROR schema's without normalizing them" do input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' subject = Bolognese::Metadata.new(input: input, from: "datacite") diff --git a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml index 91764c3d..8a1f68b7 100644 --- a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +++ b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml @@ -5,13 +5,13 @@ Ashwini Sukale - https://orcid.org/0000-0001-9998-0117 + 0000-0001-9998-0117 Metadata Game Changers Wesleyan University Erin Robinson - https://orcid.org/0000-0001-9998-0114 + https://orcid.org/0000-0001-9998-0114 Metadata Game Changers Wesleyan University @@ -35,6 +35,14 @@ جامعة زاخۆ 05sd1pz50 + + Ashwini S + 0000-0001-9998-0117 + + + Mike B + 0000-0001-9998-0118 + Genomic Standards Consortium (GSC) Island Sampling Day: Moorea Reef to Ridges Genomic Transect