Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into schema-4.5
Browse files Browse the repository at this point in the history
  • Loading branch information
svogt0511 committed Nov 30, 2023
2 parents a44b282 + f4e5323 commit 23a3e94
Show file tree
Hide file tree
Showing 11 changed files with 27 additions and 18 deletions.
2 changes: 1 addition & 1 deletion lib/bolognese/readers/bibtex_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def read_bibtex(string: nil, **options)
"related_identifiers" => related_identifiers,
"dates" => dates,
"publication_year" => publication_year,
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
"rights_list" => rights_list,
"state" => state
}.merge(read_options)
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/citeproc_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def read_citeproc(string: nil, **options)
"related_identifiers" => related_identifiers,
"dates" => dates,
"publication_year" => publication_year,
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil),
"subjects" => subjects,
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/codemeta_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def read_codemeta(string: nil, **options)
#{}"is_part_of" => is_part_of,
"dates" => dates,
"publication_year" => publication_year,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil),
"subjects" => subjects,
Expand Down
4 changes: 2 additions & 2 deletions lib/bolognese/readers/crossref_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,11 @@ def crossref_alternate_identifiers(bibliographic_metadata)

def crossref_description(bibliographic_metadata)
abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
end

description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
end

(abstract + description)
Expand Down
4 changes: 2 additions & 2 deletions lib/bolognese/readers/datacite_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ def read_datacite(string: nil, **options)
if r.blank?
nil
elsif r.is_a?(String)
{ "description" => sanitize(r), "descriptionType" => "Abstract" }
{ "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
elsif r.is_a?(Hash)
{ "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
{ "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
end
end.compact
rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/npm_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def read_npm(string: nil, **options)
#"related_identifiers" => related_identifiers,
#"dates" => dates,
#"publication_year" => publication_year,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil),
"subjects" => subjects
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/ris_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def read_ris(string: nil, **options)
"related_identifiers" => related_identifiers,
"dates" => dates,
"publication_year" => publication_year,
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
"subjects" => subjects,
"language" => meta.fetch("LA", nil),
"state" => state
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/schema_org_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def read_schema_org(string: nil, **options)
"related_identifiers" => related_identifiers,
"publication_year" => publication_year,
"dates" => dates,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil).to_s.presence,
"subjects" => subjects,
Expand Down
12 changes: 8 additions & 4 deletions lib/bolognese/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1065,12 +1065,16 @@ def sanitize(text, options={})
custom_scrubber = Bolognese::WhitelistScrubber.new(options)

if text.is_a?(String)
# remove excessive internal whitespace with squish
Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
if options[:new_line]
# Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
else
Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
end
elsif text.is_a?(Hash)
sanitize(text.fetch(content, nil))
sanitize(text.fetch(content, nil), new_line: options[:new_line])
elsif text.is_a?(Array)
a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
a = options[:first] ? a.first : a.unwrap
else
nil
Expand Down
9 changes: 7 additions & 2 deletions spec/fixtures/datacite-seriesinformation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
<version>1.0</version>
<descriptions>
<description descriptionType="SeriesInformation" xml:lang="en">DataCite Blog, 2(9), 3-4</description>
<description descriptionType="Abstract">Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...</description>
</descriptions>
<description descriptionType="Abstract">Eating your own dog food is a slang term to
describe that an organization should itself use the products and services it provides.
For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...
&#8226; Unicode Bullet Point: This is an example of a bullet point.\u2605
&#9632; Unicode Black Square: This is an example of a black square.
</description>
</descriptions>
</resource>
4 changes: 2 additions & 2 deletions spec/readers/datacite_reader_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@
expect(subject.types["citeproc"]).to eq("article")
expect(subject.creators).to eq([{"nameType"=>"Personal", "name"=>"Garza, Kristian", "givenName"=>"Kristian", "familyName"=>"Garza", "nameIdentifiers" => [], "affiliation" => []}])
expect(subject.titles).to eq([{"title"=>"Analysis Tools For Crossover Experiment Of Ui Using Choice Architecture"}])
expect(subject.descriptions.first["description"]).to start_with("This tools are used to analyse the data produced by the Crosssover Experiment")
expect(subject.descriptions.first["description"]).to start_with(" \n\nThis tools are used to analyse the data produced by the Crosssover Experiment I designed to test Choice Architecture techniques as UI interventions in a SEEk4Science data catalogue. It contains:\n\n- Data structures for the experimental data.<br>\n- Visualisation functions<br>\n- Analysis functions\n\n## Installation\n\n- R<br>\n- python<br>\n- ipython 4\n\nClone and use.\n\n## Usage\n\n<br>\n```python<br>\nsource('parallel_plot.r')<br>\nwith(z, parallelset(trt,response, freq=count, alpha=0.2))<br>\n```\n\n<br>\n## Contributing\n\n1. Fork it!<br>\n2. Create your feature branch: `git checkout -b my-new-feature`<br>\n3. Commit your changes: `git commit -am 'Add some feature'`<br>\n4. Push to the branch: `git push origin my-new-feature`<br>\n5. Submit a pull request :D\n\n<br>\n## License\n\nThis work supports my PhD Thesis at University of Manchester.")
expect(subject.rights_list).to eq([{"rights"=>
"Creative Commons Attribution Non Commercial Share Alike 4.0 International",
"rightsIdentifier"=>"cc-by-nc-sa-4.0",
Expand Down Expand Up @@ -552,7 +552,7 @@
"descriptionType"=>"SeriesInformation",
"lang"=>"en"},
{"description"=>
"Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...",
"Eating your own dog food is a slang term to \n describe that an organization should itself use the products and services it provides. \n For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...\n • Unicode Bullet Point: This is an example of a bullet point.\\u2605\n ■ Unicode Black Square: This is an example of a black square.",
"descriptionType"=>"Abstract"}])
expect(subject.container).to eq("firstPage"=>"3", "identifier"=>"10.5438/0000-00SS", "identifierType"=>"DOI", "issue"=>"9", "lastPage"=>"4", "title"=>"DataCite Blog", "type"=>"Series", "volume"=>"2")
end
Expand Down

0 comments on commit 23a3e94

Please sign in to comment.