diff --git a/datacite/schema46.py b/datacite/schema46.py new file mode 100644 index 0000000..a881eb1 --- /dev/null +++ b/datacite/schema46.py @@ -0,0 +1,559 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DataCite. +# +# Copyright (C) 2016 CERN. +# Copyright (C) 2019 Caltech. +# Copyright (C) 2024 IBT Czech Academy of Sciences. +# Copyright (C) 2026 Observatoire de Paris. +# +# DataCite is free software; you can redistribute it and/or modify it +# under the terms of the Revised BSD License; see LICENSE file for +# more details. + +"""DataCite v4.6 JSON to XML transformations.""" + +import importlib.resources as importlib_resources + +from lxml import etree +from lxml.builder import E + +from .jsonutils import validator_factory +from .xmlutils import ( + Rules, + dump_etree_helper, + etree_to_string, + set_elem_attr, + set_non_empty_attr, +) + +rules = Rules() + +ns = { + None: "http://datacite.org/schema/kernel-4", + "xsi": "http://www.w3.org/2001/XMLSchema-instance", + "xml": "xml", +} + +root_attribs = { + "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation": "http://datacite.org/schema/kernel-4 " + "http://schema.datacite.org/meta/kernel-4.6/metadata.xsd", +} + +validator = validator_factory( + importlib_resources.files("datacite") / "schemas/datacite-v4.6.json" +) + + +def dump_etree(data): + """Convert JSON dictionary to DataCite v4.6 XML as ElementTree.""" + return dump_etree_helper(data, rules, ns, root_attribs) + + +def tostring(data, **kwargs): + """Convert JSON dictionary to DataCite v4.6 XML as string.""" + return etree_to_string(dump_etree(data), **kwargs) + + +def validate(data): + """Validate DataCite v4.6 JSON dictionary.""" + return validator.is_valid(data) + + +def affiliation(root, values): + """Extract affiliation.""" + vals = values.get("affiliation", []) + for val in vals: + if val.get("name"): + elem = E.affiliation(val["name"]) + # affiliationIdentifier metadata as Attributes + # (0-1 cardinality, instead of 0-n as list of objects) + set_elem_attr(elem, "affiliationIdentifier", val) + set_elem_attr(elem, "affiliationIdentifierScheme", val) + if val.get("schemeUri"): + elem.set("schemeURI", val["schemeUri"]) + root.append(elem) + + +def familyname(root, value): + """Extract family name.""" + val = value.get("familyName") + if val: + root.append(E.familyName(val)) + + +def givenname(root, value): + """Extract family name.""" + val = value.get("givenName") + if val: + root.append(E.givenName(val)) + + +def person_or_org_name(root, value, xml_tagname, json_tagname): + """Extract creator/contributor name and it's 'nameType' attribute.""" + elem = E(xml_tagname, value[json_tagname]) + set_elem_attr(elem, "nameType", value) + set_non_empty_attr(elem, "{xml}lang", value.get("lang")) + root.append(elem) + + +def nameidentifiers(root, values): + """Extract nameidentifier.""" + vals = values.get("nameIdentifiers", []) + for val in vals: + if val.get("nameIdentifier"): + elem = E.nameIdentifier(val["nameIdentifier"]) + elem.set("nameIdentifierScheme", val["nameIdentifierScheme"]) + if val.get("schemeUri"): + elem.set("schemeURI", val["schemeUri"]) + root.append(elem) + + +def fetch_creator(root, value): + """Extract common values for creator and contributor.""" + givenname(root, value) + familyname(root, value) + nameidentifiers(root, value) + affiliation(root, value) + + +def title(root, values): + """Extract titles.""" + if not values: + return + + for value in values: + elem = etree.Element("title", nsmap=ns) + elem.text = value["title"] + set_non_empty_attr(elem, "{xml}lang", value.get("lang")) + # 'type' was a mistake in 4.0 serializer, which is supported + # for backwards compatibility until kernel 5 is released. + set_non_empty_attr(elem, "titleType", value.get("type")) + # 'titleType' will supersede 'type' if available + set_non_empty_attr(elem, "titleType", value.get("titleType")) + root.append(elem) + + +def related_object(root, value): + """Extract attributes of relatedIdentifiers and relatedItems.""" + if not value: + return + + set_elem_attr(root, "relatedMetadataScheme", value) + if value.get("schemeUri"): + root.set("schemeURI", value["schemeUri"]) + set_elem_attr(root, "schemeType", value) + set_elem_attr(root, "resourceTypeGeneral", value) + + +@rules.rule("alternateIdentifiers") +def alternate_identifiers(path, values): + """Transform to alternateIdentifiers. + + Note that as of version schema 4.5 the identifiers field is deprecated + in favour of using alternateIdentifiers and the doi field. + """ + if not values: + return + + root = E.alternateIdentifiers() + for value in values: + elem = E.alternateIdentifier(value["alternateIdentifier"]) + set_non_empty_attr( + elem, "alternateIdentifierType", value.get("alternateIdentifierType") + ) + root.append(elem) + return root + + +@rules.rule("creators") +def creators(path, values): + """Transform creators.""" + if not values: + return + + root = E.creators() + for value in values: + creator = E.creator() + person_or_org_name(creator, value, "creatorName", "name") + fetch_creator(creator, value) + root.append(creator) + + return root + + +@rules.rule("titles") +def titles(path, values): + """Transform titles.""" + if not values: + return + root = E.titles() + title(root, values) + return root + + +@rules.rule("publisher") +def publisher(path, value): + """Transform publisher.""" + if not value: + return + + elem = E.publisher(value.get("name")) + set_non_empty_attr(elem, "publisherIdentifier", value.get("publisherIdentifier")) + set_non_empty_attr( + elem, "publisherIdentifierScheme", value.get("publisherIdentifierScheme") + ) + set_non_empty_attr(elem, "schemeURI", value.get("schemeUri")) + + return elem + + +@rules.rule("publicationYear") +def publication_year(path, value): + """Transform publicationYear.""" + if not value: + return + return E.publicationYear(value) + + +@rules.rule("subjects") +def subjects(path, values): + """Transform subjects.""" + if not values: + return + + root = E.subjects() + for value in values: + elem = E.subject(value["subject"]) + set_non_empty_attr(elem, "{xml}lang", value.get("lang")) + set_elem_attr(elem, "subjectScheme", value) + if value.get("schemeUri"): + elem.set("schemeURI", value["schemeUri"]) + if value.get("valueUri"): + elem.set("valueURI", value["valueUri"]) + root.append(elem) + return root + + +@rules.rule("contributors") +def contributors(path, values): + """Transform contributors.""" + if not values: + return + + root = E.contributors() + for value in values: + contributor = E.contributor() + person_or_org_name(contributor, value, "contributorName", "name") + fetch_creator(contributor, value) + set_elem_attr(contributor, "contributorType", value) + root.append(contributor) + + return root + + +@rules.rule("dates") +def dates(path, values): + """Transform dates.""" + if not values: + return + + root = E.dates() + for value in values: + elem = E.date(value["date"], dateType=value["dateType"]) + set_elem_attr(elem, "dateInformation", value) + root.append(elem) + + return root + + +@rules.rule("language") +def language(path, value): + """Transform language.""" + if not value: + return + return E.language(value) + + +@rules.rule("types") +def resource_type(path, value): + """Transform resourceType.""" + elem = E.resourceType() + elem.set("resourceTypeGeneral", value["resourceTypeGeneral"]) + elem.text = value.get("resourceType") + return elem + + +@rules.rule("doi") +def identifier(path, value): + """Transform doi into identifier.""" + if not value: + return None + + return E.identifier(value, identifierType="DOI") + + +@rules.rule("relatedIdentifiers") +def related_identifiers(path, values): + """Transform relatedIdentifiers.""" + if not values: + return + + root = E.relatedIdentifiers() + for value in values: + elem = E.relatedIdentifier() + elem.text = value["relatedIdentifier"] + elem.set("relationType", value["relationType"]) + related_object(elem, value) + set_elem_attr(elem, "relatedIdentifierType", value) + root.append(elem) + return root + + +def free_text_list(plural, singular, values): + """List of elements with free text.""" + if not values: + return + root = etree.Element(plural) + for value in values: + etree.SubElement(root, singular).text = value + return root + + +@rules.rule("sizes") +def sizes(path, values): + """Transform sizes.""" + return free_text_list("sizes", "size", values) + + +@rules.rule("formats") +def formats(path, values): + """Transform sizes.""" + return free_text_list("formats", "format", values) + + +@rules.rule("version") +def version(path, value): + """Transform version.""" + if not value: + return + return E.version(value) + + +@rules.rule("rightsList") +def rights(path, values): + """Transform rights.""" + if not values: + return + + root = E.rightsList() + for value in values: + if "rights" in value: + elem = E.rights(value["rights"]) + # Handle the odd case where no rights text present + else: + elem = E.rights() + if value.get("rightsUri"): + elem.set("rightsURI", value["rightsUri"]) + set_elem_attr(elem, "rightsIdentifierScheme", value) + set_elem_attr(elem, "rightsIdentifier", value) + if value.get("schemeUri"): + elem.set("schemeURI", value["schemeUri"]) + set_non_empty_attr(elem, "{xml}lang", value.get("lang")) + root.append(elem) + return root + + +@rules.rule("descriptions") +def descriptions(path, values): + """Transform descriptions.""" + if not values: + return + + root = E.descriptions() + for value in values: + elem = E.description( + value["description"], descriptionType=value["descriptionType"] + ) + set_non_empty_attr(elem, "{xml}lang", value.get("lang")) + root.append(elem) + + return root + + +def geopoint(root, value): + """Extract a point (either geoLocationPoint or polygonPoint).""" + root.append(E.pointLongitude(str(value["pointLongitude"]))) + root.append(E.pointLatitude(str(value["pointLatitude"]))) + + +@rules.rule("geoLocations") +def geolocations(path, values): + """Transform geolocations.""" + if not values: + return + + root = E.geoLocations() + for value in values: + element = E.geoLocation() + + place = value.get("geoLocationPlace") + if place: + element.append(E.geoLocationPlace(place)) + + point = value.get("geoLocationPoint") + if point: + elem = E.geoLocationPoint() + geopoint(elem, point) + element.append(elem) + + box = value.get("geoLocationBox") + if box: + elem = E.geoLocationBox() + elem.append(E.westBoundLongitude(str(box["westBoundLongitude"]))) + elem.append(E.eastBoundLongitude(str(box["eastBoundLongitude"]))) + elem.append(E.southBoundLatitude(str(box["southBoundLatitude"]))) + elem.append(E.northBoundLatitude(str(box["northBoundLatitude"]))) + element.append(elem) + + polygon = value.get("geoLocationPolygon") + if polygon: + elem = E.geoLocationPolygon() + for point in polygon: + plainPoint = point.get("polygonPoint") + if plainPoint: + e = E.polygonPoint() + geopoint(e, plainPoint) + elem.append(e) + inPoint = point.get("inPolygonPoint") + if inPoint: + e = E.inPolygonPoint() + geopoint(e, inPoint) + elem.append(e) + element.append(elem) + + root.append(element) + return root + + +@rules.rule("fundingReferences") +def fundingreferences(path, values): + """Transform funding references.""" + if not values: + return + + root = E.fundingReferences() + for value in values: + element = E.fundingReference() + + element.append(E.funderName(value.get("funderName"))) + + identifier = value.get("funderIdentifier") + if identifier: + elem = E.funderIdentifier(identifier) + typev = value.get("funderIdentifierType") + if typev: + elem.set("funderIdentifierType", typev) + element.append(elem) + + number = value.get("awardNumber") + if number: + elem = E.awardNumber(number) + uri = value.get("awardUri") + if uri: + elem.set("awardURI", uri) + element.append(elem) + + title = value.get("awardTitle") + if title: + element.append(E.awardTitle(title)) + if len(element): + root.append(element) + return root + + +@rules.rule("relatedItems") +def related_items(path, values): + """Transform related items.""" + if not values: + return None + pass + + root = E.relatedItems() + for value in values: + elem = E.relatedItem() + set_elem_attr(elem, "relatedItemType", value) + set_elem_attr(elem, "relationType", value) + + id_label = "relatedItemIdentifier" + if value.get(id_label): + related_item_identifier = E.relatedItemIdentifier() + re_id = value[id_label] + related_item_identifier.text = re_id[id_label] + set_elem_attr(related_item_identifier, "relatedItemIdentifierType", re_id) + related_object(related_item_identifier, value) + elem.append(related_item_identifier) + + creator_values = value.get("creators") + if creator_values: + re_creators = E.creators() + for c in creator_values: + creator = E.creator() + person_or_org_name(creator, c, "creatorName", "name") + fetch_creator(creator, c) + re_creators.append(creator) + elem.append(re_creators) + + related_titles = E.titles() + title(related_titles, value.get("titles")) + elem.append(related_titles) + + pub_year = value.get("publicationYear") + if pub_year: + elem.append(E.publicationYear(pub_year)) + + vol = value.get("volume") + if vol: + elem.append(E.volume(vol)) + + issue = value.get("issue") + if issue: + elem.append(E.issue(issue)) + + number = value.get("number") + if number: + re_number = E.number(number) + if value.get("numberType"): + set_elem_attr(re_number, "numberType", value) + elem.append(re_number) + + first_p = value.get("firstPage") + if first_p: + elem.append(E.firstPage(first_p)) + + last_p = value.get("lastPage") + if last_p: + elem.append(E.lastPage(last_p)) + + pub = value.get("publisher") + if pub: + elem.append(E.publisher(pub)) + + edi = value.get("edition") + if edi: + elem.append(E.edition(edi)) + + contributors_values = value.get("contributors") + if contributors_values: + re_contributors = E.contributors() + for c in contributors_values: + contributor = E.contributor() + person_or_org_name(contributor, c, "contributorName", "name") + fetch_creator(contributor, c) + set_elem_attr(contributor, "contributorType", c) + re_contributors.append(contributor) + elem.append(re_contributors) + + root.append(elem) + + return root diff --git a/datacite/schemas/datacite-v4.6.json b/datacite/schemas/datacite-v4.6.json new file mode 100644 index 0000000..018483e --- /dev/null +++ b/datacite/schemas/datacite-v4.6.json @@ -0,0 +1,636 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema#", + "id": "datacite-v4.6.json", + "title": "DataCite v4.6", + "description": "JSON representation of the DataCite v4.6 schema.", + "additionalProperties": false, + "definitions": { + "nameType": { + "type": "string", + "enum": [ + "Organizational", + "Personal" + ] + }, + "nameIdentifiers": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "nameIdentifier": {"type": "string"}, + "nameIdentifierScheme": {"type": "string"}, + "schemeUri": {"type": "string", "format": "uri"} + }, + "required": ["nameIdentifier", "nameIdentifierScheme"] + }, + "uniqueItems": true + }, + "affiliation": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": {"type": "string"}, + "affiliationIdentifier": {"type": "string"}, + "affiliationIdentifierScheme": {"type": "string"}, + "schemeUri": {"type": "string", "format": "uri"} + }, + "required": ["name"] + }, + "uniqueItems": true + }, + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "nameType": {"$ref": "#/definitions/nameType"}, + "givenName": {"type": "string"}, + "familyName": {"type": "string"}, + "nameIdentifiers": {"$ref": "#/definitions/nameIdentifiers"}, + "affiliation": {"$ref": "#/definitions/affiliation"}, + "lang": {"type": "string"} + }, + "required": ["name"] + }, + "creator": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/person" }], + "unevaluatedProperties": false + }, + "contributor": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/person" }], + "unevaluatedProperties": false, + "properties": { + "contributorType": {"$ref": "#/definitions/contributorType"} + }, + "required": ["name", "contributorType"] + }, + "contributorType": { + "type": "string", + "enum": [ + "ContactPerson", + "DataCollector", + "DataCurator", + "DataManager", + "Distributor", + "Editor", + "HostingInstitution", + "Producer", + "ProjectLeader", + "ProjectManager", + "ProjectMember", + "RegistrationAgency", + "RegistrationAuthority", + "RelatedPerson", + "Researcher", + "ResearchGroup", + "RightsHolder", + "Sponsor", + "Supervisor", + "Translator", + "WorkPackageLeader", + "Other" + ] + }, + "titleType": { + "type": "string", + "enum": [ + "AlternativeTitle", + "Subtitle", + "TranslatedTitle", + "Other" + ] + }, + "longitude": { + "type": "number", + "maximum": 180, + "minimum": -180 + }, + "latitude": { + "type": "number", + "maximum": 90, + "minimum": -90 + }, + "date": { + "type": "string", + "anyOf": [ + {"format": "year"}, + {"format": "yearmonth"}, + {"format": "date"}, + {"format": "datetime"}, + {"format": "year-range"}, + {"format": "yearmonth-range"}, + {"format": "date-range"}, + {"format": "datetime-range"} + ] + }, + "dateType": { + "type": "string", + "enum": [ + "Accepted", + "Available", + "Copyrighted", + "Collected", + "Coverage", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] + }, + "resourceTypeGeneral": { + "type": "string", + "enum": [ + "Audiovisual", + "Award", + "Book", + "BookChapter", + "Collection", + "ComputationalNotebook", + "ConferencePaper", + "ConferenceProceeding", + "DataPaper", + "Dataset", + "Dissertation", + "Event", + "Image", + "Instrument", + "InteractiveResource", + "Journal", + "JournalArticle", + "Model", + "OutputManagementPlan", + "PeerReview", + "PhysicalObject", + "Preprint", + "Project", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "StudyRegistration", + "Text", + "Workflow", + "Other" + ] + }, + "relatedIdentifierType": { + "type": "string", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "CSTR", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "RRID", + "UPC", + "URL", + "URN", + "w3id" + ] + }, + "relationType": { + "type": "string", + "enum": [ + "IsCitedBy", + "Cites", + "IsCollectedBy", + "Collects", + "IsSupplementTo", + "IsSupplementedBy", + "IsContinuedBy", + "Continues", + "IsDescribedBy", + "Describes", + "HasMetadata", + "IsMetadataFor", + "HasVersion", + "IsVersionOf", + "IsNewVersionOf", + "IsPartOf", + "IsPreviousVersionOf", + "IsPublishedIn", + "HasPart", + "IsReferencedBy", + "References", + "IsDocumentedBy", + "Documents", + "IsCompiledBy", + "Compiles", + "IsVariantFormOf", + "IsOriginalFormOf", + "IsIdenticalTo", + "IsReviewedBy", + "Reviews", + "IsDerivedFrom", + "IsSourceOf", + "IsRequiredBy", + "Requires", + "IsObsoletedBy", + "Obsoletes", + "IsTranslationOf", + "HasTranslation" + ] + }, + "relatedObject": { + "type": "object", + "properties": { + "relationType": {"$ref": "#/definitions/relationType"}, + "relatedMetadataScheme": {"type": "string"}, + "schemeUri": {"type": "string", "format": "uri"}, + "schemeType": {"type": "string"}, + "resourceTypeGeneral": {"$ref": "#/definitions/resourceTypeGeneral"} + }, + "required": ["relationType"] + }, + "relatedObjectIf": { + "properties": { + "relationType": {"enum": ["HasMetadata", "IsMetadataFor"]} + } + }, + "relatedObjectElse": { + "$comment": "these properties may only be used with relation types HasMetadata/IsMetadataFor", + "properties": { + "relatedMetadataScheme": false, + "schemeUri": false, + "schemeType": false + } + }, + "descriptionType": { + "type": "string", + "enum": [ + "Abstract", + "Methods", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo", + "Other" + ] + }, + "geoLocationPoint": { + "type": "object", + "additionalProperties": false, + "properties": { + "pointLongitude": {"$ref": "#/definitions/longitude"}, + "pointLatitude": {"$ref": "#/definitions/latitude"} + }, + "required": ["pointLongitude", "pointLatitude"] + }, + "funderIdentifierType": { + "type": "string", + "enum": [ + "ISNI", + "GRID", + "Crossref Funder ID", + "ROR", + "Other" + ] + }, + "publicationYear": { + "type": "string", + "pattern": "^[0-9]{4}$" + } + }, + "type": "object", + "properties": { + "doi": {"type": "string", "pattern" : "^10[.][0-9]{4,9}[/][^\\s]+$"}, + "prefix":{"type": "string", "pattern": "^10[.][0-9]{4,9}$"}, + "suffix":{"type": "string", "pattern": "^[^\\s]+$"}, + "event" : { + "type": "string", + "enum": [ + "hide", + "register", + "publish" + ] + }, + "url": {"type": "string", "format": "uri"}, + "types": { + "type": "object", + "additionalProperties": false, + "properties": { + "resourceType": {"type": "string"}, + "resourceTypeGeneral": {"$ref": "#/definitions/resourceTypeGeneral"} + }, + "required": ["resourceTypeGeneral"] + }, + "creators": { + "type": "array", + "items": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/creator" }], + "required": ["name"] + }, + "minItems": 1 + }, + "titles": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": {"type": "string"}, + "titleType": {"$ref": "#/definitions/titleType"}, + "lang": {"type": "string"} + }, + "required": ["title"] + }, + "minItems": 1, + "uniqueItems": true + }, + "publisher": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": {"type":"string"}, + "publisherIdentifier": {"type":"string"}, + "publisherIdentifierScheme": {"type":"string"}, + "schemeUri": {"type":"string", "format": "uri"}, + "lang": {"type":"string"} + }, + "required": ["name"] + }, + "publicationYear": {"$ref": "#/definitions/publicationYear"}, + "subjects": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "subject": {"type": "string"}, + "subjectScheme": {"type": "string"}, + "schemeUri": {"type": "string", "format": "uri"}, + "valueUri": {"type": "string", "format": "uri"}, + "classificationCode": {"type": "string"}, + "lang": {"type": "string"} + }, + "required": ["subject"] + }, + "uniqueItems": true + }, + "contributors": { + "type": "array", + "items": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/contributor" }], + "required": ["contributorType", "name"] + } + }, + "dates": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "date": {"$ref": "#/definitions/date"}, + "dateType": {"$ref": "#/definitions/dateType"}, + "dateInformation": {"type": "string"} + }, + "required": ["date", "dateType"] + }, + "uniqueItems": true + }, + "language": { + "type": "string", + "$comment": "Primary language of the resource. Allowed values are taken from IETF BCP 47, ISO 639-1 language codes." + }, + "alternateIdentifiers": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "alternateIdentifier": {"type": "string"}, + "alternateIdentifierType": {"type": "string"} + }, + "required": ["alternateIdentifier", "alternateIdentifierType"] + }, + "uniqueItems": true + }, + "relatedIdentifiers": { + "type": "array", + "items": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/relatedObject"}], + "unevaluatedProperties": false, + "properties": { + "relatedIdentifier": {"type": "string"}, + "relatedIdentifierType": {"$ref": "#/definitions/relatedIdentifierType"} + }, + "required": ["relatedIdentifier", "relatedIdentifierType", "relationType"], + "if": {"$ref": "#/definitions/relatedObjectIf"}, + "else": {"$ref": "#/definitions/relatedObjectElse"} + } + }, + "relatedItems": { + "type": "array", + "items": { + "type": "object", + "allOf": [{ "$ref": "#/definitions/relatedObject"}], + "unevaluatedProperties": false, + "properties": { + "relatedItemIdentifier": { + "type": "object", + "additionalProperties": false, + "properties": { + "relatedItemIdentifier": {"type": "string"}, + "relatedItemIdentifierType": {"$ref": "#/definitions/relatedIdentifierType"} + }, + "required": ["relatedItemIdentifier", "relatedItemIdentifierType"] + }, + "relatedItemType": {"$ref": "#/definitions/resourceTypeGeneral"}, + "creators": { + "type": "array", + "items": { + "type": "object", + "unevaluatedProperties": false, + "allOf": [{ "$ref": "#/definitions/creator" }], + "required": ["name"] + } + }, + "contributors": { + "type": "array", + "items": { + "type": "object", + "unevaluatedProperties": false, + "allOf": [{ "$ref": "#/definitions/contributor" }], + "required": ["contributorType", "name"] + } + }, + "titles": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": {"type": "string"}, + "titleType": {"$ref": "#/definitions/titleType"}, + "lang": {"type": "string"} + }, + "required": ["title"] + }, + "minItems": 1, + "uniqueItems": true + }, + "publicationYear": {"$ref": "#/definitions/publicationYear"}, + "volume": {"type": "string"}, + "issue": {"type": "string"}, + "firstPage": {"type": "string"}, + "lastPage": {"type": "string"}, + "edition": {"type": "string"}, + "publisher": {"type": "string"}, + "number": {"type":"string"}, + "numberType": { + "type": "string", + "enum": [ + "Article", + "Chapter", + "Report", + "Other" + ] + } + }, + "required": ["titles", "relatedItemType", "relationType"], + "if": {"$ref": "#/definitions/relatedObjectIf"}, + "else": {"$ref": "#/definitions/relatedObjectElse"} + }, + "uniqueItems": true + }, + "sizes": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "formats": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "version": { + "type": "string" + }, + "rightsList": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "rights": {"type": "string"}, + "rightsUri": {"type": "string", "format": "uri"}, + "rightsIdentifier": {"type": "string"}, + "rightsIdentifierScheme": {"type": "string"}, + "schemeUri": {"type": "string", "format": "uri"}, + "lang": {"type": "string"} + } + }, + "uniqueItems": true + }, + "descriptions": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": {"type": "string"}, + "descriptionType": {"$ref": "#/definitions/descriptionType"}, + "lang": {"type": "string"} + }, + "required": ["description", "descriptionType"] + }, + "uniqueItems": true + }, + "geoLocations": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "geoLocationPlace": {"type": "string"}, + "geoLocationPoint": {"$ref": "#/definitions/geoLocationPoint"}, + "geoLocationBox": { + "type": "object", + "additionalProperties": false, + "properties": { + "westBoundLongitude": {"$ref": "#/definitions/longitude"}, + "eastBoundLongitude": {"$ref": "#/definitions/longitude"}, + "southBoundLatitude": {"$ref": "#/definitions/latitude"}, + "northBoundLatitude": {"$ref": "#/definitions/latitude"} + }, + "required": ["westBoundLongitude", "eastBoundLongitude", "southBoundLatitude", "northBoundLatitude"] + }, + "geoLocationPolygon": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "polygonPoint": {"$ref": "#/definitions/geoLocationPoint"}, + "inPolygonPoint": {"$ref": "#/definitions/geoLocationPoint"} + } + } + } + } + }, + "uniqueItems": true + }, + "fundingReferences": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "funderName": {"type": "string"}, + "funderIdentifier": {"type": "string"}, + "funderIdentifierType": {"$ref": "#/definitions/funderIdentifierType"}, + "awardNumber": {"type": "string"}, + "awardUri": {"type": "string", "format": "uri"}, + "awardTitle": {"type": "string"} + }, + "required": ["funderName"] + }, + "uniqueItems": true + }, + "schemaVersion": { + "type": "string", + "const": "http://datacite.org/schema/kernel-4" + }, + "container": { + "type": "object", + "properties": { + "type": {"type": "string"}, + "title": {"type": "string"}, + "firstPage": {"type": "string"} + } + } + }, + "required": [ + "creators", + "titles", + "publisher", + "publicationYear", + "types", + "schemaVersion" + ] +} diff --git a/docs/index.rst b/docs/index.rst index 3430efd..f315854 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -65,6 +65,12 @@ Errors .. automodule:: datacite.errors :members: +DataCite v4.6 Metadata Management +================================== + +.. automodule:: datacite.schema46 + :members: dump_etree, tostring, validate + DataCite v4.5 Metadata Management ================================== diff --git a/tests/conftest.py b/tests/conftest.py index 5d0125f..c6d9d96 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -78,6 +78,14 @@ def example_json_file45(): return file.read() +@pytest.fixture +def example_json_file46(): + """Load DataCite v4.6 full example JSON.""" + path = dirname(__file__) + with open(join(path, "data", "datacite-v4.6-full-example.json")) as file: + return file.read() + + @pytest.fixture def example_json40(example_json_file40): """Load the DataCite v4.0 full example into a dict.""" @@ -108,6 +116,12 @@ def example_json45(example_json_file45): return json.loads(example_json_file45) +@pytest.fixture +def example_json46(example_json_file46): + """Load the DataCite v4.6 full example into a dict.""" + return json.loads(example_json_file46) + + def load_xml(filename): """Helper method for loading the XML example file.""" path = dirname(__file__) @@ -146,6 +160,12 @@ def example_xml_file45(): return load_xml("datacite-v4.5-full-example.xml") +@pytest.fixture +def example_xml_file46(): + """Load DataCite v4.6 full example XML.""" + return load_xml("datacite-v4.6-full-example.xml") + + @pytest.fixture def example_xml40(example_xml_file40): """Load DataCite v4.0 full example as an etree.""" @@ -172,10 +192,16 @@ def example_xml43(example_xml_file43): @pytest.fixture def example_xml45(example_xml_file45): - """Load DataCite v4.3 full example as an etree.""" + """Load DataCite v4.5 full example as an etree.""" return etree.fromstring(example_xml_file45.encode("utf-8")) +@pytest.fixture +def example_xml46(example_xml_file46): + """Load DataCite v4.6 full example as an etree.""" + return etree.fromstring(example_xml_file46.encode("utf-8")) + + def _load_xsd(xsd_filename): """Load one of the XSD schemas.""" return etree.XMLSchema( @@ -213,6 +239,12 @@ def xsd45(): return _load_xsd("4.5/metadata.xsd") +@pytest.fixture(scope="session") +def xsd46(): + """Load DataCite v4.6 full example as an etree.""" + return _load_xsd("4.6/metadata.xsd") + + @pytest.fixture(scope="function") def minimal_json42(): """Minimal valid JSON for DataCite 4.2.""" @@ -270,3 +302,20 @@ def minimal_json45(): "schemaVersion": "http://datacite.org/schema/kernel-4", "url": "https://www.example.com", } + + +@pytest.fixture(scope="function") +def minimal_json46(): + """Minimal valid JSON for DataCite 4.6.""" + return { + "doi": "10.1234/foo.bar", + "creators": [ + {"name": "Nielsen, Lars Holm"}, + ], + "titles": [{"title": "Minimal Test Case"}], + "publisher": {"name": "Invenio Software"}, + "publicationYear": "2016", + "types": {"resourceType": "", "resourceTypeGeneral": "Software"}, + "schemaVersion": "http://datacite.org/schema/kernel-4", + "url": "https://www.example.com", + } diff --git a/tests/data/4.6/datacite-example-dataset-v4.json b/tests/data/4.6/datacite-example-dataset-v4.json new file mode 100644 index 0000000..94cb6f8 --- /dev/null +++ b/tests/data/4.6/datacite-example-dataset-v4.json @@ -0,0 +1,201 @@ +{ + "doi": "10.82433/9184-dy35", + "prefix": "10.82433", + "suffix": "9184-dy35", + "alternateIdentifiers": [], + "creators": [ + { + "name": "National Gallery", + "nameType": "Organizational", + "affiliation": [], + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/043kfff89", + "nameIdentifierScheme": "ROR" + } + ] + } + ], + "titles": [ + { + "lang": "en", + "title": "External Environmental Data, 2010-2020, National Gallery" + } + ], + "publisher": { + "lang": "en", + "name": "National Gallery", + "schemeUri": "https://ror.org/", + "publisherIdentifier": "https://ror.org/043kfff89", + "publisherIdentifierScheme": "ROR" + }, + "container": {}, + "publicationYear": "2022", + "subjects": [ + { + "subject": "FOS: Earth and related environmental sciences", + "schemeUri": "http://www.oecd.org/science/inno/38235147.pdf", + "subjectScheme": "Fields of Science and Technology (FOS)" + }, + { + "subject": "temperature", + "valueUri": "https://www.wikidata.org/wiki/Q11466", + "schemeUri": "https://www.wikidata.org/wiki", + "subjectScheme": "Wikidata" + }, + { + "subject": "relative humidity", + "valueUri": "http://vocab.getty.edu/aat/300192097", + "schemeUri": "http://vocab.getty.edu/aat", + "subjectScheme": "Art and Architecture Thesaurus" + }, + { + "subject": "illuminance", + "valueUri": "https://www.wikidata.org/wiki/Q194411", + "schemeUri": "https://www.wikidata.org/wiki", + "subjectScheme": "Wikidata" + }, + { + "subject": "moisture content", + "valueUri": "http://vocab.getty.edu/aat/300379432", + "schemeUri": "http://vocab.getty.edu/aat", + "subjectScheme": "Art and Architecture Thesaurus" + }, + { + "subject": "Environmental monitoring", + "valueUri": "http://id.worldcat.org/fast/913214", + "schemeUri": "http://id.worldcat.org/fast", + "subjectScheme": "FAST" + } + ], + "contributors": [ + { + "name": "Padfield, Joseph", + "nameType": "Personal", + "givenName": "Joseph", + "familyName": "Padfield", + "affiliation": [ + { + "name": "National Gallery", + "affiliationIdentifier": "https://ror.org/043kfff89", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "ContactPerson", + "nameIdentifiers": [ + { + "schemeUri": "https://orcid.org", + "nameIdentifier": "https://orcid.org/0000-0002-2572-6428", + "nameIdentifierScheme": "ORCID" + } + ] + }, + { + "name": "Building Facilities Department", + "nameType": "Organizational", + "affiliation": [ + { + "name": "National Gallery", + "affiliationIdentifier": "https://ror.org/043kfff89", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "DataCollector", + "nameIdentifiers": [] + } + ], + "dates": [ + { + "date": "2010/2020", + "dateType": "Collected" + }, + { + "date": "2010/2020", + "dateType": "Coverage" + }, + { + "date": "2022", + "dateType": "Issued" + } + ], + "language": "en", + "types": { + "resourceType": "Environmental data", + "resourceTypeGeneral": "Dataset" + }, + "relatedIdentifiers": [ + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "https://www.nationalgallery.org.uk/research/research-resources/research-papers/improving-our-environment", + "resourceTypeGeneral": "Report", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsSourceOf", + "relatedIdentifier": "https://research.ng-london.org.uk/scientific/env/", + "resourceTypeGeneral": "InteractiveResource", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsSupplementedBy", + "relatedIdentifier": "10.1080/00393630.2018.1504449/", + "resourceTypeGeneral": "JournalArticle", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsDocumentedBy", + "relatedIdentifier": "10.5281/zenodo.7629200", + "resourceTypeGeneral": "ConferencePaper", + "relatedIdentifierType": "DOI" + } + ], + "relatedItems": [], + "sizes": [ + "13.6 MB" + ], + "formats": [ + "application/json" + ], + "version": "1.0", + "rightsList": [ + { + "lang": "en", + "rights": "Creative Commons Attribution 4.0 International", + "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "schemeUri": "https://spdx.org/licenses/", + "rightsIdentifier": "cc-by-4.0", + "rightsIdentifierScheme": "SPDX" + } + ], + "descriptions": [ + { + "lang": "en", + "description": "The National Gallery houses one of the greatest ‒ and most visited ‒ collections of Western European painting, often welcoming more than 6 million visitors a year. The Scientific Department researches the history of materials and techniques and their degradation mechanisms (fading, darkening, etc.), through analysis using a range of micro-analytical and imaging methods. It also works in the fields of preventive conservation and environmental management of galleries (monitoring, lighting, vibration), as well as the development of a wide range of new instruments and methods (both analytical and imaging) for technical examination of artworks, most often in collaboration with universities. The examination of the environmental conditions within The National Gallery began soon after it was established in 1824. Early concerns often related to dust and pollution. The first electronic data logger, recording light levels, was introduced in 1975, with the regular logging of temperature and relative humidity followed soon after. Today the Gallery has hundreds of sensors, monitoring the environmental conditions in over hundreds of locations, 24/7. The National Gallery currently archives millions of environmental readings every year, which are used to monitor the Gallery conditions ensuring the on-going care of the collections. These readings are also used to examine long term environmental trends, manage additional events, and within the planning and management of preventive conservation work and research at the National Gallery. The National Gallery Environmental Database is an internal, bespoke system which has been developed, over the last 20 years, to act as an archive for all of these environmental readings. This dataset contains a selected range of the data gathered from a few of the external sensors that have been used to monitor ambient light levels, temperature, relative humidity and air moisture content 24 hours a day, 7 days a week over the last two decades.", + "descriptionType": "Abstract" + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Roof of National Gallery, London, UK", + "geoLocationPoint": { + "pointLatitude": 51.50872, + "pointLongitude": -0.12841 + } + } + ], + "fundingReferences": [ + { + "awardUri": "https://cordis.europa.eu/project/id/871034", + "awardTitle": "Integrating Platforms for the European Research Infrastructure ON Heritage Science", + "funderName": "H2020 Excellent Science", + "awardNumber": "871034", + "funderIdentifier": "https://doi.org/10.13039/100010662", + "funderIdentifierType": "Crossref Funder ID" + } + ], + "url": "https://research.ng-london.org.uk/scientific/env/External%20Environmental%20Data%202010-2020%20National%20Gallery%20V1.0.json", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} + + diff --git a/tests/data/4.6/datacite-example-dataset-v4.xml b/tests/data/4.6/datacite-example-dataset-v4.xml new file mode 100644 index 0000000..45a370e --- /dev/null +++ b/tests/data/4.6/datacite-example-dataset-v4.xml @@ -0,0 +1,80 @@ + + + + 10.82433/9184-DY35 + + + National Gallery + https://ror.org/043kfff89 + + + + External Environmental Data, 2010-2020, National Gallery + + National Gallery + 2022 + Environmental data + + FOS: Earth and related environmental sciences + temperature + relative humidity + illuminance + moisture content + Environmental monitoring + + + + Padfield, Joseph + Joseph + Padfield + https://orcid.org/0000-0002-2572-6428 + National Gallery + + + Building Facilities Department + National Gallery + + + + 2010/2020 + 2010/2020 + 2022 + + en + + https://www.nationalgallery.org.uk/research/research-resources/research-papers/improving-our-environment + https://research.ng-london.org.uk/scientific/env/ + 10.1080/00393630.2018.1504449/ + 10.5281/zenodo.7629200 + + + 13.6 MB + + + application/json + + 1.0 + + Creative Commons Attribution Non Commercial 4.0 International + + + The National Gallery houses one of the greatest ‒ and most visited ‒ collections of Western European painting, often welcoming more than 6 million visitors a year. The Scientific Department researches the history of materials and techniques and their degradation mechanisms (fading, darkening, etc.), through analysis using a range of micro-analytical and imaging methods. It also works in the fields of preventive conservation and environmental management of galleries (monitoring, lighting, vibration), as well as the development of a wide range of new instruments and methods (both analytical and imaging) for technical examination of artworks, most often in collaboration with universities. The examination of the environmental conditions within The National Gallery began soon after it was established in 1824. Early concerns often related to dust and pollution. The first electronic data logger, recording light levels, was introduced in 1975, with the regular logging of temperature and relative humidity followed soon after. Today the Gallery has hundreds of sensors, monitoring the environmental conditions in over hundreds of locations, 24/7. The National Gallery currently archives millions of environmental readings every year, which are used to monitor the Gallery conditions ensuring the on-going care of the collections. These readings are also used to examine long term environmental trends, manage additional events, and within the planning and management of preventive conservation work and research at the National Gallery. The National Gallery Environmental Database is an internal, bespoke system which has been developed, over the last 20 years, to act as an archive for all of these environmental readings. This dataset contains a selected range of the data gathered from a few of the external sensors that have been used to monitor ambient light levels, temperature, relative humidity and air moisture content 24 hours a day, 7 days a week over the last two decades. + + + + Roof of National Gallery, London, UK + + 51.50872 + -0.12841 + + + + + + H2020 Excellent Science + https://doi.org/10.13039/100010662 + 871034 + Integrating Platforms for the European Research Infrastructure ON Heritage Science + + + diff --git a/tests/data/4.6/datacite-example-instrument-v4.json b/tests/data/4.6/datacite-example-instrument-v4.json new file mode 100644 index 0000000..3aae34c --- /dev/null +++ b/tests/data/4.6/datacite-example-instrument-v4.json @@ -0,0 +1,99 @@ +{ + "doi": "10.82433/08qf-ee96", + "prefix": "10.82433", + "suffix": "08qf-ee96", + "alternateIdentifiers": [ + { + "alternateIdentifierType": "SerialNumber", + "alternateIdentifier": "1234567" + } + ], + "creators": [ + { + "name": "DECTRIS", + "nameType": "Organizational", + "affiliation": [], + "nameIdentifiers": [ + { + "schemeUri": "https://www.wikidata.org/wiki/", + "nameIdentifier": "Q107529885", + "nameIdentifierScheme": "Wikidata" + } + ] + } + ], + "titles": [ + { + "lang": "en-US", + "title": "Pilatus detector at MX station 14.1" + } + ], + "publisher": { + "lang": "en", + "name": "Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences" + }, + "container": { + "type": "Series", + "identifier": "1234.1675", + "identifierType": "Handle" + }, + "publicationYear": "2022", + "subjects": [], + "contributors": [ + { + "name": "Helmholtz-Zentrum Berlin Für Materialien Und Energie", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "HostingInstitution", + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/02aj13c28", + "nameIdentifierScheme": "ROR" + } + ] + } + ], + "dates": [ + { + "date": "2022", + "dateType": "Issued" + } + ], + "types": { + "resourceType": "Raster image pixel detector", + "resourceTypeGeneral": "Instrument" + }, + "relatedIdentifiers": [ + { + "relationType": "IsPartOf", + "relatedIdentifier": "1234.1675", + "resourceTypeGeneral": "Instrument", + "relatedIdentifierType": "Handle" + }, + { + "relationType": "IsDescribedBy", + "relatedIdentifier": "https://www.dectris.com/products/pilatus3/pilatus3-s-for-synchrotron/details/pilatus3-s-6m", + "resourceTypeGeneral": "Text", + "relatedIdentifierType": "URL" + } + ], + "relatedItems": [], + "sizes": [], + "formats": [], + "rightsList": [], + "descriptions": [ + { + "lang": "en-US", + "description": "The Pilatus 6M pixel-detector at the MX station 14.1", + "descriptionType": "Abstract" + }, + { + "lang": "en-US", + "description": "Model Name: PILATUS3 S 6M. Instrument type: Raster image pixel detector. Measured variables: X-ray.", + "descriptionType": "TechnicalInfo" + } + ], + "url": "https://github.com/rdawg-pidinst/schema/blob/master/support/examples/hzb-mx-14-1-pilatus.xml", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/4.6/datacite-example-instrument-v4.xml b/tests/data/4.6/datacite-example-instrument-v4.xml new file mode 100644 index 0000000..7f6d2df --- /dev/null +++ b/tests/data/4.6/datacite-example-instrument-v4.xml @@ -0,0 +1,34 @@ + + + + 10.82433/08QF-EE96 + + + DECTRIS + Q107529885 + + + + Pilatus detector at MX station 14.1 + + Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences + 2022 + + + Helmholtz-Zentrum Berlin für Materialien und Energie + https://ror.org/02aj13c28 + + + Raster image pixel detector + + 1234567 + + + 1234.1675 + https://www.dectris.com/products/pilatus3/pilatus3-s-for-synchrotron/details/pilatus3-s-6m + + + The Pilatus 6M pixel-detector at the MX station 14.1 + Model Name: PILATUS3 S 6M. Instrument type: Raster image pixel detector. Measured variables: X-ray. + + \ No newline at end of file diff --git a/tests/data/4.6/datacite-example-multilingual-v4.json b/tests/data/4.6/datacite-example-multilingual-v4.json new file mode 100644 index 0000000..70fcace --- /dev/null +++ b/tests/data/4.6/datacite-example-multilingual-v4.json @@ -0,0 +1,150 @@ +{ + "doi": "10.82433/byt7-2g42", + "prefix": "10.82433", + "suffix": "byt7-2g42", + "alternateIdentifiers": [], + "creators": [ + { + "name": "Zou, Jing", + "nameType": "Personal", + "givenName": "Jing", + "familyName": "Zou", + "affiliation": [], + "nameIdentifiers": [ + { + "schemeUri": "https://orcid.org", + "nameIdentifier": "https://orcid.org/0000-0002-4553-2743", + "nameIdentifierScheme": "ORCID" + } + ] + }, + { + "name": "DataCite", + "nameType": "Organizational", + "affiliation": [], + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/04wxnsj81", + "nameIdentifierScheme": "ROR" + } + ] + } + ], + "titles": [ + { + "lang": "en", + "title": "Advances in Chemistry" + }, + { + "lang": "es", + "title": "Avances en Química", + "titleType": "TranslatedTitle" + }, + { + "lang": "zh", + "title": "化学进展", + "titleType": "TranslatedTitle" + } + ], + "publisher": { + "lang": "en", + "name": "DataCite", + "schemeUri": "https://ror.org/", + "publisherIdentifier": "https://ror.org/04wxnsj81", + "publisherIdentifierScheme": "ROR" + }, + "container": { + "type": "Series", + "identifier": "arXiv:0706.0001", + "identifierType": "arXiv" + }, + "publicationYear": "2022", + "subjects": [ + { + "lang": "en", + "subject": "Chemistry" + }, + { + "lang": "es", + "subject": "Químicas" + }, + { + "lang": "zh", + "subject": "化学" + } + ], + "contributors": [], + "dates": [ + { + "date": "2024-01-01", + "dateType": "Available" + }, + { + "date": "2022", + "dateType": "Issued" + } + ], + "language": "en", + "types": { + "resourceTypeGeneral": "BookChapter" + }, + "relatedIdentifiers": [ + { + "relationType": "IsPartOf", + "relatedIdentifier": "arXiv:0706.0001", + "resourceTypeGeneral": "Book", + "relatedIdentifierType": "arXiv" + } + ], + "relatedItems": [], + "sizes": [], + "formats": [], + "rightsList": [ + { + "lang": "en", + "rights": "Creative Commons Attribution 4.0 International", + "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "schemeUri": "https://spdx.org/licenses/", + "rightsIdentifier": "cc-by-4.0", + "rightsIdentifierScheme": "SPDX" + }, + { + "lang": "es", + "rights": "Creative Commons Attribution 4.0 International", + "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "schemeUri": "https://spdx.org/licenses/", + "rightsIdentifier": "cc-by-4.0", + "rightsIdentifierScheme": "SPDX" + }, + { + "lang": "zh", + "rights": "Creative Commons Attribution 4.0 International", + "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "schemeUri": "https://spdx.org/licenses/", + "rightsIdentifier": "cc-by-4.0", + "rightsIdentifierScheme": "SPDX" + } + ], + "descriptions": [ + { + "lang": "en", + "description": "This chapter reviews selected landmarks ocurred in Chemistry basic research in the last 5 years", + "descriptionType": "Abstract" + }, + { + "lang": "es", + "description": "El capítulo repasa los principales avances en la investigación básica en Ciencias Químicas en los últimos 5 años", + "descriptionType": "Abstract" + }, + { + "lang": "zh", + "description": "本章回顾了过去5年中在化学基础研究中发生的一些里程碑式的事件", + "descriptionType": "Abstract" + } + ], + "geoLocations": [], + "fundingReferences": [], + "url": "https://example.org", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/4.6/datacite-example-multilingual-v4.xml b/tests/data/4.6/datacite-example-multilingual-v4.xml new file mode 100644 index 0000000..dbde8d8 --- /dev/null +++ b/tests/data/4.6/datacite-example-multilingual-v4.xml @@ -0,0 +1,45 @@ + + + + 10.82433/BYT7-2G42 + + Advances in Chemistry + Avances en Química + 化学进展 + + + + Zou, Jing + https://orcid.org/0000-0002-4553-2743 + + + DataCite + https://ror.org/04wxnsj81 + + + DataCite + 2022 + + + Chemistry + Químicas + 化学 + + + 2024-01-01 + + + This chapter reviews selected landmarks ocurred in Chemistry basic research in the last 5 years + El capítulo repasa los principales avances en la investigación básica en Ciencias Químicas en los últimos 5 años + 本章回顾了过去5年中在化学基础研究中发生的一些里程碑式的事件 + + en + + arXiv:0706.0001 + + + Creative Commons Attribution 4.0 International + Atribución 4.0 Internacional + 署名 4.0 国际 + + \ No newline at end of file diff --git a/tests/data/4.6/datacite-example-relateditem1-v4.json b/tests/data/4.6/datacite-example-relateditem1-v4.json new file mode 100644 index 0000000..349da50 --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem1-v4.json @@ -0,0 +1,89 @@ +{ + "doi": "10.82433/q54d-pf76", + "prefix": "10.82433", + "suffix": "q54d-pf76", + "alternateIdentifiers": [], + "creators": [ + { + "name": "Garcia, Sofia", + "nameType": "Personal", + "givenName": "Sofia", + "familyName": "Garcia", + "affiliation": [ + { + "name": "Arizona State University", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/03efmqc40" + } + ], + "nameIdentifiers": [ + { + "schemeUri": "https://orcid.org", + "nameIdentifier": "https://orcid.org/0000-0001-5727-2427", + "nameIdentifierScheme": "ORCID" + } + ] + } + ], + "titles": [ + { + "lang": "en", + "title": "Example Article Title" + } + ], + "publisher": { + "name": "Example Publisher" + }, + "container": {}, + "publicationYear": "2022", + "subjects": [], + "contributors": [], + "dates": [ + { + "date": "2022", + "dateType": "Issued" + } + ], + "types": { + "resourceType": "ScholarlyArticle", + "resourceTypeGeneral": "JournalArticle" + }, + "relatedIdentifiers": [ + { + "relationType": "IsPublishedIn", + "relatedIdentifier": "1234-5678", + "relatedIdentifierType": "ISSN" + } + ], + "relatedItems": [ + { + "issue": "4", + "titles": [ + { + "title": "Journal of Metadata Examples" + } + ], + "volume": "3", + "creators": [], + "lastPage": "35", + "firstPage": "20", + "publisher": "Example Publisher", + "contributors": [], + "relationType": "IsPublishedIn", + "publicationYear": "2022", + "relatedItemType": "Journal", + "relatedItemIdentifier": { + "relatedItemIdentifier": "1234-5678", + "relatedItemIdentifierType": "ISSN" + } + } + ], + "sizes": [], + "formats": [], + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "url": "https://example.org/RelatedItem1", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/4.6/datacite-example-relateditem1-v4.xml b/tests/data/4.6/datacite-example-relateditem1-v4.xml new file mode 100644 index 0000000..6ab46c0 --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem1-v4.xml @@ -0,0 +1,40 @@ + + + + 10.82433/Q54D-PF76 + + + Garcia, Sofia + Sofia + Garcia + https://orcid.org/0000-0001-5727-2427 + Arizona State University + + + + Example Article Title + + Example Publisher + 2022 + ScholarlyArticle + + 2022 + + + 1234-5678 + + + + 1234-5678 + + Journal of Metadata Examples + + 2022 + 3 + 4 + 20 + 35 + Example Publisher + + + \ No newline at end of file diff --git a/tests/data/4.6/datacite-example-relateditem2-v4.json b/tests/data/4.6/datacite-example-relateditem2-v4.json new file mode 100644 index 0000000..b9848dd --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem2-v4.json @@ -0,0 +1,78 @@ +{ + + "doi": "10.82433/eck0-f231", + "prefix": "10.82433", + "suffix": "eck0-f231", + "alternateIdentifiers": [], + "creators": [ + { + "name": "Garcia, Sofia", + "nameType": "Personal", + "givenName": "Sofia", + "familyName": "Garcia", + "affiliation": [], + "nameIdentifiers": [] + } + ], + "titles": [ + { + "lang": "en", + "title": "Example Chapter Title" + } + ], + "publisher": { + "lang": "en", + "name": "Example Publisher" + }, + "container": {}, + "publicationYear": "1980", + "subjects": [], + "contributors": [], + "dates": [ + { + "date": "1980", + "dateType": "Issued" + } + ], + "types": { + "resourceTypeGeneral": "BookChapter" + }, + "relatedIdentifiers": [], + "relatedItems": [ + { + "titles": [ + { + "title": "Example Book Title" + } + ], + "volume": "I", + "edition": "2nd edition", + "creators": [], + "lastPage": "155", + "firstPage": "110", + "publisher": "Example Publisher", + "contributors": [ + { + "name": "Miller, Elizabeth", + "nameType": "Personal", + "givenName": "Elizabeth", + "familyName": "Miller", + "affiliation": [], + "contributorType": "Editor", + "nameIdentifiers": [] + } + ], + "relationType": "IsPublishedIn", + "publicationYear": "1980", + "relatedItemType": "Book" + } + ], + "sizes": [], + "formats": [], + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "url": "https://example.org/RelatedItem2", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/4.6/datacite-example-relateditem2-v4.xml b/tests/data/4.6/datacite-example-relateditem2-v4.xml new file mode 100644 index 0000000..df7b078 --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem2-v4.xml @@ -0,0 +1,36 @@ + + + + 10.82433/ECK0-F231 + + + Garcia, Sofia + Sofia + Garcia + + + + Example Chapter Title + + Example Publisher + 1980 + + + + + Example Book Title + + 1980 + I + 110 + 155 + Example Publisher + 2nd edition + + + Miller, Elizabeth + + + + + \ No newline at end of file diff --git a/tests/data/4.6/datacite-example-relateditem3-v4.json b/tests/data/4.6/datacite-example-relateditem3-v4.json new file mode 100644 index 0000000..caaebbf --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem3-v4.json @@ -0,0 +1,84 @@ +{ + "doi": "10.82433/4fdh-rh04", + "prefix": "10.82433", + "suffix": "4fdh-rh04", + "alternateIdentifiers": [], + "creators": [ + { + "name": "Garcia, Sofia", + "nameType": "Personal", + "givenName": "Sofia", + "familyName": "Garcia", + "affiliation": [], + "nameIdentifiers": [] + } + ], + "titles": [ + { + "lang": "en", + "title": "Example Chapter Title" + } + ], + "publisher": { + "lang": "en", + "name": "Example Publisher" + }, + "container": {}, + "publicationYear": "2016", + "subjects": [], + "contributors": [], + "dates": [ + { + "date": "2016", + "dateType": "Issued" + } + ], + "types": { + "resourceTypeGeneral": "BookChapter" + }, + "relatedIdentifiers": [ + { + "relationType": "IsPublishedIn", + "relatedIdentifier": "0-12-345678-1", + "relatedIdentifierType": "ISBN" + } + ], + "relatedItems": [ + { + "number": "4", + "titles": [ + { + "title": "Example Book Title" + } + ], + "creators": [ + { + "name": "Garcia, Sofia", + "nameType": "Personal", + "givenName": "Sofia", + "familyName": "Garcia" + } + ], + "lastPage": "63", + "firstPage": "45", + "publisher": "Example Publisher", + "numberType": "Chapter", + "contributors": [], + "relationType": "IsPublishedIn", + "publicationYear": "2016", + "relatedItemType": "Book", + "relatedItemIdentifier": { + "relatedItemIdentifier": "0-12-345678-1", + "relatedItemIdentifierType": "ISBN" + } + } + ], + "sizes": [], + "formats": [], + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "url": "https://example.org/RelatedItem3", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/4.6/datacite-example-relateditem3-v4.xml b/tests/data/4.6/datacite-example-relateditem3-v4.xml new file mode 100644 index 0000000..74c5cd2 --- /dev/null +++ b/tests/data/4.6/datacite-example-relateditem3-v4.xml @@ -0,0 +1,41 @@ + + + + 10.82433/4FDH-RH04 + + + Garcia, Sofia + Sofia + Garcia + + + + Example Chapter Title + + Example Publisher + 2016 + + + 0-12-345678-1 + + + + 0-12-345678-1 + + + Garcia, Sofia + Sofia + Garcia + + + + Example Book Title + + 2016 + 4 + 45 + 63 + Example Publisher + + + \ No newline at end of file diff --git a/tests/data/4.6/include/datacite-contributorType-v4.xsd b/tests/data/4.6/include/datacite-contributorType-v4.xsd new file mode 100644 index 0000000..686ee57 --- /dev/null +++ b/tests/data/4.6/include/datacite-contributorType-v4.xsd @@ -0,0 +1,37 @@ + + + + + + The type of contributor of the resource. + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-dateType-v4.xsd b/tests/data/4.6/include/datacite-dateType-v4.xsd new file mode 100644 index 0000000..17db992 --- /dev/null +++ b/tests/data/4.6/include/datacite-dateType-v4.xsd @@ -0,0 +1,27 @@ + + + + + + The type of date. Use RKMS‐ISO8601 standard for depicting date ranges.To indicate the end of an embargo period, use Available. To indicate the start of an embargo period, use Submitted or Accepted, as appropriate. + + + + + + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-descriptionType-v4.xsd b/tests/data/4.6/include/datacite-descriptionType-v4.xsd new file mode 100644 index 0000000..4f5e5f4 --- /dev/null +++ b/tests/data/4.6/include/datacite-descriptionType-v4.xsd @@ -0,0 +1,19 @@ + + + + + + The type of the description. + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-funderIdentifierType-v4.xsd b/tests/data/4.6/include/datacite-funderIdentifierType-v4.xsd new file mode 100644 index 0000000..754c972 --- /dev/null +++ b/tests/data/4.6/include/datacite-funderIdentifierType-v4.xsd @@ -0,0 +1,16 @@ + + + + + + The type of the funderIdentifier. + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-nameType-v4.xsd b/tests/data/4.6/include/datacite-nameType-v4.xsd new file mode 100644 index 0000000..bf54922 --- /dev/null +++ b/tests/data/4.6/include/datacite-nameType-v4.xsd @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-numberType-v4.xsd b/tests/data/4.6/include/datacite-numberType-v4.xsd new file mode 100644 index 0000000..0de90c7 --- /dev/null +++ b/tests/data/4.6/include/datacite-numberType-v4.xsd @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-relatedIdentifierType-v4.xsd b/tests/data/4.6/include/datacite-relatedIdentifierType-v4.xsd new file mode 100644 index 0000000..71930fd --- /dev/null +++ b/tests/data/4.6/include/datacite-relatedIdentifierType-v4.xsd @@ -0,0 +1,37 @@ + + + + + + The type of the RelatedIdentifier. + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-relationType-v4.xsd b/tests/data/4.6/include/datacite-relationType-v4.xsd new file mode 100644 index 0000000..25068b9 --- /dev/null +++ b/tests/data/4.6/include/datacite-relationType-v4.xsd @@ -0,0 +1,57 @@ + + + + + + Description of the relationship of the resource being registered (A) and the related resource (B). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-resourceType-v4.xsd b/tests/data/4.6/include/datacite-resourceType-v4.xsd new file mode 100644 index 0000000..29c092c --- /dev/null +++ b/tests/data/4.6/include/datacite-resourceType-v4.xsd @@ -0,0 +1,49 @@ + + + + + + The general type of a resource. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/4.6/include/datacite-titleType-v4.xsd b/tests/data/4.6/include/datacite-titleType-v4.xsd new file mode 100644 index 0000000..18efc0d --- /dev/null +++ b/tests/data/4.6/include/datacite-titleType-v4.xsd @@ -0,0 +1,14 @@ + + + + + + + + + + + + diff --git a/tests/data/4.6/include/xml.xsd b/tests/data/4.6/include/xml.xsd new file mode 100644 index 0000000..bd291f3 --- /dev/null +++ b/tests/data/4.6/include/xml.xsd @@ -0,0 +1,286 @@ + + + + + + +
+

About the XML namespace

+ +
+

+ This schema document describes the XML namespace, in a form + suitable for import by other schema documents. +

+

+ See + http://www.w3.org/XML/1998/namespace.html and + + http://www.w3.org/TR/REC-xml for information + about this namespace. +

+

+ Note that local names in this namespace are intended to be + defined only by the World Wide Web Consortium or its subgroups. + The names currently defined in this namespace are listed below. + They should not be used with conflicting semantics by any Working + Group, specification, or document instance. +

+

+ See further below in this document for more information about how to refer to this schema document from your own + XSD schema documents and about the + namespace-versioning policy governing this schema document. +

+
+
+
+
+ + + + +
+ +

lang (as an attribute name)

+

+ denotes an attribute whose value + is a language code for the natural language of the content of + any element; its value is inherited. This name is reserved + by virtue of its definition in the XML specification.

+ +
+
+

Notes

+

+ Attempting to install the relevant ISO 2- and 3-letter + codes as the enumerated possible values is probably never + going to be a realistic possibility. +

+

+ See BCP 47 at + http://www.rfc-editor.org/rfc/bcp/bcp47.txt + and the IANA language subtag registry at + + http://www.iana.org/assignments/language-subtag-registry + for further information. +

+

+ The union allows for the 'un-declaration' of xml:lang with + the empty string. +

+
+
+
+ + + + + + + + + +
+ + + + +
+ +

space (as an attribute name)

+

+ denotes an attribute whose + value is a keyword indicating what whitespace processing + discipline is intended for the content of the element; its + value is inherited. This name is reserved by virtue of its + definition in the XML specification.

+ +
+
+
+ + + + + + +
+ + + +
+ +

base (as an attribute name)

+

+ denotes an attribute whose value + provides a URI to be used as the base for interpreting any + relative URIs in the scope of the element on which it + appears; its value is inherited. This name is reserved + by virtue of its definition in the XML Base specification.

+ +

+ See http://www.w3.org/TR/xmlbase/ + for information about this attribute. +

+
+
+
+
+ + + + +
+ +

id (as an attribute name)

+

+ denotes an attribute whose value + should be interpreted as if declared to be of type ID. + This name is reserved by virtue of its definition in the + xml:id specification.

+ +

+ See http://www.w3.org/TR/xml-id/ + for information about this attribute. +

+
+
+
+
+ + + + + + + + + + +
+ +

Father (in any context at all)

+ +
+

+ denotes Jon Bosak, the chair of + the original XML Working Group. This name is reserved by + the following decision of the W3C XML Plenary and + XML Coordination groups: +

+
+

+ In appreciation for his vision, leadership and + dedication the W3C XML Plenary on this 10th day of + February, 2000, reserves for Jon Bosak in perpetuity + the XML name "xml:Father". +

+
+
+
+
+
+ + + +
+

About this schema document

+ +
+

+ This schema defines attributes and an attribute group suitable + for use by schemas wishing to allow xml:base, + xml:lang, xml:space or + xml:id attributes on elements they define. +

+

+ To enable this, such a schema must import this schema for + the XML namespace, e.g. as follows: +

+
+          <schema . . .>
+           . . .
+           <import namespace="http://www.w3.org/XML/1998/namespace"
+                      schemaLocation="http://www.w3.org/2001/xml.xsd"/>
+     
+

+ or +

+
+           <import namespace="http://www.w3.org/XML/1998/namespace"
+                      schemaLocation="http://www.w3.org/2009/01/xml.xsd"/>
+     
+

+ Subsequently, qualified reference to any of the attributes or the + group defined below will have the desired effect, e.g. +

+
+          <type . . .>
+           . . .
+           <attributeGroup ref="xml:specialAttrs"/>
+     
+

+ will define a type which will schema-validate an instance element + with any of those attributes. +

+
+
+
+
+ + + +
+

Versioning policy for this schema document

+
+

+ In keeping with the XML Schema WG's standard versioning + policy, this schema document will persist at + + http://www.w3.org/2009/01/xml.xsd. +

+

+ At the date of issue it can also be found at + + http://www.w3.org/2001/xml.xsd. +

+

+ The schema document at that URI may however change in the future, + in order to remain compatible with the latest version of XML + Schema itself, or with the XML namespace itself. In other words, + if the XML Schema or XML namespaces change, the version of this + document at + http://www.w3.org/2001/xml.xsd + + will change accordingly; the version at + + http://www.w3.org/2009/01/xml.xsd + + will not change. +

+

+ Previous dated (and unchanging) versions of this schema + document are at: +

+ +
+
+
+
+ +
diff --git a/tests/data/4.6/metadata.xsd b/tests/data/4.6/metadata.xsd new file mode 100644 index 0000000..e9b9375 --- /dev/null +++ b/tests/data/4.6/metadata.xsd @@ -0,0 +1,712 @@ + + + + + + + + + + + + + + + + + + Root element of a single record. This wrapper element is for XML implementation only and is not defined in the DataCite DOI standard. + Note: This is the case for all wrapper elements within this schema. + No content in this wrapper element. + + + + + + + A persistent identifier that identifies a resource. + + + + + + + + + + + + + + + The main researchers involved working on the data, or the authors of the publication in priority order. May be a corporate/institutional or personal name. + Format: Family, Given. + Personal names can be further specified using givenName and familyName. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A name or title by which a resource is known. + + + + + + + + + + + + + + + + The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource. This property will be used to formulate the citation, so consider the prominence of the role. + In the case of datasets, "publish" is understood to mean making the data available to the community of researchers. + + + + + + + + + + + + + + + Year when the data is made publicly available. If an embargo period has been in effect, use the date when the embargo period ends. + In the case of datasets, "publish" is understood to mean making the data available on a specific date to the community of researchers. If there is no standard publication year value, use the date that would be preferred from a citation perspective. + YYYY + + + + + + + + The type of a resource. You may enter an additional free text description. + The format is open, but the preferred format is a single term of some detail so that a pair can be formed with the sub-property. + + + + + + + + + + + + + + + + Subject, keywords, classification codes, or key phrases describing the resource. + + + + + + + + + + + + + + + + + + + + + + The institution or person responsible for collecting, creating, or otherwise contributing to the development of the dataset. + The personal name format should be: Family, Given. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Different dates relevant to the work. + YYYY,YYYY-MM-DD, YYYY-MM-DDThh:mm:ssTZD or any other format or level of granularity described in W3CDTF. Use RKMS-ISO8601 standard for depicting date ranges. + + + + + + + + + + + + + + + + Primary language of the resource. Allowed values are taken from IETF BCP 47, ISO 639-1 language codes. + + + + + + + + An identifier or identifiers other than the primary Identifier applied to the resource being registered. This may be any alphanumeric string which is unique within its domain of issue. May be used for local identifiers. AlternateIdentifier should be used for another identifier of the same instance (same location, same file). + + + + + + + + + + + + + + + + + + Identifiers of related resources. Use this property to indicate subsets of properties, as appropriate. + + + + + + + + + + + + + + + + + + + + + + + Unstructures size information about the resource. + + + + + + + + + + + Technical format of the resource. + Use file extension or MIME type where possible. + + + + + + + + Version number of the resource. If the primary resource has changed the version number increases. + Register a new identifier for a major version change. Individual stewards need to determine which are major vs. minor versions. May be used in conjunction with properties 11 and 12 (AlternateIdentifier and RelatedIdentifier) to indicate various information updates. May be used in conjunction with property 17 (Description) to indicate the nature and file/record range of version. + + + + + + + + Any rights information for this resource. Provide a rights management statement for the resource or reference a service providing such information. Include embargo information if applicable. + Use the complete title of a license and include version information if applicable. + + + + + + + + + + + + + + + + + + + + + + All additional information that does not fit in any of the other categories. May be used for technical information. It is a best practice to supply a description. + + + + + + + + + + + + + + + + + + + + + + + Spatial region or named place where the data was gathered or about which the data is focused. + + + + + A point contains a single latitude-longitude pair. + + + + + A box contains two white space separated latitude-longitude pairs, with each pair separated by whitespace. The first pair is the lower corner, the second is the upper corner. + + + + + A drawn polygon area, defined by a set of points and lines connecting the points in a closed chain. + + + + + + + + + + + + + + + + + + + + Information about financial support (funding) for the resource being registered. + + + + + + Name of the funding provider. + + + + + + + + Uniquely identifies a funding entity, according to various types. + + + + + + + + + + + + + The code assigned by the funder to a sponsored award (grant). + + + + + + + + + + + + The human readable title of the award (grant). + + + + + + + + + + + + + + Information about a resource related to the one being registered e.g. a journal or book of which the article or chapter is part. + + + + + + The identifier for the related item. + + + + + + + The type of the Identifier for the related item e.g. DOI. + + + + + The name of the scheme. + + + + + The URI of the relatedMetadataScheme. + + + + + The type of the relatedMetadataScheme, linked with the schemeURI. + + + + + + + + + + + + The institution or person responsible for creating the + related resource. To supply multiple creators, repeat this property. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Title of the related item. + + + + + + + + + + + + + + + + The year when the item was or will be made publicly available. + + + + + + + + Volume of the related item. + + + + + Issue number or name of the related item. + + + + + Number of the related item e.g. report number of article number. + + + + + + + + + + + + First page of the related item e.g. of the chapter, article, or conference paper. + + + + + Last page of the related item e.g. of the chapter, article, or conference paper. + + + + + The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource. This property will be used to formulate the citation, so consider the prominence of the role. + + + + + Edition or version of the related item. + + + + + + + + The institution or person responsible for collecting, + managing, distributing, or otherwise contributing to the development of + the resource. + + + + + + + + + + + + + + + + + + + The type of contributor of the resource. + + + + + + + + + + + The type of the related item, e.g. journal article, book or chapter. + + + + + Description of the relationship of the resource being registered (A) and the related resource (B). + + + + + + + + + + + + + + + + + + + + + + Uniquely identifies a creator or contributor, according to various identifier schemes. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Uniquely identifies an affiliation, according to various identifier schemes. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/datacite-v4.6-full-example.json b/tests/data/datacite-v4.6-full-example.json new file mode 100644 index 0000000..5be3806 --- /dev/null +++ b/tests/data/datacite-v4.6-full-example.json @@ -0,0 +1,634 @@ +{ + "doi": "10.82433/b09z-4k37", + "prefix": "10.82433", + "suffix": "b09z-4k37", + "alternateIdentifiers": [ + { + "alternateIdentifierType": "Local accession number", + "alternateIdentifier": "12345" + } + ], + "creators": [ + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "affiliation": [], + "nameIdentifiers": [] + }, + { + "name": "ExampleOrganization", + "nameType": "Organizational", + "affiliation": [], + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/03yrm5c26", + "nameIdentifierScheme": "ROR" + } + ] + } + ], + "titles": [ + { + "lang": "en", + "title": "Example Title" + }, + { + "lang": "en", + "title": "Example Subtitle", + "titleType": "Subtitle" + }, + { + "lang": "fr", + "title": "Example TranslatedTitle", + "titleType": "TranslatedTitle" + }, + { + "lang": "en", + "title": "Example AlternativeTitle", + "titleType": "AlternativeTitle" + } + ], + "publisher": { + "lang": "en", + "name": "Example Publisher", + "schemeUri": "https://ror.org/", + "publisherIdentifier": "https://ror.org/04z8jg394", + "publisherIdentifierScheme": "ROR" + }, + "container": { + "type": "DataRepository", + "title": "Example SeriesInformation", + "identifier": "http://purl.oclc.org/foo/bar", + "identifierType": "PURL" + }, + "publicationYear": "2022", + "subjects": [ + { + "subject": "FOS: Computer and information sciences", + "valueUri": "http://www.oecd.org/science/inno/38235147.pdf", + "schemeUri": "http://www.oecd.org/science/inno", + "subjectScheme": "Fields of Science and Technology (FOS)" + }, + { + "subject": "FOS: Computer and information sciences", + "schemeUri": "http://www.oecd.org/science/inno/38235147.pdf", + "subjectScheme": "Fields of Science and Technology (FOS)" + }, + { + "subject": "Digital curation and preservation", + "valueUri": "https://www.abs.gov.au/statistics/classifications/australian-and-new-zealand-standard-research-classification-anzsrc/2020", + "schemeUri": "https://www.abs.gov.au/statistics/classifications/australian-and-new-zealand-standard-research-classification-anzsrc", + "subjectScheme": "Australian and New Zealand Standard Research Classification (ANZSRC), 2020" + }, + { + "subject": "Example Subject" + } + ], + "contributors": [ + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "ContactPerson", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "DataCollector", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "DataCurator", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "DataManager", + "affiliation": [] + }, + { + "name": "ExampleOrganization", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "Distributor" + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Editor", + "affiliation": [] + }, + { + "name": "ExampleOrganization", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "HostingInstitution", + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/03yrm5c26", + "nameIdentifierScheme": "ROR" + } + ] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Producer", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "ProjectLeader", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "ProjectManager", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "ProjectMember", + "affiliation": [] + }, + { + "name": "DataCite", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "RegistrationAgency" + }, + { + "name": "International DOI Foundation", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "RegistrationAuthority" + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "RelatedPerson", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Researcher", + "affiliation": [] + }, + { + "name": "ExampleContributor", + "affiliation": [ + { + "name": "ExampleOrganization", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/03yrm5c26", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "ResearchGroup", + "nameIdentifiers": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "RightsHolder", + "affiliation": [] + }, + { + "name": "ExampleContributor", + "affiliation": [ + { + "name": "https://ror.org/03yrm5c26", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/03yrm5c26", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "Sponsor", + "nameIdentifiers": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Supervisor", + "affiliation": [] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Translator", + "affiliation": [] + }, + { + "name": "ExampleOrganization", + "nameType": "Organizational", + "affiliation": [], + "contributorType": "WorkPackageLeader", + "nameIdentifiers": [ + { + "schemeUri": "https://ror.org", + "nameIdentifier": "https://ror.org/03yrm5c26", + "nameIdentifierScheme": "ROR" + } + ] + }, + { + "name": "ExampleFamilyName, ExampleGivenName", + "nameType": "Personal", + "givenName": "ExampleGivenName", + "familyName": "ExampleFamilyName", + "contributorType": "Other", + "affiliation": [] + } + ], + "dates": [ + { + "date": "2022-01-01", + "dateType": "Accepted" + }, + { + "date": "2022-01-01", + "dateType": "Available" + }, + { + "date": "2022-01-01", + "dateType": "Copyrighted" + }, + { + "date": "2022-01-01/2023-01-01", + "dateType": "Collected" + }, + { + "date": "2022-01-01/2023-01-01", + "dateType": "Coverage" + }, + { + "date": "2022-01-01", + "dateType": "Created" + }, + { + "date": "2022-01-01", + "dateType": "Issued" + }, + { + "date": "2022-01-01", + "dateType": "Submitted" + }, + { + "date": "2022-01-01", + "dateType": "Updated" + }, + { + "date": "2022-01-01", + "dateType": "Valid" + }, + { + "date": "2022-01-01", + "dateType": "Withdrawn" + }, + { + "date": "2022-01-01", + "dateType": "Other", + "dateInformation": "ExampleDateInformation" + } + ], + "language": "en", + "types": { + "resourceType": "Example ResourceType", + "resourceTypeGeneral": "Dataset" + }, + "relatedIdentifiers": [ + { + "relationType": "IsCitedBy", + "relatedIdentifier": "ark:/13030/tqb3kh97gh8w", + "resourceTypeGeneral": "Audiovisual", + "relatedIdentifierType": "ARK" + }, + { + "relationType": "Cites", + "relatedIdentifier": "arXiv:0706.0001", + "resourceTypeGeneral": "Book", + "relatedIdentifierType": "arXiv" + }, + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "2018AGUFM.A24K..07S", + "resourceTypeGeneral": "BookChapter", + "relatedIdentifierType": "bibcode" + }, + { + "relationType": "IsSupplementedBy", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Collection", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsContinuedBy", + "relatedIdentifier": "9783468111242", + "resourceTypeGeneral": "ComputationalNotebook", + "relatedIdentifierType": "EAN13" + }, + { + "relationType": "Continues", + "relatedIdentifier": "1562-6865", + "resourceTypeGeneral": "ConferencePaper", + "relatedIdentifierType": "EISSN" + }, + { + "relationType": "Describes", + "relatedIdentifier": "10013/epic.10033", + "resourceTypeGeneral": "ConferenceProceeding", + "relatedIdentifierType": "Handle" + }, + { + "relationType": "IsDescribedBy", + "relatedIdentifier": "IECUR0097", + "resourceTypeGeneral": "DataPaper", + "relatedIdentifierType": "IGSN" + }, + { + "relationType": "HasMetadata", + "relatedIdentifier": "978-3-905673-82-1", + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "ISBN" + }, + { + "relationType": "IsMetadataFor", + "relatedIdentifier": "0077-5606", + "resourceTypeGeneral": "Dissertation", + "relatedIdentifierType": "ISSN" + }, + { + "relationType": "HasVersion", + "relatedIdentifier": "0A9 2002 12B4A105 7", + "resourceTypeGeneral": "Event", + "relatedIdentifierType": "ISTC" + }, + { + "relationType": "IsVersionOf", + "relatedIdentifier": "1188-1534", + "resourceTypeGeneral": "Image", + "relatedIdentifierType": "LISSN" + }, + { + "relationType": "IsNewVersionOf", + "relatedIdentifier": "urn:lsid:ubio.org:namebank:11815", + "resourceTypeGeneral": "InteractiveResource", + "relatedIdentifierType": "LSID" + }, + { + "relationType": "IsPreviousVersionOf", + "relatedIdentifier": "12082125", + "resourceTypeGeneral": "Journal", + "relatedIdentifierType": "PMID" + }, + { + "relationType": "IsPartOf", + "relatedIdentifier": "http://purl.oclc.org/foo/bar", + "resourceTypeGeneral": "JournalArticle", + "relatedIdentifierType": "PURL" + }, + { + "relationType": "HasPart", + "relatedIdentifier": "123456789999", + "resourceTypeGeneral": "Model", + "relatedIdentifierType": "UPC" + }, + { + "relationType": "IsPublishedIn", + "relatedIdentifier": "http://www.heatflow.und.edu/index2.html", + "resourceTypeGeneral": "OutputManagementPlan", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsReferencedBy", + "relatedIdentifier": "urn:nbn:de:101:1-201102033592", + "resourceTypeGeneral": "PeerReview", + "relatedIdentifierType": "URN" + }, + { + "relationType": "References", + "relatedIdentifier": "https://w3id.org/games/spec/coil#Coil_Bomb_Die_Of_Age", + "resourceTypeGeneral": "PhysicalObject", + "relatedIdentifierType": "w3id" + }, + { + "relationType": "IsDocumentedBy", + "relatedIdentifier": "RRID:SCR_014641", + "resourceTypeGeneral": "Preprint", + "relatedIdentifierType": "RRID" + }, + { + "relationType": "Documents", + "relatedIdentifier": "31253.11.sciencedb.13238", + "resourceTypeGeneral": "Report", + "relatedIdentifierType": "CSTR" + }, + { + "relationType": "IsCompiledBy", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Service", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "Compiles", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Software", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsVariantFormOf", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Sound", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsOriginalFormOf", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Standard", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsIdenticalTo", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Text", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsReviewedBy", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Workflow", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "Reviews", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Project", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Award", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsSourceOf", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsRequiredBy", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "Requires", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "Obsoletes", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsObsoletedBy", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsTranslationOf", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "HasTranslation", + "relatedIdentifier": "10.1016/j.epsl.2011.11.037", + "resourceTypeGeneral": "Other", + "relatedIdentifierType": "DOI" + } + ], + "relatedItems": [], + "sizes": [ + "1 MB", + "90 pages" + ], + "formats": [ + "application/xml", + "text/plain" + ], + "version": "1", + "rightsList": [ + { + "rights": "Creative Commons Public Domain Dedication and Certification", + "rightsUri": "https://creativecommons.org/licenses/publicdomain/", + "schemeUri": "https://spdx.org/licenses/", + "rightsIdentifier": "cc-pddc", + "rightsIdentifierScheme": "SPDX" + } + ], + "descriptions": [ + { + "lang": "en", + "description": "Example Abstract", + "descriptionType": "Abstract" + }, + { + "lang": "en", + "description": "Example Methods", + "descriptionType": "Methods" + }, + { + "lang": "en", + "description": "Example SeriesInformation", + "descriptionType": "SeriesInformation" + }, + { + "lang": "en", + "description": "Example TableOfContents", + "descriptionType": "TableOfContents" + }, + { + "lang": "en", + "description": "Example TechnicalInfo", + "descriptionType": "TechnicalInfo" + }, + { + "lang": "en", + "description": "Example Other", + "descriptionType": "Other" + } + ], + "geoLocations": [ + { + "geoLocationBox": { + "eastBoundLongitude": -123.02, + "northBoundLatitude": 49.315, + "southBoundLatitude": 49.195, + "westBoundLongitude": -123.27 + }, + "geoLocationPlace": "Vancouver, British Columbia, Canada", + "geoLocationPoint": { + "pointLatitude": 49.2827, + "pointLongitude": -123.1207 + } + } + ], + "fundingReferences": [ + { + "awardUri": "https://example.com/example-award-uri", + "awardTitle": "Example AwardTitle", + "funderName": "Example Funder", + "awardNumber": "12345", + "funderIdentifier": "https://doi.org/10.13039/501100000780", + "funderIdentifierType": "Crossref Funder ID" + } + ], + "url": "https://example.com/", + "schemaVersion": "http://datacite.org/schema/kernel-4" +} diff --git a/tests/data/datacite-v4.6-full-example.xml b/tests/data/datacite-v4.6-full-example.xml new file mode 100644 index 0000000..946506e --- /dev/null +++ b/tests/data/datacite-v4.6-full-example.xml @@ -0,0 +1,268 @@ + + + 10.82433/B09Z-4K37 + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + + + ExampleOrganization + https://ror.org/03yrm5c26 + + + + Example Title + Example Subtitle + Example TranslatedTitle + Example AlternativeTitle + + Example Publisher + 2022 + Example ResourceType + + FOS: Computer and information sciences + Digital curation and preservation + Example Subject + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleOrganization + https://ror.org/03yrm5c26 + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleOrganization + https://ror.org/03yrm5c26 + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + DataCite + https://ror.org/04wxnsj81 + + + + International DOI Foundation + + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleContributor + ExampleOrganization + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleContributor + https://ror.org/03yrm5c26 + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + ExampleOrganization + https://ror.org/03yrm5c26 + + + + ExampleFamilyName, ExampleGivenName + ExampleGivenName + ExampleFamilyName + https://orcid.org/0000-0001-5727-2427/ + + + + + 2022-01-01 + 2022-01-01 + 2022-01-01 + 2021-01-01/2021-12-31 + 2021-01-01/2021-12-31 + 2022-01-01 + 2022-01-01 + 2022-01-01 + 2022-01-01 + 2022-01-01 + 2022-01-01 + 2022-01-01 + + en + + 12345 + + + ark:/13030/tqb3kh97gh8w + arXiv:0706.0001 + 2018AGUFM.A24K..07S + 10.1016/j.epsl.2011.11.037 + 9783468111242 + 1562-6865 + 10013/epic.10033 + IECUR0097 + 978-3-905673-82-1 + 0077-5606 + 0A9 2002 12B4A105 7 + 1188-1534 + urn:lsid:ubio.org:namebank:11815 + 12082125 + http://purl.oclc.org/foo/bar + 123456789999 + http://www.heatflow.und.edu/index2.html + urn:nbn:de:101:1-201102033592 + https://w3id.org/games/spec/coil#Coil_Bomb_Die_Of_Age + RRID:SCR_014641 + 31253.11.sciencedb.13238 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + 10.1016/j.epsl.2011.11.037 + + + 1 MB + 90 pages + + + application/xml + text/plain + + 1 + + Creative Commons Public Domain Dedication and Certification + + + Example Abstract + Example Methods + Example SeriesInformation + Example TableOfContents + Example TechnicalInfo + Example Other + + + + Vancouver, British Columbia, Canada + + 49.2827 + -123.1207 + + + -123.27 + -123.02 + 49.195 + 49.315 + + + + + + Example Funder + https://doi.org/10.13039/501100000780 + 12345 + Example AwardTitle + + + diff --git a/tests/example/full.py b/tests/example/full.py index 4bec3bb..a6c9376 100644 --- a/tests/example/full.py +++ b/tests/example/full.py @@ -1,4 +1,4 @@ -from datacite import DataCiteMDSClient, schema45 +from datacite import DataCiteMDSClient, schema46 prefix = "10.1234" @@ -19,10 +19,10 @@ } # Validate dictionary -assert schema45.validate(data) +assert schema46.validate(data) # Generate DataCite XML from dictionary. -doc = schema45.tostring(data) +doc = schema46.tostring(data) # Initialize the MDS client. d = DataCiteMDSClient( diff --git a/tests/example/full_rest.py b/tests/example/full_rest.py index fb9a4e8..0e57f7a 100644 --- a/tests/example/full_rest.py +++ b/tests/example/full_rest.py @@ -1,5 +1,5 @@ import os -from datacite import DataCiteRESTClient, schema45 +from datacite import DataCiteRESTClient, schema46 data = { "creators": [ @@ -17,10 +17,10 @@ } # Validate dictionary -schema45.validator.validate(data) +schema46.validator.validate(data) # Generate DataCite XML from dictionary. -doc = schema45.tostring(data) +doc = schema46.tostring(data) print(doc) diff --git a/tests/helpers.py b/tests/helpers.py index f2a7e0e..1b9d963 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -105,3 +105,13 @@ def load_json_path(path): "data/4.5/datacite-example-relateditem3-v4.json", "data/datacite-v4.5-full-example.json", ] + +TEST_46_JSON_FILES = [ + "data/4.6/datacite-example-dataset-v4.json", + "data/4.6/datacite-example-instrument-v4.json", + "data/4.6/datacite-example-multilingual-v4.json", + "data/4.6/datacite-example-relateditem1-v4.json", + "data/4.6/datacite-example-relateditem2-v4.json", + "data/4.6/datacite-example-relateditem3-v4.json", + "data/datacite-v4.6-full-example.json", +] diff --git a/tests/test_schema46.py b/tests/test_schema46.py new file mode 100644 index 0000000..0ac81b1 --- /dev/null +++ b/tests/test_schema46.py @@ -0,0 +1,628 @@ +# -*- coding: utf-8 -*- +# +# This file is part of DataCite. +# +# Copyright (C) 2016 CERN. +# Copyright (C) 2019 Caltech. +# Copyright (C) 2024 IBT Czech Academy of Sciences. +# Copyright (C) 2026 Observatoire de Paris. +# +# DataCite is free software; you can redistribute it and/or modify it +# under the terms of the Revised BSD License; see LICENSE file for +# more details. + +"""Tests for format transformations.""" + +import pytest +from helpers import TEST_46_JSON_FILES, load_json_path, load_xml_path +from lxml import etree + +from datacite.schema46 import dump_etree, tostring, validator + + +def validate_json(minimal_json, extra_json): + """Validate specific property.""" + data = {} + data.update(minimal_json) + data.update(extra_json) + validator.validate(data) + + +@pytest.mark.parametrize("example_json46", TEST_46_JSON_FILES) +def test_example_json_validates(example_json46): + """Test the example file validates against the JSON schema.""" + example_json = load_json_path(example_json46) + validator.validate(example_json) + + +FILE_PAIRS = [ + ( + "data/4.6/datacite-example-dataset-v4.xml", + "data/4.6/datacite-example-dataset-v4.json", + ), + ( + "data/4.6/datacite-example-instrument-v4.xml", + "data/4.6/datacite-example-instrument-v4.json", + ), + ( + "data/4.6/datacite-example-multilingual-v4.xml", + "data/4.6/datacite-example-multilingual-v4.json", + ), + ( + "data/4.6/datacite-example-relateditem1-v4.xml", + "data/4.6/datacite-example-relateditem1-v4.json", + ), + ( + "data/4.6/datacite-example-relateditem2-v4.xml", + "data/4.6/datacite-example-relateditem2-v4.json", + ), + ( + "data/4.6/datacite-example-relateditem3-v4.xml", + "data/4.6/datacite-example-relateditem3-v4.json", + ), + ("data/datacite-v4.6-full-example.xml", "data/datacite-v4.6-full-example.json"), +] + + +@pytest.mark.parametrize("example_xml46, example_json46", FILE_PAIRS) +def test_json_to_xml(example_xml46, example_json46, xsd46): + """Test that example XML converts to example JSON.""" + example_xml = load_xml_path(example_xml46) + example_json = load_json_path(example_json46) + xsd46.assertValid(etree.XML(example_xml.encode("utf8"))) + xsd46.assertValid(etree.XML(tostring(example_json).encode("utf8"))) + + +def test_json_eq_xml(example_xml_file46, example_json46, xsd46): + """Test that example XML converts to example JSON.""" + xsd46.assertValid(etree.XML(tostring(example_json46).encode("utf8"))) + + +# +# Field by field tests. +# +def test_identifier(minimal_json46): + """Test identifier.""" + data = {"doi": "10.1234/foo.bar"} + validate_json(minimal_json46, data) + tree = dump_etree(data) + elem = tree.xpath("/resource/identifier")[0] + assert elem.text == "10.1234/foo.bar" + assert elem.get("identifierType") == "DOI" + + +def test_creators(minimal_json46): + """Test creators.""" + pytest.raises(TypeError, dump_etree, {"creators": {"invalid": "data"}}) + + tree = dump_etree({"creators": []}) + assert len(tree.xpath("/resource/creators")) == 0 + + tree = dump_etree( + { + "creators": [ + { + "name": "Smith, John", + } + ] + } + ) + assert len(tree.xpath("/resource/creators/creator")) == 1 + assert len(tree.xpath("/resource/creators/creator/creatorName")) == 1 + assert len(tree.xpath("/resource/creators/creator/nameIdentifier")) == 0 + assert len(tree.xpath("/resource/creators/creator/affiliation")) == 0 + + data = { + "creators": [ + { + "name": "Smith, John", + "familyName": "Smith", + "givenName": "John", + "affiliation": [ + { + "name": "DataCite", + "affiliationIdentifier": "https://ror.org/04wxnsj81", + "affiliationIdentifierScheme": "ROR", + }, + { + "name": "DataCite2", + "affiliationIdentifier": "https://ror.org/04wxnsj81", + "affiliationIdentifierScheme": "ROR", + }, + ], + "nameIdentifiers": [ + { + "nameIdentifier": "1234", + "schemeUri": "http://orcid.org", + "nameIdentifierScheme": "orcid", + }, + ], + } + ] + } + validate_json(minimal_json46, data) + + tree = dump_etree(data) + assert len(tree.xpath("/resource/creators/creator/creatorName")) == 1 + assert len(tree.xpath("/resource/creators/creator/familyName")) == 1 + assert len(tree.xpath("/resource/creators/creator/givenName")) == 1 + assert len(tree.xpath("/resource/creators/creator/nameIdentifier")) == 1 + assert len(tree.xpath("/resource/creators/creator/affiliation")) == 2 + + elem = dump_etree(data).xpath("/resource/creators/creator/affiliation")[0] + assert elem.text == "DataCite" + assert elem.get("affiliationIdentifier") == "https://ror.org/04wxnsj81" + + elem = dump_etree(data).xpath("/resource/creators/creator/affiliation")[1] + assert elem.text == "DataCite2" + assert elem.get("affiliationIdentifier") == "https://ror.org/04wxnsj81" + + +def test_titles(minimal_json46): + """Test titles.""" + pytest.raises(TypeError, dump_etree, {"titles": {"invalid": "data"}}) + + tree = dump_etree({"titles": []}) + assert len(tree.xpath("/resource/titles")) == 0 + + tree = dump_etree({"titles": [{"title": "Test"}]}) + assert len(tree.xpath("/resource/titles")) == 1 + assert len(tree.xpath("/resource/titles/title")) == 1 + + data = {"titles": [{"title": "Test", "titleType": "Subtitle"}]} + validate_json(minimal_json46, data) + + elem = dump_etree(data).xpath("/resource/titles/title")[0] + assert elem.text == "Test" + assert elem.get("titleType") == "Subtitle" + + elem = dump_etree({"titles": [{"title": "Test", "lang": "en"}]}).xpath( + "/resource/titles/title" + )[0] + assert elem.get("{xml}lang") == "en" + + +def test_publisher(minimal_json46): + """Test publisher.""" + data = {"publisher": {"name": "test"}} + validate_json(minimal_json46, data) + tree = dump_etree(data) + assert tree.xpath("/resource/publisher")[0].text == "test" + + tree = dump_etree({"publisher": ""}) + assert len(tree.xpath("/resource/publisher")) == 0 + + +def test_publicationyear(minimal_json46): + """Test publication year.""" + data = {"publicationYear": "2002"} + validate_json(minimal_json46, data) + tree = dump_etree(data) + assert tree.xpath("/resource/publicationYear")[0].text == "2002" + + tree = dump_etree({"publicationYear": None}) + assert len(tree.xpath("/resource/publicationYear")) == 0 + + +def test_subjects(minimal_json46): + """Test subjects.""" + pytest.raises(TypeError, dump_etree, {"subjects": {"invalid": "data"}}) + + tree = dump_etree({"subjects": []}) + assert len(tree.xpath("/resource/subjects")) == 0 + + tree = dump_etree({"subjects": [{"subject": "test"}]}) + assert len(tree.xpath("/resource/subjects/subject")) == 1 + + data = { + "subjects": [ + { + "subject": "test", + "subjectScheme": "dewey", + "schemeUri": "dewey-uri", + "valueUri": "https://cern.ch", + } + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/subjects/subject")[0] + assert elem.text == "test" + assert elem.get("subjectScheme") == "dewey" + assert elem.get("schemeURI") == "dewey-uri" + assert elem.get("valueURI") == "https://cern.ch" + + +def test_contributors(minimal_json46): + """Test contributors.""" + pytest.raises(TypeError, dump_etree, {"contributors": {"invalid": "data"}}) + + tree = dump_etree({"contributors": []}) + assert len(tree.xpath("/resource/contributors")) == 0 + + tree = dump_etree( + { + "contributors": [ + { + "name": "CERN", + "nameType": "Organisational", + "contributorType": "HostingInstitution", + } + ] + } + ) + assert len(tree.xpath("/resource/contributors/contributor")) == 1 + assert len(tree.xpath("/resource/contributors/contributor/contributorName")) == 1 + cntr1 = tree.xpath("/resource/contributors/contributor/contributorName")[0] + assert cntr1.attrib["nameType"] == "Organisational" + assert len(tree.xpath("/resource/contributors/contributor/nameIdentifier")) == 0 + assert len(tree.xpath("/resource/contributors/contributor/affiliation")) == 0 + + data = { + "contributors": [ + { + "name": "Smith, John", + "nameType": "Personal", + "familyName": "Smith", + "givenName": "John", + "contributorType": "ContactPerson", + "affiliation": [ + { + "name": "DataCite", + "affiliationIdentifier": "https://ror.org/04wxnsj81", + "affiliationIdentifierScheme": "ROR", + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "1234", + "schemeUri": "http://orcid.org", + "nameIdentifierScheme": "orcid", + }, + ], + } + ] + } + validate_json(minimal_json46, data) + + tree = dump_etree(data) + assert len(tree.xpath("/resource/contributors/contributor/contributorName")) == 1 + cntr1 = tree.xpath("/resource/contributors/contributor/contributorName")[0] + assert cntr1.attrib["nameType"] == "Personal" + assert len(tree.xpath("/resource/contributors/contributor/familyName")) == 1 + assert len(tree.xpath("/resource/contributors/contributor/givenName")) == 1 + assert len(tree.xpath("/resource/contributors/contributor/nameIdentifier")) == 1 + assert len(tree.xpath("/resource/contributors/contributor/affiliation")) == 1 + + +def test_dates(minimal_json46): + """Test dates.""" + tree = dump_etree({"dates": []}) + assert len(tree.xpath("/resource/dates")) == 0 + + pytest.raises(KeyError, dump_etree, {"dates": [{"date": "2011-01-01"}]}) + + data = { + "dates": [ + { + "date": "2011-01-01", + "dateType": "Accepted", + "dateInformation": "Date of paper acceptance.", + } + ] + } + validate_json(minimal_json46, data) + + elem = dump_etree(data).xpath("/resource/dates/date")[0] + assert elem.text == "2011-01-01" + assert elem.get("dateType") == "Accepted" + assert elem.get("dateInformation") == "Date of paper acceptance." + + +def test_language(minimal_json46): + """Test language.""" + data = {"language": "en"} + validate_json(minimal_json46, data) + tree = dump_etree(data) + assert tree.xpath("/resource/language")[0].text == "en" + + tree = dump_etree({"language": ""}) + assert len(tree.xpath("/resource/language")) == 0 + + +def test_resourcetype(minimal_json46): + """Test resource type.""" + data = { + "types": {"resourceTypeGeneral": "Software", "resourceType": "Science Software"} + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/resourceType")[0] + assert elem.get("resourceTypeGeneral") == "Software" + assert elem.text == "Science Software" + + +def test_relatedidentifiers(minimal_json46): + """Test related identifiers.""" + tree = dump_etree({"relatedIdentifiers": []}) + assert len(tree.xpath("/resource/relatedIdentifiers")) == 0 + + data = { + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.1234/foo", + "relatedIdentifierType": "DOI", + "relationType": "Cites", + }, + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/relatedIdentifiers/relatedIdentifier")[0] + assert elem.get("relatedIdentifierType") == "DOI" + assert elem.get("relationType") == "Cites" + assert elem.text == "10.1234/foo" + + data = { + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.1234/foo", + "relatedIdentifierType": "DOI", + "relationType": "HasMetadata", + "relatedMetadataScheme": "MARC21", + "schemeUri": "http://loc.gov", + "schemeType": "XSD", + "resourceTypeGeneral": "Software", + }, + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/relatedIdentifiers/relatedIdentifier")[0] + assert elem.get("relatedMetadataScheme") == "MARC21" + assert elem.get("schemeURI") == "http://loc.gov" + assert elem.get("schemeType") == "XSD" + assert elem.get("resourceTypeGeneral") == "Software" + + +def test_sizes(minimal_json46): + """Test sizes.""" + tree = dump_etree({"sizes": []}) + assert len(tree.xpath("/resource/sizes")) == 0 + + data = {"sizes": ["123"]} + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/sizes/size")[0] + assert elem.text == "123" + + +def test_formats(minimal_json46): + """Test formats.""" + tree = dump_etree({"formats": []}) + assert len(tree.xpath("/resource/formats")) == 0 + + data = {"formats": ["abc"]} + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/formats/format")[0] + assert elem.text == "abc" + + +def test_version(minimal_json46): + """Test version.""" + tree = dump_etree({"version": ""}) + assert len(tree.xpath("/resource/version")) == 0 + + data = {"version": "v4.6"} + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/version")[0] + assert elem.text == "v4.6" + + +def test_rights(minimal_json46): + """Test rights.""" + tree = dump_etree({"rightsList": []}) + assert len(tree.xpath("/resource/rightsList")) == 0 + + data = { + "rightsList": [ + {"rights": "CC", "rightsUri": "http://cc.org", "lang": "en"}, + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/rightsList/rights")[0] + assert elem.get("rightsURI") == "http://cc.org" + assert elem.get("{xml}lang") == "en" + assert elem.text == "CC" + + +def test_descriptions(minimal_json46): + """Test descriptions.""" + tree = dump_etree({"descriptions": []}) + assert len(tree.xpath("/resource/descriptions")) == 0 + + data = { + "descriptions": [ + { + "description": "Test", + "descriptionType": "Abstract", + }, + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/descriptions/description")[0] + assert elem.get("descriptionType") == "Abstract" + assert elem.text == "Test" + + +def test_fundingreferences(minimal_json46): + """Test funding references.""" + tree = dump_etree({"fundingReferences": []}) + assert len(tree.xpath("/resource/fundingReferences")) == 0 + + data = { + "fundingReferences": [ + { + "funderName": "funderName", + "funderIdentifier": "id", + "funderIdentifierType": "ISNI", + "awardNumber": "282625", + "awardUri": "https://cern.ch", + "awardTitle": "title", + }, + ] + } + validate_json(minimal_json46, data) + elem = dump_etree(data).xpath("/resource/fundingReferences/fundingReference")[0] + name = elem.xpath("funderName")[0] + assert name.text == "funderName" + id = elem.xpath("funderIdentifier")[0] + assert id.text == "id" + assert id.get("funderIdentifierType") == "ISNI" + award = elem.xpath("awardNumber")[0] + assert award.text == "282625" + assert award.get("awardURI") == "https://cern.ch" + title = elem.xpath("awardTitle")[0] + assert title.text == "title" + + +def test_geolocations(minimal_json46): + """Test geolocation.""" + tree = dump_etree({"geoLocations": []}) + assert len(tree.xpath("/resource/geoLocations")) == 0 + + data = { + "geoLocations": [ + { + "geoLocationPoint": {"pointLongitude": 31.12, "pointLatitude": 67}, + "geoLocationBox": { + "westBoundLongitude": 31.12, + "eastBoundLongitude": 67, + "southBoundLatitude": 32, + "northBoundLatitude": 68, + }, + "geoLocationPlace": "Atlantic Ocean", + "geoLocationPolygon": [ + { + "polygonPoint": { + "pointLongitude": -71.032, + "pointLatitude": 41.090, + } + }, + { + "polygonPoint": { + "pointLongitude": -68.211, + "pointLatitude": 42.893, + } + }, + { + "polygonPoint": { + "pointLongitude": -72.032, + "pointLatitude": 39.090, + } + }, + { + "polygonPoint": { + "pointLongitude": -71.032, + "pointLatitude": 41.090, + } + }, + { + "inPolygonPoint": { + "pointLongitude": -52.032, + "pointLatitude": 12.090, + }, + }, + ], + } + ] + } + validate_json(minimal_json46, data) + + elem = dump_etree(data).xpath("/resource/geoLocations/geoLocation")[0] + pointlong = elem.xpath("geoLocationPoint/pointLongitude")[0] + pointlat = elem.xpath("geoLocationPoint/pointLatitude")[0] + assert pointlong.text == "31.12" + assert pointlat.text == "67" + boxwest = elem.xpath("geoLocationBox/westBoundLongitude")[0] + boxest = elem.xpath("geoLocationBox/eastBoundLongitude")[0] + boxsouth = elem.xpath("geoLocationBox/southBoundLatitude")[0] + boxnorth = elem.xpath("geoLocationBox/northBoundLatitude")[0] + assert boxwest.text == "31.12" + assert boxest.text == "67" + assert boxsouth.text == "32" + assert boxnorth.text == "68" + place = elem.xpath("geoLocationPlace")[0] + assert place.text == "Atlantic Ocean" + polygon = elem.xpath("geoLocationPolygon")[0] + points = polygon.xpath("polygonPoint") + p1long = points[0].xpath("pointLongitude")[0] + p1lat = points[0].xpath("pointLatitude")[0] + p2long = points[1].xpath("pointLongitude")[0] + p2lat = points[1].xpath("pointLatitude")[0] + p3long = points[2].xpath("pointLongitude")[0] + p3lat = points[2].xpath("pointLatitude")[0] + p4long = points[3].xpath("pointLongitude")[0] + p4lat = points[3].xpath("pointLatitude")[0] + assert p1long.text == "-71.032" + assert p1lat.text == "41.09" + assert p2long.text == "-68.211" + assert p2lat.text == "42.893" + assert p3long.text == "-72.032" + assert p3lat.text == "39.09" + assert p4long.text == "-71.032" + assert p4lat.text == "41.09" + inp = polygon.xpath("inPolygonPoint")[0] + inplat = inp.xpath("pointLatitude")[0] + inplong = inp.xpath("pointLongitude")[0] + assert inplat.text == "12.09" + assert inplong.text == "-52.032" + + +# +# Additional tests +# +def test_minimal_xsd(xsd46, minimal_json46): + """Test that example XML converts to example JSON.""" + validator.validate(minimal_json46) + xsd46.assertValid(etree.XML(tostring(minimal_json46).encode("utf8"))) + + +def test_minimal_xml(xsd46): + """Test minimal xml.""" + xml = """ + + 10.1234/foo.bar + + Nielsen, Lars Holm + + + Minimal Test Case + + Invenio Software + 2016 + + """ + xsd46.assertValid(etree.XML(xml)) + + +FIELD_NAMES = [ + "dates", + "subjects", + "contributors", + "relatedIdentifiers", + "relatedItems", + "sizes", + "formats", + "rightsList", + "descriptions", + "geoLocations", + "fundingReferences", +] + + +@pytest.mark.parametrize("field_name", FIELD_NAMES) +def test_empty_arrays(xsd46, minimal_json46, field_name): + """Test proper behavior with empty lists for certain fields.""" + minimal_json46[field_name] = [] + validator.validate(minimal_json46) + xsd46.assertValid(etree.XML(tostring(minimal_json46).encode("utf8")))