From f5d523a13963761d52cd983fff69afcfda43ac86 Mon Sep 17 00:00:00 2001 From: Tim Sherratt Date: Sat, 8 Jun 2024 22:30:20 +1000 Subject: [PATCH] add metadata --- README.md | 126 ++++++----- ro-crate-metadata.json | 310 ++++++++++++++++++++++++++++ single_maps-schema.json | 84 ++++++++ single_maps_coordinates-schema.json | 49 +++++ 4 files changed, 517 insertions(+), 52 deletions(-) create mode 100644 ro-crate-metadata.json create mode 100644 single_maps-schema.json create mode 100644 single_maps_coordinates-schema.json diff --git a/README.md b/README.md index bfd2a84..f52b80d 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,74 @@ -# Digitised maps in Trove - -[![Frictionless](https://github.com/GLAM-Workbench/trove-maps-data/actions/workflows/frictionless.yaml/badge.svg)](https://repository.frictionlessdata.io/pages/dashboard.html?user=frictionlessdata&repo=repository-demo&flow=frictionless) - -This repository contains metadata describing digitised maps in Trove. The methods used in creating the datasets are described in the [Trove Maps](https://glam-workbench.net/trove-maps/) section of the GLAM Workbench. - -Files include: - -* `single_maps_20230131.csv` – metadata harvested from Trove -* `single_maps_20230131_coordinates.csv` – coordinate strings in the metadata (points and bounding boxes) parsed and converted to decimal values - -## `single_maps_20230131.csv` - -Contains the following columns: - -* `title` – title of the map -* `url` – url to the map in the digitised file viewer -* `work_url` – url to the work in the Trove map category -* `identifier` – NLA identifier -* `date` – date published or created -* `creators` – creators of the map -* `publication` – publication place, publisher, and publication date (if available) -* `extent` – physical description of map -* `copyright_status` – copyright status based on available metadata (scraped from web page) -* `scale` – map scale -* `coordinates` – map coordinates, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40') -* `filesize_string` – filesize string in MB -* `filesize` – size of TIFF file in bytes -* `width` – width of TIFF in pixels -* `height` – height of TIFF in pixels - -## `single_maps_20230131_coordinates.csv` - -Contains the following metadata: - -* `title` – title of the map -* `url` – url to the map in the digitised file viewer -* `coordinates` – map coordinates, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40') - -Points or centres derived from bounding box: - -* `latitude` -* `longitude` - -Bounds of box: - -* `north` -* `south` -* `east` -* `west` - - +# trove-maps-data + +This dataset contains metadata describing digitised maps in Trove, harvested from the Trove API and other sources. + +These datasets were generated using notebooks in the [trove-maps](https://github.com/GLAM-Workbench/trove-maps/) repository. + +For more information and documentation see the [Trove digitised maps metadata](https://glam-workbench.net/trove-maps/single-maps-data/) section of the [GLAM Workbench](https://glam-workbench.net). + +## Dataset summary +- [single_maps.csv](https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps.csv) (14.8 MB, text/csv) +- [single_maps_coordinates.csv](https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps_coordinates.csv) (7.1 MB, text/csv) + + +## Dataset details + +### [single_maps.csv](https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps.csv) + +| | | +|:---------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| date harvested | 2024-06-08 | +| file size | 14.8 MB | +| format | text/csv | +| created by | Exploring digitised maps in Trove ([documentation](https://glam-workbench.net/trove-maps/exploring-digitised-maps/)) | +| number of rows | 34844 | + +#### Columns + +| name | type | description | +|:-------------------|:--------|:------------------------------------------------------------------------------------------------------------------------| +| `identifier` | string | NLA identifier | +| `title` | string | title of the map | +| `url` | string | url to the map in the digitised file viewer | +| `work_url` | string | url to the work in the Trove map category | +| `date` | string | date published or created | +| `creators` | string | creators of the map | +| `publication` | string | publication place, publisher, and publication date (if available) | +| `extent` | string | physical description of map | +| `copyright_status` | string | copyright status based on available metadata (scraped from web page) | +| `scale` | string | map scale | +| `coordinates` | string | map coordinates, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40') | +| `filesize_string` | string | filesize string in MB | +| `filesize` | integer | size of TIFF file in bytes | +| `width` | any | width of TIFF in pixels | +| `height` | any | height of TIFF in pixels | +| `copy_role` | string | code indicating type of download available | + +### [single_maps_coordinates.csv](https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps_coordinates.csv) + +| | | +|:---------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| date harvested | 2024-06-08 | +| file size | 7.1 MB | +| format | text/csv | +| created by | Parse map coordinates from metadata ([documentation](https://glam-workbench.net/trove-maps/parse-coordinates/)) | +| number of rows | 28779 | + +#### Columns + +| name | type | description | +|:--------------|:-------|:------------------------------------------------------------------------------------------------------------------------------------| +| `title` | string | title of the map | +| `url` | string | url to the map in the digitised file viewer | +| `coordinates` | string | map coordinates as a string, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40') | +| `east` | number | east bounds of box | +| `west` | number | west bounds of box | +| `north` | number | north bounds of box | +| `south` | number | south bounds of box | +| `latitude` | number | point from coordinates or centre of box | +| `longitude` | number | point from coordinates or centre of box |## Examples of use + + + +---- +Created by [Tim Sherratt](https://timsherratt.au) for the [GLAM Workbench](https://glam-workbench.net) \ No newline at end of file diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json new file mode 100644 index 0000000..2b19e69 --- /dev/null +++ b/ro-crate-metadata.json @@ -0,0 +1,310 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "author": [ + { + "@id": "https://orcid.org/0000-0001-7956-4498" + } + ], + "datePublished": "2024-06-08", + "description": "This dataset contains metadata describing digitised maps in Trove, harvested from the Trove API and other sources.", + "distribution": { + "@id": "https://github.com/GLAM-Workbench/trove-maps-data/archive/refs/heads/main.zip" + }, + "hasPart": [ + { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/parse_coordinates.ipynb" + }, + { + "@id": "single_maps.csv" + }, + { + "@id": "single_maps_coordinates.csv" + }, + { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/Exploring-digitised-maps.ipynb" + }, + { + "@id": "single_maps-schema.json" + }, + { + "@id": "single_maps_coordinates-schema.json" + } + ], + "isBasedOn": { + "@id": "https://github.com/GLAM-Workbench/trove-maps/" + }, + "license": { + "@id": "https://spdx.org/licenses/MIT" + }, + "mainEntityOfPage": { + "@id": "https://glam-workbench.net/trove-maps/single-maps-data/" + }, + "name": "trove-maps-data", + "url": "https://github.com/GLAM-Workbench/trove-maps-data", + "workExample": [] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "license": { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/" + } + }, + { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/parse_coordinates.ipynb", + "@type": [ + "File", + "SoftwareSourceCode" + ], + "author": [ + { + "@id": "https://orcid.org/0000-0001-7956-4498" + } + ], + "category": "", + "codeRepository": "https://github.com/GLAM-Workbench/trove-maps/", + "conformsTo": { + "@id": "https://purl.archive.org/textcommons/profile#Notebook" + }, + "description": "The harvest of digitised maps metadata includes a coordinates column that provides a string representation of either a point or a bounding box. This notebook attempts to parse the coordinate string and convert the values to decimals. It then uses the decimal values to explore the geographical context of Trove's digitised map collection.", + "encodingFormat": "application/x-ipynb+json", + "mainEntityOfPage": { + "@id": "https://glam-workbench.net/trove-maps/parse-coordinates/" + }, + "name": "Parse map coordinates from metadata", + "position": 1, + "programmingLanguage": { + "@id": "https://www.python.org/downloads/release/python-31012/" + }, + "url": "https://github.com/GLAM-Workbench/trove-maps/blob/master/parse_coordinates.ipynb", + "workExample": [] + }, + { + "@id": "single_maps.csv", + "@type": [ + "File", + "Dataset" + ], + "conformsTo": { + "@id": "single_maps-schema.json" + }, + "contentSize": 14752472, + "dateModified": "2024-06-08", + "encodingFormat": "text/csv", + "name": "single_maps.csv", + "sdDatePublished": "2024-06-08", + "size": 34844, + "url": "https://github.com/GLAM-Workbench/trove-maps-data/blob/main/single_maps.csv" + }, + { + "@id": "single_maps_coordinates.csv", + "@type": [ + "File", + "Dataset" + ], + "conformsTo": { + "@id": "single_maps_coordinates-schema.json" + }, + "contentSize": 7055489, + "dateModified": "2024-06-08", + "encodingFormat": "text/csv", + "name": "single_maps_coordinates.csv", + "sdDatePublished": "2024-06-08", + "size": 28779, + "url": "https://github.com/GLAM-Workbench/trove-maps-data/blob/main/single_maps_coordinates.csv" + }, + { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/Exploring-digitised-maps.ipynb", + "@type": [ + "File", + "SoftwareSourceCode" + ], + "author": [ + { + "@id": "https://orcid.org/0000-0001-7956-4498" + } + ], + "category": "", + "codeRepository": "https://github.com/GLAM-Workbench/trove-maps/", + "conformsTo": { + "@id": "https://purl.archive.org/textcommons/profile#Notebook" + }, + "description": "I knew there were lots of great maps you could download from Trove, but how many? And how big were the files? I thought I'd try to quantify this a bit by harvesting and analysing the metadata.", + "encodingFormat": "application/x-ipynb+json", + "mainEntityOfPage": { + "@id": "https://glam-workbench.net/trove-maps/exploring-digitised-maps/" + }, + "name": "Exploring digitised maps in Trove", + "position": 0, + "programmingLanguage": { + "@id": "https://www.python.org/downloads/release/python-31012/" + }, + "url": "https://github.com/GLAM-Workbench/trove-maps/blob/master/Exploring-digitised-maps.ipynb", + "workExample": [] + }, + { + "@id": "https://orcid.org/0000-0001-7956-4498", + "@type": "Person", + "mainEntityOfPage": "https://timsherratt.au", + "name": "Sherratt, Tim", + "orcid": "https://orcid.org/0000-0001-7956-4498" + }, + { + "@id": "https://github.com/GLAM-Workbench/trove-maps/", + "@type": "Dataset", + "name": "trove-maps", + "url": "https://github.com/GLAM-Workbench/trove-maps/" + }, + { + "@id": "https://github.com/GLAM-Workbench/trove-maps-data/archive/refs/heads/main.zip", + "@type": "DataDownload", + "name": "Download repository as zip", + "url": "https://github.com/GLAM-Workbench/trove-maps-data/archive/refs/heads/main.zip" + }, + { + "@id": "https://spdx.org/licenses/MIT", + "@type": "CreativeWork", + "name": "MIT License", + "url": "https://spdx.org/licenses/MIT.html" + }, + { + "@id": "https://creativecommons.org/publicdomain/zero/1.0/", + "@type": "CreativeWork", + "name": "CC0 Public Domain Dedication", + "url": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + { + "@id": "http://rightsstatements.org/vocab/NKC/1.0/", + "@type": "CreativeWork", + "description": "The organization that has made the Item available reasonably believes that the Item is not restricted by copyright or related rights, but a conclusive determination could not be made.", + "name": "No Known Copyright", + "url": "http://rightsstatements.org/vocab/NKC/1.0/" + }, + { + "@id": "http://rightsstatements.org/vocab/CNE/1.0/", + "@type": "CreativeWork", + "description": "The copyright and related rights status of this Item has not been evaluated.", + "name": "Copyright Not Evaluated", + "url": "http://rightsstatements.org/vocab/CNE/1.0/" + }, + { + "@id": "https://www.python.org/downloads/release/python-31012/", + "@type": [ + "ComputerLanguage", + "SoftwareApplication" + ], + "name": "Python 3.10.12", + "url": "https://www.python.org/downloads/release/python-31012/", + "version": "3.10.12" + }, + { + "@id": "https://glam-workbench.net/trove-maps/parse-coordinates/", + "@type": "CreativeWork", + "isPartOf": { + "@id": "https://glam-workbench.net" + }, + "name": "Parse map coordinates from metadata", + "url": "https://glam-workbench.net/trove-maps/parse-coordinates/" + }, + { + "@id": "#parse_coordinates_run_0", + "@type": "CreateAction", + "actionStatus": { + "@id": "http://schema.org/CompletedActionStatus" + }, + "endDate": "2024-06-08", + "instrument": { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/parse_coordinates.ipynb" + }, + "name": "Run of notebook: parse_coordinates.ipynb", + "object": [ + { + "@id": "single_maps.csv" + } + ], + "result": [ + { + "@id": "single_maps_coordinates.csv" + } + ] + }, + { + "@id": "https://glam-workbench.net/trove-maps/single-maps-coordinates-data/", + "@type": "CreativeWork", + "isPartOf": { + "@id": "https://glam-workbench.net" + }, + "name": "Trove digitised maps \u2013 coordinates", + "url": "https://glam-workbench.net/trove-maps/single-maps-coordinates-data/" + }, + { + "@id": "https://glam-workbench.net/trove-maps/exploring-digitised-maps/", + "@type": "CreativeWork", + "isPartOf": { + "@id": "https://glam-workbench.net" + }, + "name": "Exploring digitised maps in Trove", + "url": "https://glam-workbench.net/trove-maps/exploring-digitised-maps/" + }, + { + "@id": "#Exploring-digitised-maps_run_0", + "@type": "CreateAction", + "actionStatus": { + "@id": "http://schema.org/CompletedActionStatus" + }, + "endDate": "2024-06-08", + "instrument": { + "@id": "https://github.com/GLAM-Workbench/trove-maps/blob/master/Exploring-digitised-maps.ipynb" + }, + "name": "Run of notebook: Exploring-digitised-maps.ipynb", + "result": [ + { + "@id": "single_maps.csv" + } + ] + }, + { + "@id": "https://glam-workbench.net/trove-maps/single-maps-data/", + "@type": "CreativeWork", + "isPartOf": { + "@id": "https://glam-workbench.net" + }, + "name": "Trove digitised maps metadata", + "url": "https://glam-workbench.net/trove-maps/single-maps-data/" + }, + { + "@id": "single_maps-schema.json", + "@type": [ + "File" + ], + "conformsTo": { + "@id": "https://specs.frictionlessdata.io/table-schema/" + }, + "encodingFormat": "application/json", + "name": "Frictionless Table Schema for single_maps.csv dataset", + "url": "https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps-schema.json" + }, + { + "@id": "single_maps_coordinates-schema.json", + "@type": [ + "File" + ], + "conformsTo": { + "@id": "https://specs.frictionlessdata.io/table-schema/" + }, + "encodingFormat": "application/json", + "name": "Frictionless Table Schema for single_maps_coordinates.csv dataset", + "url": "https://github.com/GLAM-Workbench/trove-maps-data/raw/main/single_maps_coordinates-schema.json" + } + ] +} \ No newline at end of file diff --git a/single_maps-schema.json b/single_maps-schema.json new file mode 100644 index 0000000..d03c69d --- /dev/null +++ b/single_maps-schema.json @@ -0,0 +1,84 @@ +{ + "fields": [ + { + "name": "identifier", + "type": "string", + "description": "NLA identifier" + }, + { + "name": "title", + "type": "string", + "description": "title of the map" + }, + { + "name": "url", + "type": "string", + "description": "url to the map in the digitised file viewer" + }, + { + "name": "work_url", + "type": "string", + "description": "url to the work in the Trove map category" + }, + { + "name": "date", + "type": "string", + "description": "date published or created" + }, + { + "name": "creators", + "type": "string", + "description": "creators of the map" + }, + { + "name": "publication", + "type": "string", + "description": "publication place, publisher, and publication date (if available)" + }, + { + "name": "extent", + "type": "string", + "description": "physical description of map" + }, + { + "name": "copyright_status", + "type": "string", + "description": "copyright status based on available metadata (scraped from web page)" + }, + { + "name": "scale", + "type": "string", + "description": "map scale" + }, + { + "name": "coordinates", + "type": "string", + "description": "map coordinates, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40')" + }, + { + "name": "filesize_string", + "type": "string", + "description": "filesize string in MB" + }, + { + "name": "filesize", + "type": "integer", + "description": "size of TIFF file in bytes" + }, + { + "name": "width", + "type": "any", + "description": "width of TIFF in pixels" + }, + { + "name": "height", + "type": "any", + "description": "height of TIFF in pixels" + }, + { + "name": "copy_role", + "type": "string", + "description": "code indicating type of download available" + } + ] +} \ No newline at end of file diff --git a/single_maps_coordinates-schema.json b/single_maps_coordinates-schema.json new file mode 100644 index 0000000..1a893c8 --- /dev/null +++ b/single_maps_coordinates-schema.json @@ -0,0 +1,49 @@ +{ + "fields": [ + { + "name": "title", + "type": "string", + "description": "title of the map" + }, + { + "name": "url", + "type": "string", + "description": "url to the map in the digitised file viewer" + }, + { + "name": "coordinates", + "type": "string", + "description": "map coordinates as a string, either a point or a bounding box (format is 'W--E/N--S', eg: 'E 130⁰50'--E 131⁰00'/S 12⁰30'--S 12⁰40')" + }, + { + "name": "east", + "type": "number", + "description": "east bounds of box" + }, + { + "name": "west", + "type": "number", + "description": "west bounds of box" + }, + { + "name": "north", + "type": "number", + "description": "north bounds of box" + }, + { + "name": "south", + "type": "number", + "description": "south bounds of box" + }, + { + "name": "latitude", + "type": "number", + "description": "point from coordinates or centre of box" + }, + { + "name": "longitude", + "type": "number", + "description": "point from coordinates or centre of box" + } + ] +} \ No newline at end of file