From 17103abf2fdc466035113d529b0237c59d0b6653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Wed, 23 Apr 2025 09:40:21 +0200 Subject: [PATCH 01/11] Split modality agnostic files content into separate sections --- mkdocs.yml | 15 +- src/modality-agnostic-files/code.md | 16 ++ .../data-description.md} | 233 +----------------- .../dataset-description.md | 213 ++++++++++++++++ 4 files changed, 237 insertions(+), 240 deletions(-) create mode 100644 src/modality-agnostic-files/code.md rename src/{modality-agnostic-files.md => modality-agnostic-files/data-description.md} (57%) create mode 100644 src/modality-agnostic-files/dataset-description.md diff --git a/mkdocs.yml b/mkdocs.yml index 1ec59b49f7..a19b5d5331 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,7 +5,10 @@ nav: - The BIDS Specification: - Introduction: introduction.md - Common principles: common-principles.md - - Modality agnostic files: modality-agnostic-files.md + - Modality agnostic files: + - Dataset description: modality-agnostic-files/dataset-description.md + - Data description: modality-agnostic-files/data-description.md + - Code: modality-agnostic-files/code.md - Modality specific files: - Magnetic Resonance Imaging: modality-specific-files/magnetic-resonance-imaging-data.md - Magnetoencephalography: modality-specific-files/magnetoencephalography.md @@ -101,21 +104,15 @@ extra: extra_javascript: - js/jquery-3.6.0.min.js -extra_css: - - css/tsv.css markdown_extensions: - toc: anchorlink: true - - attr_list - pymdownx.superfences: preserve_tabs: true custom_fences: - name: tsv class: tsv format: !!python/name:bidsschematools.render.tsv.fence - - name: tsvgz - class: tsv - format: !!python/name:bidsschematools.render.tsv.fence - admonition - pymdownx.details plugins: @@ -132,7 +129,9 @@ plugins: redirect_maps: "01-introduction.md": "introduction.md" "02-common-principles.md": "common-principles.md" - "03-modality-agnostic-files.md": "modality-agnostic-files.md" + "03-modality-agnostic-files/01-modality-agnostic-files.md": "modality-agnostic-files/dataset-description.md" + "03-modality-agnostic-files/02-modality-agnostic-files.md": "modality-agnostic-files/data-description.md" + "03-modality-agnostic-files/03-modality-agnostic-files.md": "modality-agnostic-files/code.md" "04-modality-specific-files/01-magnetic-resonance-imaging-data.md": "modality-specific-files/magnetic-resonance-imaging-data.md" "04-modality-specific-files/02-magnetoencephalography.md": "modality-specific-files/magnetoencephalography.md" "04-modality-specific-files/03-electroencephalography.md": "modality-specific-files/electroencephalography.md" diff --git a/src/modality-agnostic-files/code.md b/src/modality-agnostic-files/code.md new file mode 100644 index 0000000000..0e894d06a7 --- /dev/null +++ b/src/modality-agnostic-files/code.md @@ -0,0 +1,16 @@ +# Code + +Template: `code/*` + +Source code of scripts that were used to prepare the dataset MAY be stored here. +Examples include deidentification or defacing of the data, or +the conversion from the format of the source data to the BIDS format +(see [source vs. raw vs. derived data](./common-principles.md#source-vs-raw-vs-derived-data)). +Extra care should be taken to avoid including original IDs or +any identifiable information with the source code. +There are no limitations or recommendations on the language and/or +code organization of these scripts at the moment. + + + +[bids uris]: ./common-principles.md#bids-uri diff --git a/src/modality-agnostic-files.md b/src/modality-agnostic-files/data-description.md similarity index 57% rename from src/modality-agnostic-files.md rename to src/modality-agnostic-files/data-description.md index 84df1a2dca..57fa736ceb 100644 --- a/src/modality-agnostic-files.md +++ b/src/modality-agnostic-files/data-description.md @@ -1,218 +1,4 @@ -# Modality agnostic files - -## Dataset description - -Templates: - -- `dataset_description.json` -- `README[.md|.rst|.txt]` -- `CITATION.cff` -- `CHANGES` -- `LICENSE[.md|.rst|.txt]` - -### `dataset_description.json` - - -{{ MACROS___render_text("objects.files.dataset_description.description") }} - -Every dataset MUST include this file with the following fields: - - -{{ MACROS___make_metadata_table( - { - "Name": "REQUIRED", - "BIDSVersion": "REQUIRED", - "HEDVersion": "RECOMMENDED", - "DatasetLinks": "REQUIRED if [BIDS URIs][] are used", - "DatasetType": "RECOMMENDED", - "License": "RECOMMENDED", - "Authors": "RECOMMENDED if CITATION.cff is not present", - "Acknowledgements": "OPTIONAL", - "HowToAcknowledge": "OPTIONAL", - "Funding": "OPTIONAL", - "EthicsApprovals": "OPTIONAL", - "ReferencesAndLinks": "OPTIONAL", - "DatasetDOI": "OPTIONAL", - "GeneratedBy": "RECOMMENDED", - "SourceDatasets": "RECOMMENDED", - } -) }} - -Each object in the `GeneratedBy` array includes the following REQUIRED, RECOMMENDED -and OPTIONAL keys: - - -{{ MACROS___make_subobject_table("metadata.GeneratedBy.items") }} - -Example: - -```JSON -{ - "Name": "The mother of all experiments", - "BIDSVersion": "1.6.0", - "DatasetType": "raw", - "License": "CC0", - "Authors": [ - "Paul Broca", - "Carl Wernicke" - ], - "Acknowledgements": "Special thanks to Korbinian Brodmann for help in formatting this dataset in BIDS. We thank Alan Lloyd Hodgkin and Andrew Huxley for helpful comments and discussions about the experiment and manuscript; Hermann Ludwig Helmholtz for administrative support; and Claudius Galenus for providing data for the medial-to-lateral index analysis.", - "HowToAcknowledge": "Please cite this paper: https://www.ncbi.nlm.nih.gov/pubmed/001012092119281", - "Funding": [ - "National Institute of Neuroscience Grant F378236MFH1", - "National Institute of Neuroscience Grant 5RMZ0023106" - ], - "EthicsApprovals": [ - "Army Human Research Protections Office (Protocol ARL-20098-10051, ARL 12-040, and ARL 12-041)" - ], - "ReferencesAndLinks": [ - "https://www.ncbi.nlm.nih.gov/pubmed/001012092119281", - "Alzheimer A., & Kraepelin, E. (2015). Neural correlates of presenile dementia in humans. Journal of Neuroscientific Data, 2, 234001. doi:1920.8/jndata.2015.7" - ], - "DatasetDOI": "doi:10.0.2.3/dfjj.10", - "HEDVersion": "8.0.0", - "GeneratedBy": [ - { - "Name": "reproin", - "Version": "0.6.0", - "Container": { - "Type": "docker", - "Tag": "repronim/reproin:0.6.0" - } - } - ], - "SourceDatasets": [ - { - "URL": "s3://dicoms/studies/correlates", - "Version": "April 11 2011" - } - ] -} -``` - -#### Derived dataset and pipeline description - -As for any BIDS dataset, a `dataset_description.json` file MUST be found at the -top level of every derived dataset: -`/derivatives//dataset_description.json`. - -In contrast to raw BIDS datasets, derived BIDS datasets MUST include a -`GeneratedBy` key: - - -{{ MACROS___make_metadata_table( - { - "GeneratedBy": "REQUIRED" - } -) }} - -If a derived dataset is stored as a subdirectory of the raw dataset, then the `Name` field -of the first `GeneratedBy` object MUST be a substring of the derived dataset directory name. -That is, in a directory `/derivatives/[-]/`, the first -`GeneratedBy` object should have a `Name` of ``. - -Example: - -```JSON -{ - "Name": "FMRIPREP Outputs", - "BIDSVersion": "1.6.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "fmriprep", - "Version": "1.4.1", - "Container": { - "Type": "docker", - "Tag": "poldracklab/fmriprep:1.4.1" - } - }, - { - "Name": "Manual", - "Description": "Re-added RepetitionTime metadata to bold.json files" - } - ], - "SourceDatasets": [ - { - "DOI": "doi:10.18112/openneuro.ds000114.v1.0.1", - "URL": "https://openneuro.org/datasets/ds000114/versions/1.0.1", - "Version": "1.0.1" - } - ] -} -``` - -### `README` - - -{{ MACROS___render_text("objects.files.README.description") }} - -### `CITATION.cff` - - -{{ MACROS___render_text("objects.files.CITATION.description") }} - -For most redundant fields between `CITATION.cff` and `dataset_description.json`, -the `CITATION.cff` SHOULD take precedence. -To avoid inconsistency, metadata present in `CITATION.cff` SHOULD NOT be -be included in `dataset_description.json`, with the exception of `Name` and -`DatasetDOI`, to ensure that `CITATION.cff`-unaware tools can generate -references to the dataset. -In particular, if `CITATION.cff` is present, -the `"Authors"` field of `dataset_description.json` MUST be omitted, -and the `"HowToAcknowledge"`, `"License"` and `"ReferencesAndLinks"` SHOULD be omitted -in favor of the `CITATION.cff` fields `message`/`preferred-citation`, `license` and -`references`. - -### `CHANGES` - - -{{ MACROS___render_text("objects.files.CHANGES.description") }} - -Example: - -```Text -1.0.1 2015-08-27 - - Fixed slice timing information. - -1.0.0 2015-08-17 - - Initial release. -``` - -### `LICENSE` - - -{{ MACROS___render_text("objects.files.LICENSE.description") }} +# Data description ## Participants file @@ -528,20 +314,3 @@ ses-predrug 2009-06-15T13:45:30 120 ses-postdrug 2009-06-16T13:45:30 100 ses-followup 2009-06-17T13:45:30 110 ``` - -## Code - -Template: `code/*` - -Source code of scripts that were used to prepare the dataset MAY be stored here. -Examples include deidentification or defacing of the data, or -the conversion from the format of the source data to the BIDS format -(see [source vs. raw vs. derived data](./common-principles.md#source-vs-raw-vs-derived-data)). -Extra care should be taken to avoid including original IDs or -any identifiable information with the source code. -There are no limitations or recommendations on the language and/or -code organization of these scripts at the moment. - - - -[bids uris]: ./common-principles.md#bids-uri diff --git a/src/modality-agnostic-files/dataset-description.md b/src/modality-agnostic-files/dataset-description.md new file mode 100644 index 0000000000..a4c36a7207 --- /dev/null +++ b/src/modality-agnostic-files/dataset-description.md @@ -0,0 +1,213 @@ +# Dataset description + +Templates: + +- `dataset_description.json` +- `README[.md|.rst|.txt]` +- `CITATION.cff` +- `CHANGES` +- `LICENSE[.md|.rst|.txt]` + +## `dataset_description.json` + + +{{ MACROS___render_text("objects.files.dataset_description.description") }} + +Every dataset MUST include this file with the following fields: + + +{{ MACROS___make_metadata_table( + { + "Name": "REQUIRED", + "BIDSVersion": "REQUIRED", + "HEDVersion": "RECOMMENDED", + "DatasetLinks": "REQUIRED if [BIDS URIs][] are used", + "DatasetType": "RECOMMENDED", + "License": "RECOMMENDED", + "Authors": "RECOMMENDED if CITATION.cff is not present", + "Acknowledgements": "OPTIONAL", + "HowToAcknowledge": "OPTIONAL", + "Funding": "OPTIONAL", + "EthicsApprovals": "OPTIONAL", + "ReferencesAndLinks": "OPTIONAL", + "DatasetDOI": "OPTIONAL", + "GeneratedBy": "RECOMMENDED", + "SourceDatasets": "RECOMMENDED", + } +) }} + +Each object in the `GeneratedBy` array includes the following REQUIRED, RECOMMENDED +and OPTIONAL keys: + + +{{ MACROS___make_subobject_table("metadata.GeneratedBy.items") }} + +Example: + +```JSON +{ + "Name": "The mother of all experiments", + "BIDSVersion": "1.6.0", + "DatasetType": "raw", + "License": "CC0", + "Authors": [ + "Paul Broca", + "Carl Wernicke" + ], + "Acknowledgements": "Special thanks to Korbinian Brodmann for help in formatting this dataset in BIDS. We thank Alan Lloyd Hodgkin and Andrew Huxley for helpful comments and discussions about the experiment and manuscript; Hermann Ludwig Helmholtz for administrative support; and Claudius Galenus for providing data for the medial-to-lateral index analysis.", + "HowToAcknowledge": "Please cite this paper: https://www.ncbi.nlm.nih.gov/pubmed/001012092119281", + "Funding": [ + "National Institute of Neuroscience Grant F378236MFH1", + "National Institute of Neuroscience Grant 5RMZ0023106" + ], + "EthicsApprovals": [ + "Army Human Research Protections Office (Protocol ARL-20098-10051, ARL 12-040, and ARL 12-041)" + ], + "ReferencesAndLinks": [ + "https://www.ncbi.nlm.nih.gov/pubmed/001012092119281", + "Alzheimer A., & Kraepelin, E. (2015). Neural correlates of presenile dementia in humans. Journal of Neuroscientific Data, 2, 234001. doi:1920.8/jndata.2015.7" + ], + "DatasetDOI": "doi:10.0.2.3/dfjj.10", + "HEDVersion": "8.0.0", + "GeneratedBy": [ + { + "Name": "reproin", + "Version": "0.6.0", + "Container": { + "Type": "docker", + "Tag": "repronim/reproin:0.6.0" + } + } + ], + "SourceDatasets": [ + { + "URL": "s3://dicoms/studies/correlates", + "Version": "April 11 2011" + } + ] +} +``` + +#### Derived dataset and pipeline description + +As for any BIDS dataset, a `dataset_description.json` file MUST be found at the +top level of every derived dataset: +`/derivatives//dataset_description.json`. + +In contrast to raw BIDS datasets, derived BIDS datasets MUST include a +`GeneratedBy` key: + + +{{ MACROS___make_metadata_table( + { + "GeneratedBy": "REQUIRED" + } +) }} + +If a derived dataset is stored as a subdirectory of the raw dataset, then the `Name` field +of the first `GeneratedBy` object MUST be a substring of the derived dataset directory name. +That is, in a directory `/derivatives/[-]/`, the first +`GeneratedBy` object should have a `Name` of ``. + +Example: + +```JSON +{ + "Name": "FMRIPREP Outputs", + "BIDSVersion": "1.6.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "fmriprep", + "Version": "1.4.1", + "Container": { + "Type": "docker", + "Tag": "poldracklab/fmriprep:1.4.1" + } + }, + { + "Name": "Manual", + "Description": "Re-added RepetitionTime metadata to bold.json files" + } + ], + "SourceDatasets": [ + { + "DOI": "doi:10.18112/openneuro.ds000114.v1.0.1", + "URL": "https://openneuro.org/datasets/ds000114/versions/1.0.1", + "Version": "1.0.1" + } + ] +} +``` + +## `README` + + +{{ MACROS___render_text("objects.files.README.description") }} + +## `CITATION.cff` + + +{{ MACROS___render_text("objects.files.CITATION.description") }} + +For most redundant fields between `CITATION.cff` and `dataset_description.json`, +the `CITATION.cff` SHOULD take precedence. +To avoid inconsistency, metadata present in `CITATION.cff` SHOULD NOT be +be included in `dataset_description.json`, with the exception of `Name` and +`DatasetDOI`, to ensure that `CITATION.cff`-unaware tools can generate +references to the dataset. +In particular, if `CITATION.cff` is present, +the `"Authors"` field of `dataset_description.json` MUST be omitted, +and the `"HowToAcknowledge"`, `"License"` and `"ReferencesAndLinks"` SHOULD be omitted +in favor of the `CITATION.cff` fields `message`/`preferred-citation`, `license` and +`references`. + +## `CHANGES` + + +{{ MACROS___render_text("objects.files.CHANGES.description") }} + +Example: + +```Text +1.0.1 2015-08-27 + - Fixed slice timing information. + +1.0.0 2015-08-17 + - Initial release. +``` + +## `LICENSE` + + +{{ MACROS___render_text("objects.files.LICENSE.description") }} From 3165456176b08b76e4bfaab82b2318280f5dc54a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Wed, 23 Apr 2025 10:36:58 +0200 Subject: [PATCH 02/11] Update links to modality-agnostic-files section --- src/appendices/meg-file-formats.md | 2 +- src/appendices/qmri.md | 2 +- src/common-principles.md | 10 +++++----- src/derivatives/introduction.md | 2 +- src/longitudinal-and-multi-site-studies.md | 2 +- src/modality-agnostic-files/code.md | 4 ++-- src/modality-agnostic-files/data-description.md | 14 +++++++------- src/modality-agnostic-files/dataset-description.md | 2 +- .../electroencephalography.md | 2 +- src/modality-specific-files/genetic-descriptor.md | 2 +- .../intracranial-electroencephalography.md | 2 +- .../magnetoencephalography.md | 4 ++-- src/modality-specific-files/microscopy.md | 4 ++-- src/modality-specific-files/motion.md | 6 +++--- .../positron-emission-tomography.md | 6 +++--- src/schema/objects/entities.yaml | 2 +- src/schema/objects/files.yaml | 4 ++-- 17 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/appendices/meg-file-formats.md b/src/appendices/meg-file-formats.md index 81b640b517..615c62fe80 100644 --- a/src/appendices/meg-file-formats.md +++ b/src/appendices/meg-file-formats.md @@ -212,7 +212,7 @@ entity to indicate each part. If there are multiple parts of a recording and the optional `scans.tsv` is provided, remember to list all files separately in `scans.tsv` and that the entries for the `acq_time` column in `scans.tsv` MUST all be identical, as described in -[Scans file](../modality-agnostic-files.md#scans-file). +[Scans file](../modality-agnostic-files/data-description.md#scans-file). Example: diff --git a/src/appendices/qmri.md b/src/appendices/qmri.md index 83247c119b..3ca687e028 100644 --- a/src/appendices/qmri.md +++ b/src/appendices/qmri.md @@ -314,7 +314,7 @@ and a guide for using macros can be found at ### Metadata requirements for qMRI maps As qMRI maps are stored as derivatives, they are subjected to the metadata requirements of -[derived datasets](../modality-agnostic-files.md#derived-dataset-and-pipeline-description). +[derived datasets](../modality-agnostic-files/dataset-description.md#derived-dataset-and-pipeline-description). An example `dataset_description.json` for a qMRI map derivatives directory: diff --git a/src/common-principles.md b/src/common-principles.md index ba9afc1706..b100d1ef17 100644 --- a/src/common-principles.md +++ b/src/common-principles.md @@ -109,7 +109,7 @@ Data for each subject are placed in subdirectories named "`sub-