From e19a466d451a8a487bacfbfa4d35b107065e91d6 Mon Sep 17 00:00:00 2001 From: aclum Date: Mon, 21 Oct 2024 12:13:04 -0700 Subject: [PATCH 1/4] Add genomad files to FileTypeEnum --- src/schema/basic_slots.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 6198d8fca3..de7089c236 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -607,6 +607,29 @@ enums: FileTypeEnum: permissible_values: + Virus Summary: + description: Tab separated file listing the viruses found by geNomad. + see_also: + - https://portal.nersc.gov/genomad/ + annotations: + file_name_pattern: '^_virus_summary\.tsv?$' + + Plasmid Summary: + description: Tab separated file listing the plasmids found be geNomad. + see_also: + - https://portal.nersc.gov/genomad/ + annotations: + file_name_pattern: '^_plasmid_summary\.tsv?$' + + GeNomad Aggregated Classification: + description: >- + Tab separated file which combines the results from neural network-based classification + and marker-based classification for virus and plasmid detection with geNomad. + see_also: + - https://portal.nersc.gov/genomad/ + annotations: + file_name_pattern: '^_aggregated_classification\.tsv?$' + Reference Calibration File: description: A file that contains data used to calibrate a natural organic matter or metabalomics analysis. From a9fcb5673d4b20ed7296c1180435c9f3b910ecd6 Mon Sep 17 00:00:00 2001 From: aclum Date: Mon, 21 Oct 2024 12:33:56 -0700 Subject: [PATCH 2/4] Create DataObject-genomad.yaml --- src/data/valid/DataObject-genomad.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/data/valid/DataObject-genomad.yaml diff --git a/src/data/valid/DataObject-genomad.yaml b/src/data/valid/DataObject-genomad.yaml new file mode 100644 index 0000000000..d44dba90a0 --- /dev/null +++ b/src/data/valid/DataObject-genomad.yaml @@ -0,0 +1,18 @@ +- id: nmdc:dobj-11-dtTMNa + type: nmdc:DataObject + description: "Virus summary nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc123_virus_summary.tsv + data_object_type: Virus Summary + file_size_bytes: 1234 +- id: nmdc:dobj-11-dtTMNc + type: nmdc:DataObject + description: "Plasmid summary nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc123_plasmid_summary.tsv + data_object_type: Plasmid Summary + file_size_bytes: 1234 +- id: nmdc:dobj-11-dtTMNd + type: nmdc:DataObject + description: "agg results nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc12_aggregated_classification\.tsv + data_object_type: GeNomad Aggregated Classification + From 66319470ee1949b575c9637fdc5271c4c4ae1f93 Mon Sep 17 00:00:00 2001 From: aclum Date: Mon, 21 Oct 2024 14:04:25 -0700 Subject: [PATCH 3/4] change valid file to Database instead of DataObject --- .../valid/Database-data_objects_genomad.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/data/valid/Database-data_objects_genomad.yaml diff --git a/src/data/valid/Database-data_objects_genomad.yaml b/src/data/valid/Database-data_objects_genomad.yaml new file mode 100644 index 0000000000..029c63ab43 --- /dev/null +++ b/src/data/valid/Database-data_objects_genomad.yaml @@ -0,0 +1,18 @@ +data_object_set: +- id: nmdc:dobj-11-dtTMNa + type: nmdc:DataObject + description: "Virus summary nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc123_virus_summary.tsv + data_object_type: Virus Summary + file_size_bytes: 1234 +- id: nmdc:dobj-11-dtTMNc + type: nmdc:DataObject + description: "Plasmid summary nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc123_plasmid_summary.tsv + data_object_type: Plasmid Summary + file_size_bytes: 1234 +- id: nmdc:dobj-11-dtTMNd + type: nmdc:DataObject + description: "agg results nmdc:ann0vx38" + name: nmdc_wfmgan-11-abc12_aggregated_classification.tsv + data_object_type: GeNomad Aggregated Classification From 985228771641b4766098f908d2ba59614f493d27 Mon Sep 17 00:00:00 2001 From: aclum Date: Mon, 21 Oct 2024 14:16:46 -0700 Subject: [PATCH 4/4] remove DataObject-genomad.yaml --- src/data/valid/DataObject-genomad.yaml | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 src/data/valid/DataObject-genomad.yaml diff --git a/src/data/valid/DataObject-genomad.yaml b/src/data/valid/DataObject-genomad.yaml deleted file mode 100644 index d44dba90a0..0000000000 --- a/src/data/valid/DataObject-genomad.yaml +++ /dev/null @@ -1,18 +0,0 @@ -- id: nmdc:dobj-11-dtTMNa - type: nmdc:DataObject - description: "Virus summary nmdc:ann0vx38" - name: nmdc_wfmgan-11-abc123_virus_summary.tsv - data_object_type: Virus Summary - file_size_bytes: 1234 -- id: nmdc:dobj-11-dtTMNc - type: nmdc:DataObject - description: "Plasmid summary nmdc:ann0vx38" - name: nmdc_wfmgan-11-abc123_plasmid_summary.tsv - data_object_type: Plasmid Summary - file_size_bytes: 1234 -- id: nmdc:dobj-11-dtTMNd - type: nmdc:DataObject - description: "agg results nmdc:ann0vx38" - name: nmdc_wfmgan-11-abc12_aggregated_classification\.tsv - data_object_type: GeNomad Aggregated Classification -