From e645564a612d494b5d5dc265fde43630a9c5b22a Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 10:49:13 +0100 Subject: [PATCH 01/39] Add gitignore --- .gitignore | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efe313b --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.nox/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_html/ +docs/_pdf/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ +.idea/* + +# OSX +.DS_Store \ No newline at end of file From 8e1f8980334d689cf83cda4c1181b44a99677c03 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 10:57:16 +0100 Subject: [PATCH 02/39] Add current dataset and config --- config/ontology_config.json | 158 +++++++ config/sources_config.json | 405 ++++++++++++++++++ test_data/alternative/clinic/RDP-IC.tab | 7 + test_data/alternative/clinic/RDP-IC.tab.sha1 | 1 + test_data/alternative/clinic/RDP-Patient.tab | 6 + .../alternative/clinic/RDP-Patient.tab.sha1 | 1 + .../clinic/RDP-Patient_codebook.txt | 1 + .../clinic/RDP-Patient_codebook.txt.sha1 | 1 + .../alternative/laboratory/biomaterial.txt | 22 + .../laboratory/biomaterial.txt.sha1 | 1 + .../alternative/laboratory/biosource.txt | 20 + .../alternative/laboratory/biosource.txt.sha1 | 1 + test_data/alternative/studies/death.txt | 4 + test_data/alternative/studies/death.txt.sha1 | 1 + .../alternative/studies/death_codebook.txt | 1 + .../studies/death_codebook.txt.sha1 | 1 + test_data/alternative/studies/diagnosis.txt | 18 + .../alternative/studies/diagnosis.txt.sha1 | 1 + .../studies/diagnosis_codebook.txt | 1 + .../studies/diagnosis_codebook.txt.sha1 | 1 + test_data/alternative/studies/individual.txt | 17 + .../alternative/studies/individual.txt.sha1 | 1 + .../studies/individual_codebook.txt | 1 + .../studies/individual_codebook.txt.sha1 | 1 + .../alternative/studies/individual_study.txt | 18 + .../studies/individual_study.txt.sha1 | 1 + test_data/alternative/studies/study.txt | 3 + test_data/alternative/studies/study.txt.sha1 | 1 + test_data/full_dataset/clinic/RDP-IC.tab | 7 + test_data/full_dataset/clinic/RDP-IC.tab.sha1 | 1 + test_data/full_dataset/clinic/RDP-Patient.tab | 7 + .../full_dataset/clinic/RDP-Patient.tab.sha1 | 1 + .../clinic/RDP-Patient_codebook.txt | 1 + .../clinic/RDP-Patient_codebook.txt.sha1 | 1 + .../full_dataset/laboratory/biomaterial.txt | 24 ++ .../laboratory/biomaterial.txt.sha1 | 1 + .../full_dataset/laboratory/biosource.txt | 22 + .../laboratory/biosource.txt.sha1 | 1 + test_data/full_dataset/studies/death.txt | 4 + test_data/full_dataset/studies/death.txt.sha1 | 1 + .../full_dataset/studies/death_codebook.txt | 1 + .../studies/death_codebook.txt.sha1 | 1 + test_data/full_dataset/studies/diagnosis.txt | 20 + .../full_dataset/studies/diagnosis.txt.sha1 | 1 + .../studies/diagnosis_codebook.txt | 1 + .../studies/diagnosis_codebook.txt.sha1 | 1 + test_data/full_dataset/studies/individual.txt | 18 + .../full_dataset/studies/individual.txt.sha1 | 1 + .../studies/individual_codebook.txt | 1 + .../studies/individual_codebook.txt.sha1 | 1 + .../full_dataset/studies/individual_study.txt | 18 + .../studies/individual_study.txt.sha1 | 1 + test_data/full_dataset/studies/study.txt | 3 + test_data/full_dataset/studies/study.txt.sha1 | 1 + 54 files changed, 835 insertions(+) create mode 100644 config/ontology_config.json create mode 100644 config/sources_config.json create mode 100644 test_data/alternative/clinic/RDP-IC.tab create mode 100644 test_data/alternative/clinic/RDP-IC.tab.sha1 create mode 100644 test_data/alternative/clinic/RDP-Patient.tab create mode 100644 test_data/alternative/clinic/RDP-Patient.tab.sha1 create mode 100644 test_data/alternative/clinic/RDP-Patient_codebook.txt create mode 100644 test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 create mode 100644 test_data/alternative/laboratory/biomaterial.txt create mode 100644 test_data/alternative/laboratory/biomaterial.txt.sha1 create mode 100644 test_data/alternative/laboratory/biosource.txt create mode 100644 test_data/alternative/laboratory/biosource.txt.sha1 create mode 100644 test_data/alternative/studies/death.txt create mode 100644 test_data/alternative/studies/death.txt.sha1 create mode 100644 test_data/alternative/studies/death_codebook.txt create mode 100644 test_data/alternative/studies/death_codebook.txt.sha1 create mode 100644 test_data/alternative/studies/diagnosis.txt create mode 100644 test_data/alternative/studies/diagnosis.txt.sha1 create mode 100644 test_data/alternative/studies/diagnosis_codebook.txt create mode 100644 test_data/alternative/studies/diagnosis_codebook.txt.sha1 create mode 100644 test_data/alternative/studies/individual.txt create mode 100644 test_data/alternative/studies/individual.txt.sha1 create mode 100644 test_data/alternative/studies/individual_codebook.txt create mode 100644 test_data/alternative/studies/individual_codebook.txt.sha1 create mode 100644 test_data/alternative/studies/individual_study.txt create mode 100644 test_data/alternative/studies/individual_study.txt.sha1 create mode 100644 test_data/alternative/studies/study.txt create mode 100644 test_data/alternative/studies/study.txt.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-IC.tab create mode 100644 test_data/full_dataset/clinic/RDP-IC.tab.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-Patient.tab create mode 100644 test_data/full_dataset/clinic/RDP-Patient.tab.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-Patient_codebook.txt create mode 100644 test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 create mode 100644 test_data/full_dataset/laboratory/biomaterial.txt create mode 100644 test_data/full_dataset/laboratory/biomaterial.txt.sha1 create mode 100644 test_data/full_dataset/laboratory/biosource.txt create mode 100644 test_data/full_dataset/laboratory/biosource.txt.sha1 create mode 100644 test_data/full_dataset/studies/death.txt create mode 100644 test_data/full_dataset/studies/death.txt.sha1 create mode 100644 test_data/full_dataset/studies/death_codebook.txt create mode 100644 test_data/full_dataset/studies/death_codebook.txt.sha1 create mode 100644 test_data/full_dataset/studies/diagnosis.txt create mode 100644 test_data/full_dataset/studies/diagnosis.txt.sha1 create mode 100644 test_data/full_dataset/studies/diagnosis_codebook.txt create mode 100644 test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 create mode 100644 test_data/full_dataset/studies/individual.txt create mode 100644 test_data/full_dataset/studies/individual.txt.sha1 create mode 100644 test_data/full_dataset/studies/individual_codebook.txt create mode 100644 test_data/full_dataset/studies/individual_codebook.txt.sha1 create mode 100644 test_data/full_dataset/studies/individual_study.txt create mode 100644 test_data/full_dataset/studies/individual_study.txt.sha1 create mode 100644 test_data/full_dataset/studies/study.txt create mode 100644 test_data/full_dataset/studies/study.txt.sha1 diff --git a/config/ontology_config.json b/config/ontology_config.json new file mode 100644 index 0000000..e85bc27 --- /dev/null +++ b/config/ontology_config.json @@ -0,0 +1,158 @@ +{ + "nodes": [ + { + "name": "01. Patient information", + "children": [ + { + "name": "03. Gender", + "concept_code": "Individual.gender" + }, + { + "name": "01. Date of birth", + "concept_code": "Individual.birth_date" + }, + { + "name": "02. Taxonomy", + "concept_code": "Individual.taxonomy" + }, + { + "name": "04. Date of death", + "concept_code": "Individual.death_date" + }, + { + "name": "Informed_consent", + "children": [ + { + "name": "01. Informed consent type", + "concept_code": "Individual.ic_type" + }, + { + "name": "Informed consent version", + "concept_code": "Individual.ic_version" + }, + { + "name": "02. Date informed Consent given", + "concept_code": "Individual.ic_given_date" + }, + { + "name": "03. Date informed consent withdrawn", + "concept_code": "Individual.ic_withdrawn_date" + }, + { + "name": "04. Report hereditary susceptibility", + "concept_code": "Individual.report_her_susc" + } + ] + } + ] + }, + { + "name": "02. Diagnosis information", + "children": [ + { + "name": "02. Tumor type", + "concept_code": "Diagnosis.tumor_type" + }, + { + "name": "03. Topography", + "concept_code": "Diagnosis.topography" + }, + { + "name": "Treatment", + "concept_code": "Diagnosis.treatment_protocol" + }, + { + "name": "04. Tumor stage", + "concept_code": "Diagnosis.tumor_stage" + }, + { + "name": "01. Date of diagnosis", + "concept_code": "Diagnosis.diagnosis_date" + }, + { + "name": "05. Center of treatment", + "concept_code": "Diagnosis.diagnosis_center" + } + ] + }, + { + "name": "03. Biosource information", + "children": [ + { + "name": "06. Biosource dedicated for specific study", + "concept_code": "Biosource.biosource_dedicated" + }, + { + "name": "01. Biosource parent", + "concept_code": "Biosource.src_biosource_id" + }, + { + "name": "03. Tissue", + "concept_code": "Biosource.tissue" + }, + { + "name": "02. Date of biosource", + "concept_code": "Biosource.biosource_date" + }, + { + "name": "04. Disease status", + "concept_code": "Biosource.disease_status" + }, + { + "name": "05. Tumor percentage", + "concept_code": "Biosource.tumor_percentage" + } + ] + }, + { + "name": "04. Biomaterial information", + "children": [ + { + "name": "01. Biomaterial parent", + "concept_code": "Biomaterial.src_biomaterial_id" + }, + { + "name": "02. Date of biomaterial", + "concept_code": "Biomaterial.biomaterial_date" + }, + { + "name": "03. Biomaterial type", + "concept_code": "Biomaterial.type" + }, + { + "name": "04. Library strategy", + "concept_code": "Biomaterial.library_strategy" + }, + { + "name": "05. Analysis type", + "concept_code": "Biomaterial.analysis_type" + } + ] + }, + { + "name": "05. Study information", + "children": [ + { + "name": "01. Study ID", + "concept_code": "Study.study_id" + }, + { + "name": "02. Study acronym", + "concept_code": "Study.acronym" + }, + { + "name": "03. Study title", + "concept_code": "Study.title" + }, + { + "name": "Study datadictionary", + "concept_code": "Study.datadictionary" + }, + { + "name": "04. Individual Study ID", + "concept_code": "IndividualStudy.individual_study_id" + } + ] + } + ] +} diff --git a/config/sources_config.json b/config/sources_config.json new file mode 100644 index 0000000..5ccc239 --- /dev/null +++ b/config/sources_config.json @@ -0,0 +1,405 @@ +{ + "entities": { + "Individual": { + "attributes": [ + { + "name": "individual_id", + "sources": [ + { + "file": "clinic/RDP-Patient.tab", + "column": "INDIVIDUAL_ID" + }, + { + "file": "clinic/RDP-IC.tab", + "column": "INDIVIDUAL_ID" + }, + { + "file": "studies/individual.txt", + "column": "INDIVIDUAL_ID" + }, + { + "file": "studies/death.txt", + "column": "INDIVIDUAL_ID" + } + ] + }, + { + "name": "birth_date", + "sources": [ + { + "file": "clinic/RDP-Patient.tab", + "column": "Gebdat", + "date_format": "%d%b%Y" + }, + { + "file": "studies/individual.txt", + "column": "DTOB", + "date_format": "%d/%m/%Y %H:%M:%S" + } + ] + }, + { + "name": "gender", + "sources": [ + { + "file": "clinic/RDP-Patient.tab", + "column": "Geslacht" + }, + { + "file": "studies/individual.txt", + "column": "SEX" + } + ] + }, + { + "name": "death_date", + "sources": [ + { + "file": "clinic/RDP-Patient.tab", + "column": "Overldat", + "date_format": "%d%b%Y" + }, + { + "file": "studies/death.txt", + "column": "DTDEATH", + "date_format": "%d/%m/%Y %H:%M:%S" + } + ] + }, + { + "name": "ic_type", + "sources": [ + { + "file": "clinic/RDP-IC.tab", + "column": "00004_Toestemmingsstatus" + }, + { + "file": "studies/individual.txt", + "column": "IFCDATR" + } + ] + }, + { + "name": "ic_given_date", + "sources": [ + { + "file": "clinic/RDP-IC.tab", + "column": "00007_Datum toestemming", + "date_format": "%d/%m/%Y" + } + ] + }, + { + "name": "ic_withdrawn_date", + "sources": [ + { + "file": "clinic/RDP-IC.tab", + "column": "00010_Datum geen toestemming", + "date_format": "%d/%m/%Y" + } + ] + }, + { + "name": "report_her_susc", + "sources": [ + { + "file": "clinic/RDP-IC.tab", + "column": "00012_Datum einde deelname", + "date_format": "%d/%m/%Y" + } + ] + } + ] + }, + "Diagnosis": { + "attributes": [ + { + "name": "individual_id", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "INDIVIDUAL_ID" + } + ] + }, + { + "name": "diagnosis_id", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "CIDDIAG" + } + ] + }, + { + "name": "tumor_type", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "DIAGCD" + } + ] + }, + { + "name": "topography", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "PLOCCD" + } + ] + }, + { + "name": "tumor_stage", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "DIAGGRSTX" + } + ] + }, + { + "name": "diagnosis_date", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "IDAABA", + "date_format": "%d/%m/%Y %H:%M:%S" + } + ] + }, + { + "name": "diagnosis_center", + "sources": [ + { + "file": "studies/diagnosis.txt", + "column": "HOSPDIAG" + } + ] + } + ] + }, + "Biosource": { + "attributes": [ + { + "name": "biosource_id", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "individual_id", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "diagnosis_id", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "src_biosource_id", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "biosource_dedicated", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "tissue", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "biosource_date", + "sources": [ + { + "file": "laboratory/biosource.txt", + "date_format": "%d/%m/%Y" + } + ] + }, + { + "name": "disease_status", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + }, + { + "name": "tumor_percentage", + "sources": [ + { + "file": "laboratory/biosource.txt" + } + ] + } + ] + }, + "Biomaterial": { + "attributes": [ + { + "name": "biomaterial_id", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] + }, + { + "name": "src_biosource_id", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] + }, + { + "name": "src_biomaterial_id", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] + }, + { + "name": "biomaterial_date", + "sources": [ + { + "file": "laboratory/biomaterial.txt", + "date_format": "%d/%m/%Y" + } + ] + }, + { + "name": "type", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] + } + ] + }, + "Study": { + "attributes": [ + { + "name": "study_id", + "sources": [ + { + "file": "studies/study.txt", + "column": "STUDY_ID" + } + ] + }, + { + "name": "acronym", + "sources": [ + { + "file": "studies/study.txt" + } + ] + }, + { + "name": "title", + "sources": [ + { + "file": "studies/study.txt" + } + ] + }, + { + "name": "datadictionary", + "sources": [ + { + "file": "studies/study.txt" + } + ] + } + ] + }, + "IndividualStudy": { + "attributes": [ + { + "name": "study_id_individual_study_id", + "sources": [ + { + "file": "studies/individual_study.txt", + "column": "STUDY_ID_INDIVIDUAL_STUDY_ID" + } + ] + }, + { + "name": "individual_study_id", + "sources": [ + { + "file": "studies/individual_study.txt", + "column": "INDIVIDUAL_STUDY_ID" + } + ] + }, + { + "name": "individual_id", + "sources": [ + { + "file": "studies/individual_study.txt", + "column": "INDIVIDUAL_ID" + } + ] + }, + { + "name": "study_id", + "sources": [ + { + "file": "studies/individual_study.txt", + "column": "STUDY_ID" + } + ] + } + ] + } + }, + "codebooks": { + "studies/individual.txt": "studies/individual_codebook.txt", + "studies/diagnosis.txt": "studies/diagnosis_codebook.txt", + "studies/death.txt": "studies/death_codebook.txt", + "clinic/RDP-Patient.tab": "clinic/RDP-Patient_codebook.txt" + }, + "file_format": { + "studies/individual.txt": { + "delimiter": "," + }, + "studies/diagnosis.txt": { + "delimiter": "," + }, + "studies/death.txt": { + "delimiter": "," + }, + "studies/study.txt": { + "delimiter": "," + }, + "studies/individual_study.txt": { + "delimiter": "," + } + } +} diff --git a/test_data/alternative/clinic/RDP-IC.tab b/test_data/alternative/clinic/RDP-IC.tab new file mode 100644 index 0000000..eac36c0 --- /dev/null +++ b/test_data/alternative/clinic/RDP-IC.tab @@ -0,0 +1,7 @@ +INDIVIDUAL_ID 00004_Toestemmingsstatus 00007_Datum toestemming 00010_Datum geen toestemming 00012_Datum einde deelname +PAT1 expliciete toestemming 24/04/2018 +PAT13 mogelijke kandidaat +PAT14 geen toestemming 28/11/2017 +PAT15 proefpersoon informatie uitgereikt maar nog geen toestemming ontvangen +PAT16 geïnformeerd door studieteam +PAT17 mogelijke kandidaat diff --git a/test_data/alternative/clinic/RDP-IC.tab.sha1 b/test_data/alternative/clinic/RDP-IC.tab.sha1 new file mode 100644 index 0000000..a616f62 --- /dev/null +++ b/test_data/alternative/clinic/RDP-IC.tab.sha1 @@ -0,0 +1 @@ +f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tab diff --git a/test_data/alternative/clinic/RDP-Patient.tab b/test_data/alternative/clinic/RDP-Patient.tab new file mode 100644 index 0000000..4e7b058 --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient.tab @@ -0,0 +1,6 @@ +INDIVIDUAL_ID Gebdat Geslacht Overleden Overldat +PAT1 15AUG2000 M 0 +PAT3 07NOV2001 M 0 +PAT15 08JAN1999 V 0 +PAT16 25DEC1997 V 0 +PAT17 29SEP1991 V 0 diff --git a/test_data/alternative/clinic/RDP-Patient.tab.sha1 b/test_data/alternative/clinic/RDP-Patient.tab.sha1 new file mode 100644 index 0000000..2712e56 --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient.tab.sha1 @@ -0,0 +1 @@ +b7860a8380fc89de15598cae38effad4979e6c84 RDP-Patient.tab diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.txt b/test_data/alternative/clinic/RDP-Patient_codebook.txt new file mode 100644 index 0000000..76336f2 --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient_codebook.txt @@ -0,0 +1 @@ +1 Geslacht M male V female \ No newline at end of file diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 b/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 new file mode 100644 index 0000000..7a26340 --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 @@ -0,0 +1 @@ +7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.txt diff --git a/test_data/alternative/laboratory/biomaterial.txt b/test_data/alternative/laboratory/biomaterial.txt new file mode 100644 index 0000000..b1b0fe7 --- /dev/null +++ b/test_data/alternative/laboratory/biomaterial.txt @@ -0,0 +1,22 @@ +biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA diff --git a/test_data/alternative/laboratory/biomaterial.txt.sha1 b/test_data/alternative/laboratory/biomaterial.txt.sha1 new file mode 100644 index 0000000..de6f64f --- /dev/null +++ b/test_data/alternative/laboratory/biomaterial.txt.sha1 @@ -0,0 +1 @@ +0d1fe56d7e23f84753ef00c6c694d12aa95ca97a biomaterial.txt diff --git a/test_data/alternative/laboratory/biosource.txt b/test_data/alternative/laboratory/biosource.txt new file mode 100644 index 0000000..640ff67 --- /dev/null +++ b/test_data/alternative/laboratory/biosource.txt @@ -0,0 +1,20 @@ +biosource_id biosource_dedicated tissue biosource_date disease_status individual_id diagnosis_id src_biosource_id tumor_percentage label description +BIOS1T no liver 25/01/2018 primary tumor PAT1 DIA1 25 HBL hepatoblastoma +BIOS1N no liver 25/02/2018 unaffected PAT1 0 HBL_normal normal hepatic tissue +BIOS3T yes kidney 24/02/2018 primary tumor PAT3 DIA3 15 NBL nephroblastoma +BIOS3N yes kidney 04/03/2018 unaffected PAT3 0 NBL_normal normal kidney tissue +BIOS4 yes nerve 25/01/2018 unaffected PAT4 DIA4 0 NBL_normal normal nerve tissue +BIOS5 yes eye 24/01/2018 primary tumor PAT5 DIA5 5 NBL neuroblastoma +BIOS6 yes nerve 24/01/2018 primary tumor PAT6 DIA6 5 NBL neuroblastoma +BIOS7 yes eye 25/01/2018 unaffected PAT7 DIA7 0 NBL_normal normal nerve tissue +BIOS8 yes nerve 24/01/2018 primary tumor PAT8 DIA8 5 NBL neuroblastoma +BIOS9 no eye 24/01/2018 primary tumor PAT9 DIA9 5 NBL neuroblastoma +BIOS10 yes eye 25/01/2018 unaffected PAT10 DIA10 0 NBL_normal normal nerve tissue +BIOS11 yes eye 24/01/2018 primary tumor PAT11 DIA11 5 NBL neuroblastoma +BIOS12 yes eye 24/01/2018 primary tumor PAT12 DIA12 5 NBL neuroblastoma +BIOS13 yes nerve 25/01/2018 unaffected PAT13 DIA13 0 NBL_normal normal nerve tissue +BIOS14 yes nerve 24/01/2018 primary tumor PAT14 DIA14 5 NBL neuroblastoma +BIOS15 no nerve 24/01/2018 primary tumor PAT15 DIA15 5 NBL neuroblastoma +BIOS16 no nerve 25/01/2018 unaffected PAT16 DIA16 0 NBL_normal normal nerve tissue +BIOS17 yes nerve 24/01/2018 primary tumor PAT17 DIA17 5 NBL neuroblastoma +BIOS18 yes liver 30/01/2018 primary tumor PAT1 DIA18 BIOS1T 30 HBL hepatoblastoma diff --git a/test_data/alternative/laboratory/biosource.txt.sha1 b/test_data/alternative/laboratory/biosource.txt.sha1 new file mode 100644 index 0000000..f9c5381 --- /dev/null +++ b/test_data/alternative/laboratory/biosource.txt.sha1 @@ -0,0 +1 @@ +9e5f88297f0284d32f147448e96ae07397dfd567 biosource.txt diff --git a/test_data/alternative/studies/death.txt b/test_data/alternative/studies/death.txt new file mode 100644 index 0000000..675f9b2 --- /dev/null +++ b/test_data/alternative/studies/death.txt @@ -0,0 +1,4 @@ +MARK:,ID,IDAA,INDIVIDUAL_ID,STATUSA,IDAABB,DTDEATH +,217,8000217,PAT10,1,07/03/2017 0:00:00,10/08/2016 0:00:00 +,217,8000238,PAT11,1,15/01/2016 0:00:00,06/07/2015 0:00:00 +,217,8000251,PAT12,1,24/02/2016 0:00:00,07/09/2015 0:00:00 diff --git a/test_data/alternative/studies/death.txt.sha1 b/test_data/alternative/studies/death.txt.sha1 new file mode 100644 index 0000000..2bd26e2 --- /dev/null +++ b/test_data/alternative/studies/death.txt.sha1 @@ -0,0 +1 @@ +e0eec424c2bb325a0bd5c8be3438f86f13b20b01 death.txt diff --git a/test_data/alternative/studies/death_codebook.txt b/test_data/alternative/studies/death_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/alternative/studies/death_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/alternative/studies/death_codebook.txt.sha1 b/test_data/alternative/studies/death_codebook.txt.sha1 new file mode 100644 index 0000000..c7ec326 --- /dev/null +++ b/test_data/alternative/studies/death_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_death_codebook.txt diff --git a/test_data/alternative/studies/diagnosis.txt b/test_data/alternative/studies/diagnosis.txt new file mode 100644 index 0000000..555e9aa --- /dev/null +++ b/test_data/alternative/studies/diagnosis.txt @@ -0,0 +1,18 @@ +MARK:,ID,IDAA,INDIVIDUAL_ID,CIDDIAG,HOSPDIAG,DIAGCD,PLOCCD,DIAGGRSTX,IDAABA +,217,8000208,PAT1,DIA1,217,95913,771,,10/04/2003 0:00:00 +,217,8000233,PAT3,DIA3,217,95913,421,,04/05/2003 0:00:00 +,217,8000250,PAT4,DIA4,217,95913,421,,21/05/2003 0:00:00 +,217,8000268,PAT5,DIA5,217,95913,421,,05/06/2003 0:00:00 +,217,8000273,PAT6,DIA6,217,95913,421,,29/09/2003 0:00:00 +,217,8000323,PAT7,DIA7,217,95913,421,,17/08/2003 0:00:00 +,217,8000333,PAT8,DIA8,217,95913,421,,28/08/2003 0:00:00 +,217,8000345,PAT9,DIA9,217,95913,778,,06/08/2003 0:00:00 +,217,8000217,PAT10,DIA10,217,95913,778,,06/08/2003 0:00:00 +,217,8000238,PAT11,DIA11,217,97053,778,,06/08/2003 0:00:00 +,217,8000251,PAT12,DIA12,217,95913,778,,06/08/2003 0:00:00 +,217,8000269,PAT13,DIA13,217,95913,778,,06/08/2003 0:00:00 +,217,8000274,PAT14,DIA14,217,97053,778,,06/08/2003 0:00:00 +,217,8000324,PAT15,DIA15,217,97053,778,,06/08/2003 0:00:00 +,217,8000334,PAT16,DIA16,217,97053,778,,06/08/2003 0:00:00 +,217,8000346,PAT17,DIA17,217,97053,778,,06/08/2003 0:00:00 +,217,8000208,PAT1,DIA18,217,97053,778,,06/08/2003 0:00:00 diff --git a/test_data/alternative/studies/diagnosis.txt.sha1 b/test_data/alternative/studies/diagnosis.txt.sha1 new file mode 100644 index 0000000..ea10f3f --- /dev/null +++ b/test_data/alternative/studies/diagnosis.txt.sha1 @@ -0,0 +1 @@ +2489e88660e3cf07a4ad56085651707186b79294 diagnosis.txt diff --git a/test_data/alternative/studies/diagnosis_codebook.txt b/test_data/alternative/studies/diagnosis_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/alternative/studies/diagnosis_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/alternative/studies/diagnosis_codebook.txt.sha1 b/test_data/alternative/studies/diagnosis_codebook.txt.sha1 new file mode 100644 index 0000000..24531c1 --- /dev/null +++ b/test_data/alternative/studies/diagnosis_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_diagnosis_codebook.txt diff --git a/test_data/alternative/studies/individual.txt b/test_data/alternative/studies/individual.txt new file mode 100644 index 0000000..6a13db9 --- /dev/null +++ b/test_data/alternative/studies/individual.txt @@ -0,0 +1,17 @@ +MARK:,ID,IDAA,INDIVIDUAL_ID,SEX,IFCDATR,IFCGIV,IFCMAT,IFCCOM,DTOB +,217,8000208,PAT1,1,2,,,,15/09/2000 0:00:00 +,217,8000233,PAT3,1,2,,,,07/11/2001 0:00:00 +,217,8000250,PAT4,1,2,,,,23/06/1998 0:00:00 +,217,8000268,PAT5,1,2,,,,23/04/2001 0:00:00 +,217,8000273,PAT6,1,2,,,,28/03/2001 0:00:00 +,217,8000323,PAT7,1,2,,,,08/01/1999 0:00:00 +,217,8000333,PAT8,1,2,,,,25/12/1997 0:00:00 +,217,8000345,PAT9,1,2,,,,29/09/1991 0:00:00 +,217,8000217,PAT10,1,2,,,,18/02/1989 0:00:00 +,217,8000238,PAT11,1,2,,,,07/11/2001 0:00:00 +,217,8000251,PAT12,2,2,,,,23/06/1998 0:00:00 +,217,8000269,PAT13,2,2,,,,23/04/2001 0:00:00 +,217,8000274,PAT14,2,2,,,,28/03/2001 0:00:00 +,217,8000324,PAT15,2,2,,,,08/01/1999 0:00:00 +,217,8000334,PAT16,2,2,,,,25/12/1997 0:00:00 +,217,8000346,PAT17,2,2,,,,29/09/1991 0:00:00 diff --git a/test_data/alternative/studies/individual.txt.sha1 b/test_data/alternative/studies/individual.txt.sha1 new file mode 100644 index 0000000..6ad5870 --- /dev/null +++ b/test_data/alternative/studies/individual.txt.sha1 @@ -0,0 +1 @@ +1c9e88792a41c571d9017932d6a07d47e15b9336 individual.txt diff --git a/test_data/alternative/studies/individual_codebook.txt b/test_data/alternative/studies/individual_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/alternative/studies/individual_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/alternative/studies/individual_codebook.txt.sha1 b/test_data/alternative/studies/individual_codebook.txt.sha1 new file mode 100644 index 0000000..e05732f --- /dev/null +++ b/test_data/alternative/studies/individual_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_individual_codebook.txt diff --git a/test_data/alternative/studies/individual_study.txt b/test_data/alternative/studies/individual_study.txt new file mode 100644 index 0000000..a23fa19 --- /dev/null +++ b/test_data/alternative/studies/individual_study.txt @@ -0,0 +1,18 @@ +STUDY_ID_INDIVIDUAL_STUDY_ID,STUDY_ID,INDIVIDUAL_ID,INDIVIDUAL_STUDY_ID +PMCST000AAC_8301,PMCST000AAC,PAT1,8301 +PMCST000AAC_8355,PMCST000AAC,PAT3,8355 +PMCST000AAB_8371,PMCST000AAB,PAT3,8371 +PMCST000AAC_8409,PMCST000AAC,PAT4,8409 +PMCST000AAC_8419,PMCST000AAC,PAT5,8419 +PMCST000AAC_8436,PMCST000AAC,PAT6,8436 +PMCST000AAC_8448,PMCST000AAC,PAT7,8448 +PMCST000AAB_8592,PMCST000AAB,PAT8,8592 +PMCST000AAB_8637,PMCST000AAB,PAT9,8637 +PMCST000AAB_8962,PMCST000AAB,PAT10,8962 +PMCST000AAB_9162,PMCST000AAB,PAT11,9162 +PMCST000AAC_9174,PMCST000AAC,PAT12,9174 +PMCST000AAC_9383,PMCST000AAC,PAT13,9383 +PMCST000AAC_9571,PMCST000AAC,PAT14,9571 +PMCST000AAC_9676,PMCST000AAC,PAT15,9676 +PMCST000AAB_9875,PMCST000AAB,PAT16,9875 +PMCST000AAB_9931,PMCST000AAB,PAT17,9931 diff --git a/test_data/alternative/studies/individual_study.txt.sha1 b/test_data/alternative/studies/individual_study.txt.sha1 new file mode 100644 index 0000000..5da61b0 --- /dev/null +++ b/test_data/alternative/studies/individual_study.txt.sha1 @@ -0,0 +1 @@ +87bcb438e7a36127a3758c8fb4d94b58777e808a individual_study.txt diff --git a/test_data/alternative/studies/study.txt b/test_data/alternative/studies/study.txt new file mode 100644 index 0000000..44103c2 --- /dev/null +++ b/test_data/alternative/studies/study.txt @@ -0,0 +1,3 @@ +"STUDY_ID","acronym","title","description","datadictionary" +PMCST000AAC,STUDYA,Treatment study protocol of the Dutch Childhood Oncology Group for children and adolescents (1-19 year) with newly diagnosed acute lymphoblastic leukemia.,Intial ALL treatment <1 year,xls bestand +PMCST000AAB,STUDYB,International collaborative treatment protocol for infants under one year with acute lymphoblastic or biphenotypic leukemia. ,Intial ALL treatment 1-19 years,xls bestand diff --git a/test_data/alternative/studies/study.txt.sha1 b/test_data/alternative/studies/study.txt.sha1 new file mode 100644 index 0000000..f729831 --- /dev/null +++ b/test_data/alternative/studies/study.txt.sha1 @@ -0,0 +1 @@ +683f607214f3b53c06a0431a2303c0322cc99ce6 study.txt diff --git a/test_data/full_dataset/clinic/RDP-IC.tab b/test_data/full_dataset/clinic/RDP-IC.tab new file mode 100644 index 0000000..eac36c0 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-IC.tab @@ -0,0 +1,7 @@ +INDIVIDUAL_ID 00004_Toestemmingsstatus 00007_Datum toestemming 00010_Datum geen toestemming 00012_Datum einde deelname +PAT1 expliciete toestemming 24/04/2018 +PAT13 mogelijke kandidaat +PAT14 geen toestemming 28/11/2017 +PAT15 proefpersoon informatie uitgereikt maar nog geen toestemming ontvangen +PAT16 geïnformeerd door studieteam +PAT17 mogelijke kandidaat diff --git a/test_data/full_dataset/clinic/RDP-IC.tab.sha1 b/test_data/full_dataset/clinic/RDP-IC.tab.sha1 new file mode 100644 index 0000000..a616f62 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-IC.tab.sha1 @@ -0,0 +1 @@ +f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tab diff --git a/test_data/full_dataset/clinic/RDP-Patient.tab b/test_data/full_dataset/clinic/RDP-Patient.tab new file mode 100644 index 0000000..f7893f0 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient.tab @@ -0,0 +1,7 @@ +INDIVIDUAL_ID Gebdat Geslacht Overleden Overldat +PAT1 15AUG2000 M 0 +PAT2 18FEB1989 M 0 +PAT3 07NOV2001 M 0 +PAT15 08JAN1999 V 0 +PAT16 25DEC1997 V 0 +PAT17 29SEP1991 V 0 diff --git a/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 b/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 new file mode 100644 index 0000000..22e83aa --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 @@ -0,0 +1 @@ +67082ab768b32dfc28c0d9d665d646f4f355140f RDP-Patient.tab diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.txt b/test_data/full_dataset/clinic/RDP-Patient_codebook.txt new file mode 100644 index 0000000..76336f2 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient_codebook.txt @@ -0,0 +1 @@ +1 Geslacht M male V female \ No newline at end of file diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 b/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 new file mode 100644 index 0000000..7a26340 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 @@ -0,0 +1 @@ +7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.txt diff --git a/test_data/full_dataset/laboratory/biomaterial.txt b/test_data/full_dataset/laboratory/biomaterial.txt new file mode 100644 index 0000000..bbf11cf --- /dev/null +++ b/test_data/full_dataset/laboratory/biomaterial.txt @@ -0,0 +1,24 @@ +biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA +BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA +BIOM19 07/03/2018 mRNA BIOS19 genomic DNA isolated from 1222TI 1222TI_DNA +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA diff --git a/test_data/full_dataset/laboratory/biomaterial.txt.sha1 b/test_data/full_dataset/laboratory/biomaterial.txt.sha1 new file mode 100644 index 0000000..0b331b9 --- /dev/null +++ b/test_data/full_dataset/laboratory/biomaterial.txt.sha1 @@ -0,0 +1 @@ +a3fe2d8a4db26c0dd6400caa6f662d6826878862 biomaterial.txt diff --git a/test_data/full_dataset/laboratory/biosource.txt b/test_data/full_dataset/laboratory/biosource.txt new file mode 100644 index 0000000..795b389 --- /dev/null +++ b/test_data/full_dataset/laboratory/biosource.txt @@ -0,0 +1,22 @@ +biosource_id biosource_dedicated tissue biosource_date disease_status individual_id diagnosis_id src_biosource_id tumor_percentage label description +BIOS1T no liver 25/01/2018 primary tumor PAT1 DIA1 25 HBL hepatoblastoma +BIOS1N no liver 25/02/2018 unaffected PAT1 0 HBL_normal normal hepatic tissue +BIOS2 no nerve 24/01/2018 primary tumor PAT2 DIA2 5 NBL neuroblastoma +BIOS3T yes kidney 24/02/2018 primary tumor PAT3 DIA3 15 NBL nephroblastoma +BIOS3N yes kidney 04/03/2018 unaffected PAT3 0 NBL_normal normal kidney tissue +BIOS4 yes nerve 25/01/2018 unaffected PAT4 DIA4 0 NBL_normal normal nerve tissue +BIOS5 yes eye 24/01/2018 primary tumor PAT5 DIA5 5 NBL neuroblastoma +BIOS6 yes nerve 24/01/2018 primary tumor PAT6 DIA6 5 NBL neuroblastoma +BIOS7 yes eye 25/01/2018 unaffected PAT7 DIA7 0 NBL_normal normal nerve tissue +BIOS8 yes nerve 24/01/2018 primary tumor PAT8 DIA8 5 NBL neuroblastoma +BIOS9 no eye 24/01/2018 primary tumor PAT9 DIA9 5 NBL neuroblastoma +BIOS10 yes eye 25/01/2018 unaffected PAT10 DIA10 0 NBL_normal normal nerve tissue +BIOS11 yes eye 24/01/2018 primary tumor PAT11 DIA11 5 NBL neuroblastoma +BIOS12 yes eye 24/01/2018 primary tumor PAT12 DIA12 5 NBL neuroblastoma +BIOS13 yes nerve 25/01/2018 unaffected PAT13 DIA13 0 NBL_normal normal nerve tissue +BIOS14 yes nerve 24/01/2018 primary tumor PAT14 DIA14 5 NBL neuroblastoma +BIOS15 no nerve 24/01/2018 primary tumor PAT15 DIA15 5 NBL neuroblastoma +BIOS16 no nerve 25/01/2018 unaffected PAT16 DIA16 0 NBL_normal normal nerve tissue +BIOS17 yes nerve 24/01/2018 primary tumor PAT17 DIA17 5 NBL neuroblastoma +BIOS18 yes liver 30/01/2018 primary tumor PAT1 DIA18 BIOS1T 30 HBL hepatoblastoma +BIOS19 no nerve 24/01/2018 primary tumor PAT2 DIA19 BIOS2 15 NBL neuroblastoma diff --git a/test_data/full_dataset/laboratory/biosource.txt.sha1 b/test_data/full_dataset/laboratory/biosource.txt.sha1 new file mode 100644 index 0000000..e796242 --- /dev/null +++ b/test_data/full_dataset/laboratory/biosource.txt.sha1 @@ -0,0 +1 @@ +6985168bcc2c5d64e11c5317a70478775ba8eec3 biosource.txt diff --git a/test_data/full_dataset/studies/death.txt b/test_data/full_dataset/studies/death.txt new file mode 100644 index 0000000..92e457e --- /dev/null +++ b/test_data/full_dataset/studies/death.txt @@ -0,0 +1,4 @@ +"MARK:","ID","IDAA","INDIVIDUAL_ID","STATUSA","IDAABB","DTDEATH" +,217,8000217,PAT10,1,07/03/2017 0:00:00,10/08/2016 0:00:00 +,217,8000238,PAT11,1,15/01/2016 0:00:00,06/07/2015 0:00:00 +,217,8000251,PAT12,1,24/02/2016 0:00:00,07/09/2015 0:00:00 diff --git a/test_data/full_dataset/studies/death.txt.sha1 b/test_data/full_dataset/studies/death.txt.sha1 new file mode 100644 index 0000000..65d0d74 --- /dev/null +++ b/test_data/full_dataset/studies/death.txt.sha1 @@ -0,0 +1 @@ +b886a68f4d69de8dd1e76739af4fa3c688520f8e PMCST000AAA_death.txt diff --git a/test_data/full_dataset/studies/death_codebook.txt b/test_data/full_dataset/studies/death_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/full_dataset/studies/death_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/full_dataset/studies/death_codebook.txt.sha1 b/test_data/full_dataset/studies/death_codebook.txt.sha1 new file mode 100644 index 0000000..c7ec326 --- /dev/null +++ b/test_data/full_dataset/studies/death_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_death_codebook.txt diff --git a/test_data/full_dataset/studies/diagnosis.txt b/test_data/full_dataset/studies/diagnosis.txt new file mode 100644 index 0000000..197d37c --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis.txt @@ -0,0 +1,20 @@ +"MARK:","ID","IDAA","INDIVIDUAL_ID","CIDDIAG","HOSPDIAG","DIAGCD","PLOCCD","DIAGGRSTX","IDAABA" +,217,8000208,PAT1,DIA1,217,95913,771,,10/04/2003 0:00:00 +,217,8000216,PAT2,DIA2,217,95913,421,,17/04/2003 0:00:00 +,217,8000233,PAT3,DIA3,217,95913,421,,04/05/2003 0:00:00 +,217,8000250,PAT4,DIA4,217,95913,421,,21/05/2003 0:00:00 +,217,8000268,PAT5,DIA5,217,95913,421,,05/06/2003 0:00:00 +,217,8000273,PAT6,DIA6,217,95913,421,,29/09/2003 0:00:00 +,217,8000323,PAT7,DIA7,217,95913,421,,17/08/2003 0:00:00 +,217,8000333,PAT8,DIA8,217,95913,421,,28/08/2003 0:00:00 +,217,8000345,PAT9,DIA9,217,95913,778,,06/08/2003 0:00:00 +,217,8000217,PAT10,DIA10,217,95913,778,,06/08/2003 0:00:00 +,217,8000238,PAT11,DIA11,217,97053,778,,06/08/2003 0:00:00 +,217,8000251,PAT12,DIA12,217,95913,778,,06/08/2003 0:00:00 +,217,8000269,PAT13,DIA13,217,95913,778,,06/08/2003 0:00:00 +,217,8000274,PAT14,DIA14,217,97053,778,,06/08/2003 0:00:00 +,217,8000324,PAT15,DIA15,217,97053,778,,06/08/2003 0:00:00 +,217,8000334,PAT16,DIA16,217,97053,778,,06/08/2003 0:00:00 +,217,8000346,PAT17,DIA17,217,97053,778,,06/08/2003 0:00:00 +,217,8000208,PAT1,DIA18,217,97053,778,,06/08/2003 0:00:00 +,217,8000216,PAT2,DIA19,217,97053,778,,06/08/2003 0:00:00 diff --git a/test_data/full_dataset/studies/diagnosis.txt.sha1 b/test_data/full_dataset/studies/diagnosis.txt.sha1 new file mode 100644 index 0000000..fac1e34 --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis.txt.sha1 @@ -0,0 +1 @@ +c2c862477e73c9059b5b2e8cc7b8049a74099ea7 PMCST000AAA_diagnosis.txt diff --git a/test_data/full_dataset/studies/diagnosis_codebook.txt b/test_data/full_dataset/studies/diagnosis_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 b/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 new file mode 100644 index 0000000..24531c1 --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_diagnosis_codebook.txt diff --git a/test_data/full_dataset/studies/individual.txt b/test_data/full_dataset/studies/individual.txt new file mode 100644 index 0000000..4d7974e --- /dev/null +++ b/test_data/full_dataset/studies/individual.txt @@ -0,0 +1,18 @@ +"MARK:","ID","IDAA","INDIVIDUAL_ID","SEX","IFCDATR","IFCGIV","IFCMAT","IFCCOM","DTOB" +,217,8000208,PAT1,1,2,,,,15/09/2000 0:00:00 +,217,8000216,PAT2,1,2,,,,18/02/1989 0:00:00 +,217,8000233,PAT3,1,2,,,,07/11/2001 0:00:00 +,217,8000250,PAT4,1,2,,,,23/06/1998 0:00:00 +,217,8000268,PAT5,1,2,,,,23/04/2001 0:00:00 +,217,8000273,PAT6,1,2,,,,28/03/2001 0:00:00 +,217,8000323,PAT7,1,2,,,,08/01/1999 0:00:00 +,217,8000333,PAT8,1,2,,,,25/12/1997 0:00:00 +,217,8000345,PAT9,1,2,,,,29/09/1991 0:00:00 +,217,8000217,PAT10,1,2,,,,18/02/1989 0:00:00 +,217,8000238,PAT11,1,2,,,,07/11/2001 0:00:00 +,217,8000251,PAT12,2,2,,,,23/06/1998 0:00:00 +,217,8000269,PAT13,2,2,,,,23/04/2001 0:00:00 +,217,8000274,PAT14,2,2,,,,28/03/2001 0:00:00 +,217,8000324,PAT15,2,2,,,,08/01/1999 0:00:00 +,217,8000334,PAT16,2,2,,,,25/12/1997 0:00:00 +,217,8000346,PAT17,2,2,,,,29/09/1991 0:00:00 diff --git a/test_data/full_dataset/studies/individual.txt.sha1 b/test_data/full_dataset/studies/individual.txt.sha1 new file mode 100644 index 0000000..92c2fd7 --- /dev/null +++ b/test_data/full_dataset/studies/individual.txt.sha1 @@ -0,0 +1 @@ +d70a6c8727d3d8dd40eaf2cf87eb71d9ee581387 PMCST000AAA_individual.txt diff --git a/test_data/full_dataset/studies/individual_codebook.txt b/test_data/full_dataset/studies/individual_codebook.txt new file mode 100644 index 0000000..9d23109 --- /dev/null +++ b/test_data/full_dataset/studies/individual_codebook.txt @@ -0,0 +1 @@ +1 SEX 1 male 2 female 9 unknown 2 SIGYN IFCGIV IFCMAT IFCCOM IFCREF NOREGIS CLEXPYN CYTOLYN HISTOYN IMAGEYN TMARKYN PATDCOGR PROTYN ELIGYN TXSTART SIGYNX SIGYNY 1 yes 2 no 8 not applicable 8 HOSPREC HOSPDIAG HOSPASS HOSPTRAN 200 AMC 201 UMCG 202 AZM 203 CZE 204 ErasmusMC 207 JBZ 208 LUMC 213 EZT 214 Radboudumc 216 VUMC 217 UMCU 220 PMC 332 DIAGCD PDGCD1 PDGCD2 PDGCD3 61000 FanconiÔs anaemia 61900 Aplastic anaemia 70000 KostmannÔs disease 76100 HLH 80000 "Neoplasm, benign" 80001 "Neoplasm, uncertain whether benign or malignant" 80003 "Neoplasm, malignant" 80010 "Tumor cells, benign" 80011 "Tumor cells, uncertain whether benign or mal..." 80013 "Tumor cells, malignant" 80023 "Malignant tumor, small cell type" 80033 "Malignant tumor, giant cell type" 80043 "Malignant tumor, spindle cell type" 80050 "Clear cell tumor, NOS" 80053 "Malignant tumor, clear cell type" 80100 "Epithelial tumor, benign" 80102 "Carcinoma in situ, NOS" 80103 "Carcinoma, NOS" 80113 "Epithelioma, malignant" 80123 "Large cell carcinoma, NOS" 80133 Large cell neuroendocrine carcinoma 80143 Large cell carcinoma with rhabdoid phenotype 80153 Glassy cell carcinoma 80203 "Carcinoma, undifferentiated type, NOS" 80213 "Carcinoma, anaplastic type, NOS" 80223 Pleomorphic carcinoma 80303 Giant cell and spindle cell carcinoma 80313 Giant cell carcinoma 80323 Spindle cell carcinoma 80333 Pseudosarcomatous carcinoma 80343 Polygonal cell carcinoma 80353 Carcinoma with osteoclast-like giant cells 80413 "Small cell carcinoma, NOS" 80423 Oat cell carcinoma 80433 "Small cell carcinoma, fusiform cell" 80443 "Small cell carcinoma, intermediate cell" 80453 Combined small cell carcinoma 80463 Non-small cell carcinoma 80502 Papillary carcinoma in situ 80503 "Papillary carcinoma, NOS" 80513 "Verrucous carcinoma, NOS" 80522 "Papillary squamous cell carcinoma, non-invasive" 80523 Papillary squamous cell carcinoma 80702 "Squamous cell carcinoma in situ, NOS" 80703 "Squamous cell carcinoma, NOS" 80713 "Sq. cell carcinoma, keratinizing, NOS" 80723 "Sq. cell carcinoma, lg. cell, non-ker." 80733 "Sq. cell carcinoma, sm. cell, non-ker." 80743 "Sq. cell carcinoma, spindle cell" 80753 "Squamous cell carcinoma, adenoid" 80762 Sq. cell carc. in situ with question. stroma... 80763 "Sq. cell carcinoma, micro-invasive" 80770 "Squamous intraepithelial neoplasia, low grade" 80772 "Squamous intraepithelial neoplasia, high grade" 80783 Squamous cell carcinoma with horn formation 80802 Queyrat erythroplasia 80812 Bowen disease 80823 Lymphoepithelial carcinoma 80833 Basaloid squamous cell carcinoma 80843 "Squamous cell carcinoma, clear cell type" 80903 "Basal cell carcinoma, NOS" 80913 Multifocal superficial basal cell carcinoma 80923 "Infiltrating basal cell carcinoma, NOS" 80933 "Basal cell carcinoma, fibroepithelial" 80943 Basosquamous carcinoma 80953 Metatypical carcinoma 80973 "Basal cell carcinoma, nodular" 80983 Adenoid basal cell carcinoma 81023 Trichilemmocarcinoma 81103 Pilomatrix carcinoma 81202 Transitional cell carcinoma in situ 81203 "Transitional cell carcinoma, NOS" 81213 Schneiderian carcinoma 81223 "Trans. cell carcinoma, spindle cell" 81233 Basaloid carcinoma 81243 Cloacogenic carcinoma 81302 "Papillary trans. cell carcinoma, non-invasive" 81303 Papillary trans. cell carcinoma 81313 "Transitional cell carcinoma, micropapillary" 81400 "Adenoma, NOS" 81402 Adenocarcinoma in situ 81403 "Adenocarcinoma, NOS" 81413 Scirrhous adenocarcinoma 81423 Linitis plastica 81433 Superficial spreading adenocarcinoma 81443 "Adenocarcinoma, intestinal type" 81453 "Carcinoma, diffuse type" 81460 Monomorphic adenoma 81473 Basal cell adenocarcinoma 81480 "Glandular intraepithelial neoplasia, low grade" 81482 "Glandular intraepithelial neoplasia, high grade" 81500 "Pancreatic endocrine tumor, benign" 81501 "Pancreatic endocrine tumor, NOS" 81503 "Pancreatic endocrine tumor, malignant" 81513 "Insulinoma, malignant" 81523 "Glucagonoma, malignant" 81533 "Gastrinoma, malignant" 81543 "Mix. pancreatic endocrine&exocrine tumor, mal" 81553 Vipoma 81563 "Somatostatinoma, malignant" 81573 "Enteroglucagonoma, malignant" 81581 "Endocrine tumor, functioning, NOS" 81603 Cholangiocarcinoma 81613 Bile duct cystadenocarcinoma 81623 Klatskin tumor 81630 "Pancreatobiliary neoplasm, non-invasive" 81632 "Pap. neoplasm,pancreatobiliary-type,high gr. int" 81633 Pancreatobiliary-type carcinoma 81703 "Hepatocellular carcinoma, NOS" 81713 "Hepatocellular carcinoma, fibrolamellar" 81723 "Hepatocellular carcinoma, scirrhous" 81733 "Hepatocellular carcinoma, spindle cell variant" 81743 "Hepatocellular carcinoma, clear cell type" 81753 "Hepatocellular carcinoma, pleomorphic type" 81803 Comb. hepatocel. carcinoma & cholangiocarcinoma 81903 Trabecular adenocarcinoma 82003 Adenoid cystic carcinoma 82012 Cribriform carcinoma in situ 82013 Cribriform carcinoma 82102 Adenocarcinoma in situ in adenomatous polyp 82103 Adenocarcinoma in adenomatous polyp 82113 Tubular adenocarcinoma 82133 Serrated adenocarcinoma 82143 Parietal cell carcinoma 82153 Adenocarcinoma of anal glands 82202 Adenocarcinoma in situ in familial polyp. coli 82203 Adenocarcinoma in adenoma. polyposis coli 82212 Adenocarc. in situ in mult. adenomatous polyps 82213 Adenocarcinoma in mult. adenomatous polyps 82302 "Duct carcinoma in situ, solid type" 82303 "Solid carcinoma, NOS" 82313 Carcinoma simplex 82401 Carcinoid tumor of uncertain malignant potential 82403 "Carcinoid tumor, malignant" 82413 Enterochromaffin cell carcinoid 82423 "Enterochromaffin-like cell tumor, malignant" 82433 Goblet cell carcinoid 82443 Mixed adenoneuroendocrine carcinoma 82453 Adenocarcinoid tumor 82463 Neuroendocrine carcinoma 82473 Merkel cell carcinoma 82493 Atypical carcinoid tumor 82503 Bronchiolo-alveolar adenocarcinoma 82513 Alveolar adenocarcinoma 82523 "Bronchiolo-alveolar carcinoma, non-mucinous" 82533 "Bronchiolo-alveolar carcinoma, mucinous" 82543 "Bronch.-alv. carc., mixed mucin. and non-muc..." 82553 Adenocarcinoma with mixed subtypes 82600 "Papillary adenoma, NOS" 82603 "Papillary adenocarcinoma, NOS" 82612 Adenocarcinoma in situ in villous adenoma 82613 Adenocarcinoma in villous adenoma 82623 Villous adenocarcinoma 82632 Adenocarcinoma in situ in tubulovillous adenoma 82633 Adenocarcinoma in tubulovillous adenoma 82653 "Micropapillary carcinoma, NOS" 82700 Chromophobe adenoma 82703 Chromophobe carcinoma 82710 Prolactinoma 82720 "Pituitary adenoma, NOS" 82723 "Pituitary carcinoma, NOS" 82800 Acidophil adenoma 82803 Acidophil carcinoma 82810 Mixed acidophil-basophil adenoma 82813 Mixed acidophil-basophil carcinoma 82900 Oxyphilic adenoma 82903 Oxyphilic adenocarcinoma 83000 Basophil adenoma 83003 Basophil carcinoma 83100 Clear cell adenoma 83103 "Clear cell adenocarcinoma, NOS" 83123 Renal cell carcinoma 83133 Clear cell adenocarcinofibroma 83143 Lipid-rich carcinoma 83153 Glycogen-rich carcinoma 83163 Cyst-associated renal cell carcinoma 83173 "Renal cell carcinoma, chromophobe type" 83183 "Renal cell carcinoma, sarcomatoid" 83193 Collecting duct carcinoma 83203 Granular cell carcinoma 83223 Water-clear cell adenocarcinoma 83230 Mixed cell adenoma 83233 Mixed cell adenocarcinoma 83303 "Follicular adenocarcinoma, NOS" 83313 Follicular adenocarcinoma well diff. 83323 Follicular adenocarcinoma trabecular 83333 Fetal adenocarcinoma 83353 "Follicular carcinoma, minimally invasive" 83373 Insular carcinoma 83403 "Papillary carcinoma, follicular variant" 83413 Papillary microcarcinoma 83423 "Papillary carcinoma, oxyphilic cell" 83433 "Papillary carcinoma, encapsulated" 83443 "Papillary carcinoma, columnar cell" 83453 Medullary carcinoma with amyloid stroma 83463 Mixed medullary-follicular carcinoma 83473 Mixed medullary-papillary carcinoma 83503 Nonencapsulated sclerosing carcinoma 83700 "Adrenal cortical adenoma, NOS" 83703 Adrenal cortical carcinoma 83803 Endometrioid carcinoma 83813 "Endometrioid adenofibroma, malignant" 83823 "Endometrioid adenocarcinoma, secretory variant" 83833 "Endometrioid adenocarcinoma, ciliated cell v..." 83843 "Adenocarcinoma, endocervical type" 83903 Skin appendage carcinoma 84003 Sweat gland adenocarcinoma 84013 Apocrine adenocarcinoma 84023 "Nodular hidradenoma, malignant" 84033 Malignant eccrine spiradenoma 84073 Sclerosing sweat duct carcinoma 84083 Eccrine papillary adenocarcinoma 84093 "Eccrine poroma, malignant" 84103 Sebaceous adenocarcinoma 84133 Eccrine adenocarcinoma 84203 Ceruminous adenocarcinoma 84303 Mucoepidermoid carcinoma 84403 "Cystadenocarcinoma, NOS" 84413 "Serous cystadenocarcinoma, NOS" 84421 "Serous cystadenoma, borderline malignancy (C..." 84503 "Papillary cystadenocarcinoma, NOS" 84511 "Papillary cystadenoma, borderline malignancy..." 84523 Solid pseudopapillary carcinoma 84532 "Intraductal papillary-mucinous carcinoma, no..." 84533 "Intraductal papillary-mucinous carcinoma, in..." 84603 Papillary serous cystadenocarcinoma 84613 Serous surface papillary carcinoma 84621 Serous papillary cystic tumor of borderline ... 84702 "Mucinous cystadenocarcinoma, non-invasive" 84703 "Mucinous cystadenocarcinoma, NOS" 84713 Papillary mucinous cystadenocarcinoma 84721 Mucinous cystic tumor of borderline malignan... 84731 "Papillary mucinous cystadenoma, borderline m..." 84803 Mucinous adenocarcinoma 84813 Mucin-producing adenocarcinoma 84823 "Mucinous adenocarcinoma, endocervical type" 84903 Signet ring cell carcinoma 85002 "Intraductal carcinoma, noninfiltrating, NOS" 85003 "Infiltrating duct carcinoma, NOS" 85012 "Comedocarcinoma, non-infiltrating" 85013 "Comedocarcinoma, NOS" 85023 Secretory carcinoma of breast 85032 Noninfiltrating intraductal papillary adenoc... 85033 Intraductal papillary adenocarcinoma with in... 85042 Noninfiltrating intracystic carcinoma 85043 "Intracystic carcinoma, NOS" 85072 Intraductal micropapillary carcinoma 85083 Cystic hypersecretory carcinoma 85103 "Medullary carcinoma, NOS" 85123 Medullary carcinoma with lymphoid stroma 85133 Atypical medullary carcinoma 85143 "Duct carcinoma, desmoplastic type" 85202 Lobular carcinoma in situ 85203 "Lobular carcinoma, NOS" 85213 Infiltrating ductular carcinoma 85222 Intraductal and lobular in situ carcinoma 85223 Infiltrating duct and lobular carcinoma 85233 Infiltr. duct mixed with other types of carc... 85243 Infiltrating lobular mixed with other types ... 85253 Polymorphous low grade adenocarcinoma 85303 Inflammatory carcinoma 85403 "Paget disease, mammary" 85413 Paget dis. & infil. duct carcinoma 85423 "Paget disease, extramammary" 85433 Paget disease and intraductal ca. 85503 Acinar cell carcinoma 85513 Acinar cell cystadenocarcinoma 85603 Adenosquamous carcinoma 85613 "Warthin tumor, malignant" 85623 Epithelial-myoepithelial carcinoma 85703 Adenocarcinoma with squamous metaplasia 85713 Adenocarcinoma w cartilag. & oss. metaplas. 85723 Adenocarcinoma with spindle cell mataplasia 85733 Adenocarcinoma with apocrine metaplasia 85743 Adenocarcinoma with neuroendocrine differen. 85753 "Metaplastic carcinoma, NOS" 85763 Hepatoid adenocarcinoma 85803 "Thymoma, malignant, NOS" 85813 "Thymoma, type A, malignant" 85823 "Thymoma, type AB, malignant" 85833 "Thymoma, type B1, malignant" 85843 "Thymoma, type B2, malignant" 85853 "Thymoma, type B3, malignant" 85863 "Thymic carcinoma, NOS" 85883 Spindle epithelial tumor with thymus-like el... 85893 Carcinoma showing thymus-like element 85903 "Ovarian stromal tumor, mal." 86003 "Thecoma, malignant" 86201 "Granulosa cell tumor, adult type" 86203 "Granulosa cell tumor, malignant" 86211 Granulosa cell-theca cell tumor 86221 "Granulosa cell tumor, juvenile" 86303 "Androblastoma, malignant" 86311 Sertoli-Leydig cell tumor of intermediate differ 86313 "Sertoli-Leydig cell tumor, poorly differenti..." 86323 "Gynandroblastoma, malignant" 86343 "Sertoli-Leydig cl tum., p.d. w heterologous ..." 86401 "Sertoli cell tumor, NOS" 86403 Sertoli cell carcinoma 86501 "Leydig cell tumor, NOS" 86503 "Leydig cell tumor, malignant" 86703 "Steroid cell tumor, malignant" 86801 "Paraganglioma, NOS" 86803 "Paraganglioma, malignant" 86913 "Aortic body tumor, malignant" 86923 "Carotid body tumor, malignant" 86933 "Extra-adrenal paraganglioma, malignant" 87000 Pheochromocytoma NOS 87003 Pheochromocytoma 87103 Glomangiosarcoma 87202 Melanoma in situ 87203 "Malignant melanoma, NOS" 87213 Nodular melanoma 87223 Balloon cell melanoma 87233 "Malignant melanoma, regressing" 87280 Diffuse melanocytosis 87281 Meningeal melanocytoma 87283 Meningeal melanomatosis 87303 Amelanotic melanoma 87403 Mal. melanoma in junctional nevus 87412 "Precancerous melanosis, NOS" 87413 Mal. melanoma in precan. melanosis 87422 Lentigo maligna 87423 Lentigo maligna melanoma 87433 Superficial spreading melanoma 87443 "Acral lentiginous melanoma, malig." 87453 "Desmoplastic melanoma, malignant" 87463 Mucosal lentiginous melanoma 87613 Mal. melanoma in giant pigmented nevus 87703 Mixed epithel. & spindle cell melanoma 87713 Epithelioid cell melanoma 87723 "Spindle cell melanoma, NOS" 87733 "Spindle cell melanoma, type A" 87743 "Spindle cell melanoma, type B" 87803 "Blue nevus, malignant" 88000 "Soft tissue tumor, benign" 88003 "Sarcoma, NOS" 88013 Spindle cell sarcoma 88023 Giant cell sarcoma 88033 Small cell sarcoma 88043 Epithelioid sarcoma 88053 Undifferentiated sarcoma 88063 Desmoplastic small round cell tumor 88100 "Fibroma, NOS" 88103 "Fibrosarcoma, NOS" 88113 Fibromyxosarcoma 88123 Periosteal fibrosarcoma 88133 Fascial fibrosarcoma 88143 Infantile fibrosarcoma 88150 Solitary fibrous tumor 88153 "Solitary fibrous tumor, malignant" 88211 "Fibromatosis, aggressive" 88240 Myofibroma 88241 Myofibromatosis 88251 "Myofibroblastic tumor, NOS" 88300 "Fibrous histiocytoma, benign" 88303 "Fibrous histiocytoma, malignant" 88323 "Dermatofibrosarcoma, NOS" 88333 Pigmented dermatofibrosarcoma protuberans 88341 Giant cell fibroblastoma 88351 Plexiform fibrohistiocytic tumor 88361 Angiomatoid fibrous histiocytoma 88403 Myxosarcoma 88411 Angiomyxoma 88500 "Lipoma, NOS" 88501 Atypical lipoma 88503 "Liposarcoma, NOS" 88510 Fibrolipoma 88513 "Liposarcoma, well differentiated" 88523 Myxoid liposarcoma 88533 Round cell liposarcoma 88543 Pleomorphic liposarcoma 88553 Mixed type liposarcoma 88573 Fibroblastic liposarcoma 88583 Dedifferentiated liposarcoma 88610 "Angiolipoma, NOS" 88900 "Leiomyoma, NOS" 88901 "Leiomyomatosis, NOS" 88903 "Leiomyosarcoma, NOS" 88913 Epithelioid leiomyosarcoma 88943 Angiomyosarcoma 88953 Myosarcoma 88963 Myxoid leiomyosarcoma 88971 "Smooth muscle tumor, NOS" 89000 "Rhabdomyoma, NOS" 89003 "Rhabdomyosarcoma, NOS" 89013 "Pleomorphic rhabdomyosarcoma, adult type" 89023 Mixed type rhabdomyosarcoma 89103 Embryonal rhabdomyosarcoma 89123 Spindle cell rhabdomyosarcoma 89203 Alveolar rhabdomyosarcoma 89213 Rhabdomyosarcoma with ganglionic differentia... 89303 Endometrial stromal sarcoma 89313 "Endometrial stromal sarcoma, low grade" 89333 Adenosarcoma 89343 Carcinofibroma 89353 "Stromal sarcoma, NOS" 89361 "Gastrointestinal stromal tumor, NOS" 89363 Gastrointestinal stromal sarcoma 89403 "Mixed tumor, malignant, NOS" 89413 Carcinoma in pleomorphic adenoma 89503 Mullerian mixed tumor 89513 Mesodermal mixed tumor 89590 Benign Cystic nephroma 89591 Cystic partially differentiated nephroblastoma 89593 Malignant cystic nephroma 89601 Mesoblastic nephroma 89603 "Nephroblastoma, NOS" 89633 Malignant rhabdoid tumor 89643 Clear cell sarcoma of kidney 89703 Hepatoblastoma 89713 Pancreatoblastoma 89723 Pulmonary blastoma 89733 Pleuropulmonary blastoma 89741 Sialoblastoma 89751 Calcifying nested epithelial stromal tumor 89803 "Carcinosarcoma, NOS" 89813 "Carcinosarcoma, embryonal type" 89823 Malignant myoepithelioma 89901 "Mesenchymoma, NOS" 89903 "Mesenchymoma, malignant" 89913 Embryonal sarcoma 90003 "Brenner tumor, malignant" 90143 Serous adenocarcinofibroma 90153 Mucinous adenocarcinofibroma 90203 "Phyllodes tumor, malignant" 90403 "Synovial sarcoma, NOS" 90413 "Synovial sarcoma, spindle cell" 90423 "Synovial sarcoma, epithelioid cell" 90433 "Synovial sarcoma, biphasic" 90443 "Clear cell sarcoma,NOS (except of kidney M-8..." 90503 "Mesothelioma, malignant" 90513 "Fibrous mesothelioma, malignant" 90523 "Epithel. mesothelioma, mal." 90533 "Mesothelioma, biphasic, malignant" 90603 Dysgerminoma 90613 "Seminoma, NOS" 90623 "Seminoma, anaplastic" 90633 Spermatocytic seminoma 90642 Intratubular malignant germ cells 90643 Germinoma 90653 "Germ cell tumor, nonseminomatous" 90703 "Embryonal carcinoma, NOS" 90713 Yolk sac tumor 90723 Polyembryoma 90800 "Teratoma, benign" 90801 "Teratoma, NOS" 90803 "Teratoma, malignant, NOS" 90813 Teratocarcinoma 90823 "Malignant teratoma, undiff." 90833 "Malignant teratoma, intermediate" 90840 "Dermoid cyst, NOS" 90843 Teratoma with malig. transformation 90853 Mixed germ cell tumor 90903 "Struma ovarii, malignant" 91003 Choriocarcinoma 91013 Choriocarcinoma combined w/ other germ cell ... 91023 "Malignant teratoma, trophoblastic" 91043 Malignant placental site trophoblastic tumor 91053 "Trophoblastic tumor, epithelioid" 91103 "Mesonephroma, malignant" 91200 "Hemangioma, NOS" 91203 Hemangiosarcoma 91210 Cavernous hemangioma 91220 Venous hemangioma 91243 Kupffer cell sarcoma 91300 "Hemangioendothelioma, benign" 91301 "Hemangioendothelioma, NOS" 91303 "Hemangioendothelioma, malignant" 91310 Capillary hemangioma 91333 "Epithelioid hemangioendothelioma, malignant" 91403 Kaposi sarcoma 91500 "Hemangiopericytoma, benign" 91501 "Hemangiopericytoma, NOS" 91503 "Hemangiopericytoma, malignant" 91611 Hemangioblastoma 91703 Lymphangiosarcoma 91803 "Osteosarcoma, NOS" 91813 Chondroblastic osteosarcoma 91823 Fibroblastic osteosarcoma 91833 Telangiectatic osteosarcoma 91843 Osteosarcoma in Paget disease 91853 Small cell osteosarcoma 91863 Central osteosarcoma 91873 Instrosseous well differentiated osteosarcoma 91923 Parosteal osteosarcoma 91933 Periosteal osteosarcoma 91943 High grade surface osteosarcoma 91953 Intracortical osteosarcoma 92203 "Chondrosarcoma, NOS" 92213 Juxtacortical chondrosarcoma 92303 "Chondroblastoma, malignant" 92313 Myxoid chondrosarcoma 92403 Mesenchymal chondrosarcoma 92423 Clear cell chondrosarcoma 92433 Dedifferentiated chondrosarcoma 92501 "Giant cell tumor of bone, NOS" 92503 "Giant cell tumor of bone, malignant" 92511 Giant cell tumor of soft parts 92513 Malignant giant cell tumor of soft parts 92523 Malignant tenosynovial giant cell tumor 92603 Ewing sarcoma 92613 Adamantinoma of long bones 92703 "Odontogenic tumor, malignant" 92903 Ameloblastic odontosarcoma 93103 "Ameloblastoma, malignant" 93303 Ameloblastic fibrosarcoma 93423 Odontogenic carcinosarcoma 93501 Craniopharyngioma 93511 Adamantinomatous craniopharyngioma 93521 Papillary craniopharyngioma 93601 "Pinealoma, NOS" 93611 Pineocytoma 93623 Pineoblastoma 93630 Melanotic neuroectodermal tumor 93643 Peripheral neuroectodermal tumor 93653 Askin tumor 93703 "Chordoma, NOS" 93713 Chondroid chordoma 93723 Dedifferentiated chordoma 93803 "Glioma, malignant" 93813 Gliomatosis cerebri 93823 Mixed glioma 93831 Subependymoma 93841 Supependymal giant cell astrocytoma 93900 "Choroid plexus papilloma, NOS" 93901 Atypical choroid plexus papilloma 93903 "Choroid plexus papilloma, malignant" 93913 "Ependymoma, NOS" 93923 "Ependymoma, anaplastic" 93933 Papillary ependymoma 93941 Myxopapillary ependymoma 93953 Papillary tumor of the pineal region 94003 "Astrocytoma, NOS" 94013 "Astrocytoma, anaplastic" 94103 Protoplasmic astrocytoma 94113 Gemistocytic astrocytoma 94121 Desmoplastic infantile astrocytoma 94130 Dysembryoplastic neuroepithelial tumor 94203 Fibrillary astrocytoma 94211 Pilocytic astrocytoma 94233 Polar spongioblastoma 94243 Pleomorphic xanthoastrocytoma 94253 Pilomyxoid astrocytoma 94303 Astroblastoma 94311 Angiocentric glioma 94321 Pituicytoma 94403 "Glioblastoma, NOS" 94413 Giant cell glioblastoma 94421 Gliofibroma 94423 Gliosarcoma 94441 Chordoid glioma 94503 "Oligodendroglioma, NOS" 94513 "Oligodendroglioma, anaplastic" 94603 Oligodendroblastoma 94703 "Medulloblastoma, NOS" 94713 Desmoplastic medulloblastoma 94723 Medullomyoblastoma 94733 Primitive neuroectodermal tumor 94743 Large cell medulloblastoma 94803 "Cerebellar sarcoma, NOS" 94900 Ganglioneuroma 94903 Ganglioneuroblastoma 94920 Gangliocytoma 94930 Dysplastic gangliocytoma of cerebellum (Lher... 95003 "Neuroblastoma, NOS" 95013 "Medulloepithelioma, NOS" 95023 Teratoid medulloepithelioma 95033 "Neuroepithelioma, NOS" 95043 Spongioneuroblastoma 95051 "Ganglioglioma, NOS" 95053 "Ganglioglioma, anaplastic" 95061 Centrol neurocytoma 95083 Atypical teratoid/rhabdoid tumor 95091 Papillary glioneuronal tumor 95103 "Retinoblastoma, NOS" 95113 "Retinoblastoma, differentiated" 95123 "Retinoblastoma, undifferentiated" 95133 "Retinoblastoma, diffuse" 95203 Olfactory neurogenic tumor 95213 Olfactory neurocytoma 95223 Olfactory neuroblastoma 95233 Olfactory neuroepithelioma 95300 "Meningioma, NOS" 95301 "Meningiomatosis, NOS" 95303 "Meningioma, malignant" 95310 Meningothelial meningioma 95320 Fibrous meningioma 95330 Psammomatous meningioma 95340 Angiomatous meningioma 95370 Transitional meningioma 95381 Clear cell meningioma 95383 Papillary meningioma 95391 Atypical meningioma 95393 Meningeal sarcomatosis 95400 "Neurofibroma, NOS" 95401 "Neurofibromatosis, NOS" 95403 Malignant peripheral nerve sheath tumor 95410 Melanotic neurofibroma 95500 Plexiform neurofibroma 95600 "Neurilemoma, NOS" 95601 Neurinomatosis 95603 "Neurilemmoma, malignant" 95613 MPNST with rhabdomyoblastic differentiation 95620 Neurothekeoma 95700 "Neuroma, NOS" 95710 "Perineurioma, NOS" 95713 "Perineurioma, malignant" 95800 "Granular cell tumor, NOS" 95803 "Granular cell tumor, malignant" 95813 Alveolar soft part sarcoma 95903 "Malignant lymphoma, NOS" 95913 "Malignant lymphoma, non-Hodgkin" 95963 Composite Hodgkin and non-Hodgkin lymphoma 95973 Primary cutaneous follicle centre lymphoma 96503 "Hodgkin lymphoma, NOS" 96513 "Hodgkin lymphoma, lymphocyte-rich" 96523 "Hodgkin lymphoma, mixed cellularity, NOS" 96533 "Hodgkin lymphoma, lymphocytic deplet., NOS" 96543 "Hodgkin lymph., lymphocyt. deplet., diffuse ..." 96553 "Hodgkin lymphoma, lymphocyt. deplet., reticular" 96593 "Hodgkin lymph., nodular lymphocyte predom." 96613 Hodgkin granuloma [obs] 96623 Hodgkin sarcoma [obs] 96633 "Hodgkin lymphoma, nodular sclerosis, NOS" 96643 "Hodgkin lymphoma, nod. scler., cellular phase" 96653 "Hodgkin lymphoma, nod. scler., grade 1" 96673 "Hodgkin lymphoma, nod. scler., grade 2" 96703 "ML, small B lymphocytic, NOS" 96713 "ML, lymphoplasmacytic" 96733 Mantle cell lymphoma 96753 "ML, mixed sm. and lg. cell, diffuse" 96783 Primary effusion lymphoma 96793 Mediastinal large B-cell lymphoma 96803 "ML, large B-cell, diffuse" 96843 "ML, large B-cell, diffuse, immunoblastic, NOS" 96873 "Burkitt lymphoma, NOS" 96883 T-cell/histiocyte rich large B-cell lymphoma 96893 Splenic marginal zone B-cell lymphoma 96903 "Follicular lymphoma, NOS" 96913 "Follicular lymphoma, grade 2" 96953 "Follicular lymphoma, grade 1" 96983 "Follicular lymphoma, grade 3" 96993 "Marginal zone B-cell lymphoma, NOS" 97003 Mycosis fungoides 97013 Sezary syndrome 97023 "Mature T-cell lymphoma, NOS" 97053 Angioimmunoblastic T-cell lymphoma 97083 Subcutaneous panniculitis-like T-cell lymphoma 97093 "Cutaneous T-cell lymphoma, NOS" 97123 Intravascular large B-cell lymphoma 97143 "Anaplastic large cell lymphoma, T-cell and N..." 97163 Hepatosplenic T-cell lymphoma 97173 Intestinal T-cell lymphoma 97183 Primary cutan. CD30+ T-cell lymphoprolif. di... 97193 "NK/T-cell lymphoma, nasal and nasal-type" 97243 Syst. EBV pos. T-cell lymphoprol. disease 97253 Hydroa vacciniforme-like lymphoma 97263 Primary cutaneous gamma-delta T-cell lymphoma 97273 "Precursor cell lymphoblastic lymphoma, NOS" 97283 Precursor B-cell lymphoblastic lymphoma 97293 Precursor T-cell lymphoblastic lymphoma 97313 "Plasmacytoma, NOS" 97323 Multiple myeloma 97333 Plasma cell leukemia 97343 "Plasmacytoma, extramedullary" 97353 Plasmablastic lymphoma 97373 ALK postive large B-cell lymphoma 97383 Large B-cell lymph. arising in HHV8-assoc. multi 97403 Mast cell sarcoma 97411 Indolent systemic mastocytosis 97413 Malignant mastocytosis 97423 Mast cell leukemia 97503 Malignant histiocytosis 97511 "Langerhans cell histiocytosis, NOS" 97513 "Langerhans cell histiocytosis, NOS" 97521 "Langerhans cell histiocytosis, unifocal" 97531 "Langerhans cell histiocytosis, multifocal" 97543 "Langerhans cell histiocytosis, disseminated" 97553 Histiocytic sarcoma 97563 Langerhans cell sarcoma 97573 Interdigitating dendritic cell sarcoma 97583 Follicular dendritic cell sarcoma 97593 Fibroblastic reticular cell tumor 97603 "Immunoproliferative disease, NOS" 97613 Waldenstrom macroglobulinemia 97623 "Heavy chain disease, NOS" 97643 Immunoproliferative small intestinal disease 98003 "Leukemia, NOS" 98013 "Acute leukemia, NOS" 98053 Acute biphenotypic leukemia 98063 Mix. phenotype ac. leukemia with t;BCR-ABL1 98073 Mix. phenotype ac. leukemia with t;MLL rearrange 98083 "Mixed phenotype acute leukemia, B/myeloid, NOS" 98093 "Mixed phenotype acute leukemia, T/myeloid, NOS" 98113 "B lymphoblastic leukemia/lymphoma, NOS" 98123 B lymphoblast. leukemia/lymphoma with t;BCR-ABL1 98133 B lymphoblast. leukemia/lymphoma with t;MLL rear 98143 B lymphoblast. leukemia/lymphoma with t;TEL-AML1 98153 B lymphoblastic leukemia/lymphoma with hyperdipl 98163 B lymphoblast. leukemia/lymph. with hypodiploidy 98173 B lymphoblast. leukemia/lymphoma with t;IL3-IGH 98183 B lymphoblast. leukemia/lymphoma with t;E2A PBX1 98203 "Lymphoid leukemia, NOS" 98233 B-cell chr. lymph. leuk./small lymphocytic l... 98263 Burkitt cell leukemia 98273 Adult T-cell leukemia/lymphoma (HTLV-1 pos.) 98283 "Acute lymphoblastic leukemia, L2 type, NOS" 98313 T-cell large granular lymphocytic leukemia 98323 "Prolymphocytic leukemia, NOS" 98333 "Prolymphocytic leukemia, B-cell type" 98343 "Prolymphocytic leukemia, T-cell type" 98353 "Precursor cell lymphoblastic leukemia, NOS" 98363 Precursor B-cell lymphoblastic leukemia 98373 Precursor T-cell lymphoblastic leukemia 98403 "Acute myeloid leukemia, M6 type" 98603 "Myeloid leukemia, NOS" 98613 Acute myeloid leukemia 98633 "Chronic myeloid leukemia, NOS" 98653 Acute myeloid leukemia with t;DEK-NUP214 98663 "Acute promyelocytic leuk.,t(15;17)(q22;q11-12)" 98673 Acute myelomonocytic leukemia 98693 Acute myeloid leukemia with inv or t;RPN1-EVI1 98703 Acute basophilic leukemia 98713 Ac. myelomonocytic leuk. w abn. mar. eosinop... 98723 "Acute myeloid leukemia, minimal differentiation" 98733 Acute myeloid leukemia without maturation 98743 Acute myeloid leukemia with maturation 98753 "Chronic myelogenous leukemia, BCR/ABL positive" 98763 "Atypical chronic myeloid leuk., BCR/ABL nega..." 98913 Acute monocytic leukemia 98953 Acute myeloid leuk. with myelodysplasia-related 98963 "Acute myeloid leukemia, t(8;21)(q22;q22)" 98973 "Acute myeloid leukemia, 11q23 abnormalities" 98981 Transient abnormal myelopoiesis 98983 Myeloid leukemia associated with Down Syndrome 99103 Acute megakaryoblastic leukemia 99113 Acute myeloid leukemia (megakar. blast.) with t; 99203 Therapy related myeloid neoplasm 99303 Myeloid sarcoma 99313 Acute panmyelosis with myelofibrosis 99403 Hairy cell leukemia 99453 "Chronic myelomonocytic leukemia, NOS" 99463 Juvenile myelomonocytic leukemia 99483 Aggressive NK-cell leukemia 99503 Polycythemia vera 99603 "Myeloproliferative neoplasm, NOS" 99613 Primary myelofibrosis 99623 Essential thrombocythemia 99633 Chronic neutrophilic leukemia 99643 "Chronic eosinophilic leukemia, NOS" 99653 Myeloid&lymphoid neoplasms with PDGFRB rearrange 99663 Myeloid neoplasms with PDGFRB rearrangement 99673 Myeloid&lymphoid neoplasm with FGFR1 abnormaliti 99701 "Lymphoproliferative disorder, NOS" 99711 "Post transplant lymphoproliferative disorder, NO" 99713 Polymorphic post transplant lymphoproliferative 99751 "Myeloproliferative disease, NOS" 99753 "Myeloproliferative neoplasm, unclassifiable" 99803 Refractory anemia 99823 Refractory anemia with sideroblasts 99833 Refractory anemia with excess blasts 99843 Refract. anemia with excess blasts in transf... 99853 Refractory cytopenia with multilineage dyspl... 99863 Myelodysplastic syndr. with 5q deletion synd... 99873 "Therapy-related myelodysplastic syndrome, NOS" 99893 "Myelodysplastic syndrome, NOS" 99913 Refractory neutropenia 99923 Refractory thrombocytopenia 341 PLOCCD PLCCD1 PLCCD2 PLCCD3 0 external upper lip 1 external lower lip 2 "external lip, NOS" 3 mucose of uppper lip 4 mucosa of lower lip 5 "mucosa of lip, NOS" 6 commissure of lip 8 overlapping lesion of lip 9 "lip, NOS" 19 base of tongue 20 "dorsal surface of tongue, NOS" 21 border of tongue 22 "ventral surface of tongue, NOS" 23 anterior 2/3 of tongue 24 lingual tonsil 28 overlapping lesion of tongue 29 "tongue, NOS" 30 upper gum 31 lower gum 39 "gum, NOS" 40 anterior floor of mouth 41 lateral floor of mouth 48 overlapping lesion of floor of mouth 49 "floor of mouth, NOS" 50 hard palate 51 "soft palate, NOS" 52 uvula 58 overlapping lesion of palate 59 "palate, NOS" 60 cheek mucosa 61 vestibule of mouth 62 retromolar area 68 overlappinglesionofotherandunspecifiedpartso... 69 "mouth, NOS" 79 parotid gland 80 submandibular gland 81 sublingual gland 88 overlapping lesion of major salivary glands 89 "major salivary gland, NOS" 90 tonsillar fossa 91 tonsillar pillar 98 overlapping lesion of tonsil 99 "tonsil, NOS" 100 vallecula 101 anterior surface of epiglottis 102 lateral wall of oropharynx 103 posterior wall of oropharynx 104 branchial cleft 108 overlapping lesion of oropharynx 109 "oropharynx, NOS" 110 superior wall of nasopharyx 111 posterior wall of nasopharyx 112 lateral wall of nasopharyx 113 anterior wall of nasopharyx 118 overlapping lesion of nasopharyx 119 "nasopharyx, NOS" 129 "nasopharyxyriform sinus, NOS" 130 postcricoid region 131 hypopharyngeal aspect of aryepiglottic fold 132 posterior wall of hypopharynx 138 overlapping lesion of hypopharyx 139 "hypopharyx, NOS" 140 "pharyx, NOS" 142 waldeyer ring 148 "overlappinglesionoflip,oralcavityandpharynx" 150 cervical esophagus 151 thoracic esophagus 152 abdominal esophagus 153 upper third of esophagus 154 middle third of esophagus 155 lower third of esophagus 158 overlapping lesion of esophagus 159 "esophagus, NOS" 160 "cardia, NOS" 161 fundus of stomach 162 body of stomach 163 gastric antrum 164 pylorus 165 lesser curvature of stomach 166 greater curvature of stomach 168 overlapping lesion of stomach 169 "stomach, NOS" 170 duodenum 171 jejunum 172 ileum 173 Meckel diverticulum 178 overlapping lesion of small intestine 179 "small intestine, NOS" 180 cecum 181 appendix 182 ascending colon 183 hepatic flexure of colon 184 transverse colon 185 splenic flexure of colon 186 descending colon 187 sigmoid colon 188 overlapping lesion of colon 189 "colon, NOS" 199 rectosigmoid junction 209 "rectum, NOS" 210 "anus, NOS" 211 anal canal 212 cloacogenic zone 218 "overlappinglesionofrectum,anusandanalcanal" 220 liver 221 intrahepatic bile duct 239 "gallbladder, NOS" 240 extrahepatic bile duct 241 ampulla of Vater 248 overlapping lesion of biliary tract 249 "billary tract, NOS" 250 head of pancreas 251 body of pancreas 252 tail of pancreas 253 pancreatic duct 254 islets of Langerhans 257 other specified parts of pancreas 258 overlapping lesion of pancreas 259 "pancreas, NOS" 260 "intestinal tract, NOS" 268 overlapping lesion of digestive system 269 "gastrointestinal tract, NOS" 300 nasal cavity 301 middle ear 310 maxillary sinus 311 ethmoid sinus 312 frontal sinus 313 sphenoid sinus 318 overlapping lesion of accessory sinuses 319 "accessory sinus, NOS" 320 glottis 321 supraglottis 322 subglottis 323 laryngeal cartilage 328 overlapping lesion of larynx 329 "larynx, NOS" 339 trachea 340 main bronchus 341 "upper lobe, lung" 342 "middle lobe, lung" 343 "lower lobe, lung" 348 overlapping lesion of lung 349 "lung, NOS" 379 thymus 380 heart 381 anterior mediastinum 382 posterior mediastinum 383 "mediastinum, NOS" 384 "pleura, NOS" 388 "overlappinglesionofheart,mediastinumandpleura" 390 "upper respiratory tract, NOS" 398 overlappinglesionofrespiratorysystemandintra... 399 ill-defined sites within respiratory system 400 "longboneofupperlimb,scapulaandassociatedjoints" 401 short bone of upper limb and associated joints 402 long bones of lower limb and associated joints 403 short bones of lower limb and associated joints 408 "overlappinglesionofbones,jointsandarticularc..." 409 "bones of lomb, NOS" 410 bones of skull and face and associated joints 411 mandible 412 vertebral column 413 "rib, sternum, clavicle and associated joints" 414 "pelvicbones,sacrum,coccyxandassociatedjoints" 418 "overlappinglesionofbones,jointsandarticularc..." 419 "bone, NOS" 420 blood 421 bone marrow 422 spleen 423 "reticuloendothelial system, NOS" 424 "hematopoietic system, NOS" 440 "skin of lip, NOS" 441 eyelid 442 external ear 443 skin of other and unspecified parts of face 444 skin of scalp and neck 445 skin of trunc 446 skin of upper limb and shoulder 447 skin of lower limb and hip 448 overlapping lesion of skin 449 "skin, NOS" 470 peripheralnervesandautonomicnervoussystemofh... 471 peripheralnervesandautonomicnervoussystemofu... 472 peripheralnervesandautonomicnervoussystemofl... 473 peripheralnervesandautonomicnervoussystemoft... 474 peripheralnervesandautonomicnervoussystemofa... 475 peripheralnervesandautonomicnervoussystemofp... 476 peripheralnervesandautonomicnervoussystemoft... 478 overlappinglesionofperipheralnervesandautono... 479 "autonomic nervous system, NOS" 480 retroperitoneum 481 specified parts of peritoneum 482 "peritoneum, NOS" 488 overlapping lesion of retroperitoneum 490 "connective,subcutaneousandothersofttissuesof..." 491 "connective,subcutaneousandothersofttissuesof..." 492 "connective,subcutaneousandothersofttissuesof..." 493 "connective,subcutaneousandothersofttissuesof..." 494 "connective,subcutaneousandothersofttissuesof..." 495 "connective,subcutaneousandothersofttissuesof..." 496 "connective,subcutaneousandothersofttissuesof..." 498 "overlappinglesionofconnective,subcutaneousan..." 499 "connective,subcutaneousandothersofttissues,NOS" 500 nipple 501 central portion of breast 502 upper-inner quadrant of breast 503 lower-inner quadrant of breast 504 upper-outer quadrant of breast 505 upper-outer quadrant of breast 506 axillary tail of breast 508 overlapping lesion of breast 509 "breast, NOS" 510 labium majus 511 labium minus 512 clitoris 518 overlapping lesion of vulva 519 "vulva, NOS" 529 "vagina, NOS" 530 endocervix 531 exocervix 538 overlapping lesion of cervix uteri 539 "cervix uteri, NOS" 540 isthmus uteri 541 endometrium 542 myometrium 543 fundus uteri 548 overlapping lesion of corpus uteri 549 "corpus uteri, NOS" 559 "uterus, NOS" 569 "ovary, NOS" 570 fallopian tube 571 broad ligament 572 round ligament 573 parametrium 574 uterine adnexa 577 other specified parts of female genital organs 578 overlapping lesion of female genital organs 579 "female genital tract, NOS" 589 "placenta, NOS" 600 prepuce 601 glans penis 602 body of penis 608 overlapping lesion of penis 609 "penis, NOS" 619 prostate gland 620 undescended testis 621 descended testis 629 "testis, NOS" 630 epididymis 631 spermatic cord 632 "scrotum, NOS" 637 other specified parts of male genital organs 638 overlapping lesion of male genital organs 639 "male genital tract, NOS" 649 "kidney, NOS" 659 renal pelvis 669 ureter 670 trigone of bladder 671 dome of bladder 672 lateral wall of bladder 673 anterior wall of bladder 674 posterior of bladder 675 bladder neck 676 ureteric orifice 677 urachus 678 overlapping lesion of bladder 679 "bladder, NOS" 680 urethra 681 paraurethral gland 688 overlapping lesion of urinary organs 689 "urinary system, NOS" 690 conjuctiva 691 "cornea, NOS" 692 retina 693 choroid 694 ciliary body 695 lacrimal gland 696 "orbit, NOS" 698 overlapping lesion of eye and adnexa 699 "eye, NOS" 700 cerebral meninges 701 spinal meninges 709 "meninges, NOS" 710 cerebrum 711 frontal lobe 712 temperal lobe 713 parietal lobe 714 occipital lobe 715 "ventricle, NOS" 716 cerebellum 717 brain stem 718 overlapping lesion of brain 719 "brain, NOS" 720 spinal cord 721 cauda equina 722 olfactory nerve 723 optic nerve 724 acoustic nerve 725 "cranial nerve, NOS" 728 overlappinglesionofbrainandcentralnervoussystem 729 "nervous system, NOS" 739 "thyroid gland, NOS" 740 cortex of adrenal gland 741 medulla of adrenal gland 749 "adrenal gland, NOS" 750 parathyroid gland 751 pituitary gland 752 craniopharyngeal duct 753 pineal gland 754 carotid body 755 aortic body and other paraganglia 758 overlappinglesionofendocrineglandsandrelated... 759 "endocrine gland, NOS" 760 "head, face or neck, NOS" 761 "thorax, NOS" 762 "abdomen, NOS" 763 "pelvis, NOS" 764 "upper limb, NOS" 765 "lower limb, NOS" 767 other ill-defined sites 768 overlapping lesion of ill-defined sites 770 "lymph nodes of head, face and neck" 771 intrathoracic lymph nodes 772 intra-abdominal lymph nodes 773 lymph nodes of axilla of arm 774 lymph nodes of inguinal region or leg 775 pelvic lymph nodes 778 lymph nodes of multiple regions 779 "lymph node, NOS" 809 unknown primary sites 343 IFCDATR 1 yes 2 no 3 statement by physician 4 IC will follow \ No newline at end of file diff --git a/test_data/full_dataset/studies/individual_codebook.txt.sha1 b/test_data/full_dataset/studies/individual_codebook.txt.sha1 new file mode 100644 index 0000000..e05732f --- /dev/null +++ b/test_data/full_dataset/studies/individual_codebook.txt.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_individual_codebook.txt diff --git a/test_data/full_dataset/studies/individual_study.txt b/test_data/full_dataset/studies/individual_study.txt new file mode 100644 index 0000000..2414911 --- /dev/null +++ b/test_data/full_dataset/studies/individual_study.txt @@ -0,0 +1,18 @@ +STUDY_ID_INDIVIDUAL_STUDY_ID,STUDY_ID,INDIVIDUAL_ID,INDIVIDUAL_STUDY_ID +PMCST000AAC_8301,PMCST000AAC,PAT1,8301 +PMCST000AAC_8337,PMCST000AAC,PAT2,8337 +PMCST000AAC_8355,PMCST000AAC,PAT3,8355 +PMCST000AAC_8409,PMCST000AAC,PAT4,8409 +PMCST000AAC_8419,PMCST000AAC,PAT5,8419 +PMCST000AAC_8436,PMCST000AAC,PAT6,8436 +PMCST000AAC_8448,PMCST000AAC,PAT7,8448 +PMCST000AAB_8592,PMCST000AAB,PAT8,8592 +PMCST000AAB_8637,PMCST000AAB,PAT9,8637 +PMCST000AAB_8962,PMCST000AAB,PAT10,8962 +PMCST000AAB_9162,PMCST000AAB,PAT11,9162 +PMCST000AAC_9174,PMCST000AAC,PAT12,9174 +PMCST000AAC_9383,PMCST000AAC,PAT13,9383 +PMCST000AAC_9571,PMCST000AAC,PAT14,9571 +PMCST000AAC_9676,PMCST000AAC,PAT15,9676 +PMCST000AAB_9875,PMCST000AAB,PAT16,9875 +PMCST000AAB_9931,PMCST000AAB,PAT17,9931 diff --git a/test_data/full_dataset/studies/individual_study.txt.sha1 b/test_data/full_dataset/studies/individual_study.txt.sha1 new file mode 100644 index 0000000..4a8ad83 --- /dev/null +++ b/test_data/full_dataset/studies/individual_study.txt.sha1 @@ -0,0 +1 @@ +97a9c1f7f565c76d95d0bdacc1043247c52259a7 individual_study.txt diff --git a/test_data/full_dataset/studies/study.txt b/test_data/full_dataset/studies/study.txt new file mode 100644 index 0000000..44103c2 --- /dev/null +++ b/test_data/full_dataset/studies/study.txt @@ -0,0 +1,3 @@ +"STUDY_ID","acronym","title","description","datadictionary" +PMCST000AAC,STUDYA,Treatment study protocol of the Dutch Childhood Oncology Group for children and adolescents (1-19 year) with newly diagnosed acute lymphoblastic leukemia.,Intial ALL treatment <1 year,xls bestand +PMCST000AAB,STUDYB,International collaborative treatment protocol for infants under one year with acute lymphoblastic or biphenotypic leukemia. ,Intial ALL treatment 1-19 years,xls bestand diff --git a/test_data/full_dataset/studies/study.txt.sha1 b/test_data/full_dataset/studies/study.txt.sha1 new file mode 100644 index 0000000..f729831 --- /dev/null +++ b/test_data/full_dataset/studies/study.txt.sha1 @@ -0,0 +1 @@ +683f607214f3b53c06a0431a2303c0322cc99ce6 study.txt From 2102cbf19cd7b09d7fdad1091282c7160e0b3546 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 12:55:31 +0100 Subject: [PATCH 03/39] Relabel Gender to Sex --- config/ontology_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ontology_config.json b/config/ontology_config.json index e85bc27..381ee90 100644 --- a/config/ontology_config.json +++ b/config/ontology_config.json @@ -4,7 +4,7 @@ "name": "01. Patient information", "children": [ { - "name": "03. Gender", + "name": "03. Sex", "concept_code": "Individual.gender" }, { From 5b80611a4c181aabc22b89516ab77c551b06b937 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 13:11:06 +0100 Subject: [PATCH 04/39] Change source data extension to tsv --- test_data/alternative/clinic/{RDP-IC.tab => RDP-IC.tsv} | 0 test_data/alternative/clinic/{RDP-Patient.tab => RDP-Patient.tsv} | 0 .../clinic/{RDP-Patient_codebook.txt => RDP-Patient_codebook.tsv} | 0 .../alternative/laboratory/{biomaterial.txt => biomaterial.tsv} | 0 test_data/alternative/laboratory/{biosource.txt => biosource.tsv} | 0 test_data/alternative/studies/{death.txt => death.tsv} | 0 .../studies/{death_codebook.txt => death_codebook.tsv} | 0 test_data/alternative/studies/{diagnosis.txt => diagnosis.tsv} | 0 .../studies/{diagnosis_codebook.txt => diagnosis_codebook.tsv} | 0 test_data/alternative/studies/{individual.txt => individual.tsv} | 0 .../studies/{individual_codebook.txt => individual_codebook.tsv} | 0 .../studies/{individual_study.txt => individual_study.tsv} | 0 test_data/alternative/studies/{study.txt => study.tsv} | 0 test_data/full_dataset/clinic/{RDP-IC.tab => RDP-IC.tsv} | 0 .../full_dataset/clinic/{RDP-Patient.tab => RDP-Patient.tsv} | 0 .../clinic/{RDP-Patient_codebook.txt => RDP-Patient_codebook.tsv} | 0 .../full_dataset/laboratory/{biomaterial.txt => biomaterial.tsv} | 0 .../full_dataset/laboratory/{biosource.txt => biosource.tsv} | 0 test_data/full_dataset/studies/{death.txt => death.tsv} | 0 .../studies/{death_codebook.txt => death_codebook.tsv} | 0 test_data/full_dataset/studies/{diagnosis.txt => diagnosis.tsv} | 0 .../studies/{diagnosis_codebook.txt => diagnosis_codebook.tsv} | 0 test_data/full_dataset/studies/{individual.txt => individual.tsv} | 0 .../studies/{individual_codebook.txt => individual_codebook.tsv} | 0 .../studies/{individual_study.txt => individual_study.tsv} | 0 test_data/full_dataset/studies/{study.txt => study.tsv} | 0 26 files changed, 0 insertions(+), 0 deletions(-) rename test_data/alternative/clinic/{RDP-IC.tab => RDP-IC.tsv} (100%) rename test_data/alternative/clinic/{RDP-Patient.tab => RDP-Patient.tsv} (100%) rename test_data/alternative/clinic/{RDP-Patient_codebook.txt => RDP-Patient_codebook.tsv} (100%) rename test_data/alternative/laboratory/{biomaterial.txt => biomaterial.tsv} (100%) rename test_data/alternative/laboratory/{biosource.txt => biosource.tsv} (100%) rename test_data/alternative/studies/{death.txt => death.tsv} (100%) rename test_data/alternative/studies/{death_codebook.txt => death_codebook.tsv} (100%) rename test_data/alternative/studies/{diagnosis.txt => diagnosis.tsv} (100%) rename test_data/alternative/studies/{diagnosis_codebook.txt => diagnosis_codebook.tsv} (100%) rename test_data/alternative/studies/{individual.txt => individual.tsv} (100%) rename test_data/alternative/studies/{individual_codebook.txt => individual_codebook.tsv} (100%) rename test_data/alternative/studies/{individual_study.txt => individual_study.tsv} (100%) rename test_data/alternative/studies/{study.txt => study.tsv} (100%) rename test_data/full_dataset/clinic/{RDP-IC.tab => RDP-IC.tsv} (100%) rename test_data/full_dataset/clinic/{RDP-Patient.tab => RDP-Patient.tsv} (100%) rename test_data/full_dataset/clinic/{RDP-Patient_codebook.txt => RDP-Patient_codebook.tsv} (100%) rename test_data/full_dataset/laboratory/{biomaterial.txt => biomaterial.tsv} (100%) rename test_data/full_dataset/laboratory/{biosource.txt => biosource.tsv} (100%) rename test_data/full_dataset/studies/{death.txt => death.tsv} (100%) rename test_data/full_dataset/studies/{death_codebook.txt => death_codebook.tsv} (100%) rename test_data/full_dataset/studies/{diagnosis.txt => diagnosis.tsv} (100%) rename test_data/full_dataset/studies/{diagnosis_codebook.txt => diagnosis_codebook.tsv} (100%) rename test_data/full_dataset/studies/{individual.txt => individual.tsv} (100%) rename test_data/full_dataset/studies/{individual_codebook.txt => individual_codebook.tsv} (100%) rename test_data/full_dataset/studies/{individual_study.txt => individual_study.tsv} (100%) rename test_data/full_dataset/studies/{study.txt => study.tsv} (100%) diff --git a/test_data/alternative/clinic/RDP-IC.tab b/test_data/alternative/clinic/RDP-IC.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-IC.tab rename to test_data/alternative/clinic/RDP-IC.tsv diff --git a/test_data/alternative/clinic/RDP-Patient.tab b/test_data/alternative/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-Patient.tab rename to test_data/alternative/clinic/RDP-Patient.tsv diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.txt b/test_data/alternative/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-Patient_codebook.txt rename to test_data/alternative/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/alternative/laboratory/biomaterial.txt b/test_data/alternative/laboratory/biomaterial.tsv similarity index 100% rename from test_data/alternative/laboratory/biomaterial.txt rename to test_data/alternative/laboratory/biomaterial.tsv diff --git a/test_data/alternative/laboratory/biosource.txt b/test_data/alternative/laboratory/biosource.tsv similarity index 100% rename from test_data/alternative/laboratory/biosource.txt rename to test_data/alternative/laboratory/biosource.tsv diff --git a/test_data/alternative/studies/death.txt b/test_data/alternative/studies/death.tsv similarity index 100% rename from test_data/alternative/studies/death.txt rename to test_data/alternative/studies/death.tsv diff --git a/test_data/alternative/studies/death_codebook.txt b/test_data/alternative/studies/death_codebook.tsv similarity index 100% rename from test_data/alternative/studies/death_codebook.txt rename to test_data/alternative/studies/death_codebook.tsv diff --git a/test_data/alternative/studies/diagnosis.txt b/test_data/alternative/studies/diagnosis.tsv similarity index 100% rename from test_data/alternative/studies/diagnosis.txt rename to test_data/alternative/studies/diagnosis.tsv diff --git a/test_data/alternative/studies/diagnosis_codebook.txt b/test_data/alternative/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/alternative/studies/diagnosis_codebook.txt rename to test_data/alternative/studies/diagnosis_codebook.tsv diff --git a/test_data/alternative/studies/individual.txt b/test_data/alternative/studies/individual.tsv similarity index 100% rename from test_data/alternative/studies/individual.txt rename to test_data/alternative/studies/individual.tsv diff --git a/test_data/alternative/studies/individual_codebook.txt b/test_data/alternative/studies/individual_codebook.tsv similarity index 100% rename from test_data/alternative/studies/individual_codebook.txt rename to test_data/alternative/studies/individual_codebook.tsv diff --git a/test_data/alternative/studies/individual_study.txt b/test_data/alternative/studies/individual_study.tsv similarity index 100% rename from test_data/alternative/studies/individual_study.txt rename to test_data/alternative/studies/individual_study.tsv diff --git a/test_data/alternative/studies/study.txt b/test_data/alternative/studies/study.tsv similarity index 100% rename from test_data/alternative/studies/study.txt rename to test_data/alternative/studies/study.tsv diff --git a/test_data/full_dataset/clinic/RDP-IC.tab b/test_data/full_dataset/clinic/RDP-IC.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-IC.tab rename to test_data/full_dataset/clinic/RDP-IC.tsv diff --git a/test_data/full_dataset/clinic/RDP-Patient.tab b/test_data/full_dataset/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient.tab rename to test_data/full_dataset/clinic/RDP-Patient.tsv diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.txt b/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient_codebook.txt rename to test_data/full_dataset/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/full_dataset/laboratory/biomaterial.txt b/test_data/full_dataset/laboratory/biomaterial.tsv similarity index 100% rename from test_data/full_dataset/laboratory/biomaterial.txt rename to test_data/full_dataset/laboratory/biomaterial.tsv diff --git a/test_data/full_dataset/laboratory/biosource.txt b/test_data/full_dataset/laboratory/biosource.tsv similarity index 100% rename from test_data/full_dataset/laboratory/biosource.txt rename to test_data/full_dataset/laboratory/biosource.tsv diff --git a/test_data/full_dataset/studies/death.txt b/test_data/full_dataset/studies/death.tsv similarity index 100% rename from test_data/full_dataset/studies/death.txt rename to test_data/full_dataset/studies/death.tsv diff --git a/test_data/full_dataset/studies/death_codebook.txt b/test_data/full_dataset/studies/death_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/death_codebook.txt rename to test_data/full_dataset/studies/death_codebook.tsv diff --git a/test_data/full_dataset/studies/diagnosis.txt b/test_data/full_dataset/studies/diagnosis.tsv similarity index 100% rename from test_data/full_dataset/studies/diagnosis.txt rename to test_data/full_dataset/studies/diagnosis.tsv diff --git a/test_data/full_dataset/studies/diagnosis_codebook.txt b/test_data/full_dataset/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/diagnosis_codebook.txt rename to test_data/full_dataset/studies/diagnosis_codebook.tsv diff --git a/test_data/full_dataset/studies/individual.txt b/test_data/full_dataset/studies/individual.tsv similarity index 100% rename from test_data/full_dataset/studies/individual.txt rename to test_data/full_dataset/studies/individual.tsv diff --git a/test_data/full_dataset/studies/individual_codebook.txt b/test_data/full_dataset/studies/individual_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/individual_codebook.txt rename to test_data/full_dataset/studies/individual_codebook.tsv diff --git a/test_data/full_dataset/studies/individual_study.txt b/test_data/full_dataset/studies/individual_study.tsv similarity index 100% rename from test_data/full_dataset/studies/individual_study.txt rename to test_data/full_dataset/studies/individual_study.tsv diff --git a/test_data/full_dataset/studies/study.txt b/test_data/full_dataset/studies/study.tsv similarity index 100% rename from test_data/full_dataset/studies/study.txt rename to test_data/full_dataset/studies/study.tsv From 16f02ac2286402406adcc0f14064b4a5fce2bdb4 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 14:45:37 +0100 Subject: [PATCH 05/39] Add test data for library_strategy & analysis type --- config/sources_config.json | 16 +++++++ .../alternative/laboratory/biomaterial.tsv | 44 ++++++++--------- .../full_dataset/laboratory/biomaterial.tsv | 48 +++++++++---------- 3 files changed, 62 insertions(+), 46 deletions(-) diff --git a/config/sources_config.json b/config/sources_config.json index 5ccc239..baca6d7 100644 --- a/config/sources_config.json +++ b/config/sources_config.json @@ -298,6 +298,22 @@ "file": "laboratory/biomaterial.txt" } ] + }, + { + "name": "library_strategy", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] + }, + { + "name": "analysis_type", + "sources": [ + { + "file": "laboratory/biomaterial.txt" + } + ] } ] }, diff --git a/test_data/alternative/laboratory/biomaterial.tsv b/test_data/alternative/laboratory/biomaterial.tsv index b1b0fe7..068967e 100644 --- a/test_data/alternative/laboratory/biomaterial.tsv +++ b/test_data/alternative/laboratory/biomaterial.tsv @@ -1,22 +1,22 @@ -biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA -BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA -BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA -BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA -BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA -BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA -BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA -BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA -BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA -BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA -BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA -BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA -BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA -BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA -BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA -BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA +biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A,B Y +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A,C X +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A,C X +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y,Z +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A,C Y,Z +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B,C Y,Z +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B,C Y,Z +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B,C Y,Z +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B,C X,Z +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B,C X,Z +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X,Z +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X,Z +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X,Z +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X,Y +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X,Y +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A,B,C X,Y +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X,Y diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv b/test_data/full_dataset/laboratory/biomaterial.tsv index bbf11cf..8de8573 100644 --- a/test_data/full_dataset/laboratory/biomaterial.tsv +++ b/test_data/full_dataset/laboratory/biomaterial.tsv @@ -1,24 +1,24 @@ -biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA -BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA -BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA -BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA -BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA -BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA -BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA -BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA -BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA -BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA -BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA -BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA -BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA -BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA -BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA -BIOM19 07/03/2018 mRNA BIOS19 genomic DNA isolated from 1222TI 1222TI_DNA -BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA -BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA +biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z +BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA A,B Z +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A,B Y +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A,C X +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A,C X +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y,Z +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A,C Y,Z +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B,C Y,Z +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B,C Y,Z +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B,C Y,Z +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B,C X,Z +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B,C X,Z +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X,Z +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X,Z +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X,Z +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X,Y +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X,Y +BIOM19 07/03/2018 mRNA BIOS19 genomic DNA isolated from 1222TI 1222TI_DNA C X,Y +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A,B,C X,Y +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X,Y From 1df22868784c80a7eedeeeb1c8450c0b2e9d781a Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 14:55:01 +0100 Subject: [PATCH 06/39] Rename studies files to csv --- test_data/alternative/studies/{death.tsv => death.csv} | 0 test_data/alternative/studies/{diagnosis.tsv => diagnosis.csv} | 0 test_data/alternative/studies/{individual.tsv => individual.csv} | 0 .../studies/{individual_study.tsv => individual_study.csv} | 0 test_data/alternative/studies/{study.tsv => study.csv} | 0 test_data/full_dataset/studies/{death.tsv => death.csv} | 0 test_data/full_dataset/studies/{diagnosis.tsv => diagnosis.csv} | 0 test_data/full_dataset/studies/{individual.tsv => individual.csv} | 0 .../studies/{individual_study.tsv => individual_study.csv} | 0 test_data/full_dataset/studies/{study.tsv => study.csv} | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename test_data/alternative/studies/{death.tsv => death.csv} (100%) rename test_data/alternative/studies/{diagnosis.tsv => diagnosis.csv} (100%) rename test_data/alternative/studies/{individual.tsv => individual.csv} (100%) rename test_data/alternative/studies/{individual_study.tsv => individual_study.csv} (100%) rename test_data/alternative/studies/{study.tsv => study.csv} (100%) rename test_data/full_dataset/studies/{death.tsv => death.csv} (100%) rename test_data/full_dataset/studies/{diagnosis.tsv => diagnosis.csv} (100%) rename test_data/full_dataset/studies/{individual.tsv => individual.csv} (100%) rename test_data/full_dataset/studies/{individual_study.tsv => individual_study.csv} (100%) rename test_data/full_dataset/studies/{study.tsv => study.csv} (100%) diff --git a/test_data/alternative/studies/death.tsv b/test_data/alternative/studies/death.csv similarity index 100% rename from test_data/alternative/studies/death.tsv rename to test_data/alternative/studies/death.csv diff --git a/test_data/alternative/studies/diagnosis.tsv b/test_data/alternative/studies/diagnosis.csv similarity index 100% rename from test_data/alternative/studies/diagnosis.tsv rename to test_data/alternative/studies/diagnosis.csv diff --git a/test_data/alternative/studies/individual.tsv b/test_data/alternative/studies/individual.csv similarity index 100% rename from test_data/alternative/studies/individual.tsv rename to test_data/alternative/studies/individual.csv diff --git a/test_data/alternative/studies/individual_study.tsv b/test_data/alternative/studies/individual_study.csv similarity index 100% rename from test_data/alternative/studies/individual_study.tsv rename to test_data/alternative/studies/individual_study.csv diff --git a/test_data/alternative/studies/study.tsv b/test_data/alternative/studies/study.csv similarity index 100% rename from test_data/alternative/studies/study.tsv rename to test_data/alternative/studies/study.csv diff --git a/test_data/full_dataset/studies/death.tsv b/test_data/full_dataset/studies/death.csv similarity index 100% rename from test_data/full_dataset/studies/death.tsv rename to test_data/full_dataset/studies/death.csv diff --git a/test_data/full_dataset/studies/diagnosis.tsv b/test_data/full_dataset/studies/diagnosis.csv similarity index 100% rename from test_data/full_dataset/studies/diagnosis.tsv rename to test_data/full_dataset/studies/diagnosis.csv diff --git a/test_data/full_dataset/studies/individual.tsv b/test_data/full_dataset/studies/individual.csv similarity index 100% rename from test_data/full_dataset/studies/individual.tsv rename to test_data/full_dataset/studies/individual.csv diff --git a/test_data/full_dataset/studies/individual_study.tsv b/test_data/full_dataset/studies/individual_study.csv similarity index 100% rename from test_data/full_dataset/studies/individual_study.tsv rename to test_data/full_dataset/studies/individual_study.csv diff --git a/test_data/full_dataset/studies/study.tsv b/test_data/full_dataset/studies/study.csv similarity index 100% rename from test_data/full_dataset/studies/study.tsv rename to test_data/full_dataset/studies/study.csv From 4dc625ae6c74d962e47be93713491f858014cac1 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 14:55:17 +0100 Subject: [PATCH 07/39] Update file names in sources_config --- config/sources_config.json | 110 ++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/config/sources_config.json b/config/sources_config.json index baca6d7..ad91177 100644 --- a/config/sources_config.json +++ b/config/sources_config.json @@ -6,19 +6,19 @@ "name": "individual_id", "sources": [ { - "file": "clinic/RDP-Patient.tab", + "file": "clinic/RDP-Patient.tsv", "column": "INDIVIDUAL_ID" }, { - "file": "clinic/RDP-IC.tab", + "file": "clinic/RDP-IC.tsv", "column": "INDIVIDUAL_ID" }, { - "file": "studies/individual.txt", + "file": "studies/individual.csv", "column": "INDIVIDUAL_ID" }, { - "file": "studies/death.txt", + "file": "studies/death.csv", "column": "INDIVIDUAL_ID" } ] @@ -27,12 +27,12 @@ "name": "birth_date", "sources": [ { - "file": "clinic/RDP-Patient.tab", + "file": "clinic/RDP-Patient.tsv", "column": "Gebdat", "date_format": "%d%b%Y" }, { - "file": "studies/individual.txt", + "file": "studies/individual.csv", "column": "DTOB", "date_format": "%d/%m/%Y %H:%M:%S" } @@ -42,11 +42,11 @@ "name": "gender", "sources": [ { - "file": "clinic/RDP-Patient.tab", + "file": "clinic/RDP-Patient.tsv", "column": "Geslacht" }, { - "file": "studies/individual.txt", + "file": "studies/individual.csv", "column": "SEX" } ] @@ -55,12 +55,12 @@ "name": "death_date", "sources": [ { - "file": "clinic/RDP-Patient.tab", + "file": "clinic/RDP-Patient.tsv", "column": "Overldat", "date_format": "%d%b%Y" }, { - "file": "studies/death.txt", + "file": "studies/death.csv", "column": "DTDEATH", "date_format": "%d/%m/%Y %H:%M:%S" } @@ -70,11 +70,11 @@ "name": "ic_type", "sources": [ { - "file": "clinic/RDP-IC.tab", + "file": "clinic/RDP-IC.tsv", "column": "00004_Toestemmingsstatus" }, { - "file": "studies/individual.txt", + "file": "studies/individual.csv", "column": "IFCDATR" } ] @@ -83,7 +83,7 @@ "name": "ic_given_date", "sources": [ { - "file": "clinic/RDP-IC.tab", + "file": "clinic/RDP-IC.tsv", "column": "00007_Datum toestemming", "date_format": "%d/%m/%Y" } @@ -93,7 +93,7 @@ "name": "ic_withdrawn_date", "sources": [ { - "file": "clinic/RDP-IC.tab", + "file": "clinic/RDP-IC.tsv", "column": "00010_Datum geen toestemming", "date_format": "%d/%m/%Y" } @@ -103,7 +103,7 @@ "name": "report_her_susc", "sources": [ { - "file": "clinic/RDP-IC.tab", + "file": "clinic/RDP-IC.tsv", "column": "00012_Datum einde deelname", "date_format": "%d/%m/%Y" } @@ -117,7 +117,7 @@ "name": "individual_id", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "INDIVIDUAL_ID" } ] @@ -126,7 +126,7 @@ "name": "diagnosis_id", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "CIDDIAG" } ] @@ -135,7 +135,7 @@ "name": "tumor_type", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "DIAGCD" } ] @@ -144,7 +144,7 @@ "name": "topography", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "PLOCCD" } ] @@ -153,7 +153,7 @@ "name": "tumor_stage", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "DIAGGRSTX" } ] @@ -162,7 +162,7 @@ "name": "diagnosis_date", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "IDAABA", "date_format": "%d/%m/%Y %H:%M:%S" } @@ -172,7 +172,7 @@ "name": "diagnosis_center", "sources": [ { - "file": "studies/diagnosis.txt", + "file": "studies/diagnosis.csv", "column": "HOSPDIAG" } ] @@ -185,7 +185,7 @@ "name": "biosource_id", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -193,7 +193,7 @@ "name": "individual_id", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -201,7 +201,7 @@ "name": "diagnosis_id", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -209,7 +209,7 @@ "name": "src_biosource_id", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -217,7 +217,7 @@ "name": "biosource_dedicated", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -225,7 +225,7 @@ "name": "tissue", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -233,7 +233,7 @@ "name": "biosource_date", "sources": [ { - "file": "laboratory/biosource.txt", + "file": "laboratory/biosource.tsv", "date_format": "%d/%m/%Y" } ] @@ -242,7 +242,7 @@ "name": "disease_status", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] }, @@ -250,7 +250,7 @@ "name": "tumor_percentage", "sources": [ { - "file": "laboratory/biosource.txt" + "file": "laboratory/biosource.tsv" } ] } @@ -262,7 +262,7 @@ "name": "biomaterial_id", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -270,7 +270,7 @@ "name": "src_biosource_id", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -278,7 +278,7 @@ "name": "src_biomaterial_id", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -286,7 +286,7 @@ "name": "biomaterial_date", "sources": [ { - "file": "laboratory/biomaterial.txt", + "file": "laboratory/biomaterial.tsv", "date_format": "%d/%m/%Y" } ] @@ -295,7 +295,7 @@ "name": "type", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -303,7 +303,7 @@ "name": "library_strategy", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -311,7 +311,7 @@ "name": "analysis_type", "sources": [ { - "file": "laboratory/biomaterial.txt" + "file": "laboratory/biomaterial.tsv" } ] } @@ -323,7 +323,7 @@ "name": "study_id", "sources": [ { - "file": "studies/study.txt", + "file": "studies/study.csv", "column": "STUDY_ID" } ] @@ -332,7 +332,7 @@ "name": "acronym", "sources": [ { - "file": "studies/study.txt" + "file": "studies/study.csv" } ] }, @@ -340,7 +340,7 @@ "name": "title", "sources": [ { - "file": "studies/study.txt" + "file": "studies/study.csv" } ] }, @@ -348,7 +348,7 @@ "name": "datadictionary", "sources": [ { - "file": "studies/study.txt" + "file": "studies/study.csv" } ] } @@ -360,7 +360,7 @@ "name": "study_id_individual_study_id", "sources": [ { - "file": "studies/individual_study.txt", + "file": "studies/individual_study.csv", "column": "STUDY_ID_INDIVIDUAL_STUDY_ID" } ] @@ -369,7 +369,7 @@ "name": "individual_study_id", "sources": [ { - "file": "studies/individual_study.txt", + "file": "studies/individual_study.csv", "column": "INDIVIDUAL_STUDY_ID" } ] @@ -378,7 +378,7 @@ "name": "individual_id", "sources": [ { - "file": "studies/individual_study.txt", + "file": "studies/individual_study.csv", "column": "INDIVIDUAL_ID" } ] @@ -387,7 +387,7 @@ "name": "study_id", "sources": [ { - "file": "studies/individual_study.txt", + "file": "studies/individual_study.csv", "column": "STUDY_ID" } ] @@ -396,25 +396,25 @@ } }, "codebooks": { - "studies/individual.txt": "studies/individual_codebook.txt", - "studies/diagnosis.txt": "studies/diagnosis_codebook.txt", - "studies/death.txt": "studies/death_codebook.txt", - "clinic/RDP-Patient.tab": "clinic/RDP-Patient_codebook.txt" + "studies/individual.csv": "studies/individual_codebook.tsv", + "studies/diagnosis.csv": "studies/diagnosis_codebook.tsv", + "studies/death.csv": "studies/death_codebook.tsv", + "clinic/RDP-Patient.tsv": "clinic/RDP-Patient_codebook.tsv" }, "file_format": { - "studies/individual.txt": { + "studies/individual.csv": { "delimiter": "," }, - "studies/diagnosis.txt": { + "studies/diagnosis.csv": { "delimiter": "," }, - "studies/death.txt": { + "studies/death.csv": { "delimiter": "," }, - "studies/study.txt": { + "studies/study.csv": { "delimiter": "," }, - "studies/individual_study.txt": { + "studies/individual_study.csv": { "delimiter": "," } } From 89c212ea9bfee1930b5ffeae3047de3d44854763 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 15:02:26 +0100 Subject: [PATCH 08/39] Add requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3ea167d --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +git+https://github.com/thehyve/python_csr2transmart.git@master \ No newline at end of file From 2fbbab44164193aedc503feeaebab0dba22d8ff5 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 15:27:43 +0100 Subject: [PATCH 09/39] Resume tab at end of line in biomaterial data --- test_data/alternative/laboratory/biomaterial.tsv | 2 +- test_data/full_dataset/laboratory/biomaterial.tsv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test_data/alternative/laboratory/biomaterial.tsv b/test_data/alternative/laboratory/biomaterial.tsv index 068967e..610d1f4 100644 --- a/test_data/alternative/laboratory/biomaterial.tsv +++ b/test_data/alternative/laboratory/biomaterial.tsv @@ -1,5 +1,5 @@ biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A,B Y diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv b/test_data/full_dataset/laboratory/biomaterial.tsv index 8de8573..5bf027b 100644 --- a/test_data/full_dataset/laboratory/biomaterial.tsv +++ b/test_data/full_dataset/laboratory/biomaterial.tsv @@ -1,5 +1,5 @@ biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA A,B Z BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z From 8bf34896f0af499c60335a7910ab1fb8a87b6089 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 16:39:32 +0100 Subject: [PATCH 10/39] Delete old sha1 sum files --- test_data/alternative/clinic/RDP-IC.tab.sha1 | 1 - test_data/alternative/clinic/RDP-Patient.tab.sha1 | 1 - test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 | 1 - test_data/alternative/laboratory/biomaterial.txt.sha1 | 1 - test_data/alternative/laboratory/biosource.txt.sha1 | 1 - test_data/alternative/studies/death.txt.sha1 | 1 - test_data/alternative/studies/death_codebook.txt.sha1 | 1 - test_data/alternative/studies/diagnosis.txt.sha1 | 1 - test_data/alternative/studies/diagnosis_codebook.txt.sha1 | 1 - test_data/alternative/studies/individual.txt.sha1 | 1 - test_data/alternative/studies/individual_codebook.txt.sha1 | 1 - test_data/alternative/studies/individual_study.txt.sha1 | 1 - test_data/alternative/studies/study.txt.sha1 | 1 - test_data/full_dataset/clinic/RDP-IC.tab.sha1 | 1 - test_data/full_dataset/clinic/RDP-Patient.tab.sha1 | 1 - test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 | 1 - test_data/full_dataset/laboratory/biomaterial.txt.sha1 | 1 - test_data/full_dataset/laboratory/biosource.txt.sha1 | 1 - test_data/full_dataset/studies/death.txt.sha1 | 1 - test_data/full_dataset/studies/death_codebook.txt.sha1 | 1 - test_data/full_dataset/studies/diagnosis.txt.sha1 | 1 - test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 | 1 - test_data/full_dataset/studies/individual.txt.sha1 | 1 - test_data/full_dataset/studies/individual_codebook.txt.sha1 | 1 - test_data/full_dataset/studies/individual_study.txt.sha1 | 1 - test_data/full_dataset/studies/study.txt.sha1 | 1 - 26 files changed, 26 deletions(-) delete mode 100644 test_data/alternative/clinic/RDP-IC.tab.sha1 delete mode 100644 test_data/alternative/clinic/RDP-Patient.tab.sha1 delete mode 100644 test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 delete mode 100644 test_data/alternative/laboratory/biomaterial.txt.sha1 delete mode 100644 test_data/alternative/laboratory/biosource.txt.sha1 delete mode 100644 test_data/alternative/studies/death.txt.sha1 delete mode 100644 test_data/alternative/studies/death_codebook.txt.sha1 delete mode 100644 test_data/alternative/studies/diagnosis.txt.sha1 delete mode 100644 test_data/alternative/studies/diagnosis_codebook.txt.sha1 delete mode 100644 test_data/alternative/studies/individual.txt.sha1 delete mode 100644 test_data/alternative/studies/individual_codebook.txt.sha1 delete mode 100644 test_data/alternative/studies/individual_study.txt.sha1 delete mode 100644 test_data/alternative/studies/study.txt.sha1 delete mode 100644 test_data/full_dataset/clinic/RDP-IC.tab.sha1 delete mode 100644 test_data/full_dataset/clinic/RDP-Patient.tab.sha1 delete mode 100644 test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 delete mode 100644 test_data/full_dataset/laboratory/biomaterial.txt.sha1 delete mode 100644 test_data/full_dataset/laboratory/biosource.txt.sha1 delete mode 100644 test_data/full_dataset/studies/death.txt.sha1 delete mode 100644 test_data/full_dataset/studies/death_codebook.txt.sha1 delete mode 100644 test_data/full_dataset/studies/diagnosis.txt.sha1 delete mode 100644 test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 delete mode 100644 test_data/full_dataset/studies/individual.txt.sha1 delete mode 100644 test_data/full_dataset/studies/individual_codebook.txt.sha1 delete mode 100644 test_data/full_dataset/studies/individual_study.txt.sha1 delete mode 100644 test_data/full_dataset/studies/study.txt.sha1 diff --git a/test_data/alternative/clinic/RDP-IC.tab.sha1 b/test_data/alternative/clinic/RDP-IC.tab.sha1 deleted file mode 100644 index a616f62..0000000 --- a/test_data/alternative/clinic/RDP-IC.tab.sha1 +++ /dev/null @@ -1 +0,0 @@ -f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tab diff --git a/test_data/alternative/clinic/RDP-Patient.tab.sha1 b/test_data/alternative/clinic/RDP-Patient.tab.sha1 deleted file mode 100644 index 2712e56..0000000 --- a/test_data/alternative/clinic/RDP-Patient.tab.sha1 +++ /dev/null @@ -1 +0,0 @@ -b7860a8380fc89de15598cae38effad4979e6c84 RDP-Patient.tab diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 b/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 deleted file mode 100644 index 7a26340..0000000 --- a/test_data/alternative/clinic/RDP-Patient_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.txt diff --git a/test_data/alternative/laboratory/biomaterial.txt.sha1 b/test_data/alternative/laboratory/biomaterial.txt.sha1 deleted file mode 100644 index de6f64f..0000000 --- a/test_data/alternative/laboratory/biomaterial.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -0d1fe56d7e23f84753ef00c6c694d12aa95ca97a biomaterial.txt diff --git a/test_data/alternative/laboratory/biosource.txt.sha1 b/test_data/alternative/laboratory/biosource.txt.sha1 deleted file mode 100644 index f9c5381..0000000 --- a/test_data/alternative/laboratory/biosource.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -9e5f88297f0284d32f147448e96ae07397dfd567 biosource.txt diff --git a/test_data/alternative/studies/death.txt.sha1 b/test_data/alternative/studies/death.txt.sha1 deleted file mode 100644 index 2bd26e2..0000000 --- a/test_data/alternative/studies/death.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -e0eec424c2bb325a0bd5c8be3438f86f13b20b01 death.txt diff --git a/test_data/alternative/studies/death_codebook.txt.sha1 b/test_data/alternative/studies/death_codebook.txt.sha1 deleted file mode 100644 index c7ec326..0000000 --- a/test_data/alternative/studies/death_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_death_codebook.txt diff --git a/test_data/alternative/studies/diagnosis.txt.sha1 b/test_data/alternative/studies/diagnosis.txt.sha1 deleted file mode 100644 index ea10f3f..0000000 --- a/test_data/alternative/studies/diagnosis.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -2489e88660e3cf07a4ad56085651707186b79294 diagnosis.txt diff --git a/test_data/alternative/studies/diagnosis_codebook.txt.sha1 b/test_data/alternative/studies/diagnosis_codebook.txt.sha1 deleted file mode 100644 index 24531c1..0000000 --- a/test_data/alternative/studies/diagnosis_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_diagnosis_codebook.txt diff --git a/test_data/alternative/studies/individual.txt.sha1 b/test_data/alternative/studies/individual.txt.sha1 deleted file mode 100644 index 6ad5870..0000000 --- a/test_data/alternative/studies/individual.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -1c9e88792a41c571d9017932d6a07d47e15b9336 individual.txt diff --git a/test_data/alternative/studies/individual_codebook.txt.sha1 b/test_data/alternative/studies/individual_codebook.txt.sha1 deleted file mode 100644 index e05732f..0000000 --- a/test_data/alternative/studies/individual_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_individual_codebook.txt diff --git a/test_data/alternative/studies/individual_study.txt.sha1 b/test_data/alternative/studies/individual_study.txt.sha1 deleted file mode 100644 index 5da61b0..0000000 --- a/test_data/alternative/studies/individual_study.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -87bcb438e7a36127a3758c8fb4d94b58777e808a individual_study.txt diff --git a/test_data/alternative/studies/study.txt.sha1 b/test_data/alternative/studies/study.txt.sha1 deleted file mode 100644 index f729831..0000000 --- a/test_data/alternative/studies/study.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -683f607214f3b53c06a0431a2303c0322cc99ce6 study.txt diff --git a/test_data/full_dataset/clinic/RDP-IC.tab.sha1 b/test_data/full_dataset/clinic/RDP-IC.tab.sha1 deleted file mode 100644 index a616f62..0000000 --- a/test_data/full_dataset/clinic/RDP-IC.tab.sha1 +++ /dev/null @@ -1 +0,0 @@ -f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tab diff --git a/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 b/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 deleted file mode 100644 index 22e83aa..0000000 --- a/test_data/full_dataset/clinic/RDP-Patient.tab.sha1 +++ /dev/null @@ -1 +0,0 @@ -67082ab768b32dfc28c0d9d665d646f4f355140f RDP-Patient.tab diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 b/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 deleted file mode 100644 index 7a26340..0000000 --- a/test_data/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.txt diff --git a/test_data/full_dataset/laboratory/biomaterial.txt.sha1 b/test_data/full_dataset/laboratory/biomaterial.txt.sha1 deleted file mode 100644 index 0b331b9..0000000 --- a/test_data/full_dataset/laboratory/biomaterial.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -a3fe2d8a4db26c0dd6400caa6f662d6826878862 biomaterial.txt diff --git a/test_data/full_dataset/laboratory/biosource.txt.sha1 b/test_data/full_dataset/laboratory/biosource.txt.sha1 deleted file mode 100644 index e796242..0000000 --- a/test_data/full_dataset/laboratory/biosource.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -6985168bcc2c5d64e11c5317a70478775ba8eec3 biosource.txt diff --git a/test_data/full_dataset/studies/death.txt.sha1 b/test_data/full_dataset/studies/death.txt.sha1 deleted file mode 100644 index 65d0d74..0000000 --- a/test_data/full_dataset/studies/death.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -b886a68f4d69de8dd1e76739af4fa3c688520f8e PMCST000AAA_death.txt diff --git a/test_data/full_dataset/studies/death_codebook.txt.sha1 b/test_data/full_dataset/studies/death_codebook.txt.sha1 deleted file mode 100644 index c7ec326..0000000 --- a/test_data/full_dataset/studies/death_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_death_codebook.txt diff --git a/test_data/full_dataset/studies/diagnosis.txt.sha1 b/test_data/full_dataset/studies/diagnosis.txt.sha1 deleted file mode 100644 index fac1e34..0000000 --- a/test_data/full_dataset/studies/diagnosis.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -c2c862477e73c9059b5b2e8cc7b8049a74099ea7 PMCST000AAA_diagnosis.txt diff --git a/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 b/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 deleted file mode 100644 index 24531c1..0000000 --- a/test_data/full_dataset/studies/diagnosis_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_diagnosis_codebook.txt diff --git a/test_data/full_dataset/studies/individual.txt.sha1 b/test_data/full_dataset/studies/individual.txt.sha1 deleted file mode 100644 index 92c2fd7..0000000 --- a/test_data/full_dataset/studies/individual.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -d70a6c8727d3d8dd40eaf2cf87eb71d9ee581387 PMCST000AAA_individual.txt diff --git a/test_data/full_dataset/studies/individual_codebook.txt.sha1 b/test_data/full_dataset/studies/individual_codebook.txt.sha1 deleted file mode 100644 index e05732f..0000000 --- a/test_data/full_dataset/studies/individual_codebook.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -8cc20a3917db3e251d9426d078ce74969e531931 PMCST000AAA_individual_codebook.txt diff --git a/test_data/full_dataset/studies/individual_study.txt.sha1 b/test_data/full_dataset/studies/individual_study.txt.sha1 deleted file mode 100644 index 4a8ad83..0000000 --- a/test_data/full_dataset/studies/individual_study.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -97a9c1f7f565c76d95d0bdacc1043247c52259a7 individual_study.txt diff --git a/test_data/full_dataset/studies/study.txt.sha1 b/test_data/full_dataset/studies/study.txt.sha1 deleted file mode 100644 index f729831..0000000 --- a/test_data/full_dataset/studies/study.txt.sha1 +++ /dev/null @@ -1 +0,0 @@ -683f607214f3b53c06a0431a2303c0322cc99ce6 study.txt From 334f68edfdb135383276f30463305f357df9862e Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 16:57:41 +0100 Subject: [PATCH 11/39] Add basic radiology data and sources config --- config/sources_config.json | 60 +++++++++++++++++++ .../full_dataset/laboratory/radiology.tsv | 4 ++ 2 files changed, 64 insertions(+) create mode 100644 test_data/full_dataset/laboratory/radiology.tsv diff --git a/config/sources_config.json b/config/sources_config.json index ad91177..5b00800 100644 --- a/config/sources_config.json +++ b/config/sources_config.json @@ -317,6 +317,66 @@ } ] }, + "Radiology": { + "attributes": [ + { + "name": "radiology_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "examination_date", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "image_type", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "field_strength", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "individual_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "diagnosis_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "body_part", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + } + ] + }, "Study": { "attributes": [ { diff --git a/test_data/full_dataset/laboratory/radiology.tsv b/test_data/full_dataset/laboratory/radiology.tsv new file mode 100644 index 0000000..6979110 --- /dev/null +++ b/test_data/full_dataset/laboratory/radiology.tsv @@ -0,0 +1,4 @@ +radiology_id examination_date image_type field_strength individual_id diagnosis_id body_part +R1 2016-05-01 type_2 PAT1 DIA1 torso +R2 2016-07-02 type_2 50 PAT2 DIA2 torso +R3 2016-11-03 type_1 20 PAT2 legs From 178c320744e9fbb1af08118434ac1df1c38be0bf Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 16:59:31 +0100 Subject: [PATCH 12/39] Ignore validation output --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index efe313b..766d469 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Project files +validation_results/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -108,4 +111,4 @@ ENV/ .idea/* # OSX -.DS_Store \ No newline at end of file +.DS_Store From 73d7cf59ed5e549b4b234b3fa147af2057e6aca4 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 18:31:35 +0100 Subject: [PATCH 13/39] Add utility to generate sha1sum files --- generate_sha1sum.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 generate_sha1sum.py diff --git a/generate_sha1sum.py b/generate_sha1sum.py new file mode 100644 index 0000000..339a018 --- /dev/null +++ b/generate_sha1sum.py @@ -0,0 +1,33 @@ +import hashlib +import os + + +def compute_checksum(path, algorithm: str) -> str: + """ Generates a hex digest using specified algorithm for a file. """ + buffer_size = 65536 + hash_builder = hashlib.new(algorithm) + + with open(path, 'rb') as f: + while True: + data = f.read(buffer_size) + if not data: + break + hash_builder.update(data) + + return hash_builder.hexdigest() + + +def traverse(top_dir, algorithm: str = 'sha1'): + for root, d_names, f_names in os.walk(top_dir): + for f_name in f_names: + if f_name.endswith(algorithm): + continue + f_in = os.path.join(root, f_name) + f_out = f_in + '.' + algorithm + checksum = compute_checksum(f_in, algorithm) + with open(f_out, 'w') as f: + f.write(checksum + ' ' + f_name) + + +if __name__ == '__main__': + traverse('test_data') From 9ffe2bc6440d2a5fd40935659ac05975af7a9715 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 18:31:52 +0100 Subject: [PATCH 14/39] Update sha1sum files --- test_data/alternative/clinic/RDP-IC.tsv.sha1 | 1 + test_data/alternative/clinic/RDP-Patient.tsv.sha1 | 1 + test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 | 1 + test_data/alternative/laboratory/biomaterial.tsv.sha1 | 1 + test_data/alternative/laboratory/biosource.tsv.sha1 | 1 + test_data/alternative/studies/death.csv.sha1 | 1 + test_data/alternative/studies/death_codebook.tsv.sha1 | 1 + test_data/alternative/studies/diagnosis.csv.sha1 | 1 + test_data/alternative/studies/diagnosis_codebook.tsv.sha1 | 1 + test_data/alternative/studies/individual.csv.sha1 | 1 + test_data/alternative/studies/individual_codebook.tsv.sha1 | 1 + test_data/alternative/studies/individual_study.csv.sha1 | 1 + test_data/alternative/studies/study.csv.sha1 | 1 + test_data/full_dataset/clinic/RDP-IC.tsv.sha1 | 1 + test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 | 1 + test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 | 1 + test_data/full_dataset/laboratory/biomaterial.tsv.sha1 | 1 + test_data/full_dataset/laboratory/biosource.tsv.sha1 | 1 + test_data/full_dataset/laboratory/radiology.tsv.sha1 | 1 + test_data/full_dataset/studies/death.csv.sha1 | 1 + test_data/full_dataset/studies/death_codebook.tsv.sha1 | 1 + test_data/full_dataset/studies/diagnosis.csv.sha1 | 1 + test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 | 1 + test_data/full_dataset/studies/individual.csv.sha1 | 1 + test_data/full_dataset/studies/individual_codebook.tsv.sha1 | 1 + test_data/full_dataset/studies/individual_study.csv.sha1 | 1 + test_data/full_dataset/studies/study.csv.sha1 | 1 + 27 files changed, 27 insertions(+) create mode 100644 test_data/alternative/clinic/RDP-IC.tsv.sha1 create mode 100644 test_data/alternative/clinic/RDP-Patient.tsv.sha1 create mode 100644 test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 create mode 100644 test_data/alternative/laboratory/biomaterial.tsv.sha1 create mode 100644 test_data/alternative/laboratory/biosource.tsv.sha1 create mode 100644 test_data/alternative/studies/death.csv.sha1 create mode 100644 test_data/alternative/studies/death_codebook.tsv.sha1 create mode 100644 test_data/alternative/studies/diagnosis.csv.sha1 create mode 100644 test_data/alternative/studies/diagnosis_codebook.tsv.sha1 create mode 100644 test_data/alternative/studies/individual.csv.sha1 create mode 100644 test_data/alternative/studies/individual_codebook.tsv.sha1 create mode 100644 test_data/alternative/studies/individual_study.csv.sha1 create mode 100644 test_data/alternative/studies/study.csv.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-IC.tsv.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 create mode 100644 test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 create mode 100644 test_data/full_dataset/laboratory/biomaterial.tsv.sha1 create mode 100644 test_data/full_dataset/laboratory/biosource.tsv.sha1 create mode 100644 test_data/full_dataset/laboratory/radiology.tsv.sha1 create mode 100644 test_data/full_dataset/studies/death.csv.sha1 create mode 100644 test_data/full_dataset/studies/death_codebook.tsv.sha1 create mode 100644 test_data/full_dataset/studies/diagnosis.csv.sha1 create mode 100644 test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 create mode 100644 test_data/full_dataset/studies/individual.csv.sha1 create mode 100644 test_data/full_dataset/studies/individual_codebook.tsv.sha1 create mode 100644 test_data/full_dataset/studies/individual_study.csv.sha1 create mode 100644 test_data/full_dataset/studies/study.csv.sha1 diff --git a/test_data/alternative/clinic/RDP-IC.tsv.sha1 b/test_data/alternative/clinic/RDP-IC.tsv.sha1 new file mode 100644 index 0000000..31b593d --- /dev/null +++ b/test_data/alternative/clinic/RDP-IC.tsv.sha1 @@ -0,0 +1 @@ +f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tsv \ No newline at end of file diff --git a/test_data/alternative/clinic/RDP-Patient.tsv.sha1 b/test_data/alternative/clinic/RDP-Patient.tsv.sha1 new file mode 100644 index 0000000..799cd2f --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient.tsv.sha1 @@ -0,0 +1 @@ +b7860a8380fc89de15598cae38effad4979e6c84 RDP-Patient.tsv \ No newline at end of file diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 new file mode 100644 index 0000000..b47a949 --- /dev/null +++ b/test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 @@ -0,0 +1 @@ +7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.tsv \ No newline at end of file diff --git a/test_data/alternative/laboratory/biomaterial.tsv.sha1 b/test_data/alternative/laboratory/biomaterial.tsv.sha1 new file mode 100644 index 0000000..b3773b8 --- /dev/null +++ b/test_data/alternative/laboratory/biomaterial.tsv.sha1 @@ -0,0 +1 @@ +6a43417536fe3a35e33a11b7cde47a96148ba4fa biomaterial.tsv \ No newline at end of file diff --git a/test_data/alternative/laboratory/biosource.tsv.sha1 b/test_data/alternative/laboratory/biosource.tsv.sha1 new file mode 100644 index 0000000..ba4ff4c --- /dev/null +++ b/test_data/alternative/laboratory/biosource.tsv.sha1 @@ -0,0 +1 @@ +9e5f88297f0284d32f147448e96ae07397dfd567 biosource.tsv \ No newline at end of file diff --git a/test_data/alternative/studies/death.csv.sha1 b/test_data/alternative/studies/death.csv.sha1 new file mode 100644 index 0000000..d7bb4fc --- /dev/null +++ b/test_data/alternative/studies/death.csv.sha1 @@ -0,0 +1 @@ +e0eec424c2bb325a0bd5c8be3438f86f13b20b01 death.csv \ No newline at end of file diff --git a/test_data/alternative/studies/death_codebook.tsv.sha1 b/test_data/alternative/studies/death_codebook.tsv.sha1 new file mode 100644 index 0000000..fd01342 --- /dev/null +++ b/test_data/alternative/studies/death_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 death_codebook.tsv \ No newline at end of file diff --git a/test_data/alternative/studies/diagnosis.csv.sha1 b/test_data/alternative/studies/diagnosis.csv.sha1 new file mode 100644 index 0000000..43f92d3 --- /dev/null +++ b/test_data/alternative/studies/diagnosis.csv.sha1 @@ -0,0 +1 @@ +2489e88660e3cf07a4ad56085651707186b79294 diagnosis.csv \ No newline at end of file diff --git a/test_data/alternative/studies/diagnosis_codebook.tsv.sha1 b/test_data/alternative/studies/diagnosis_codebook.tsv.sha1 new file mode 100644 index 0000000..2961a19 --- /dev/null +++ b/test_data/alternative/studies/diagnosis_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 diagnosis_codebook.tsv \ No newline at end of file diff --git a/test_data/alternative/studies/individual.csv.sha1 b/test_data/alternative/studies/individual.csv.sha1 new file mode 100644 index 0000000..edeb943 --- /dev/null +++ b/test_data/alternative/studies/individual.csv.sha1 @@ -0,0 +1 @@ +1c9e88792a41c571d9017932d6a07d47e15b9336 individual.csv \ No newline at end of file diff --git a/test_data/alternative/studies/individual_codebook.tsv.sha1 b/test_data/alternative/studies/individual_codebook.tsv.sha1 new file mode 100644 index 0000000..edb3943 --- /dev/null +++ b/test_data/alternative/studies/individual_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 individual_codebook.tsv \ No newline at end of file diff --git a/test_data/alternative/studies/individual_study.csv.sha1 b/test_data/alternative/studies/individual_study.csv.sha1 new file mode 100644 index 0000000..a564d56 --- /dev/null +++ b/test_data/alternative/studies/individual_study.csv.sha1 @@ -0,0 +1 @@ +87bcb438e7a36127a3758c8fb4d94b58777e808a individual_study.csv \ No newline at end of file diff --git a/test_data/alternative/studies/study.csv.sha1 b/test_data/alternative/studies/study.csv.sha1 new file mode 100644 index 0000000..19ab50e --- /dev/null +++ b/test_data/alternative/studies/study.csv.sha1 @@ -0,0 +1 @@ +683f607214f3b53c06a0431a2303c0322cc99ce6 study.csv \ No newline at end of file diff --git a/test_data/full_dataset/clinic/RDP-IC.tsv.sha1 b/test_data/full_dataset/clinic/RDP-IC.tsv.sha1 new file mode 100644 index 0000000..31b593d --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-IC.tsv.sha1 @@ -0,0 +1 @@ +f84346dff7e4fae2e720b118d1f4cd846a4ec5d3 RDP-IC.tsv \ No newline at end of file diff --git a/test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 b/test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 new file mode 100644 index 0000000..6e24425 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 @@ -0,0 +1 @@ +67082ab768b32dfc28c0d9d665d646f4f355140f RDP-Patient.tsv \ No newline at end of file diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 new file mode 100644 index 0000000..b47a949 --- /dev/null +++ b/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 @@ -0,0 +1 @@ +7854a94ee9adb8f765a65db17cd17fcc9fea708d RDP-Patient_codebook.tsv \ No newline at end of file diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 b/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 new file mode 100644 index 0000000..ba28421 --- /dev/null +++ b/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 @@ -0,0 +1 @@ +8c00d77d67d9e2c76f08bd9d3b8f1d14410da8fb biomaterial.tsv \ No newline at end of file diff --git a/test_data/full_dataset/laboratory/biosource.tsv.sha1 b/test_data/full_dataset/laboratory/biosource.tsv.sha1 new file mode 100644 index 0000000..a56b85c --- /dev/null +++ b/test_data/full_dataset/laboratory/biosource.tsv.sha1 @@ -0,0 +1 @@ +6985168bcc2c5d64e11c5317a70478775ba8eec3 biosource.tsv \ No newline at end of file diff --git a/test_data/full_dataset/laboratory/radiology.tsv.sha1 b/test_data/full_dataset/laboratory/radiology.tsv.sha1 new file mode 100644 index 0000000..740bf22 --- /dev/null +++ b/test_data/full_dataset/laboratory/radiology.tsv.sha1 @@ -0,0 +1 @@ +4402798843a617f81f2a245e6532b628d91e463d radiology.tsv \ No newline at end of file diff --git a/test_data/full_dataset/studies/death.csv.sha1 b/test_data/full_dataset/studies/death.csv.sha1 new file mode 100644 index 0000000..3b229e3 --- /dev/null +++ b/test_data/full_dataset/studies/death.csv.sha1 @@ -0,0 +1 @@ +b886a68f4d69de8dd1e76739af4fa3c688520f8e death.csv \ No newline at end of file diff --git a/test_data/full_dataset/studies/death_codebook.tsv.sha1 b/test_data/full_dataset/studies/death_codebook.tsv.sha1 new file mode 100644 index 0000000..fd01342 --- /dev/null +++ b/test_data/full_dataset/studies/death_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 death_codebook.tsv \ No newline at end of file diff --git a/test_data/full_dataset/studies/diagnosis.csv.sha1 b/test_data/full_dataset/studies/diagnosis.csv.sha1 new file mode 100644 index 0000000..2764067 --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis.csv.sha1 @@ -0,0 +1 @@ +c2c862477e73c9059b5b2e8cc7b8049a74099ea7 diagnosis.csv \ No newline at end of file diff --git a/test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 b/test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 new file mode 100644 index 0000000..2961a19 --- /dev/null +++ b/test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 diagnosis_codebook.tsv \ No newline at end of file diff --git a/test_data/full_dataset/studies/individual.csv.sha1 b/test_data/full_dataset/studies/individual.csv.sha1 new file mode 100644 index 0000000..8b458bf --- /dev/null +++ b/test_data/full_dataset/studies/individual.csv.sha1 @@ -0,0 +1 @@ +d70a6c8727d3d8dd40eaf2cf87eb71d9ee581387 individual.csv \ No newline at end of file diff --git a/test_data/full_dataset/studies/individual_codebook.tsv.sha1 b/test_data/full_dataset/studies/individual_codebook.tsv.sha1 new file mode 100644 index 0000000..edb3943 --- /dev/null +++ b/test_data/full_dataset/studies/individual_codebook.tsv.sha1 @@ -0,0 +1 @@ +8cc20a3917db3e251d9426d078ce74969e531931 individual_codebook.tsv \ No newline at end of file diff --git a/test_data/full_dataset/studies/individual_study.csv.sha1 b/test_data/full_dataset/studies/individual_study.csv.sha1 new file mode 100644 index 0000000..f7a4d99 --- /dev/null +++ b/test_data/full_dataset/studies/individual_study.csv.sha1 @@ -0,0 +1 @@ +97a9c1f7f565c76d95d0bdacc1043247c52259a7 individual_study.csv \ No newline at end of file diff --git a/test_data/full_dataset/studies/study.csv.sha1 b/test_data/full_dataset/studies/study.csv.sha1 new file mode 100644 index 0000000..19ab50e --- /dev/null +++ b/test_data/full_dataset/studies/study.csv.sha1 @@ -0,0 +1 @@ +683f607214f3b53c06a0431a2303c0322cc99ce6 study.csv \ No newline at end of file From 2dd38827da44a286b8435997f1033f23b6d91cc9 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 19:04:12 +0100 Subject: [PATCH 15/39] Automate dataset validation --- validate_data.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 validate_data.py diff --git a/validate_data.py b/validate_data.py new file mode 100644 index 0000000..2519873 --- /dev/null +++ b/validate_data.py @@ -0,0 +1,29 @@ +import os +import shutil +import subprocess + + +config_dir = 'config' +output_folder = 'validation_results' +out_csr = os.path.join(output_folder, 'sources2csr') +out_tm = os.path.join(output_folder, 'csr2transmart') + + +def cleanup(dir_path: str): + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + + +def validate(top_folder: str): + + print('Validating SOURCES to CSR') + cleanup(out_csr) + subprocess.run(f'sources2csr {top_folder} {out_csr} {config_dir}', shell=True) + + print('Validating CSR to TRANSMART') + cleanup(out_tm) + subprocess.run(f'csr2transmart {out_csr} {out_tm} {config_dir}', shell=True) + + +if __name__ == '__main__': + validate('test_data/full_dataset') From ad72b879c646f70486f8cb4e18c7ccaebc871f65 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 14 Dec 2021 19:09:14 +0100 Subject: [PATCH 16/39] Add click options to validation script --- validate_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/validate_data.py b/validate_data.py index 2519873..65f779c 100644 --- a/validate_data.py +++ b/validate_data.py @@ -2,6 +2,8 @@ import shutil import subprocess +import click + config_dir = 'config' output_folder = 'validation_results' @@ -14,6 +16,8 @@ def cleanup(dir_path: str): shutil.rmtree(dir_path) +@click.command() +@click.argument('top_folder', type=click.Path(file_okay=False, exists=True, readable=True)) def validate(top_folder: str): print('Validating SOURCES to CSR') @@ -26,4 +30,4 @@ def validate(top_folder: str): if __name__ == '__main__': - validate('test_data/full_dataset') + validate() From 5170b4d76a4f96e6ef129aef99f8732d3ca004a0 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 10:59:29 +0100 Subject: [PATCH 17/39] Extend radiology example --- .../alternative/laboratory/radiology.tsv | 15 +++++++++++++++ .../alternative/laboratory/radiology.tsv.sha1 | 1 + .../full_dataset/laboratory/radiology.tsv | 19 ++++++++++++++++--- .../laboratory/radiology.tsv.sha1 | 2 +- 4 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 test_data/alternative/laboratory/radiology.tsv create mode 100644 test_data/alternative/laboratory/radiology.tsv.sha1 diff --git a/test_data/alternative/laboratory/radiology.tsv b/test_data/alternative/laboratory/radiology.tsv new file mode 100644 index 0000000..5cd60f1 --- /dev/null +++ b/test_data/alternative/laboratory/radiology.tsv @@ -0,0 +1,15 @@ +radiology_id examination_date image_type field_strength individual_id diagnosis_id body_part +R1A 2016-05-01 type_2 PAT1 DIA1 legs +R1B 2016-10-31 type_1 20 PAT1 DIA18 torso +R4 2016-11-03 type_1 20 PAT4 legs +R5 2014-06-01 type_1 PAT5 DIA5 legs +R6 2012-07-05 type_2 30 PAT6 DIA6 legs +R7 2015-11-01 type_1 40 PAT7 DIA7 legs +R8 2011-02-01 type_1 40 PAT8 torso +R9 2017-05-01 type_1 40 PAT9 DIA9 torso +R10A 2010-05-01 type_1 40 PAT10 DIA10 torso +R10B 2011-10-31 type_2 30 PAT10 DIA10 torso +R13A 2010-12-01 type_2 PAT13 DIA13 legs +R13B 2011-05-01 type_2 PAT13 DIA13 legs +R14 2016-08-31 type_2 30 PAT14 legs +R15 2016-05-01 type_2 PAT15 DIA15 torso diff --git a/test_data/alternative/laboratory/radiology.tsv.sha1 b/test_data/alternative/laboratory/radiology.tsv.sha1 new file mode 100644 index 0000000..65e4474 --- /dev/null +++ b/test_data/alternative/laboratory/radiology.tsv.sha1 @@ -0,0 +1 @@ +ea4b2e59e958e9e01e876fe1b03f9821f92c89a4 radiology.tsv \ No newline at end of file diff --git a/test_data/full_dataset/laboratory/radiology.tsv b/test_data/full_dataset/laboratory/radiology.tsv index 6979110..bb76f3a 100644 --- a/test_data/full_dataset/laboratory/radiology.tsv +++ b/test_data/full_dataset/laboratory/radiology.tsv @@ -1,4 +1,17 @@ radiology_id examination_date image_type field_strength individual_id diagnosis_id body_part -R1 2016-05-01 type_2 PAT1 DIA1 torso -R2 2016-07-02 type_2 50 PAT2 DIA2 torso -R3 2016-11-03 type_1 20 PAT2 legs +R1A 2016-05-01 type_2 PAT1 DIA1 legs +R1B 2016-10-31 type_1 20 PAT1 DIA18 torso +R2A 2016-07-01 type_2 50 PAT2 DIA2 legs +R2B 2016-08-31 type_1 30 PAT2 DIA19 torso +R4 2016-11-03 type_1 20 PAT4 legs +R5 2014-06-01 type_1 PAT5 DIA5 legs +R6 2012-07-05 type_2 30 PAT6 DIA6 legs +R7 2015-11-01 type_1 40 PAT7 DIA7 legs +R8 2011-02-01 type_1 40 PAT8 torso +R9 2017-05-01 type_1 40 PAT9 DIA9 torso +R10A 2010-05-01 type_1 40 PAT10 DIA10 torso +R10B 2011-10-31 type_2 30 PAT10 DIA10 torso +R13A 2010-12-01 type_2 PAT13 DIA13 legs +R13B 2011-05-01 type_2 PAT13 DIA13 legs +R14 2016-08-31 type_2 30 PAT14 legs +R15 2016-05-01 type_2 PAT15 DIA15 torso diff --git a/test_data/full_dataset/laboratory/radiology.tsv.sha1 b/test_data/full_dataset/laboratory/radiology.tsv.sha1 index 740bf22..eb8bffd 100644 --- a/test_data/full_dataset/laboratory/radiology.tsv.sha1 +++ b/test_data/full_dataset/laboratory/radiology.tsv.sha1 @@ -1 +1 @@ -4402798843a617f81f2a245e6532b628d91e463d radiology.tsv \ No newline at end of file +c4f36afc4c30ba11638d49b0d5f295588a167c82 radiology.tsv \ No newline at end of file From 9c92e210d3b56617e0d073cfaf441e7a64579e77 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 11:10:51 +0100 Subject: [PATCH 18/39] Specify radiology date format in sources config --- config/sources_config.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/sources_config.json b/config/sources_config.json index 5b00800..67d31ad 100644 --- a/config/sources_config.json +++ b/config/sources_config.json @@ -331,7 +331,8 @@ "name": "examination_date", "sources": [ { - "file": "laboratory/radiology.tsv" + "file": "laboratory/radiology.tsv", + "date_format": "%Y-%m-%d" } ] }, From df28d1ab109a4f970f86ad11864d97ba79c81162 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 11:16:16 +0100 Subject: [PATCH 19/39] Add radiology to ontology config --- config/ontology_config.json | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/config/ontology_config.json b/config/ontology_config.json index 381ee90..bf5ba60 100644 --- a/config/ontology_config.json +++ b/config/ontology_config.json @@ -130,7 +130,40 @@ ] }, { - "name": "05. Study information", + "name": "05. Radiology information", + "children": [ + { + "name": "01. Radiology ID", + "concept_code": "Radiology.radiology_id" + }, + { + "name": "02. Examination Date", + "concept_code": "Radiology.examination_date" + }, + { + "name": "03. Image Type", + "concept_code": "Radiology.image_type" + }, + { + "name": "04. Field Strength", + "concept_code": "Radiology.field_strength" + }, + { + "name": "05. Individual ID", + "concept_code": "Radiology.individual_id" + }, + { + "name": "06. Diagnosis ID", + "concept_code": "Radiology.diagnosis_id" + }, + { + "name": "07. Body Part", + "concept_code": "Radiology.body_part" + } + ] + }, + { + "name": "06. Study information", "children": [ { "name": "01. Study ID", From 2aaf0709ef5ce76ecbd69f92c8740fb4f4daf356 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 12:10:26 +0100 Subject: [PATCH 20/39] Replace , with ; in array-type values --- .../alternative/laboratory/biomaterial.tsv | 42 ++++++++--------- .../laboratory/biomaterial.tsv.sha1 | 2 +- .../full_dataset/laboratory/biomaterial.tsv | 46 +++++++++---------- .../laboratory/biomaterial.tsv.sha1 | 2 +- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/test_data/alternative/laboratory/biomaterial.tsv b/test_data/alternative/laboratory/biomaterial.tsv index 610d1f4..94a02cb 100644 --- a/test_data/alternative/laboratory/biomaterial.tsv +++ b/test_data/alternative/laboratory/biomaterial.tsv @@ -1,22 +1,22 @@ biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B -BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z -BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z -BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A,B Y -BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y -BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A,C X -BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A,C X -BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y,Z -BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A,C Y,Z -BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B,C Y,Z -BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B,C Y,Z -BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B,C Y,Z -BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B,C X,Z -BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B,C X,Z -BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X,Z -BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X,Z -BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X,Z -BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X,Y -BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X,Y -BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A,B,C X,Y -BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X,Y +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A;B +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A;B X;Y;Z +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A;B Z +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A;B Y +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A;C Y +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A;C X +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A;C X +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A;C Y;Z +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A;C Y;Z +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B;C Y;Z +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B;C Y;Z +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B;C Y;Z +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B;C X;Z +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B;C X;Z +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X;Z +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X;Z +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X;Z +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X;Y +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X;Y +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A;B;C X;Y +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X;Y diff --git a/test_data/alternative/laboratory/biomaterial.tsv.sha1 b/test_data/alternative/laboratory/biomaterial.tsv.sha1 index b3773b8..dc4cb2f 100644 --- a/test_data/alternative/laboratory/biomaterial.tsv.sha1 +++ b/test_data/alternative/laboratory/biomaterial.tsv.sha1 @@ -1 +1 @@ -6a43417536fe3a35e33a11b7cde47a96148ba4fa biomaterial.tsv \ No newline at end of file +fa845cd8237b3ab856f190213522eecedbb97b5d biomaterial.tsv \ No newline at end of file diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv b/test_data/full_dataset/laboratory/biomaterial.tsv index 5bf027b..c9939ea 100644 --- a/test_data/full_dataset/laboratory/biomaterial.tsv +++ b/test_data/full_dataset/laboratory/biomaterial.tsv @@ -1,24 +1,24 @@ biomaterial_id biomaterial_date type src_biosource_id src_biomaterial_id description label library_strategy analysis_type -BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A,B -BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A,B X,Y,Z -BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA A,B Z -BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A,B Z -BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A,B Y -BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y -BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A,C X -BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A,C X -BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A,C Y,Z -BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A,C Y,Z -BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B,C Y,Z -BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B,C Y,Z -BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B,C Y,Z -BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B,C X,Z -BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B,C X,Z -BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X,Z -BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X,Z -BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X,Z -BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X,Y -BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X,Y -BIOM19 07/03/2018 mRNA BIOS19 genomic DNA isolated from 1222TI 1222TI_DNA C X,Y -BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A,B,C X,Y -BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X,Y +BIOM1T 07/03/2018 total RNA BIOS1T hepatoblastoma RNA HBL_RNA A;B +BIOM1N 22/11/2018 total RNA BIOS1N healthy tissue HBL_normal_RNA A;B X;Y;Z +BIOM2 05/06/2011 genomic DNA BIOS2 neuroblastoma DNA NBL_DNA A;B Z +BIOM3T 16/04/2018 genomic DNA BIOS3T nephroblastoma DNA NBL_DNA A;B Z +BIOM3N 02/05/2018 genomic DNA BIOS3N healthy tissue NBL_normal_DNA A;B Y +BIOM4 07/03/2018 mRNA BIOS4 genomic DNA isolated from 1222TI 1222TI_DNA A;C Y +BIOM5 05/06/2011 mRNA BIOS5 neuroblastoma DNA NBL_DNA A;C X +BIOM6 05/06/2011 mRNA DNA BIOS6 neuroblastoma DNA NBL_DNA A;C X +BIOM7 07/03/2018 mRNA DNA BIOS7 genomic DNA isolated from 1222TI 1222TI_DNA A;C Y;Z +BIOM8 05/06/2011 mRNA DNA BIOS8 neuroblastoma DNA NBL_DNA A;C Y;Z +BIOM9 05/06/2011 genomic DNA BIOS9 neuroblastoma DNA NBL_DNA B;C Y;Z +BIOM10 07/03/2018 genomic DNA BIOS10 genomic DNA isolated from 1222TI 1222TI_DNA B;C Y;Z +BIOM11 05/06/2011 genomic DNA BIOS11 neuroblastoma DNA NBL_DNA B;C Y;Z +BIOM12 05/06/2011 genomic DNA BIOS12 neuroblastoma DNA NBL_DNA B;C X;Z +BIOM13 07/03/2018 mRNA BIOS13 genomic DNA isolated from 1222TI 1222TI_DNA B;C X;Z +BIOM14 05/06/2011 mRNA BIOS14 neuroblastoma DNA NBL_DNA A X;Z +BIOM15 05/06/2011 genomic DNA BIOS15 neuroblastoma DNA NBL_DNA A X;Z +BIOM16 07/03/2018 genomic DNA BIOS16 genomic DNA isolated from 1222TI 1222TI_DNA B X;Z +BIOM17 05/06/2011 genomic DNA BIOS17 neuroblastoma DNA NBL_DNA B X;Y +BIOM18 05/06/2011 mRNA BIOS18 neuroblastoma DNA NBL_DNA C X;Y +BIOM19 07/03/2018 mRNA BIOS19 genomic DNA isolated from 1222TI 1222TI_DNA C X;Y +BIOM20 05/06/2011 mRNA BIOS11 BIOM11 neuroblastoma DNA NBL_DNA A;B;C X;Y +BIOM21 05/06/2011 genomic DNA BIOS12 BIOM12 neuroblastoma DNA NBL_DNA X;Y diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 b/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 index ba28421..6e6d633 100644 --- a/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 +++ b/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 @@ -1 +1 @@ -8c00d77d67d9e2c76f08bd9d3b8f1d14410da8fb biomaterial.tsv \ No newline at end of file +163b8d354dbdae1ed951ab034dbc8e890f8ed80e biomaterial.tsv \ No newline at end of file From be087f093ccf4a7a13c8f8025e94717483547032 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 12:20:52 +0100 Subject: [PATCH 21/39] Reorder and relabel nodes --- config/ontology_config.json | 62 ++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/config/ontology_config.json b/config/ontology_config.json index bf5ba60..34efa1e 100644 --- a/config/ontology_config.json +++ b/config/ontology_config.json @@ -3,43 +3,43 @@ { "name": "01. Patient information", "children": [ - { - "name": "03. Sex", - "concept_code": "Individual.gender" - }, { "name": "01. Date of birth", "concept_code": "Individual.birth_date" + }, + { + "name": "02. Date of death", + "concept_code": "Individual.death_date" }, { - "name": "02. Taxonomy", - "concept_code": "Individual.taxonomy" + "name": "03. Sex", + "concept_code": "Individual.gender" }, { - "name": "04. Date of death", - "concept_code": "Individual.death_date" + "name": "04. Taxonomy", + "concept_code": "Individual.taxonomy" }, { - "name": "Informed_consent", + "name": "05. Informed_consent", "children": [ { "name": "01. Informed consent type", "concept_code": "Individual.ic_type" }, { - "name": "Informed consent version", + "name": "02. Informed consent version", "concept_code": "Individual.ic_version" }, { - "name": "02. Date informed Consent given", + "name": "03. Date informed Consent given", "concept_code": "Individual.ic_given_date" }, { - "name": "03. Date informed consent withdrawn", + "name": "04. Date informed consent withdrawn", "concept_code": "Individual.ic_withdrawn_date" }, { - "name": "04. Report hereditary susceptibility", + "name": "05. Report hereditary susceptibility", "concept_code": "Individual.report_her_susc" } ] @@ -50,27 +50,27 @@ "name": "02. Diagnosis information", "children": [ { - "name": "02. Tumor type", + "name": "01. Tumor type", "concept_code": "Diagnosis.tumor_type" }, { - "name": "03. Topography", + "name": "02. Topography", "concept_code": "Diagnosis.topography" }, { - "name": "Treatment", - "concept_code": "Diagnosis.treatment_protocol" + "name": "03. Tumor stage", + "concept_code": "Diagnosis.tumor_stage" }, { - "name": "04. Tumor stage", - "concept_code": "Diagnosis.tumor_stage" + "name": "04. Treatment", + "concept_code": "Diagnosis.treatment_protocol" }, { - "name": "01. Date of diagnosis", + "name": "05. Date of diagnosis", "concept_code": "Diagnosis.diagnosis_date" }, { - "name": "05. Center of treatment", + "name": "06. Center of treatment", "concept_code": "Diagnosis.diagnosis_center" } ] @@ -78,20 +78,16 @@ { "name": "03. Biosource information", "children": [ - { - "name": "06. Biosource dedicated for specific study", - "concept_code": "Biosource.biosource_dedicated" - }, { "name": "01. Biosource parent", "concept_code": "Biosource.src_biosource_id" }, { - "name": "03. Tissue", + "name": "02. Tissue", "concept_code": "Biosource.tissue" }, { - "name": "02. Date of biosource", + "name": "03. Date of biosource", "concept_code": "Biosource.biosource_date" }, { @@ -101,6 +97,10 @@ { "name": "05. Tumor percentage", "concept_code": "Biosource.tumor_percentage" + }, + { + "name": "06. Biosource dedicated for specific study", + "concept_code": "Biosource.biosource_dedicated" } ] }, @@ -177,13 +177,13 @@ "name": "03. Study title", "concept_code": "Study.title" }, - { - "name": "Study datadictionary", - "concept_code": "Study.datadictionary" - }, { "name": "04. Individual Study ID", "concept_code": "IndividualStudy.individual_study_id" + }, + { + "name": "05. Study datadictionary", + "concept_code": "Study.datadictionary" } ] } From b97a8695240ba68d78f5b194e70fd6d6b8a53ff7 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 15 Dec 2021 12:40:52 +0100 Subject: [PATCH 22/39] Add basic readme --- README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..3200df6 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# PMC E2E TESTDATA + +A repository complete with utilities to help create CSR-compatible test datasets for E2E testing. + + +### How to use this repo + +1. Update `requirements.txt` to point to the latest release of `python_csr2transmart` you would + like your test data to be compatible with (default `master` branch). + + +2. Create a Python virtual environment (check the latest Python version supported by + `python_csr2transmart`) and install the requirements: + ``` + pip install -r requirements.txt + ``` + + +3. Create a new branch and start working on a new version of `test_data/`, and corresponding + changes to the configuration files in `config/`. We recommend using two sub-folders, one for the + complete dataset (e.g. `full_dataset/`) and one with an alternative version, obtained for + example by removing or swapping the gender of one patient (e.g. `alternative/`). + + +4. Check that the dataset can be parsed by `sources2csr` and `csr2transmart` by running: + ``` + python validate_data.py + ``` + where the provided path should point to a specific test data subfolder (e.g. `full_dataset/`). + The output will be written to the (git-ignored) `validation_results/` folder, should you need + to inspect it. + + +5. Generate sha1sum files for all test data by running: + ``` + python generate_sha1sum.py + ``` + This automatically traverses the whole `test_data/` folder, no need to provide the path. From bca64800b0c455760c71ebe66dd2836e011d6e56 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 15:44:09 +0100 Subject: [PATCH 23/39] Move new config to E2E test data --- config/ontology_config.json | 191 ------- config/sources_config.json | 482 ------------------ .../E2E_TEST_DATA/config/ontology_config.json | 105 ++-- .../E2E_TEST_DATA/config/sources_config.json | 265 +++++++--- 4 files changed, 265 insertions(+), 778 deletions(-) delete mode 100644 config/ontology_config.json delete mode 100644 config/sources_config.json diff --git a/config/ontology_config.json b/config/ontology_config.json deleted file mode 100644 index 34efa1e..0000000 --- a/config/ontology_config.json +++ /dev/null @@ -1,191 +0,0 @@ -{ - "nodes": [ - { - "name": "01. Patient information", - "children": [ - { - "name": "01. Date of birth", - "concept_code": "Individual.birth_date" - }, - { - "name": "02. Date of death", - "concept_code": "Individual.death_date" - }, - { - "name": "03. Sex", - "concept_code": "Individual.gender" - }, - { - "name": "04. Taxonomy", - "concept_code": "Individual.taxonomy" - }, - { - "name": "05. Informed_consent", - "children": [ - { - "name": "01. Informed consent type", - "concept_code": "Individual.ic_type" - }, - { - "name": "02. Informed consent version", - "concept_code": "Individual.ic_version" - }, - { - "name": "03. Date informed Consent given", - "concept_code": "Individual.ic_given_date" - }, - { - "name": "04. Date informed consent withdrawn", - "concept_code": "Individual.ic_withdrawn_date" - }, - { - "name": "05. Report hereditary susceptibility", - "concept_code": "Individual.report_her_susc" - } - ] - } - ] - }, - { - "name": "02. Diagnosis information", - "children": [ - { - "name": "01. Tumor type", - "concept_code": "Diagnosis.tumor_type" - }, - { - "name": "02. Topography", - "concept_code": "Diagnosis.topography" - }, - { - "name": "03. Tumor stage", - "concept_code": "Diagnosis.tumor_stage" - }, - { - "name": "04. Treatment", - "concept_code": "Diagnosis.treatment_protocol" - }, - { - "name": "05. Date of diagnosis", - "concept_code": "Diagnosis.diagnosis_date" - }, - { - "name": "06. Center of treatment", - "concept_code": "Diagnosis.diagnosis_center" - } - ] - }, - { - "name": "03. Biosource information", - "children": [ - { - "name": "01. Biosource parent", - "concept_code": "Biosource.src_biosource_id" - }, - { - "name": "02. Tissue", - "concept_code": "Biosource.tissue" - }, - { - "name": "03. Date of biosource", - "concept_code": "Biosource.biosource_date" - }, - { - "name": "04. Disease status", - "concept_code": "Biosource.disease_status" - }, - { - "name": "05. Tumor percentage", - "concept_code": "Biosource.tumor_percentage" - }, - { - "name": "06. Biosource dedicated for specific study", - "concept_code": "Biosource.biosource_dedicated" - } - ] - }, - { - "name": "04. Biomaterial information", - "children": [ - { - "name": "01. Biomaterial parent", - "concept_code": "Biomaterial.src_biomaterial_id" - }, - { - "name": "02. Date of biomaterial", - "concept_code": "Biomaterial.biomaterial_date" - }, - { - "name": "03. Biomaterial type", - "concept_code": "Biomaterial.type" - }, - { - "name": "04. Library strategy", - "concept_code": "Biomaterial.library_strategy" - }, - { - "name": "05. Analysis type", - "concept_code": "Biomaterial.analysis_type" - } - ] - }, - { - "name": "05. Radiology information", - "children": [ - { - "name": "01. Radiology ID", - "concept_code": "Radiology.radiology_id" - }, - { - "name": "02. Examination Date", - "concept_code": "Radiology.examination_date" - }, - { - "name": "03. Image Type", - "concept_code": "Radiology.image_type" - }, - { - "name": "04. Field Strength", - "concept_code": "Radiology.field_strength" - }, - { - "name": "05. Individual ID", - "concept_code": "Radiology.individual_id" - }, - { - "name": "06. Diagnosis ID", - "concept_code": "Radiology.diagnosis_id" - }, - { - "name": "07. Body Part", - "concept_code": "Radiology.body_part" - } - ] - }, - { - "name": "06. Study information", - "children": [ - { - "name": "01. Study ID", - "concept_code": "Study.study_id" - }, - { - "name": "02. Study acronym", - "concept_code": "Study.acronym" - }, - { - "name": "03. Study title", - "concept_code": "Study.title" - }, - { - "name": "04. Individual Study ID", - "concept_code": "IndividualStudy.individual_study_id" - }, - { - "name": "05. Study datadictionary", - "concept_code": "Study.datadictionary" - } - ] - } - ] -} diff --git a/config/sources_config.json b/config/sources_config.json deleted file mode 100644 index 67d31ad..0000000 --- a/config/sources_config.json +++ /dev/null @@ -1,482 +0,0 @@ -{ - "entities": { - "Individual": { - "attributes": [ - { - "name": "individual_id", - "sources": [ - { - "file": "clinic/RDP-Patient.tsv", - "column": "INDIVIDUAL_ID" - }, - { - "file": "clinic/RDP-IC.tsv", - "column": "INDIVIDUAL_ID" - }, - { - "file": "studies/individual.csv", - "column": "INDIVIDUAL_ID" - }, - { - "file": "studies/death.csv", - "column": "INDIVIDUAL_ID" - } - ] - }, - { - "name": "birth_date", - "sources": [ - { - "file": "clinic/RDP-Patient.tsv", - "column": "Gebdat", - "date_format": "%d%b%Y" - }, - { - "file": "studies/individual.csv", - "column": "DTOB", - "date_format": "%d/%m/%Y %H:%M:%S" - } - ] - }, - { - "name": "gender", - "sources": [ - { - "file": "clinic/RDP-Patient.tsv", - "column": "Geslacht" - }, - { - "file": "studies/individual.csv", - "column": "SEX" - } - ] - }, - { - "name": "death_date", - "sources": [ - { - "file": "clinic/RDP-Patient.tsv", - "column": "Overldat", - "date_format": "%d%b%Y" - }, - { - "file": "studies/death.csv", - "column": "DTDEATH", - "date_format": "%d/%m/%Y %H:%M:%S" - } - ] - }, - { - "name": "ic_type", - "sources": [ - { - "file": "clinic/RDP-IC.tsv", - "column": "00004_Toestemmingsstatus" - }, - { - "file": "studies/individual.csv", - "column": "IFCDATR" - } - ] - }, - { - "name": "ic_given_date", - "sources": [ - { - "file": "clinic/RDP-IC.tsv", - "column": "00007_Datum toestemming", - "date_format": "%d/%m/%Y" - } - ] - }, - { - "name": "ic_withdrawn_date", - "sources": [ - { - "file": "clinic/RDP-IC.tsv", - "column": "00010_Datum geen toestemming", - "date_format": "%d/%m/%Y" - } - ] - }, - { - "name": "report_her_susc", - "sources": [ - { - "file": "clinic/RDP-IC.tsv", - "column": "00012_Datum einde deelname", - "date_format": "%d/%m/%Y" - } - ] - } - ] - }, - "Diagnosis": { - "attributes": [ - { - "name": "individual_id", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "INDIVIDUAL_ID" - } - ] - }, - { - "name": "diagnosis_id", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "CIDDIAG" - } - ] - }, - { - "name": "tumor_type", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "DIAGCD" - } - ] - }, - { - "name": "topography", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "PLOCCD" - } - ] - }, - { - "name": "tumor_stage", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "DIAGGRSTX" - } - ] - }, - { - "name": "diagnosis_date", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "IDAABA", - "date_format": "%d/%m/%Y %H:%M:%S" - } - ] - }, - { - "name": "diagnosis_center", - "sources": [ - { - "file": "studies/diagnosis.csv", - "column": "HOSPDIAG" - } - ] - } - ] - }, - "Biosource": { - "attributes": [ - { - "name": "biosource_id", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "individual_id", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "diagnosis_id", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "src_biosource_id", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "biosource_dedicated", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "tissue", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "biosource_date", - "sources": [ - { - "file": "laboratory/biosource.tsv", - "date_format": "%d/%m/%Y" - } - ] - }, - { - "name": "disease_status", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - }, - { - "name": "tumor_percentage", - "sources": [ - { - "file": "laboratory/biosource.tsv" - } - ] - } - ] - }, - "Biomaterial": { - "attributes": [ - { - "name": "biomaterial_id", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - }, - { - "name": "src_biosource_id", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - }, - { - "name": "src_biomaterial_id", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - }, - { - "name": "biomaterial_date", - "sources": [ - { - "file": "laboratory/biomaterial.tsv", - "date_format": "%d/%m/%Y" - } - ] - }, - { - "name": "type", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - }, - { - "name": "library_strategy", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - }, - { - "name": "analysis_type", - "sources": [ - { - "file": "laboratory/biomaterial.tsv" - } - ] - } - ] - }, - "Radiology": { - "attributes": [ - { - "name": "radiology_id", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - }, - { - "name": "examination_date", - "sources": [ - { - "file": "laboratory/radiology.tsv", - "date_format": "%Y-%m-%d" - } - ] - }, - { - "name": "image_type", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - }, - { - "name": "field_strength", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - }, - { - "name": "individual_id", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - }, - { - "name": "diagnosis_id", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - }, - { - "name": "body_part", - "sources": [ - { - "file": "laboratory/radiology.tsv" - } - ] - } - ] - }, - "Study": { - "attributes": [ - { - "name": "study_id", - "sources": [ - { - "file": "studies/study.csv", - "column": "STUDY_ID" - } - ] - }, - { - "name": "acronym", - "sources": [ - { - "file": "studies/study.csv" - } - ] - }, - { - "name": "title", - "sources": [ - { - "file": "studies/study.csv" - } - ] - }, - { - "name": "datadictionary", - "sources": [ - { - "file": "studies/study.csv" - } - ] - } - ] - }, - "IndividualStudy": { - "attributes": [ - { - "name": "study_id_individual_study_id", - "sources": [ - { - "file": "studies/individual_study.csv", - "column": "STUDY_ID_INDIVIDUAL_STUDY_ID" - } - ] - }, - { - "name": "individual_study_id", - "sources": [ - { - "file": "studies/individual_study.csv", - "column": "INDIVIDUAL_STUDY_ID" - } - ] - }, - { - "name": "individual_id", - "sources": [ - { - "file": "studies/individual_study.csv", - "column": "INDIVIDUAL_ID" - } - ] - }, - { - "name": "study_id", - "sources": [ - { - "file": "studies/individual_study.csv", - "column": "STUDY_ID" - } - ] - } - ] - } - }, - "codebooks": { - "studies/individual.csv": "studies/individual_codebook.tsv", - "studies/diagnosis.csv": "studies/diagnosis_codebook.tsv", - "studies/death.csv": "studies/death_codebook.tsv", - "clinic/RDP-Patient.tsv": "clinic/RDP-Patient_codebook.tsv" - }, - "file_format": { - "studies/individual.csv": { - "delimiter": "," - }, - "studies/diagnosis.csv": { - "delimiter": "," - }, - "studies/death.csv": { - "delimiter": "," - }, - "studies/study.csv": { - "delimiter": "," - }, - "studies/individual_study.csv": { - "delimiter": "," - } - } -} diff --git a/test_data/E2E_TEST_DATA/config/ontology_config.json b/test_data/E2E_TEST_DATA/config/ontology_config.json index 4fb7b74..34efa1e 100644 --- a/test_data/E2E_TEST_DATA/config/ontology_config.json +++ b/test_data/E2E_TEST_DATA/config/ontology_config.json @@ -3,43 +3,43 @@ { "name": "01. Patient information", "children": [ - { - "name": "02. Gender", - "concept_code": "Individual.gender" - }, { "name": "01. Date of birth", "concept_code": "Individual.birth_date" + }, + { + "name": "02. Date of death", + "concept_code": "Individual.death_date" }, { - "name": "Taxonomy", - "concept_code": "Individual.taxonomy" + "name": "03. Sex", + "concept_code": "Individual.gender" }, { - "name": "05. Date of death", - "concept_code": "Individual.death_date" + "name": "04. Taxonomy", + "concept_code": "Individual.taxonomy" }, { - "name": "Informed_consent", + "name": "05. Informed_consent", "children": [ { "name": "01. Informed consent type", "concept_code": "Individual.ic_type" }, { - "name": "Informed consent version", + "name": "02. Informed consent version", "concept_code": "Individual.ic_version" }, { - "name": "02. Date informed Consent given", + "name": "03. Date informed Consent given", "concept_code": "Individual.ic_given_date" }, { - "name": "03. Date informed consent withdrawn", + "name": "04. Date informed consent withdrawn", "concept_code": "Individual.ic_withdrawn_date" }, { - "name": "04. Report hereditary susceptibility", + "name": "05. Report hereditary susceptibility", "concept_code": "Individual.report_her_susc" } ] @@ -50,27 +50,27 @@ "name": "02. Diagnosis information", "children": [ { - "name": "02. Tumor type", + "name": "01. Tumor type", "concept_code": "Diagnosis.tumor_type" }, { - "name": "03. Topography", + "name": "02. Topography", "concept_code": "Diagnosis.topography" }, { - "name": "Treatment", - "concept_code": "Diagnosis.treatment_protocol" + "name": "03. Tumor stage", + "concept_code": "Diagnosis.tumor_stage" }, { - "name": "04. Tumor stage", - "concept_code": "Diagnosis.tumor_stage" + "name": "04. Treatment", + "concept_code": "Diagnosis.treatment_protocol" }, { - "name": "01. Date of diagnosis", + "name": "05. Date of diagnosis", "concept_code": "Diagnosis.diagnosis_date" }, { - "name": "05. Center of treatment", + "name": "06. Center of treatment", "concept_code": "Diagnosis.diagnosis_center" } ] @@ -78,20 +78,16 @@ { "name": "03. Biosource information", "children": [ - { - "name": "06. Biosource dedicated for specific study", - "concept_code": "Biosource.biosource_dedicated" - }, { "name": "01. Biosource parent", "concept_code": "Biosource.src_biosource_id" }, { - "name": "03. Tissue", + "name": "02. Tissue", "concept_code": "Biosource.tissue" }, { - "name": "02. Date of biosource", + "name": "03. Date of biosource", "concept_code": "Biosource.biosource_date" }, { @@ -101,6 +97,10 @@ { "name": "05. Tumor percentage", "concept_code": "Biosource.tumor_percentage" + }, + { + "name": "06. Biosource dedicated for specific study", + "concept_code": "Biosource.biosource_dedicated" } ] }, @@ -118,11 +118,52 @@ { "name": "03. Biomaterial type", "concept_code": "Biomaterial.type" + }, + { + "name": "04. Library strategy", + "concept_code": "Biomaterial.library_strategy" + }, + { + "name": "05. Analysis type", + "concept_code": "Biomaterial.analysis_type" } ] }, { - "name": "05. Study information", + "name": "05. Radiology information", + "children": [ + { + "name": "01. Radiology ID", + "concept_code": "Radiology.radiology_id" + }, + { + "name": "02. Examination Date", + "concept_code": "Radiology.examination_date" + }, + { + "name": "03. Image Type", + "concept_code": "Radiology.image_type" + }, + { + "name": "04. Field Strength", + "concept_code": "Radiology.field_strength" + }, + { + "name": "05. Individual ID", + "concept_code": "Radiology.individual_id" + }, + { + "name": "06. Diagnosis ID", + "concept_code": "Radiology.diagnosis_id" + }, + { + "name": "07. Body Part", + "concept_code": "Radiology.body_part" + } + ] + }, + { + "name": "06. Study information", "children": [ { "name": "01. Study ID", @@ -136,13 +177,13 @@ "name": "03. Study title", "concept_code": "Study.title" }, - { - "name": "Study datadictionary", - "concept_code": "Study.datadictionary" - }, { "name": "04. Individual Study ID", "concept_code": "IndividualStudy.individual_study_id" + }, + { + "name": "05. Study datadictionary", + "concept_code": "Study.datadictionary" } ] } diff --git a/test_data/E2E_TEST_DATA/config/sources_config.json b/test_data/E2E_TEST_DATA/config/sources_config.json index b9f112b..67d31ad 100644 --- a/test_data/E2E_TEST_DATA/config/sources_config.json +++ b/test_data/E2E_TEST_DATA/config/sources_config.json @@ -6,16 +6,20 @@ "name": "individual_id", "sources": [ { - "file": "individual.tsv", - "column": "individual_id" - } - ] - }, - { - "name": "taxonomy", - "sources": [ + "file": "clinic/RDP-Patient.tsv", + "column": "INDIVIDUAL_ID" + }, + { + "file": "clinic/RDP-IC.tsv", + "column": "INDIVIDUAL_ID" + }, { - "file": "individual.tsv" + "file": "studies/individual.csv", + "column": "INDIVIDUAL_ID" + }, + { + "file": "studies/death.csv", + "column": "INDIVIDUAL_ID" } ] }, @@ -23,8 +27,14 @@ "name": "birth_date", "sources": [ { - "file": "individual.tsv", - "date_format": "%Y-%m-%d" + "file": "clinic/RDP-Patient.tsv", + "column": "Gebdat", + "date_format": "%d%b%Y" + }, + { + "file": "studies/individual.csv", + "column": "DTOB", + "date_format": "%d/%m/%Y %H:%M:%S" } ] }, @@ -32,7 +42,12 @@ "name": "gender", "sources": [ { - "file": "individual.tsv" + "file": "clinic/RDP-Patient.tsv", + "column": "Geslacht" + }, + { + "file": "studies/individual.csv", + "column": "SEX" } ] }, @@ -40,8 +55,14 @@ "name": "death_date", "sources": [ { - "file": "individual.tsv", - "date_format": "%Y-%m-%d" + "file": "clinic/RDP-Patient.tsv", + "column": "Overldat", + "date_format": "%d%b%Y" + }, + { + "file": "studies/death.csv", + "column": "DTDEATH", + "date_format": "%d/%m/%Y %H:%M:%S" } ] }, @@ -49,8 +70,12 @@ "name": "ic_type", "sources": [ { - "file": "individual.tsv", - "column": "ic_type" + "file": "clinic/RDP-IC.tsv", + "column": "00004_Toestemmingsstatus" + }, + { + "file": "studies/individual.csv", + "column": "IFCDATR" } ] }, @@ -58,9 +83,9 @@ "name": "ic_given_date", "sources": [ { - "file": "individual.tsv", - "column": "ic_given_date", - "date_format": "%Y-%m-%d" + "file": "clinic/RDP-IC.tsv", + "column": "00007_Datum toestemming", + "date_format": "%d/%m/%Y" } ] }, @@ -68,9 +93,9 @@ "name": "ic_withdrawn_date", "sources": [ { - "file": "individual.tsv", - "column": "ic_withdrawn_date", - "date_format": "%Y-%m-%d" + "file": "clinic/RDP-IC.tsv", + "column": "00010_Datum geen toestemming", + "date_format": "%d/%m/%Y" } ] }, @@ -78,16 +103,9 @@ "name": "report_her_susc", "sources": [ { - "file": "individual.tsv" - } - ] - }, - { - "name": "report_inc_findings", - "sources": [ - { - "file": "individual.tsv", - "column": "report_inc_findings" + "file": "clinic/RDP-IC.tsv", + "column": "00012_Datum einde deelname", + "date_format": "%d/%m/%Y" } ] } @@ -99,7 +117,8 @@ "name": "individual_id", "sources": [ { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "INDIVIDUAL_ID" } ] }, @@ -107,7 +126,8 @@ "name": "diagnosis_id", "sources": [ { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "CIDDIAG" } ] }, @@ -115,7 +135,8 @@ "name": "tumor_type", "sources": [ { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "DIAGCD" } ] }, @@ -123,15 +144,8 @@ "name": "topography", "sources": [ { - "file": "diagnosis.tsv" - } - ] - }, - { - "name": "treatment_protocol", - "sources": [ - { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "PLOCCD" } ] }, @@ -139,7 +153,8 @@ "name": "tumor_stage", "sources": [ { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "DIAGGRSTX" } ] }, @@ -147,8 +162,9 @@ "name": "diagnosis_date", "sources": [ { - "file": "diagnosis.tsv", - "date_format": "%Y-%m-%d" + "file": "studies/diagnosis.csv", + "column": "IDAABA", + "date_format": "%d/%m/%Y %H:%M:%S" } ] }, @@ -156,7 +172,8 @@ "name": "diagnosis_center", "sources": [ { - "file": "diagnosis.tsv" + "file": "studies/diagnosis.csv", + "column": "HOSPDIAG" } ] } @@ -168,7 +185,7 @@ "name": "biosource_id", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -176,7 +193,7 @@ "name": "individual_id", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -184,7 +201,7 @@ "name": "diagnosis_id", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -192,7 +209,7 @@ "name": "src_biosource_id", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -200,7 +217,7 @@ "name": "biosource_dedicated", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -208,7 +225,7 @@ "name": "tissue", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -216,8 +233,8 @@ "name": "biosource_date", "sources": [ { - "file": "biosource.tsv", - "date_format": "%Y-%m-%d" + "file": "laboratory/biosource.tsv", + "date_format": "%d/%m/%Y" } ] }, @@ -225,7 +242,7 @@ "name": "disease_status", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] }, @@ -233,7 +250,7 @@ "name": "tumor_percentage", "sources": [ { - "file": "biosource.tsv" + "file": "laboratory/biosource.tsv" } ] } @@ -245,7 +262,7 @@ "name": "biomaterial_id", "sources": [ { - "file": "biomaterial.tsv" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -253,7 +270,7 @@ "name": "src_biosource_id", "sources": [ { - "file": "biomaterial.tsv" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -261,7 +278,7 @@ "name": "src_biomaterial_id", "sources": [ { - "file": "biomaterial.tsv" + "file": "laboratory/biomaterial.tsv" } ] }, @@ -269,8 +286,8 @@ "name": "biomaterial_date", "sources": [ { - "file": "biomaterial.tsv", - "date_format": "%Y-%m-%d" + "file": "laboratory/biomaterial.tsv", + "date_format": "%d/%m/%Y" } ] }, @@ -278,7 +295,84 @@ "name": "type", "sources": [ { - "file": "biomaterial.tsv" + "file": "laboratory/biomaterial.tsv" + } + ] + }, + { + "name": "library_strategy", + "sources": [ + { + "file": "laboratory/biomaterial.tsv" + } + ] + }, + { + "name": "analysis_type", + "sources": [ + { + "file": "laboratory/biomaterial.tsv" + } + ] + } + ] + }, + "Radiology": { + "attributes": [ + { + "name": "radiology_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "examination_date", + "sources": [ + { + "file": "laboratory/radiology.tsv", + "date_format": "%Y-%m-%d" + } + ] + }, + { + "name": "image_type", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "field_strength", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "individual_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "diagnosis_id", + "sources": [ + { + "file": "laboratory/radiology.tsv" + } + ] + }, + { + "name": "body_part", + "sources": [ + { + "file": "laboratory/radiology.tsv" } ] } @@ -290,7 +384,8 @@ "name": "study_id", "sources": [ { - "file": "study.tsv" + "file": "studies/study.csv", + "column": "STUDY_ID" } ] }, @@ -298,7 +393,7 @@ "name": "acronym", "sources": [ { - "file": "study.tsv" + "file": "studies/study.csv" } ] }, @@ -306,7 +401,7 @@ "name": "title", "sources": [ { - "file": "study.tsv" + "file": "studies/study.csv" } ] }, @@ -314,7 +409,7 @@ "name": "datadictionary", "sources": [ { - "file": "study.tsv" + "file": "studies/study.csv" } ] } @@ -326,7 +421,8 @@ "name": "study_id_individual_study_id", "sources": [ { - "file": "individual_study.tsv" + "file": "studies/individual_study.csv", + "column": "STUDY_ID_INDIVIDUAL_STUDY_ID" } ] }, @@ -334,7 +430,8 @@ "name": "individual_study_id", "sources": [ { - "file": "individual_study.tsv" + "file": "studies/individual_study.csv", + "column": "INDIVIDUAL_STUDY_ID" } ] }, @@ -342,7 +439,8 @@ "name": "individual_id", "sources": [ { - "file": "individual_study.tsv" + "file": "studies/individual_study.csv", + "column": "INDIVIDUAL_ID" } ] }, @@ -350,7 +448,8 @@ "name": "study_id", "sources": [ { - "file": "individual_study.tsv" + "file": "studies/individual_study.csv", + "column": "STUDY_ID" } ] } @@ -358,6 +457,26 @@ } }, "codebooks": { - "individual.tsv": "codebook.txt" + "studies/individual.csv": "studies/individual_codebook.tsv", + "studies/diagnosis.csv": "studies/diagnosis_codebook.tsv", + "studies/death.csv": "studies/death_codebook.tsv", + "clinic/RDP-Patient.tsv": "clinic/RDP-Patient_codebook.tsv" + }, + "file_format": { + "studies/individual.csv": { + "delimiter": "," + }, + "studies/diagnosis.csv": { + "delimiter": "," + }, + "studies/death.csv": { + "delimiter": "," + }, + "studies/study.csv": { + "delimiter": "," + }, + "studies/individual_study.csv": { + "delimiter": "," + } } } From 108001d960164389dab5813c79d90b780e948f9a Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 15:54:12 +0100 Subject: [PATCH 24/39] Delete E2E NGS data --- .../E2E_TEST_DATA/dropzone/NGS/pmc_test.maf | 6 ------ .../E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.gz | Bin 1656 -> 0 bytes .../dropzone/NGS/pmc_test.maf.gz.sha1 | 1 - .../E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.sha1 | 1 - .../E2E_TEST_DATA/dropzone/NGS/pmc_test.seg | 3 --- .../E2E_TEST_DATA/dropzone/NGS/pmc_test.seg.sha1 | 1 - .../dropzone/NGS/pmc_test_all_data_by_genes.txt | 3 --- .../NGS/pmc_test_all_data_by_genes.txt.sha1 | 1 - .../NGS/pmc_test_all_thresholded.by_genes.txt | 3 --- .../pmc_test_all_thresholded.by_genes.txt.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/biomaterial.tsv | 5 ----- .../E2E_TEST_DATA/dropzone/biomaterial.tsv.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/biosource.tsv | 5 ----- .../E2E_TEST_DATA/dropzone/biosource.tsv.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/codebook.txt | 1 - .../E2E_TEST_DATA/dropzone/codebook.txt.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/diagnosis.tsv | 4 ---- .../E2E_TEST_DATA/dropzone/diagnosis.tsv.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/individual.tsv | 3 --- .../E2E_TEST_DATA/dropzone/individual.tsv.sha1 | 1 - .../E2E_TEST_DATA/dropzone/individual_study.tsv | 3 --- .../dropzone/individual_study.tsv.sha1 | 1 - test_data/E2E_TEST_DATA/dropzone/study.tsv | 3 --- test_data/E2E_TEST_DATA/dropzone/study.tsv.sha1 | 1 - 24 files changed, 51 deletions(-) delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.gz delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.gz.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.sha1 delete mode 100755 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.seg delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.seg.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test_all_data_by_genes.txt delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test_all_data_by_genes.txt.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test_all_thresholded.by_genes.txt delete mode 100644 test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test_all_thresholded.by_genes.txt.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/biomaterial.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/biomaterial.tsv.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/biosource.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/biosource.tsv.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/codebook.txt delete mode 100644 test_data/E2E_TEST_DATA/dropzone/codebook.txt.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/diagnosis.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/diagnosis.tsv.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/individual.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/individual.tsv.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/individual_study.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/individual_study.tsv.sha1 delete mode 100644 test_data/E2E_TEST_DATA/dropzone/study.tsv delete mode 100644 test_data/E2E_TEST_DATA/dropzone/study.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf b/test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf deleted file mode 100644 index 8a18f25..0000000 --- a/test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf +++ /dev/null @@ -1,6 +0,0 @@ -#version 2.4 PMCBS000BCA_PMCBM000BAB -Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps variant_id variant_qual ExAC_AF_Adj ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER -OTOF 9381 . GRCh38 chr2 26482551 26482551 + Missense_Mutation SNP C C A novel PMCBS000AAA_PMCBM000AAA PMCBS000BCA_PMCBM000BAB C C c.1434G>T p.Trp478Cys p.W478C ENST00000272371 14/47 117 115 2 52 52 0 OTOF,missense_variant,p.Trp478Cys,ENST00000272371,NM_194248.2;OTOF,missense_variant,p.Trp478Cys,ENST00000403946,NM_001287489.1;OTOF,upstream_gene_variant,,ENST00000338581,NM_004802.3;OTOF,upstream_gene_variant,,ENST00000339598,NM_194323.2;OTOF,upstream_gene_variant,,ENST00000402415,NM_194322.2; A ENSG00000115155 ENST00000272371 Transcript missense_variant 1561/7156 1434/5994 478/1997 W/C tgG/tgT 1 -1 OTOF HGNC HGNC:8515 protein_coding YES CCDS1725.1 ENSP00000272371 Q9HC10 UPI000013D94D NM_194248.2 deleterious(0) probably_damaging(0.999) 14/47 PROSITE_profiles:PS50004,hmmpanther:PTHR12546:SF32,hmmpanther:PTHR12546,Pfam_domain:PF00168,Gene3D:2.60.40.150,SMART_domains:SM00239,Superfamily_domains:SSF49562 MODERATE 1 SNV 1 1 1 t_lod_fstar . . -OTOF 9381 . GRCh38 chr2 26503953 26503953 + Intron SNP G G T novel PMCBS000AAA_PMCBM000AAA PMCBS000BCA_PMCBM000BAB G G c.510-108C>A ENST00000272371 49 47 2 37 37 0 OTOF,intron_variant,,ENST00000272371,NM_194248.2;OTOF,intron_variant,,ENST00000403946,NM_001287489.1;,regulatory_region_variant,,ENSR00000114299,; T ENSG00000115155 ENST00000272371 Transcript intron_variant 1 -1 OTOF HGNC HGNC:8515 protein_coding YES CCDS1725.1 ENSP00000272371 Q9HC10 UPI000013D94D NM_194248.2 5/46 MODIFIER 1 SNV 1 1 1 homologous_mapping_event;read_position;t_lod_fstar . . -OTOF 9381 . GRCh38 chr2 26558497 26558498 + Nonsense_Mutation INS - - AAAGTCACTTTGGCGATCCGGTCGCC novel PMCBS000AAA_PMCBM000AAA PMCBS000BCA_PMCBM000BAB - - c.49_74dup p.Phe25LeufsTer8 p.F25Lfs*8 ENST00000272371 1/47 94 93 1 59 59 0 OTOF,stop_gained,p.Phe25LeufsTer8,ENST00000272371,NM_194248.2;OTOF,stop_gained,p.Phe25LeufsTer8,ENST00000403946,NM_001287489.1;C2orf70,upstream_gene_variant,,ENST00000329615,NM_001105519.2;C2orf70,upstream_gene_variant,,ENST00000409392,NM_001322426.1;C2orf70,upstream_gene_variant,,ENST00000479453,; AAAGTCACTTTGGCGATCCGGTCGCC ENSG00000115155 ENST00000272371 Transcript stop_gained,frameshift_variant 201-202/7156 74-75/5994 25/1997 F/LATGSPK*LX ttc/ttGGCGACCGGATCGCCAAAGTGACTTTc 1 -1 OTOF HGNC HGNC:8515 protein_coding YES CCDS1725.1 ENSP00000272371 Q9HC10 UPI000013D94D NM_194248.2 1/47 Pfam_domain:PF00168,Gene3D:2.60.40.150,SMART_domains:SM00239,Superfamily_domains:SSF49562 HIGH 1 insertion 1 1 1 t_lod_fstar . . -OTOF 9381 . GRCh38 chr2 26482551 26482551 + Missense_Mutation SNP C C A novel PMCBS000AAB_PMCBM000AAB PMCBS000BCA_PMCBM000BAB C C c.1434G>T p.Trp478Cys p.W478C ENST00000272371 14/47 117 115 2 52 52 0 OTOF,missense_variant,p.Trp478Cys,ENST00000272371,NM_194248.2;OTOF,missense_variant,p.Trp478Cys,ENST00000403946,NM_001287489.1;OTOF,upstream_gene_variant,,ENST00000338581,NM_004802.3;OTOF,upstream_gene_variant,,ENST00000339598,NM_194323.2;OTOF,upstream_gene_variant,,ENST00000402415,NM_194322.2; A ENSG00000115155 ENST00000272371 Transcript missense_variant 1561/7156 1434/5994 478/1997 W/C tgG/tgT 1 -1 OTOF HGNC HGNC:8515 protein_coding YES CCDS1725.1 ENSP00000272371 Q9HC10 UPI000013D94D NM_194248.2 deleterious(0) probably_damaging(0.999) 14/47 PROSITE_profiles:PS50004,hmmpanther:PTHR12546:SF32,hmmpanther:PTHR12546,Pfam_domain:PF00168,Gene3D:2.60.40.150,SMART_domains:SM00239,Superfamily_domains:SSF49562 MODERATE 1 SNV 1 1 1 t_lod_fstar . . diff --git a/test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.gz b/test_data/E2E_TEST_DATA/dropzone/NGS/pmc_test.maf.gz deleted file mode 100644 index 6925db9414e78589129b26f8e09a8fe298a7866e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1656 zcmV-;28a0{iwFqjFmhc018{9)Uvy=2bS`aSW&rIL*^Z+|@VWLYBIPBA$L6$QR*Hl% zU=)K1&CKpTwHVX3NiYz=qfEX&)qs!jc+W(em&KT_>guYluCD9*M^@Eok$)!*mG1)2 zoDxA0rlvtT>~YwbzMAhh1tm|rhav+jZz}dTby&`*n}EskhE?F3QfPy9S(T5te^4r%`+VJ-J?O%d_#k6 zT(iRjqomkZD|RlzW6icmfUJs&!PM}mon|M7>bW`Z?%df`TegI`b5B<7x9q-@Y>TRa z2wkCGRcYB!TxI=*yZZax0}5|w!pdfgW5w2VRqXQy;W%r0IPdBC89hIv$5}?%dd*f% z4ZZVl&jp*I9jfC<8}4Ujk=OmPU^Vj%S{|I$oRJd}R7Jzm{4`Q&bkSH|#8NIgxAn&~Ca?uZzEbl_3&mG_7CYid+ z=v`oeY4}UuHH`(Bs1jH{LB=;OAwjr|APOySx_~?14UEv_VS~2>#I+*`itH)aGK|5u zOCMZ63UQb%J;U`07&g8k<%8D^#OH%$kaoChCrO7HMn~dbbK&}wxDF7M6uBk@cT>+o z2Utd~O?|_&dN5cLn7huLx_*ar)5Khc7B%OF?^xh^fnlPiuK6q68=-6X5j7VEArO&8 z>n?O@+r+{pbIV_X=lZVa5{o;GVYW(jBfjAY_D(RK=Lp-be}sM8I>O6neuVMF;8?yz zyA5r35m_OuvpD~QU+qI#!(*SP(&PyKxsS8c_Q~%(jIV!zKBF9U?ojU;deXc_Ps+FG zN&gl-9ic_Zfw$BD)iPSzpv#&FLvTWKD{HXYRuV{KMUzxjJOMw0m)14IprD5Y0+>s| z#Ge6q@yIfOL(MRZQ_Vma-cw6j=8I=F6ct%Ndw=bqOM4aHk4j}2Dc)}x-#b6f*?xT zMA7u2*a_H|HCD%XM>o7Y9;BRDWm!`-v13&%M0G5VCin$@XCqbD6+i=xL|vc2n~@34#u+u62tWk5 z5!+j7(^&ht)o?>7`vi)`72^orS)@&8;zUx1Vw=>?wfRk-o1y@47r3p?@=RA|aCUc) zU|Po}BrW#!55kXJ{6qYZJ<%lI#T(q=2Vtn|`j5Tm0Om^KMi#|&Yb<5;Z9r6RtHE}+ zEAfn5R^0~CJQO8W8Q&6Hmab9;!5R&p6uUUhZvz`O8*2kzT;$oUG#m>qFeZU-YFR&Y53?xGD-aqM(_-7>Hha94I<^5eBn7 z;cp+zX{*l_w|-yzzL(*8$POyD*=KQ6R8QDGY|;zXu#XW%()Gb>G{BuHJYnkY9DwJTPV+SMw1v@jxv1i${YcneLl8Z}L;4A;eIm1)& Date: Thu, 16 Dec 2021 15:55:30 +0100 Subject: [PATCH 25/39] Move new dataset to dropzone subfolder --- .../{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-IC.tsv | 0 .../dropzone}/alternative/clinic/RDP-IC.tsv.sha1 | 0 .../dropzone}/alternative/clinic/RDP-Patient.tsv | 0 .../dropzone}/alternative/clinic/RDP-Patient.tsv.sha1 | 0 .../dropzone}/alternative/clinic/RDP-Patient_codebook.tsv | 0 .../dropzone}/alternative/clinic/RDP-Patient_codebook.tsv.sha1 | 0 .../dropzone}/alternative/laboratory/biomaterial.tsv | 0 .../dropzone}/alternative/laboratory/biomaterial.tsv.sha1 | 0 .../dropzone}/alternative/laboratory/biosource.tsv | 0 .../dropzone}/alternative/laboratory/biosource.tsv.sha1 | 0 .../dropzone}/alternative/laboratory/radiology.tsv | 0 .../dropzone}/alternative/laboratory/radiology.tsv.sha1 | 0 .../{ => E2E_TEST_DATA/dropzone}/alternative/studies/death.csv | 0 .../dropzone}/alternative/studies/death.csv.sha1 | 0 .../dropzone}/alternative/studies/death_codebook.tsv | 0 .../dropzone}/alternative/studies/death_codebook.tsv.sha1 | 0 .../dropzone}/alternative/studies/diagnosis.csv | 0 .../dropzone}/alternative/studies/diagnosis.csv.sha1 | 0 .../dropzone}/alternative/studies/diagnosis_codebook.tsv | 0 .../dropzone}/alternative/studies/diagnosis_codebook.tsv.sha1 | 0 .../dropzone}/alternative/studies/individual.csv | 0 .../dropzone}/alternative/studies/individual.csv.sha1 | 0 .../dropzone}/alternative/studies/individual_codebook.tsv | 0 .../dropzone}/alternative/studies/individual_codebook.tsv.sha1 | 0 .../dropzone}/alternative/studies/individual_study.csv | 0 .../dropzone}/alternative/studies/individual_study.csv.sha1 | 0 .../{ => E2E_TEST_DATA/dropzone}/alternative/studies/study.csv | 0 .../dropzone}/alternative/studies/study.csv.sha1 | 0 .../{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-IC.tsv | 0 .../dropzone}/full_dataset/clinic/RDP-IC.tsv.sha1 | 0 .../dropzone}/full_dataset/clinic/RDP-Patient.tsv | 0 .../dropzone}/full_dataset/clinic/RDP-Patient.tsv.sha1 | 0 .../dropzone}/full_dataset/clinic/RDP-Patient_codebook.tsv | 0 .../dropzone}/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 | 0 .../dropzone}/full_dataset/laboratory/biomaterial.tsv | 0 .../dropzone}/full_dataset/laboratory/biomaterial.tsv.sha1 | 0 .../dropzone}/full_dataset/laboratory/biosource.tsv | 0 .../dropzone}/full_dataset/laboratory/biosource.tsv.sha1 | 0 .../dropzone}/full_dataset/laboratory/radiology.tsv | 0 .../dropzone}/full_dataset/laboratory/radiology.tsv.sha1 | 0 .../{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/death.csv | 0 .../dropzone}/full_dataset/studies/death.csv.sha1 | 0 .../dropzone}/full_dataset/studies/death_codebook.tsv | 0 .../dropzone}/full_dataset/studies/death_codebook.tsv.sha1 | 0 .../dropzone}/full_dataset/studies/diagnosis.csv | 0 .../dropzone}/full_dataset/studies/diagnosis.csv.sha1 | 0 .../dropzone}/full_dataset/studies/diagnosis_codebook.tsv | 0 .../dropzone}/full_dataset/studies/diagnosis_codebook.tsv.sha1 | 0 .../dropzone}/full_dataset/studies/individual.csv | 0 .../dropzone}/full_dataset/studies/individual.csv.sha1 | 0 .../dropzone}/full_dataset/studies/individual_codebook.tsv | 0 .../dropzone}/full_dataset/studies/individual_codebook.tsv.sha1 | 0 .../dropzone}/full_dataset/studies/individual_study.csv | 0 .../dropzone}/full_dataset/studies/individual_study.csv.sha1 | 0 .../{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/study.csv | 0 .../dropzone}/full_dataset/studies/study.csv.sha1 | 0 56 files changed, 0 insertions(+), 0 deletions(-) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-IC.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-IC.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-Patient.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-Patient.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-Patient_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/clinic/RDP-Patient_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/biomaterial.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/biomaterial.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/biosource.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/biosource.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/radiology.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/laboratory/radiology.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/death.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/death.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/death_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/death_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/diagnosis.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/diagnosis.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/diagnosis_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/diagnosis_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual_study.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/individual_study.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/study.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/alternative/studies/study.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-IC.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-IC.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-Patient.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-Patient.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-Patient_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/biomaterial.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/biomaterial.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/biosource.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/biosource.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/radiology.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/laboratory/radiology.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/death.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/death.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/death_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/death_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/diagnosis.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/diagnosis.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/diagnosis_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/diagnosis_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual_codebook.tsv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual_codebook.tsv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual_study.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/individual_study.csv.sha1 (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/study.csv (100%) rename test_data/{ => E2E_TEST_DATA/dropzone}/full_dataset/studies/study.csv.sha1 (100%) diff --git a/test_data/alternative/clinic/RDP-IC.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-IC.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv diff --git a/test_data/alternative/clinic/RDP-IC.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv.sha1 similarity index 100% rename from test_data/alternative/clinic/RDP-IC.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv.sha1 diff --git a/test_data/alternative/clinic/RDP-Patient.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-Patient.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv diff --git a/test_data/alternative/clinic/RDP-Patient.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 similarity index 100% rename from test_data/alternative/clinic/RDP-Patient.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/alternative/clinic/RDP-Patient_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 similarity index 100% rename from test_data/alternative/clinic/RDP-Patient_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 diff --git a/test_data/alternative/laboratory/biomaterial.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv similarity index 100% rename from test_data/alternative/laboratory/biomaterial.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv diff --git a/test_data/alternative/laboratory/biomaterial.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv.sha1 similarity index 100% rename from test_data/alternative/laboratory/biomaterial.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv.sha1 diff --git a/test_data/alternative/laboratory/biosource.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv similarity index 100% rename from test_data/alternative/laboratory/biosource.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv diff --git a/test_data/alternative/laboratory/biosource.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv.sha1 similarity index 100% rename from test_data/alternative/laboratory/biosource.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv.sha1 diff --git a/test_data/alternative/laboratory/radiology.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv similarity index 100% rename from test_data/alternative/laboratory/radiology.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv diff --git a/test_data/alternative/laboratory/radiology.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv.sha1 similarity index 100% rename from test_data/alternative/laboratory/radiology.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv.sha1 diff --git a/test_data/alternative/studies/death.csv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv similarity index 100% rename from test_data/alternative/studies/death.csv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv diff --git a/test_data/alternative/studies/death.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv.sha1 similarity index 100% rename from test_data/alternative/studies/death.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv.sha1 diff --git a/test_data/alternative/studies/death_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv similarity index 100% rename from test_data/alternative/studies/death_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv diff --git a/test_data/alternative/studies/death_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv.sha1 similarity index 100% rename from test_data/alternative/studies/death_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv.sha1 diff --git a/test_data/alternative/studies/diagnosis.csv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv similarity index 100% rename from test_data/alternative/studies/diagnosis.csv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv diff --git a/test_data/alternative/studies/diagnosis.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv.sha1 similarity index 100% rename from test_data/alternative/studies/diagnosis.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv.sha1 diff --git a/test_data/alternative/studies/diagnosis_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/alternative/studies/diagnosis_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv diff --git a/test_data/alternative/studies/diagnosis_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 similarity index 100% rename from test_data/alternative/studies/diagnosis_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 diff --git a/test_data/alternative/studies/individual.csv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv similarity index 100% rename from test_data/alternative/studies/individual.csv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv diff --git a/test_data/alternative/studies/individual.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv.sha1 similarity index 100% rename from test_data/alternative/studies/individual.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv.sha1 diff --git a/test_data/alternative/studies/individual_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv similarity index 100% rename from test_data/alternative/studies/individual_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv diff --git a/test_data/alternative/studies/individual_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv.sha1 similarity index 100% rename from test_data/alternative/studies/individual_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv.sha1 diff --git a/test_data/alternative/studies/individual_study.csv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv similarity index 100% rename from test_data/alternative/studies/individual_study.csv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv diff --git a/test_data/alternative/studies/individual_study.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv.sha1 similarity index 100% rename from test_data/alternative/studies/individual_study.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv.sha1 diff --git a/test_data/alternative/studies/study.csv b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv similarity index 100% rename from test_data/alternative/studies/study.csv rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv diff --git a/test_data/alternative/studies/study.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv.sha1 similarity index 100% rename from test_data/alternative/studies/study.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv.sha1 diff --git a/test_data/full_dataset/clinic/RDP-IC.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-IC.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv diff --git a/test_data/full_dataset/clinic/RDP-IC.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 similarity index 100% rename from test_data/full_dataset/clinic/RDP-IC.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 diff --git a/test_data/full_dataset/clinic/RDP-Patient.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv diff --git a/test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 similarity index 100% rename from test_data/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv similarity index 100% rename from test_data/full_dataset/laboratory/biomaterial.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv diff --git a/test_data/full_dataset/laboratory/biomaterial.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 similarity index 100% rename from test_data/full_dataset/laboratory/biomaterial.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 diff --git a/test_data/full_dataset/laboratory/biosource.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv similarity index 100% rename from test_data/full_dataset/laboratory/biosource.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv diff --git a/test_data/full_dataset/laboratory/biosource.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv.sha1 similarity index 100% rename from test_data/full_dataset/laboratory/biosource.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv.sha1 diff --git a/test_data/full_dataset/laboratory/radiology.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv similarity index 100% rename from test_data/full_dataset/laboratory/radiology.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv diff --git a/test_data/full_dataset/laboratory/radiology.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv.sha1 similarity index 100% rename from test_data/full_dataset/laboratory/radiology.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv.sha1 diff --git a/test_data/full_dataset/studies/death.csv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv similarity index 100% rename from test_data/full_dataset/studies/death.csv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv diff --git a/test_data/full_dataset/studies/death.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv.sha1 similarity index 100% rename from test_data/full_dataset/studies/death.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv.sha1 diff --git a/test_data/full_dataset/studies/death_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/death_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv diff --git a/test_data/full_dataset/studies/death_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv.sha1 similarity index 100% rename from test_data/full_dataset/studies/death_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv.sha1 diff --git a/test_data/full_dataset/studies/diagnosis.csv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv similarity index 100% rename from test_data/full_dataset/studies/diagnosis.csv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv diff --git a/test_data/full_dataset/studies/diagnosis.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv.sha1 similarity index 100% rename from test_data/full_dataset/studies/diagnosis.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv.sha1 diff --git a/test_data/full_dataset/studies/diagnosis_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/diagnosis_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv diff --git a/test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 similarity index 100% rename from test_data/full_dataset/studies/diagnosis_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 diff --git a/test_data/full_dataset/studies/individual.csv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv similarity index 100% rename from test_data/full_dataset/studies/individual.csv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv diff --git a/test_data/full_dataset/studies/individual.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv.sha1 similarity index 100% rename from test_data/full_dataset/studies/individual.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv.sha1 diff --git a/test_data/full_dataset/studies/individual_codebook.tsv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv similarity index 100% rename from test_data/full_dataset/studies/individual_codebook.tsv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv diff --git a/test_data/full_dataset/studies/individual_codebook.tsv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 similarity index 100% rename from test_data/full_dataset/studies/individual_codebook.tsv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 diff --git a/test_data/full_dataset/studies/individual_study.csv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv similarity index 100% rename from test_data/full_dataset/studies/individual_study.csv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv diff --git a/test_data/full_dataset/studies/individual_study.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv.sha1 similarity index 100% rename from test_data/full_dataset/studies/individual_study.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv.sha1 diff --git a/test_data/full_dataset/studies/study.csv b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv similarity index 100% rename from test_data/full_dataset/studies/study.csv rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv diff --git a/test_data/full_dataset/studies/study.csv.sha1 b/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv.sha1 similarity index 100% rename from test_data/full_dataset/studies/study.csv.sha1 rename to test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv.sha1 From 05fab5247dd6cbf11c25492bf22d5c949371d94c Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 15:59:11 +0100 Subject: [PATCH 26/39] Move test data scripts to test data folder --- generate_sha1sum.py => test_data/generate_sha1sum.py | 0 requirements.txt => test_data/requirements.txt | 0 validate_data.py => test_data/validate_data.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename generate_sha1sum.py => test_data/generate_sha1sum.py (100%) rename requirements.txt => test_data/requirements.txt (100%) rename validate_data.py => test_data/validate_data.py (100%) diff --git a/generate_sha1sum.py b/test_data/generate_sha1sum.py similarity index 100% rename from generate_sha1sum.py rename to test_data/generate_sha1sum.py diff --git a/requirements.txt b/test_data/requirements.txt similarity index 100% rename from requirements.txt rename to test_data/requirements.txt diff --git a/validate_data.py b/test_data/validate_data.py similarity index 100% rename from validate_data.py rename to test_data/validate_data.py From bac221de64595dd85b47e992ace72155cc23d08c Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 16:00:06 +0100 Subject: [PATCH 27/39] Add test data README --- test_data/README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 test_data/README.md diff --git a/test_data/README.md b/test_data/README.md new file mode 100644 index 0000000..3200df6 --- /dev/null +++ b/test_data/README.md @@ -0,0 +1,38 @@ +# PMC E2E TESTDATA + +A repository complete with utilities to help create CSR-compatible test datasets for E2E testing. + + +### How to use this repo + +1. Update `requirements.txt` to point to the latest release of `python_csr2transmart` you would + like your test data to be compatible with (default `master` branch). + + +2. Create a Python virtual environment (check the latest Python version supported by + `python_csr2transmart`) and install the requirements: + ``` + pip install -r requirements.txt + ``` + + +3. Create a new branch and start working on a new version of `test_data/`, and corresponding + changes to the configuration files in `config/`. We recommend using two sub-folders, one for the + complete dataset (e.g. `full_dataset/`) and one with an alternative version, obtained for + example by removing or swapping the gender of one patient (e.g. `alternative/`). + + +4. Check that the dataset can be parsed by `sources2csr` and `csr2transmart` by running: + ``` + python validate_data.py + ``` + where the provided path should point to a specific test data subfolder (e.g. `full_dataset/`). + The output will be written to the (git-ignored) `validation_results/` folder, should you need + to inspect it. + + +5. Generate sha1sum files for all test data by running: + ``` + python generate_sha1sum.py + ``` + This automatically traverses the whole `test_data/` folder, no need to provide the path. From c0a2da715bfd022e397fa043aaad2db5c5508aa3 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 16:24:38 +0100 Subject: [PATCH 28/39] Remove redundant logging.cfg file --- test_data/E2E_TEST_DATA/config/logging.cfg | 32 ---------------------- 1 file changed, 32 deletions(-) delete mode 100644 test_data/E2E_TEST_DATA/config/logging.cfg diff --git a/test_data/E2E_TEST_DATA/config/logging.cfg b/test_data/E2E_TEST_DATA/config/logging.cfg deleted file mode 100644 index ffc9d5d..0000000 --- a/test_data/E2E_TEST_DATA/config/logging.cfg +++ /dev/null @@ -1,32 +0,0 @@ -[loggers] -keys=root - -[handlers] -keys=consoleHandler,fileHandler - -[formatters] -keys=consoleFormatter,fileFormatter - -[logger_root] -level=NOTSET -handlers=consoleHandler,fileHandler - -[handler_consoleHandler] -class=StreamHandler -level=INFO -formatter=consoleFormatter -args=(sys.stdout,) - -[handler_fileHandler] -class=FileHandler -level=DEBUG -formatter=fileFormatter -args=('python.log','w') - -[formatter_consoleFormatter] -format=%(asctime)s; %(levelname)-7s %(name)-25s - %(message)s -datefmt=%Y-%m-%d %H:%M:%S - -[formatter_fileFormatter] -format=%(asctime)s %(levelname)-7s %(name)-25s %(message)s -datefmt=%Y-%m-%d %H:%M:%S \ No newline at end of file From f1eb771eeb486d12fb5506cf51dec04e4024d8c4 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 16:29:14 +0100 Subject: [PATCH 29/39] Rename test data folders --- README.md | 2 +- {test_data => test_data_e2e}/README.md | 0 .../current}/config/ontology_config.json | 0 .../current}/config/sources_config.json | 0 .../current}/dropzone/alternative/clinic/RDP-IC.tsv | 0 .../dropzone/alternative/clinic/RDP-IC.tsv.sha1 | 0 .../dropzone/alternative/clinic/RDP-Patient.tsv | 0 .../alternative/clinic/RDP-Patient.tsv.sha1 | 0 .../alternative/clinic/RDP-Patient_codebook.tsv | 0 .../clinic/RDP-Patient_codebook.tsv.sha1 | 0 .../dropzone/alternative/laboratory/biomaterial.tsv | 0 .../alternative/laboratory/biomaterial.tsv.sha1 | 0 .../dropzone/alternative/laboratory/biosource.tsv | 0 .../alternative/laboratory/biosource.tsv.sha1 | 0 .../dropzone/alternative/laboratory/radiology.tsv | 0 .../alternative/laboratory/radiology.tsv.sha1 | 0 .../current}/dropzone/alternative/studies/death.csv | 0 .../dropzone/alternative/studies/death.csv.sha1 | 0 .../dropzone/alternative/studies/death_codebook.tsv | 0 .../alternative/studies/death_codebook.tsv.sha1 | 0 .../dropzone/alternative/studies/diagnosis.csv | 0 .../dropzone/alternative/studies/diagnosis.csv.sha1 | 0 .../alternative/studies/diagnosis_codebook.tsv | 0 .../alternative/studies/diagnosis_codebook.tsv.sha1 | 0 .../dropzone/alternative/studies/individual.csv | 0 .../alternative/studies/individual.csv.sha1 | 0 .../alternative/studies/individual_codebook.tsv | 0 .../studies/individual_codebook.tsv.sha1 | 0 .../alternative/studies/individual_study.csv | 0 .../alternative/studies/individual_study.csv.sha1 | 0 .../current}/dropzone/alternative/studies/study.csv | 0 .../dropzone/alternative/studies/study.csv.sha1 | 0 .../dropzone/full_dataset/clinic/RDP-IC.tsv | 0 .../dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 | 0 .../dropzone/full_dataset/clinic/RDP-Patient.tsv | 0 .../full_dataset/clinic/RDP-Patient.tsv.sha1 | 0 .../full_dataset/clinic/RDP-Patient_codebook.tsv | 0 .../clinic/RDP-Patient_codebook.tsv.sha1 | 0 .../full_dataset/laboratory/biomaterial.tsv | 0 .../full_dataset/laboratory/biomaterial.tsv.sha1 | 0 .../dropzone/full_dataset/laboratory/biosource.tsv | 0 .../full_dataset/laboratory/biosource.tsv.sha1 | 0 .../dropzone/full_dataset/laboratory/radiology.tsv | 0 .../full_dataset/laboratory/radiology.tsv.sha1 | 0 .../dropzone/full_dataset/studies/death.csv | 0 .../dropzone/full_dataset/studies/death.csv.sha1 | 0 .../full_dataset/studies/death_codebook.tsv | 0 .../full_dataset/studies/death_codebook.tsv.sha1 | 0 .../dropzone/full_dataset/studies/diagnosis.csv | 0 .../full_dataset/studies/diagnosis.csv.sha1 | 0 .../full_dataset/studies/diagnosis_codebook.tsv | 0 .../studies/diagnosis_codebook.tsv.sha1 | 0 .../dropzone/full_dataset/studies/individual.csv | 0 .../full_dataset/studies/individual.csv.sha1 | 0 .../full_dataset/studies/individual_codebook.tsv | 0 .../studies/individual_codebook.tsv.sha1 | 0 .../full_dataset/studies/individual_study.csv | 0 .../full_dataset/studies/individual_study.csv.sha1 | 0 .../dropzone/full_dataset/studies/study.csv | 0 .../dropzone/full_dataset/studies/study.csv.sha1 | 0 {test_data => test_data_e2e}/generate_sha1sum.py | 2 +- {test_data => test_data_e2e}/requirements.txt | 0 {test_data => test_data_e2e}/validate_data.py | 0 .../xx_archive}/test_data_NGS/README.md | 0 .../test_data_NGS/config/ontology_config.json | 0 .../test_data_NGS/config/sources_config.json | 0 .../dropzone/alternative/NGS/pmc_test1_WGS.maf.gz | Bin .../alternative/NGS/pmc_test1_WGS.maf.gz.sha1 | 0 .../dropzone/alternative/NGS/pmc_test2_WGS.maf.gz | Bin .../alternative/NGS/pmc_test2_WGS.maf.gz.sha1 | 0 .../dropzone/alternative/NGS/pmc_test_WXS.seg | 0 .../dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 | 0 .../NGS/pmc_test_WXS_all_data_by_genes.txt | 0 .../NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 | 0 .../NGS/pmc_test_WXS_all_thresholded.by_genes.txt | 0 .../pmc_test_WXS_all_thresholded.by_genes.txt.sha1 | 0 .../dropzone/alternative/clinic/RDP-IC.tab | 0 .../dropzone/alternative/clinic/RDP-IC.tab.sha1 | 0 .../dropzone/alternative/clinic/RDP-Patient.tab | 0 .../alternative/clinic/RDP-Patient.tab.sha1 | 0 .../alternative/clinic/RDP-Patient_codebook.txt | 0 .../clinic/RDP-Patient_codebook.txt.sha1 | 0 .../dropzone/alternative/laboratory/biomaterial.txt | 0 .../alternative/laboratory/biomaterial.txt.sha1 | 0 .../dropzone/alternative/laboratory/biosource.txt | 0 .../alternative/laboratory/biosource.txt.sha1 | 0 .../dropzone/alternative/studies/death.txt | 0 .../dropzone/alternative/studies/death.txt.sha1 | 0 .../dropzone/alternative/studies/death_codebook.txt | 0 .../alternative/studies/death_codebook.txt.sha1 | 0 .../dropzone/alternative/studies/diagnosis.txt | 0 .../dropzone/alternative/studies/diagnosis.txt.sha1 | 0 .../alternative/studies/diagnosis_codebook.txt | 0 .../alternative/studies/diagnosis_codebook.txt.sha1 | 0 .../dropzone/alternative/studies/individual.txt | 0 .../alternative/studies/individual.txt.sha1 | 0 .../alternative/studies/individual_codebook.txt | 0 .../studies/individual_codebook.txt.sha1 | 0 .../alternative/studies/individual_study.txt | 0 .../alternative/studies/individual_study.txt.sha1 | 0 .../dropzone/alternative/studies/study.txt | 0 .../dropzone/alternative/studies/study.txt.sha1 | 0 .../dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz | Bin .../full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 | 0 .../dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz | Bin .../full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 | 0 .../dropzone/full_dataset/NGS/pmc_test_WXS.seg | 0 .../dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 | 0 .../NGS/pmc_test_WXS_all_data_by_genes.txt | 0 .../NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 | 0 .../NGS/pmc_test_WXS_all_thresholded.by_genes.txt | 0 .../pmc_test_WXS_all_thresholded.by_genes.txt.sha1 | 0 .../dropzone/full_dataset/clinic/RDP-IC.tab | 0 .../dropzone/full_dataset/clinic/RDP-IC.tab.sha1 | 0 .../dropzone/full_dataset/clinic/RDP-Patient.tab | 0 .../full_dataset/clinic/RDP-Patient.tab.sha1 | 0 .../full_dataset/clinic/RDP-Patient_codebook.txt | 0 .../clinic/RDP-Patient_codebook.txt.sha1 | 0 .../full_dataset/laboratory/biomaterial.txt | 0 .../full_dataset/laboratory/biomaterial.txt.sha1 | 0 .../dropzone/full_dataset/laboratory/biosource.txt | 0 .../full_dataset/laboratory/biosource.txt.sha1 | 0 .../dropzone/full_dataset/studies/death.txt | 0 .../dropzone/full_dataset/studies/death.txt.sha1 | 0 .../full_dataset/studies/death_codebook.txt | 0 .../full_dataset/studies/death_codebook.txt.sha1 | 0 .../dropzone/full_dataset/studies/diagnosis.txt | 0 .../full_dataset/studies/diagnosis.txt.sha1 | 0 .../full_dataset/studies/diagnosis_codebook.txt | 0 .../studies/diagnosis_codebook.txt.sha1 | 0 .../dropzone/full_dataset/studies/individual.txt | 0 .../full_dataset/studies/individual.txt.sha1 | 0 .../full_dataset/studies/individual_codebook.txt | 0 .../studies/individual_codebook.txt.sha1 | 0 .../full_dataset/studies/individual_study.txt | 0 .../full_dataset/studies/individual_study.txt.sha1 | 0 .../dropzone/full_dataset/studies/study.txt | 0 .../dropzone/full_dataset/studies/study.txt.sha1 | 0 138 files changed, 2 insertions(+), 2 deletions(-) rename {test_data => test_data_e2e}/README.md (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/config/ontology_config.json (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/config/sources_config.json (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-IC.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-IC.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-Patient.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-Patient_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/biomaterial.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/biomaterial.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/biosource.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/biosource.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/radiology.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/laboratory/radiology.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/death.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/death.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/death_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/death_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/diagnosis.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/diagnosis.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/diagnosis_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual_study.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/individual_study.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/study.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/alternative/studies/study.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-IC.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-Patient.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/biomaterial.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/biosource.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/biosource.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/radiology.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/laboratory/radiology.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/death.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/death.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/death_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/death_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/diagnosis.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/diagnosis.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/diagnosis_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual_codebook.tsv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual_study.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/individual_study.csv.sha1 (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/study.csv (100%) rename {test_data/E2E_TEST_DATA => test_data_e2e/current}/dropzone/full_dataset/studies/study.csv.sha1 (100%) rename {test_data => test_data_e2e}/generate_sha1sum.py (96%) rename {test_data => test_data_e2e}/requirements.txt (100%) rename {test_data => test_data_e2e}/validate_data.py (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/README.md (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/config/ontology_config.json (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/config/sources_config.json (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/laboratory/biosource.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/laboratory/biosource.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/death.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/death.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/death_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/death_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/diagnosis.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/diagnosis.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual_study.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/individual_study.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/study.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/alternative/studies/study.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/death.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/death.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt.sha1 (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/study.txt (100%) rename {test_data => test_data_e2e/xx_archive}/test_data_NGS/dropzone/full_dataset/studies/study.txt.sha1 (100%) diff --git a/README.md b/README.md index 0c29fb2..01d9064 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ As sha1 hashes 40 characters long the rest of the file gets ignored: `1625be750dab24057c4c82d62d27298236ebb04c diagnosis.txt` For more information, see the [CSR data model](https://github.com/thehyve/python_csr2transmart#data-model) description -and an example of [input data files](test_data/E2E_TEST_DATA/dropzone). +and an example of [input data files](test_data_e2e/current/dropzone). ## Usage diff --git a/test_data/README.md b/test_data_e2e/README.md similarity index 100% rename from test_data/README.md rename to test_data_e2e/README.md diff --git a/test_data/E2E_TEST_DATA/config/ontology_config.json b/test_data_e2e/current/config/ontology_config.json similarity index 100% rename from test_data/E2E_TEST_DATA/config/ontology_config.json rename to test_data_e2e/current/config/ontology_config.json diff --git a/test_data/E2E_TEST_DATA/config/sources_config.json b/test_data_e2e/current/config/sources_config.json similarity index 100% rename from test_data/E2E_TEST_DATA/config/sources_config.json rename to test_data_e2e/current/config/sources_config.json diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv b/test_data_e2e/current/dropzone/alternative/clinic/RDP-IC.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-IC.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/clinic/RDP-IC.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-IC.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-IC.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv b/test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv b/test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/clinic/RDP-Patient_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv b/test_data_e2e/current/dropzone/alternative/laboratory/biomaterial.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv rename to test_data_e2e/current/dropzone/alternative/laboratory/biomaterial.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/laboratory/biomaterial.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biomaterial.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/laboratory/biomaterial.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv b/test_data_e2e/current/dropzone/alternative/laboratory/biosource.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv rename to test_data_e2e/current/dropzone/alternative/laboratory/biosource.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/laboratory/biosource.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/biosource.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/laboratory/biosource.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv b/test_data_e2e/current/dropzone/alternative/laboratory/radiology.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv rename to test_data_e2e/current/dropzone/alternative/laboratory/radiology.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/laboratory/radiology.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/laboratory/radiology.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/laboratory/radiology.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv b/test_data_e2e/current/dropzone/alternative/studies/death.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv rename to test_data_e2e/current/dropzone/alternative/studies/death.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/death.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/death.csv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/death.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv b/test_data_e2e/current/dropzone/alternative/studies/death_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv rename to test_data_e2e/current/dropzone/alternative/studies/death_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/death_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/death_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/death_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv b/test_data_e2e/current/dropzone/alternative/studies/diagnosis.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv rename to test_data_e2e/current/dropzone/alternative/studies/diagnosis.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/diagnosis.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis.csv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/diagnosis.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv b/test_data_e2e/current/dropzone/alternative/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv rename to test_data_e2e/current/dropzone/alternative/studies/diagnosis_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/diagnosis_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv b/test_data_e2e/current/dropzone/alternative/studies/individual.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv rename to test_data_e2e/current/dropzone/alternative/studies/individual.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/individual.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual.csv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/individual.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv b/test_data_e2e/current/dropzone/alternative/studies/individual_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv rename to test_data_e2e/current/dropzone/alternative/studies/individual_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/individual_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/individual_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv b/test_data_e2e/current/dropzone/alternative/studies/individual_study.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv rename to test_data_e2e/current/dropzone/alternative/studies/individual_study.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/individual_study.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/individual_study.csv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/individual_study.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv b/test_data_e2e/current/dropzone/alternative/studies/study.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv rename to test_data_e2e/current/dropzone/alternative/studies/study.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv.sha1 b/test_data_e2e/current/dropzone/alternative/studies/study.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/alternative/studies/study.csv.sha1 rename to test_data_e2e/current/dropzone/alternative/studies/study.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-IC.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-IC.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-IC.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/clinic/RDP-Patient_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv b/test_data_e2e/current/dropzone/full_dataset/laboratory/biomaterial.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv rename to test_data_e2e/current/dropzone/full_dataset/laboratory/biomaterial.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/laboratory/biomaterial.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv b/test_data_e2e/current/dropzone/full_dataset/laboratory/biosource.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv rename to test_data_e2e/current/dropzone/full_dataset/laboratory/biosource.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/laboratory/biosource.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/biosource.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/laboratory/biosource.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv b/test_data_e2e/current/dropzone/full_dataset/laboratory/radiology.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv rename to test_data_e2e/current/dropzone/full_dataset/laboratory/radiology.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/laboratory/radiology.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/laboratory/radiology.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/laboratory/radiology.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv b/test_data_e2e/current/dropzone/full_dataset/studies/death.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv rename to test_data_e2e/current/dropzone/full_dataset/studies/death.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/death.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death.csv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/death.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv b/test_data_e2e/current/dropzone/full_dataset/studies/death_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv rename to test_data_e2e/current/dropzone/full_dataset/studies/death_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/death_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/death_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/death_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv b/test_data_e2e/current/dropzone/full_dataset/studies/diagnosis.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv rename to test_data_e2e/current/dropzone/full_dataset/studies/diagnosis.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/diagnosis.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis.csv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/diagnosis.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv b/test_data_e2e/current/dropzone/full_dataset/studies/diagnosis_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv rename to test_data_e2e/current/dropzone/full_dataset/studies/diagnosis_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/diagnosis_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv b/test_data_e2e/current/dropzone/full_dataset/studies/individual.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv rename to test_data_e2e/current/dropzone/full_dataset/studies/individual.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/individual.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual.csv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/individual.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv b/test_data_e2e/current/dropzone/full_dataset/studies/individual_codebook.tsv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv rename to test_data_e2e/current/dropzone/full_dataset/studies/individual_codebook.tsv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/individual_codebook.tsv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv b/test_data_e2e/current/dropzone/full_dataset/studies/individual_study.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv rename to test_data_e2e/current/dropzone/full_dataset/studies/individual_study.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/individual_study.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/individual_study.csv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/individual_study.csv.sha1 diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv b/test_data_e2e/current/dropzone/full_dataset/studies/study.csv similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv rename to test_data_e2e/current/dropzone/full_dataset/studies/study.csv diff --git a/test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv.sha1 b/test_data_e2e/current/dropzone/full_dataset/studies/study.csv.sha1 similarity index 100% rename from test_data/E2E_TEST_DATA/dropzone/full_dataset/studies/study.csv.sha1 rename to test_data_e2e/current/dropzone/full_dataset/studies/study.csv.sha1 diff --git a/test_data/generate_sha1sum.py b/test_data_e2e/generate_sha1sum.py similarity index 96% rename from test_data/generate_sha1sum.py rename to test_data_e2e/generate_sha1sum.py index 339a018..1db64bb 100644 --- a/test_data/generate_sha1sum.py +++ b/test_data_e2e/generate_sha1sum.py @@ -30,4 +30,4 @@ def traverse(top_dir, algorithm: str = 'sha1'): if __name__ == '__main__': - traverse('test_data') + traverse('test_data_e2e') diff --git a/test_data/requirements.txt b/test_data_e2e/requirements.txt similarity index 100% rename from test_data/requirements.txt rename to test_data_e2e/requirements.txt diff --git a/test_data/validate_data.py b/test_data_e2e/validate_data.py similarity index 100% rename from test_data/validate_data.py rename to test_data_e2e/validate_data.py diff --git a/test_data/test_data_NGS/README.md b/test_data_e2e/xx_archive/test_data_NGS/README.md similarity index 100% rename from test_data/test_data_NGS/README.md rename to test_data_e2e/xx_archive/test_data_NGS/README.md diff --git a/test_data/test_data_NGS/config/ontology_config.json b/test_data_e2e/xx_archive/test_data_NGS/config/ontology_config.json similarity index 100% rename from test_data/test_data_NGS/config/ontology_config.json rename to test_data_e2e/xx_archive/test_data_NGS/config/ontology_config.json diff --git a/test_data/test_data_NGS/config/sources_config.json b/test_data_e2e/xx_archive/test_data_NGS/config/sources_config.json similarity index 100% rename from test_data/test_data_NGS/config/sources_config.json rename to test_data_e2e/xx_archive/test_data_NGS/config/sources_config.json diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test1_WGS.maf.gz.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test2_WGS.maf.gz.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS.seg.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-IC.tab.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient.tab.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/clinic/RDP-Patient_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biomaterial.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/laboratory/biosource.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biosource.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/laboratory/biosource.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biosource.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/laboratory/biosource.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biosource.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/laboratory/biosource.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/laboratory/biosource.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/death.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/death.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/death.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/death.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/death_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/death_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/death_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/death_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/death_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/diagnosis.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/diagnosis.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/diagnosis.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/diagnosis.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/diagnosis_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual_study.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_study.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual_study.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_study.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/individual_study.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_study.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/individual_study.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/individual_study.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/study.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/study.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/study.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/study.txt diff --git a/test_data/test_data_NGS/dropzone/alternative/studies/study.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/study.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/alternative/studies/study.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/alternative/studies/study.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test1_WGS.maf.gz.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test2_WGS.maf.gz.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS.seg.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_data_by_genes.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/NGS/pmc_test_WXS_all_thresholded.by_genes.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-IC.tab.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient.tab.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/clinic/RDP-Patient_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biomaterial.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/laboratory/biosource.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/death.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/death.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/death.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/death.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/death_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/diagnosis_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_codebook.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/individual_study.txt.sha1 diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/study.txt b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/study.txt similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/study.txt rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/study.txt diff --git a/test_data/test_data_NGS/dropzone/full_dataset/studies/study.txt.sha1 b/test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/study.txt.sha1 similarity index 100% rename from test_data/test_data_NGS/dropzone/full_dataset/studies/study.txt.sha1 rename to test_data_e2e/xx_archive/test_data_NGS/dropzone/full_dataset/studies/study.txt.sha1 From 6134943dc818865930d9a9bae00255434fc4dae4 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 16:37:32 +0100 Subject: [PATCH 30/39] Reuse checksum function from pmc-conversion scripts --- test_data_e2e/generate_sha1sum.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/test_data_e2e/generate_sha1sum.py b/test_data_e2e/generate_sha1sum.py index 1db64bb..d1b1064 100644 --- a/test_data_e2e/generate_sha1sum.py +++ b/test_data_e2e/generate_sha1sum.py @@ -1,23 +1,10 @@ -import hashlib import os +from scripts.checksum import compute_checksum -def compute_checksum(path, algorithm: str) -> str: - """ Generates a hex digest using specified algorithm for a file. """ - buffer_size = 65536 - hash_builder = hashlib.new(algorithm) - with open(path, 'rb') as f: - while True: - data = f.read(buffer_size) - if not data: - break - hash_builder.update(data) - - return hash_builder.hexdigest() - - -def traverse(top_dir, algorithm: str = 'sha1'): +def compute_checksum_in_folder(top_dir, algorithm: str): + """Traverse folder and write checksum file for each file found.""" for root, d_names, f_names in os.walk(top_dir): for f_name in f_names: if f_name.endswith(algorithm): @@ -30,4 +17,4 @@ def traverse(top_dir, algorithm: str = 'sha1'): if __name__ == '__main__': - traverse('test_data_e2e') + compute_checksum_in_folder('test_data_e2e/current', 'sha1') From e10db60f5e575c98c1d7409a18af66d040d2db19 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 17:33:31 +0100 Subject: [PATCH 31/39] Update execution of sha1sum files generator --- test_data_e2e/README.md | 7 ++++--- .../{generate_sha1sum.py => generate_sha1sum_files.py} | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) rename test_data_e2e/{generate_sha1sum.py => generate_sha1sum_files.py} (89%) diff --git a/test_data_e2e/README.md b/test_data_e2e/README.md index 3200df6..d0f130b 100644 --- a/test_data_e2e/README.md +++ b/test_data_e2e/README.md @@ -31,8 +31,9 @@ A repository complete with utilities to help create CSR-compatible test datasets to inspect it. -5. Generate sha1sum files for all test data by running: +5. Generate sha1sum files for all test data by moving to the root folder and running: ``` - python generate_sha1sum.py + python -m test_data_e2e.generate_sha1sum_files ``` - This automatically traverses the whole `test_data/` folder, no need to provide the path. + This automatically traverses the whole `test_data_e2e/current/dropzone` folder, no need to + provide the path. diff --git a/test_data_e2e/generate_sha1sum.py b/test_data_e2e/generate_sha1sum_files.py similarity index 89% rename from test_data_e2e/generate_sha1sum.py rename to test_data_e2e/generate_sha1sum_files.py index d1b1064..ed64069 100644 --- a/test_data_e2e/generate_sha1sum.py +++ b/test_data_e2e/generate_sha1sum_files.py @@ -17,4 +17,4 @@ def compute_checksum_in_folder(top_dir, algorithm: str): if __name__ == '__main__': - compute_checksum_in_folder('test_data_e2e/current', 'sha1') + compute_checksum_in_folder('test_data_e2e/current/dropzone', 'sha1') From 9f0a458ad2bfa4d5fb0f5b34246a5ab1ca2d2aeb Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 18:26:05 +0100 Subject: [PATCH 32/39] Update test data readme --- test_data_e2e/README.md | 54 ++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/test_data_e2e/README.md b/test_data_e2e/README.md index d0f130b..6dab644 100644 --- a/test_data_e2e/README.md +++ b/test_data_e2e/README.md @@ -1,34 +1,50 @@ -# PMC E2E TESTDATA +# E2E TESTDATA -A repository complete with utilities to help create CSR-compatible test datasets for E2E testing. +This folder is complete with utilities to help create CSR-compatible test datasets for E2E testing. +Test data updates should coordinate with +[csr2transmart](https://github.com/thehyve/python_csr2transmart) developments. -### How to use this repo +### Folder structure -1. Update `requirements.txt` to point to the latest release of `python_csr2transmart` you would - like your test data to be compatible with (default `master` branch). +`current/` contains the latest test dataset compatible with `csr2transmart`. Inside this folder, +you will find: + - `config/` for configuration files (ontology and sources) + - `dropzone/` for the actual data +Inside `dropzone`, you should have at least two test dataset versions (e.g. `full_dataset` and +`alternative`). This allows to quickly switch between them, which is handy to trigger a new ETL +pipeline run. -2. Create a Python virtual environment (check the latest Python version supported by - `python_csr2transmart`) and install the requirements: - ``` - pip install -r requirements.txt - ``` + `xx_achive/` can be used to store old datasets for future reference. -3. Create a new branch and start working on a new version of `test_data/`, and corresponding - changes to the configuration files in `config/`. We recommend using two sub-folders, one for the - complete dataset (e.g. `full_dataset/`) and one with an alternative version, obtained for - example by removing or swapping the gender of one patient (e.g. `alternative/`). +### How to update E2E test data +1. Create a Python virtual environment (make sure the Python version is compatible with the + `csr2transmart` version you want to use). -4. Check that the dataset can be parsed by `sources2csr` and `csr2transmart` by running: + +2. Install the desired version of `csr2transmart` from PyPI, e.g.: + ``` + pip install csr2transmart==0.1.0 + ``` + Alternatively, install a development version from the `python_csr2transmart` repo by pointing + to a specific tag or branch, e.g.: + ``` + pip install git+https://github.com/thehyve/python_csr2transmart.git@my-dev-branch + ``` + + +3. Create a new branch and start updating your test data and configuration files. + + +4. Check that all of your new dataset versions can be parsed by `sources2csr` and + `csr2transmart` by running (e.g. from inside `current/`): ``` - python validate_data.py + sources2csr dropzone/full_dataset /tmp/csr_test config + csr2transmart /tmp/csr_test /tmp/transmart_test config ``` - where the provided path should point to a specific test data subfolder (e.g. `full_dataset/`). - The output will be written to the (git-ignored) `validation_results/` folder, should you need - to inspect it. 5. Generate sha1sum files for all test data by moving to the root folder and running: From 19cafac150ec2e8623ceef3d286498d41b4eceff Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 18:27:38 +0100 Subject: [PATCH 33/39] Remove unused test data scripts --- test_data_e2e/requirements.txt | 1 - test_data_e2e/validate_data.py | 33 --------------------------------- 2 files changed, 34 deletions(-) delete mode 100644 test_data_e2e/requirements.txt delete mode 100644 test_data_e2e/validate_data.py diff --git a/test_data_e2e/requirements.txt b/test_data_e2e/requirements.txt deleted file mode 100644 index 3ea167d..0000000 --- a/test_data_e2e/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -git+https://github.com/thehyve/python_csr2transmart.git@master \ No newline at end of file diff --git a/test_data_e2e/validate_data.py b/test_data_e2e/validate_data.py deleted file mode 100644 index 65f779c..0000000 --- a/test_data_e2e/validate_data.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -import shutil -import subprocess - -import click - - -config_dir = 'config' -output_folder = 'validation_results' -out_csr = os.path.join(output_folder, 'sources2csr') -out_tm = os.path.join(output_folder, 'csr2transmart') - - -def cleanup(dir_path: str): - if os.path.exists(dir_path): - shutil.rmtree(dir_path) - - -@click.command() -@click.argument('top_folder', type=click.Path(file_okay=False, exists=True, readable=True)) -def validate(top_folder: str): - - print('Validating SOURCES to CSR') - cleanup(out_csr) - subprocess.run(f'sources2csr {top_folder} {out_csr} {config_dir}', shell=True) - - print('Validating CSR to TRANSMART') - cleanup(out_tm) - subprocess.run(f'csr2transmart {out_csr} {out_tm} {config_dir}', shell=True) - - -if __name__ == '__main__': - validate() From f95f6dc0c59557c397671f40898c46bd0121c237 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 18:52:59 +0100 Subject: [PATCH 34/39] Add to readme where to deploy test data once ready --- test_data_e2e/README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test_data_e2e/README.md b/test_data_e2e/README.md index 6dab644..caee763 100644 --- a/test_data_e2e/README.md +++ b/test_data_e2e/README.md @@ -2,15 +2,16 @@ This folder is complete with utilities to help create CSR-compatible test datasets for E2E testing. Test data updates should coordinate with -[csr2transmart](https://github.com/thehyve/python_csr2transmart) developments. +[csr2transmart](https://github.com/thehyve/python_csr2transmart) developments. Please refer to +that documentation for details about expected configuration files and source data format. ### Folder structure `current/` contains the latest test dataset compatible with `csr2transmart`. Inside this folder, you will find: - - `config/` for configuration files (ontology and sources) - - `dropzone/` for the actual data + - `config/` for configuration files (ontology and sources). + - `dropzone/` for the actual data. Inside `dropzone`, you should have at least two test dataset versions (e.g. `full_dataset` and `alternative`). This allows to quickly switch between them, which is handy to trigger a new ETL @@ -53,3 +54,11 @@ pipeline run. ``` This automatically traverses the whole `test_data_e2e/current/dropzone` folder, no need to provide the path. + + +6. Once a new release of `pmc-conversion` is deployed, copy the data to the appropriate folders: + - `dropzone/` contents to the same folder in the `drop` user home + - `config/` contents to the same folder in the `pmc` user home + See [here](https://github.com/thehyve/pmc-conversion#create-required-directories) for details. + Then you are ready to execute an end-to-end test! + From d7d3e051a49606a4b2249036a5c9a540f16af70c Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Thu, 16 Dec 2021 22:10:13 +0100 Subject: [PATCH 35/39] Add test data description --- test_data_e2e/current/README.md | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 test_data_e2e/current/README.md diff --git a/test_data_e2e/current/README.md b/test_data_e2e/current/README.md new file mode 100644 index 0000000..36c405a --- /dev/null +++ b/test_data_e2e/current/README.md @@ -0,0 +1,36 @@ +# Test data description + +Two versions of the data are provided: + - `full_dataset`, composed of 17 patients (11 males, 6 females) + - `alternative`, from which patient PAT2 (male) and all associated data (diagnosis, + biosource, biomaterial, radiology) has been removed. + +The dataset is designed to allow testing of various subselections in the query builder of Glowing +Bear. More details about specific entities are provided below. + +### Diagnosis entity + + +| | Full dataset | | | Alternative | | | +|------------|--------------|-----|-------|-------------|-----|--------| +| tumor_type | M | F | TOTAL | M | F | TOTAL | +| ---------- | ------------ | --- | ------| ------------| --- | ------ | +| TC only | 1 | 4 | 5 | 1 | 4 | 5 | +| NH only | 8 | 2 | 11 | 8 | 2 | 10 | +| both | 2 | 0 | 2 | 1 | 0 | 1 | +| ---------- | ------------ | --- | ------| ------------| --- | ------ | +| TOTAL | 11 | 6 | 17 | 10 | 6 | 16 | + + +### Radiology entity + + +| | Full dataset | | | Alternative | | | +|------------|--------------|-----|--------|--------------|-----|--------| +| body_part | M | F | TOTAL | M | F | TOTAL | +| ---------- | ------------ | --- | ------ | ------------ | --- | ------ | +| legs only | 4 | 2 | 6 | 4 | 2 | 6 | +| torso only | 3 | 1 | 4 | 3 | 1 | 4 | +| both | 2 | 0 | 2 | 1 | 0 | 1 | +| ---------- | ------------ | --- | ------ | ------------ | --- | ------ | +| TOTAL | 9 | 3 | 12 | 8 | 3 | 11 | From bef147a261153151aaa3132b8f19a970f421b1d5 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Mon, 20 Dec 2021 10:09:06 +0100 Subject: [PATCH 36/39] Test data description corrections (table) --- test_data_e2e/current/README.md | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/test_data_e2e/current/README.md b/test_data_e2e/current/README.md index 36c405a..75086d8 100644 --- a/test_data_e2e/current/README.md +++ b/test_data_e2e/current/README.md @@ -11,26 +11,22 @@ Bear. More details about specific entities are provided below. ### Diagnosis entity -| | Full dataset | | | Alternative | | | -|------------|--------------|-----|-------|-------------|-----|--------| -| tumor_type | M | F | TOTAL | M | F | TOTAL | -| ---------- | ------------ | --- | ------| ------------| --- | ------ | -| TC only | 1 | 4 | 5 | 1 | 4 | 5 | -| NH only | 8 | 2 | 11 | 8 | 2 | 10 | -| both | 2 | 0 | 2 | 1 | 0 | 1 | -| ---------- | ------------ | --- | ------| ------------| --- | ------ | -| TOTAL | 11 | 6 | 17 | 10 | 6 | 16 | +| | Full dataset | | | Alternative | | | +|----------------|--------------|-------|-----------|-------------|-------|-----------| +| **tumor_type** | **M** | **F** | **TOTAL** | **M** | **F** | **TOTAL** | +| TC only | 1 | 4 | 5 | 1 | 4 | 5 | +| NH only | 8 | 2 | 10 | 8 | 2 | 10 | +| both | 2 | 0 | 2 | 1 | 0 | 1 | +| **TOTAL** | 11 | 6 | 17 | 10 | 6 | 16 | ### Radiology entity -| | Full dataset | | | Alternative | | | -|------------|--------------|-----|--------|--------------|-----|--------| -| body_part | M | F | TOTAL | M | F | TOTAL | -| ---------- | ------------ | --- | ------ | ------------ | --- | ------ | -| legs only | 4 | 2 | 6 | 4 | 2 | 6 | -| torso only | 3 | 1 | 4 | 3 | 1 | 4 | -| both | 2 | 0 | 2 | 1 | 0 | 1 | -| ---------- | ------------ | --- | ------ | ------------ | --- | ------ | -| TOTAL | 9 | 3 | 12 | 8 | 3 | 11 | +| | Full dataset | | | Alternative | | | +|---------------|--------------|-------|-----------|-------------|-------|------------| +| **body_part** | **M** | **F** | **TOTAL** | **M** | **F** | **TOTAL** | +| legs only | 4 | 2 | 6 | 4 | 2 | 6 | +| torso only | 3 | 1 | 4 | 3 | 1 | 4 | +| both | 2 | 0 | 2 | 1 | 0 | 1 | +| **TOTAL** | 9 | 3 | 12 | 8 | 3 | 11 | From 9c009b43d57083b71e6d7f0998f1c203bbad456f Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Mon, 20 Dec 2021 10:12:01 +0100 Subject: [PATCH 37/39] Add tumor acronym --- test_data_e2e/current/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_data_e2e/current/README.md b/test_data_e2e/current/README.md index 75086d8..f4ad091 100644 --- a/test_data_e2e/current/README.md +++ b/test_data_e2e/current/README.md @@ -19,6 +19,8 @@ Bear. More details about specific entities are provided below. | both | 2 | 0 | 2 | 1 | 0 | 1 | | **TOTAL** | 11 | 6 | 17 | 10 | 6 | 16 | +_TC = angioimmunoblastic T-cell lymphoma, NH = malignant non-Hodgkin lymphoma_ + ### Radiology entity From 4d34828180cf516de86b549b3b94504eae015875 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Tue, 11 Jan 2022 14:34:39 +0100 Subject: [PATCH 38/39] sha1sum script fixes --- test_data_e2e/generate_sha1sum_files.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test_data_e2e/generate_sha1sum_files.py b/test_data_e2e/generate_sha1sum_files.py index ed64069..5389461 100644 --- a/test_data_e2e/generate_sha1sum_files.py +++ b/test_data_e2e/generate_sha1sum_files.py @@ -1,14 +1,18 @@ import os from scripts.checksum import compute_checksum +from scripts.sync import is_hidden_file -def compute_checksum_in_folder(top_dir, algorithm: str): - """Traverse folder and write checksum file for each file found.""" +def compute_checksum_in_folder(top_dir): + """Traverse folder and write sha1 checksum file for each file found.""" + algorithm = 'sha1' for root, d_names, f_names in os.walk(top_dir): for f_name in f_names: if f_name.endswith(algorithm): continue + if is_hidden_file(f_name): + continue f_in = os.path.join(root, f_name) f_out = f_in + '.' + algorithm checksum = compute_checksum(f_in, algorithm) @@ -17,4 +21,4 @@ def compute_checksum_in_folder(top_dir, algorithm: str): if __name__ == '__main__': - compute_checksum_in_folder('test_data_e2e/current/dropzone', 'sha1') + compute_checksum_in_folder('test_data_e2e/current/dropzone') From d1d4dfce1dc32c6aba7016b8f402a6a4a0f79e04 Mon Sep 17 00:00:00 2001 From: Alessia Peviani Date: Wed, 12 Jan 2022 10:30:51 +0100 Subject: [PATCH 39/39] Update csr2transmart version in requirements --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index a29a6bc..6cef243 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ click>=7.0,<8.0 -csr2transmart==0.1.0 +csr2transmart==0.2.0 gitpython>=2.1.15,<2.2 luigi>=2.8.10,<2.9.0 python-daemon==2.1.2