From f96984cebd34673f8df3039c261d8521548a43b2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 13 Jan 2025 22:18:05 +0100 Subject: [PATCH 1/3] Add Python test data files. --- python/data/v1/README.md | 9 + python/data/v1/cohort.json | 251 ++++++++++++++++++++++++++ python/data/v1/cohort.pb | 80 ++++++++ python/data/v1/family.json | 268 +++++++++++++++++++++++++++ python/data/v1/family.pb | 83 +++++++++ python/data/v1/phenopacket.json | 189 +++++++++++++++++++ python/data/v1/phenopacket.pb | 60 ++++++ python/data/v2/cohort.json | 294 ++++++++++++++++++++++++++++++ python/data/v2/cohort.pb | Bin 0 -> 3099 bytes python/data/v2/family.json | 311 ++++++++++++++++++++++++++++++++ python/data/v2/family.pb | Bin 0 -> 3130 bytes python/data/v2/phenopacket.json | 220 ++++++++++++++++++++++ python/data/v2/phenopacket.pb | 71 ++++++++ python/tests/conftest.py | 21 +++ python/tests/test_io.py | 120 ++++++++++++ 15 files changed, 1977 insertions(+) create mode 100644 python/data/v1/README.md create mode 100644 python/data/v1/cohort.json create mode 100644 python/data/v1/cohort.pb create mode 100644 python/data/v1/family.json create mode 100644 python/data/v1/family.pb create mode 100644 python/data/v1/phenopacket.json create mode 100644 python/data/v1/phenopacket.pb create mode 100644 python/data/v2/cohort.json create mode 100644 python/data/v2/cohort.pb create mode 100644 python/data/v2/family.json create mode 100644 python/data/v2/family.pb create mode 100644 python/data/v2/phenopacket.json create mode 100644 python/data/v2/phenopacket.pb create mode 100644 python/tests/test_io.py diff --git a/python/data/v1/README.md b/python/data/v1/README.md new file mode 100644 index 0000000..f312473 --- /dev/null +++ b/python/data/v1/README.md @@ -0,0 +1,9 @@ +# README + +The files in this folder correspond to comprehensive, albeit medically invalid, phenopacket elements: +- phenopacket +- family, or +- cohort. + +The content corresponds to the output of `TestData.V1.comprehensive*()` as of Oct 27th, 2022 +(see [TestData](https://github.com/phenopackets/phenopacket-tools/blob/main/phenopacket-tools-test/src/main/java/org/phenopackets/phenopackettools/test/TestData.java) in phenopacket-tools). diff --git a/python/data/v1/cohort.json b/python/data/v1/cohort.json new file mode 100644 index 0000000..17ab00c --- /dev/null +++ b/python/data/v1/cohort.json @@ -0,0 +1,251 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/python/data/v1/cohort.pb b/python/data/v1/cohort.pb new file mode 100644 index 0000000..5c57f5e --- /dev/null +++ b/python/data/v1/cohort.pb @@ -0,0 +1,80 @@ + +comprehensive-cohort-id$A description of the example cohort.ò +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"– +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F*Á + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/python/data/v1/family.json b/python/data/v1/family.json new file mode 100644 index 0000000..b7ad2d7 --- /dev/null +++ b/python/data/v1/family.json @@ -0,0 +1,268 @@ +{ + "id": "comprehensive-family-id", + "proband": { + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, + "relatives": [{ + "subject": { + "id": "MOTHER", + "sex": "FEMALE" + } + }, { + "subject": { + "id": "FATHER", + "sex": "MALE" + } + }], + "pedigree": { + "persons": [{ + "individualId": "14 year-old boy", + "paternalId": "FATHER", + "maternalId": "MOTHER", + "sex": "MALE", + "affectedStatus": "AFFECTED" + }, { + "individualId": "MOTHER", + "sex": "FEMALE", + "affectedStatus": "UNAFFECTED" + }, { + "individualId": "FATHER", + "sex": "MALE", + "affectedStatus": "UNAFFECTED" + }] + }, + "htsFiles": [{ + "uri": "file://data/genomes/FAM000001", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/python/data/v1/family.pb b/python/data/v1/family.pb new file mode 100644 index 0000000..fb13105 --- /dev/null +++ b/python/data/v1/family.pb @@ -0,0 +1,83 @@ + +comprehensive-family-idò +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"C +%14 year-old boyFATHER"MOTHER(0 + MOTHER(0 + FATHER(0*– +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F2Á + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/python/data/v1/phenopacket.json b/python/data/v1/phenopacket.json new file mode 100644 index 0000000..e6848a9 --- /dev/null +++ b/python/data/v1/phenopacket.json @@ -0,0 +1,189 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "ageAtCollection": { + "age": "P14Y" + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "classOfOnset": { + "id": "HP:0011461", + "label": "Fetal onset" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "negated": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "ageOfOnset": { + "age": "P14Y" + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "classOfOnset": { + "id": "HP:0011463", + "label": "Childhood onset" + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "ageOfIndividualAtCollection": { + "age": "P14Y" + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }] + }], + "genes": [{ + "id": "HGNC1:3688", + "symbol": "FGFR1" + }], + "variants": [{ + "hgvsAllele": { + "hgvs": "NM_001848.2:c.877G\u003eA" + }, + "zygosity": { + "id": "GENO:0000135", + "label": "heterozygous" + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "classOfOnset": { + "id": "HP:0003577", + "label": "Congenital onset" + } + }], + "htsFiles": [{ + "uri": "file://data/genomes/P000001C", + "description": "Whole genome sequencing VCF output", + "htsFormat": "VCF", + "genomeAssembly": "GRCh38.p13", + "individualToSampleIdentifiers": { + "14 year-old boy": "P000001C" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "1.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/python/data/v1/phenopacket.pb b/python/data/v1/phenopacket.pb new file mode 100644 index 0000000..8fe66cc --- /dev/null +++ b/python/data/v1/phenopacket.pb @@ -0,0 +1,60 @@ + +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/python/data/v2/cohort.json b/python/data/v2/cohort.json new file mode 100644 index 0000000..e59170a --- /dev/null +++ b/python/data/v2/cohort.json @@ -0,0 +1,294 @@ +{ + "id": "comprehensive-cohort-id", + "description": "A description of the example cohort.", + "members": [{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } + }, { + "subject": { + "id": "MOTHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "FEMALE", + "taxonomy": { + } + } + }, { + "subject": { + "id": "FATHER", + "dateOfBirth": "1970-01-01T00:00:00Z", + "timeAtLastEncounter": { + }, + "sex": "MALE", + "taxonomy": { + } + } + }], + "files": [{ + "uri": "file://data/genomes/FAM000001", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C", + "MOTHER": "P000001M", + "FATHER": "P000001F" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/python/data/v2/cohort.pb b/python/data/v2/cohort.pb new file mode 100644 index 0000000000000000000000000000000000000000..0679cfa09cdfea17ce246282ad8dae72315c188f GIT binary patch literal 3099 zcmeHJ-EJF26pmAqhDn<^BT&mWZKq2qlBKNoC$=|MRpB^xe$=&%8$j&^(d_QnyL5MF z*`0N|z2q&*4Tvjv1+GBig16uWxIy9pn6cyZ4{EL|RqBo7%$#$+Ip3V`8-q&@4daA( zBuf1;*>tE!6SnEQ@MaUaBy|!$W}QV4hWjYDbf$?VB`4;F)u}9>^vls zGGBr?G>8}F31;Bp4p{lA@t(x%iE^!iqQ{ti_VK_h` z9g~nF=~tu=P9rsKtERz~&SalPDPcQ56~HH8rQPa@LaJ&QP{SGHWT2E`HYtG8ERH!l zyt17mmSBfqn{$znAxTK&5Jd)2qT9##P(bTPDj#VBuC+YA z>{05X$yk?vUIZ6xpQaPl3R%CXMj2UI>hLM%QE{3Joxh;h@(r;1*-m@V>k4TYTPlZ1 zANmtZ7*Z~^B%G(x!P#{@xDHmjt%IXR%P{NJIxKa`h%1B7YR8X+GHl+^X+W)Bhif|| z@<$Qs5*i0M4ZpfI%hye{UW1o*{Zxn@?IqYH=+g`b{?I4MHw82cteN#K6Rz*K2YRJT zGSsK3FA%>iT;qc8Oa#AOt5)F}o~dX`m;R*iYI7@{lTHA~=~)L>LsPYys>1btr+v`r zv}4U2xbg8uSRNDJCz}|c^pVF{%=?V9Bv7bL1K&o@15ilAfpHdu-?J0vb#>Swk^+;UMoLDTG%_j_`3qC}4ED^Ub7rErnkw;RTC0Q!XJGID}u{Axlu9k~JwsTJ{f{yifbVXIv#%p>nAuNF`00+aCs;XMOZo4X4bdF-jO_vjiwfqlm_gXSOoLVr&y^`2@veOkxt+1QKv@*gM>7 z8LFw8hNeqL?ZLgqj)sDiC-G>Ae1aV>oFGQgE@7@u0u*GtUChnUiN{=?%uZvTjUkr= zn5D6YTL>eY7sU4jydwQpfVI;~3~gIe;f)xYI#}tq z506@H!)(@@u+k?J&I~@QT`v^Ouy{eI4vl6LuI-Z0n}n!OXyoH0`21ELubXPK0Wa-& zi6A@LkFi70M=AEbu}9*s3MhAMn9VH{t{-$pdaX}VG^B|q5Wg;5vs;NYSOXrxH1!>13;J#5JtD7q26?4|(@ zx$yBqSQZgpCz}bNbkk)l;&n!8>?_owzGuZa&J-F?YJ6F9BZZ#$GSK-&Y^uhlYF3K} zLtS1Fb}&4aAA^#p0j}@osDm8!{RQD7ZjP&M3siw#Es1;A-&+tWoQ~z&pd6)EKpa>e zrdE$Q)slFE=FthS_vGY632n<$LO)PK&s8RL%9|iUYVs76c*hyM$SH@8T$124P7`U- zsYOiQt@){SwOSIiU3hBYbgaLjE2^TlUebFBVFi?XgCi~h(vrNS7Va)dGh27(%vQX+ zB)7rM=k!r3&zUpnb$5DmQoWE<5auU6<5^GG Z<^X55?#tkw{+fILB^Ix__doxu{snF*zsdjr literal 0 HcmV?d00001 diff --git a/python/data/v2/phenopacket.json b/python/data/v2/phenopacket.json new file mode 100644 index 0000000..89d29db --- /dev/null +++ b/python/data/v2/phenopacket.json @@ -0,0 +1,220 @@ +{ + "id": "comprehensive-phenopacket-id", + "subject": { + "id": "14 year-old boy", + "alternateIds": ["boy", "patient", "proband"], + "dateOfBirth": "1970-01-02T10:17:36.000000100Z", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P14Y" + } + }, + "sex": "MALE", + "karyotypicSex": "XY", + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001558", + "label": "Decreased fetal movement" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011461", + "label": "Fetal onset" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0031910", + "label": "Abnormal cranial nerve physiology" + }, + "excluded": true, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0011463", + "label": "Macroscopic hematuria" + }, + "modifiers": [{ + "id": "HP:0031796", + "label": "Recurrent" + }], + "onset": { + "age": { + "iso8601duration": "P14Y" + } + }, + "evidence": [{ + "evidenceCode": { + "id": "ECO:0000033", + "label": "author statement supported by traceable reference" + }, + "reference": { + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + } + }] + }, { + "type": { + "id": "HP:0001270", + "label": "Motor delay" + }, + "severity": { + "id": "HP:0012825", + "label": "Mild" + }, + "onset": { + "ontologyClass": { + "id": "HP:0011463", + "label": "Childhood onset" + } + } + }], + "biosamples": [{ + "id": "biosample-id", + "individualId": "14 year-old boy", + "description": "Muscle biopsy of 14 year-old boy", + "sampledTissue": { + "id": "UBERON:0003403", + "label": "skin of forearm" + }, + "taxonomy": { + "id": "NCBITaxon:9606", + "label": "homo sapiens" + }, + "timeOfCollection": { + "age": { + "iso8601duration": "P14Y" + } + }, + "histologicalDiagnosis": { + "id": "NCIT:C38757", + "label": "Negative Finding" + }, + "tumorProgression": { + "id": "NCIT:C3677", + "label": "Benign Neoplasm" + }, + "tumorGrade": { + "id": "NCIT:C28076", + "label": "Disease Grade Qualifier" + }, + "diagnosticMarkers": [{ + "id": "NCIT:C68748", + "label": "HER2/Neu Positive" + }], + "materialSample": { + "id": "EFO:0009655", + "label": "abnormal sample" + } + }], + "interpretations": [{ + "id": "comprehensive-phenopacket-id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "14 year-old boy", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "variationDescriptor": { + "expressions": [{ + "syntax": "hgvs", + "value": "NM_001848.2:c.877G\u003eA" + }], + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "OMIM:101600", + "label": "PFEIFFER SYNDROME" + }, + "onset": { + "ontologyClass": { + "id": "HP:0003577", + "label": "Congenital onset" + } + } + }], + "files": [{ + "uri": "file://data/genomes/P000001C", + "individualToFileIdentifiers": { + "14 year-old boy": "P000001C" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38.p13", + "fileFormat": "vcf", + "description": "Whole genome sequencing VCF output" + } + }], + "metaData": { + "created": "2022-10-03T16:39:04.000123456Z", + "createdBy": "Peter R.", + "submittedBy": "PhenopacketLab", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "19-03-2018", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "pubmed", + "name": "PubMed", + "namespacePrefix": "PMID", + "iriPrefix": "https://www.ncbi.nlm.nih.gov/pubmed/" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "20-03-2020", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }], + "phenopacketSchemaVersion": "2.0.0", + "externalReferences": [{ + "id": "PMID:30808312", + "description": "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report." + }] + } +} \ No newline at end of file diff --git a/python/data/v2/phenopacket.pb b/python/data/v2/phenopacket.pb new file mode 100644 index 0000000..3acd16a --- /dev/null +++ b/python/data/v2/phenopacket.pb @@ -0,0 +1,71 @@ + +comprehensive-phenopacket-id^ +14 year-old boyboypatientprobandÀÄd" + +P14Y08J +NCBITaxon:9606 homo sapiensñ& + +HP:0001558Decreased fetal movement2 + +HP:0011461 Fetal onsetB© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyB© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ô# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 + +P14YB© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.P + +HP:0001270 Motor delay" + +HP:0012825Mild2 + +HP:0011463Childhood onset*º + biosample-id14 year-old boy" Muscle biopsy of 14 year-old boy*! +UBERON:0003403skin of forearmJ +NCBITaxon:9606 homo sapiensR + +P14YZ + NCIT:C38757Negative Findingb + +NCIT:C3677Benign Neoplasmj& + NCIT:C28076Disease Grade Qualifier‚ + NCIT:C68748HER2/Neu Positiveš + EFO:0009655abnormal sample2™ +comprehensive-phenopacket-idw + + OMIM:101600PFEIFFER SYNDROMES +14 year-old boy"><2 +hgvsNM_001848.2:c.877G>Ar + GENO:0000135 heterozygous:D + + OMIM:101600PFEIFFER SYNDROME  + +HP:0003577Congenital onsetRŸ +file://data/genomes/P000001C +14 year-old boyP000001C +genomeAssembly +GRCh38.p13 + +fileFormatvcf1 + description"Whole genome sequencing VCF outputZÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_22.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/python/tests/conftest.py b/python/tests/conftest.py index d27777b..99be183 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -3,8 +3,29 @@ import pytest +@pytest.fixture(scope="session") +def fpath_python_src_dir( + fpath_test_dir: str, +) -> str: + return os.path.realpath( + os.path.join( + fpath_test_dir, + os.pardir, + ) + ) + + @pytest.fixture(scope='session') def fpath_test_dir() -> str: # When running `pytest` from the top-level Python folder (`phenopacket-schema/python`) # this path will evaluate to the `tests` folder. return os.path.dirname(__file__) + + +@pytest.fixture(scope="session") +def fpath_test_data_dir( + fpath_python_src_dir: str, +) -> str: + # When running `pytest` from the top-level Python folder (`phenopacket-schema/python`) + # this path will evaluate to the `data` folder. + return os.path.join(fpath_python_src_dir, "data") diff --git a/python/tests/test_io.py b/python/tests/test_io.py new file mode 100644 index 0000000..d9d814a --- /dev/null +++ b/python/tests/test_io.py @@ -0,0 +1,120 @@ +import os + +import pytest + + +from google.protobuf.json_format import Parse + + +class TestV1IO: + @pytest.fixture(scope="class") + def fpath_v1_data( + self, + fpath_test_data_dir: str, + ) -> str: + return os.path.join( + fpath_test_data_dir, + "v1", + ) + + def test_phenopacket( + self, + fpath_v1_data: str, + ): + from phenopackets.schema.v1.phenopackets_pb2 import Phenopacket + + phenopacket_pb = Phenopacket() + with open(os.path.join(fpath_v1_data, "phenopacket.pb"), "rb") as fh: + phenopacket_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v1_data, "phenopacket.json"), "rb") as fh: + phenopacket_json = Parse(fh.read(), message=Phenopacket()) + + assert phenopacket_pb == phenopacket_json + + def test_family( + self, + fpath_v1_data: str, + ): + from phenopackets.schema.v1.phenopackets_pb2 import Family + + family_pb = Family() + with open(os.path.join(fpath_v1_data, "family.pb"), "rb") as fh: + family_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v1_data, "family.json"), "rb") as fh: + family_json = Parse(fh.read(), message=Family()) + + assert family_pb == family_json + + def test_cohort( + self, + fpath_v1_data: str, + ): + from phenopackets.schema.v1.phenopackets_pb2 import Cohort + + cohort_pb = Cohort() + with open(os.path.join(fpath_v1_data, "cohort.pb"), "rb") as fh: + cohort_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v1_data, "cohort.json"), "rb") as fh: + cohort_json = Parse(fh.read(), message=Cohort()) + + assert cohort_pb == cohort_json + + +class TestV2IO: + @pytest.fixture(scope="class") + def fpath_v2_data( + self, + fpath_test_data_dir: str, + ) -> str: + return os.path.join( + fpath_test_data_dir, + "v2", + ) + + def test_phenopacket( + self, + fpath_v2_data: str, + ): + from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket + + phenopacket_pb = Phenopacket() + with open(os.path.join(fpath_v2_data, "phenopacket.pb"), "rb") as fh: + phenopacket_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v2_data, "phenopacket.json"), "rb") as fh: + phenopacket_json = Parse(fh.read(), message=Phenopacket()) + + assert phenopacket_pb == phenopacket_json + + def test_family( + self, + fpath_v2_data: str, + ): + from phenopackets.schema.v2.phenopackets_pb2 import Family + + family_pb = Family() + with open(os.path.join(fpath_v2_data, "family.pb"), "rb") as fh: + family_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v2_data, "family.json"), "rb") as fh: + family_json = Parse(fh.read(), message=Family()) + + assert family_pb == family_json + + def test_cohort( + self, + fpath_v2_data: str, + ): + from phenopackets.schema.v2.phenopackets_pb2 import Cohort + + cohort_pb = Cohort() + with open(os.path.join(fpath_v2_data, "cohort.pb"), "rb") as fh: + cohort_pb.ParseFromString(fh.read()) + + with open(os.path.join(fpath_v2_data, "cohort.json"), "rb") as fh: + cohort_json = Parse(fh.read(), message=Cohort()) + + assert cohort_pb == cohort_json From b4ff6779255f075d46bfa8315d984e163f6c282f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 13 Jan 2025 23:35:04 +0100 Subject: [PATCH 2/3] Allow using protobuf `<=5.2.9` in Python. --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index f3cab23..6825873 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -33,7 +33,7 @@ keywords = [ ] dependencies = [ - "protobuf>=3.20.2,<4.0.0", + "protobuf>=3.20.2,<=5.29.3", # Last tested Python version ] [project.optional-dependencies] From 9007275d67e3341f5e343948edc8ceb92f2e0a25 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 22 Jul 2025 12:46:12 +0200 Subject: [PATCH 3/3] Allow using protobuf `<7.0.0`. --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 6825873..2d93b9e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -33,7 +33,7 @@ keywords = [ ] dependencies = [ - "protobuf>=3.20.2,<=5.29.3", # Last tested Python version + "protobuf>=3.20.2,<7.0.0", # Last tested Python version ] [project.optional-dependencies]