Skip to content

Commit

Permalink
Refactored code for logging, optimization and checking if valid gene …
Browse files Browse the repository at this point in the history
…symbol
  • Loading branch information
GabSal123 committed Mar 3, 2024
1 parent 247c627 commit d05f025
Show file tree
Hide file tree
Showing 10 changed files with 14,451 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions .idea/.name

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions .idea/kath.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

110 changes: 110 additions & 0 deletions data_collection/data/A1BG.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
### LOVD-version 3000-290 ### Full data download ### To import, do not remove or alter this header ###
## Filter: (gene_public = A1BG)
# charset = UTF-8

## Genes ## Do not remove or alter this header ##
## Count = 1
"{{id}}" "{{name}}" "{{chromosome}}" "{{chrom_band}}" "{{imprinting}}" "{{refseq_genomic}}" "{{refseq_UD}}" "{{reference}}" "{{url_homepage}}" "{{url_external}}" "{{allow_download}}" "{{id_hgnc}}" "{{id_entrez}}" "{{id_omim}}" "{{show_hgmd}}" "{{show_genecards}}" "{{show_genetests}}" "{{show_orphanet}}" "{{note_index}}" "{{note_listing}}" "{{refseq}}" "{{refseq_url}}" "{{disclaimer}}" "{{disclaimer_text}}" "{{header}}" "{{header_align}}" "{{footer}}" "{{footer_align}}" "{{created_by}}" "{{created_date}}" "{{edited_by}}" "{{edited_date}}" "{{updated_by}}" "{{updated_date}}"
"A1BG" "alpha-1-B glycoprotein" "19" "q13.43" "unknown" "NC_000019.9" "UD_132611092394" "" "http://www.LOVD.nl/A1BG" "" "1" "5" "1" "138670" "1" "1" "1" "1" "Establishment of this gene variant database (LSDB) was supported by the <a href=\'http://www.LUMC.nl\' target=\'_blank\'>Leiden University Medical Center (LUMC)</a>, Leiden, Nederland." "" "g" "http://databases.lovd.nl/shared/refseq/A1BG_codingDNA.html" "1" "" "" "-1" "" "-1" "00000" "2012-09-13 00:00:00" "00006" "2015-12-13 09:24:24" "00006" "2022-01-23 11:30:18"


## Transcripts ## Do not remove or alter this header ##
## Count = 1
"{{id}}" "{{geneid}}" "{{name}}" "{{id_mutalyzer}}" "{{id_ncbi}}" "{{id_ensembl}}" "{{id_protein_ncbi}}" "{{id_protein_ensembl}}" "{{id_protein_uniprot}}" "{{remarks}}" "{{position_c_mrna_start}}" "{{position_c_mrna_end}}" "{{position_c_cds_end}}" "{{position_g_mrna_start}}" "{{position_g_mrna_end}}" "{{created_by}}" "{{created_date}}" "{{edited_by}}" "{{edited_date}}"
"00001331" "A1BG" "alpha-1-B glycoprotein" "001" "NM_130786.3" "" "NP_570602.2" "" "" "" "-62" "1704" "1488" "58858172" "58864865" "00000" "2012-09-13 13:27:02" "" ""


## Diseases ## Do not remove or alter this header ##
## Count = 2
"{{id}}" "{{symbol}}" "{{name}}" "{{inheritance}}" "{{id_omim}}" "{{tissues}}" "{{features}}" "{{remarks}}" "{{created_by}}" "{{created_date}}" "{{edited_by}}" "{{edited_date}}"
"01157" "CHTE" "Hypothyroidism, central, testicular enlargement (CHTE)" "XLR" "300888" "" "" "" "00006" "2014-09-25 23:29:40" "00006" "2021-12-10 21:51:32"
"04254" "CLP" "cleft lip, cleft palate (CLP)" "" "" "" "" "" "00006" "2015-05-08 09:59:28" "00006" "2015-05-08 10:00:15"


## Genes_To_Diseases ## Do not remove or alter this header ##
## Count = 0


## Individuals ## Do not remove or alter this header ##
## Count = 3
"{{id}}" "{{fatherid}}" "{{motherid}}" "{{panelid}}" "{{panel_size}}" "{{license}}" "{{owned_by}}" "{{Individual/Reference}}" "{{Individual/Remarks}}" "{{Individual/Gender}}" "{{Individual/Consanguinity}}" "{{Individual/Origin/Geographic}}" "{{Individual/Age_of_death}}" "{{Individual/VIP}}" "{{Individual/Data_av}}" "{{Individual/Treatment}}" "{{Individual/Origin/Population}}" "{{Individual/Individual_ID}}"
"00000208" "" "" "" "1" "" "00037" "{PMID:Sun 2011:23143598}, {DOI:Sun 2011:10.1038/ng.2453}" "" "M" "no" "Netherlands" "" "0" "" "" "" ""
"00000209" "" "" "" "1" "" "00037" "{PMID:Sun 2011:23143598}, {DOI:Sun 2011:10.1038/ng.2453}" "" "M" "no" "Netherlands" "" "0" "" "" "" ""
"00231413" "" "" "" "3" "" "03278" "{PMID:Cox 2019:31215115}" "3-generation family, 3 affected (2F, M)" "F;M" "" "Colombia" "" "0" "" "" "" "Fam22"


## Individuals_To_Diseases ## Do not remove or alter this header ##
## Count = 3
"{{individualid}}" "{{diseaseid}}"
"00000208" "01157"
"00000209" "01157"
"00231413" "04254"


## Phenotypes ## Do not remove or alter this header ##
## Note: Only showing Phenotype columns active for Diseases 01157, 04254
## Count = 3
"{{id}}" "{{diseaseid}}" "{{individualid}}" "{{owned_by}}" "{{Phenotype/Inheritance}}" "{{Phenotype/Age}}" "{{Phenotype/Additional}}" "{{Phenotype/Age/Onset}}" "{{Phenotype/Age/Diagnosis}}" "{{Phenotype/Onset}}" "{{Phenotype/Protein}}" "{{Phenotype/Enzyme/CPK}}" "{{Phenotype/Heart/Myocardium}}" "{{Phenotype/Lung}}" "{{Phenotype/Diagnosis/Definite}}" "{{Phenotype/Diagnosis/Initial}}"
"0000038983" "01157" "00000208" "00006" "Familial, X-linked recessive" "" "central hypothyroidism (FT4 0.50-0.99of lower limit normal), no prolactin deficiency, age sonographic determination testicular volume 17.64y, testicular volume right/left 21/20 (7.3–16ml)" "" "3w" "" "" "" "" "" "" ""
"0000038984" "01157" "00000209" "00006" "Familial, X-linked recessive" "" "central hypothyroidism (FT4 0.50-0.99of lower limit normal), prolactin deficiency, age sonographic determination testicular volume 21.36y, testicular volume right/left 30/26 (8.5–18.3ml)" "" "07y04m" "" "" "" "" "" "" ""
"0000173805" "04254" "00231413" "03278" "Familial, autosomal dominant" "" "" "" "" "" "" "" "" "" "" ""


## Screenings ## Do not remove or alter this header ##
## Count = 3
"{{id}}" "{{individualid}}" "{{variants_found}}" "{{owned_by}}" "{{created_by}}" "{{created_date}}" "{{edited_by}}" "{{edited_date}}" "{{Screening/Technique}}" "{{Screening/Template}}" "{{Screening/Tissue}}" "{{Screening/Remarks}}"
"0000000209" "00000208" "1" "00037" "00001" "2012-09-13 12:02:03" "" "" "SEQ-NG-I" "DNA" "" ""
"0000000210" "00000209" "1" "00037" "00001" "2012-09-13 12:09:36" "" "" "SEQ-NG-I" "DNA" "" ""
"0000232512" "00231413" "1" "03278" "03278" "2019-05-02 18:06:52" "" "" "SEQ-NG-I" "DNA" "" ""


## Screenings_To_Genes ## Do not remove or alter this header ##
## Count = 1
"{{screeningid}}" "{{geneid}}"
"0000232512" "FST"


## Variants_On_Genome ## Do not remove or alter this header ##
## Please note that not necessarily all variants found in the given individuals are shown. This output is restricted to variants in the selected gene.
## Count = 11
"{{id}}" "{{allele}}" "{{effectid}}" "{{chromosome}}" "{{position_g_start}}" "{{position_g_end}}" "{{type}}" "{{average_frequency}}" "{{owned_by}}" "{{VariantOnGenome/DBID}}" "{{VariantOnGenome/DNA}}" "{{VariantOnGenome/Frequency}}" "{{VariantOnGenome/Reference}}" "{{VariantOnGenome/Restriction_site}}" "{{VariantOnGenome/Published_as}}" "{{VariantOnGenome/Remarks}}" "{{VariantOnGenome/Genetic_origin}}" "{{VariantOnGenome/Segregation}}" "{{VariantOnGenome/dbSNP}}" "{{VariantOnGenome/VIP}}" "{{VariantOnGenome/Methylation}}" "{{VariantOnGenome/ISCN}}" "{{VariantOnGenome/DNA/hg38}}" "{{VariantOnGenome/ClinVar}}" "{{VariantOnGenome/ClinicalClassification}}" "{{VariantOnGenome/ClinicalClassification/Method}}"
"0000002698" "3" "50" "19" "58858614" "58858614" "del" "0" "00037" "A1BG_000004" "g.58858614del" "" "" "" "" "" "Germline" "" "" "" "" "" "g.58347248del" "" "VUS" ""
"0000010704" "3" "50" "19" "58858614" "58858614" "del" "0" "00037" "A1BG_000004" "g.58858614del" "" "" "" "" "" "Germline" "" "" "" "" "" "g.58347248del" "" "VUS" ""
"0000013878" "3" "50" "19" "58858676" "58858676" "subst" "0.922338" "00037" "A1BG_000003" "g.58858676A>G" "" "" "" "" "" "Germline" "" "" "" "" "" "g.58347310A>G" "" "VUS" ""
"0000193885" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000193887" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000193889" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000193891" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000193895" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000193897" "3" "50" "19" "58858719" "58858719" "subst" "0" "00006" "A1BG_000005" "g.58858719C>T" "" "" "" "" "" "Germline" "" "" "0" "" "" "g.58347353C>T" "" "VUS" ""
"0000480548" "0" "70" "19" "58858937" "58858937" "subst" "0" "03278" "A1BG_000007" "g.58858937A>G" "" "{PMID:Cox 2019:31215115}" "" "" "" "Germline" "" "" "0" "" "" "g.58347571A>G" "" "VUS" ""
"0000568608" "0" "30" "19" "58868716" "58868716" "subst" "0.00112904" "01804" "ZNF497_000004" "g.58868716G>C" "" "" "" "ZNF497(NM_001207009.1):c.286C>G (p.(Arg96Gly))" "VKGL data sharing initiative Nederland" "CLASSIFICATION record" "" "" "" "" "" "g.58357350G>C" "" "likely benign" ""


## Variants_On_Transcripts ## Do not remove or alter this header ##
## Please note that not necessarily all variants found in the given individuals are shown. This output is restricted to variants in the selected gene.
## Note: Only showing Variants_On_Transcript columns active for Genes A1BG
## Count = 11
"{{id}}" "{{transcriptid}}" "{{effectid}}" "{{position_c_start}}" "{{position_c_start_intron}}" "{{position_c_end}}" "{{position_c_end_intron}}" "{{VariantOnTranscript/DNA}}" "{{VariantOnTranscript/RNA}}" "{{VariantOnTranscript/Protein}}" "{{VariantOnTranscript/Exon}}"
"0000002698" "00001331" "50" "1480" "105" "1480" "105" "c.1480+105del" "r.(=)" "p.(=)" ""
"0000010704" "00001331" "50" "1480" "105" "1480" "105" "c.1480+105del" "r.(=)" "p.(=)" ""
"0000013878" "00001331" "50" "1480" "43" "1480" "43" "c.1480+43T>C" "r.(=)" "p.(=)" ""
"0000193885" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.1490g>a" "p.(Glu494Lys)" ""
"0000193887" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.(?)" "p.(Glu494Lys)" ""
"0000193889" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.1490g>a" "p.(Glu494Lys)" ""
"0000193891" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.(?)" "p.(Glu494Lys)" ""
"0000193895" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.1490g>a" "p.(Glu494Lys)" ""
"0000193897" "00001331" "50" "1480" "0" "1480" "0" "c.1480G>A" "r.(?)" "p.(Glu494Lys)" ""
"0000480548" "00001331" "70" "1262" "0" "1262" "0" "c.1262T>C" "r.(?)" "p.(Leu421Pro)" ""
"0000568608" "00001331" "30" "-3913" "0" "-3913" "0" "c.-3913C>G" "r.(?)" "p.(=)" ""


## Screenings_To_Variants ## Do not remove or alter this header ##
## Count = 4
"{{screeningid}}" "{{variantid}}"
"0000000209" "0000002698"
"0000000210" "0000010704"
"0000000210" "0000013878"
"0000232512" "0000480548"


Loading

0 comments on commit d05f025

Please sign in to comment.