Skip to content

Commit

Permalink
Merge pull request #59 from Strexas/main
Browse files Browse the repository at this point in the history
Merge `main` into `JTE/PKFE-46`
  • Loading branch information
mantvydasdeltuva authored Sep 10, 2024
2 parents 9e0e201 + 1c7e137 commit e2e9c48
Show file tree
Hide file tree
Showing 31 changed files with 1,658 additions and 4,071 deletions.
3 changes: 2 additions & 1 deletion api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@
# Functions for refactoring data
set_lovd_dtypes,
parse_lovd,
from_clinvar_name_to_cdna_position
from_clinvar_name_to_cdna_position,
save_lovd_as_vcf,
)
65 changes: 65 additions & 0 deletions api/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Package for data collection purposes provides both collection and refactoring functionality.
Data from LOVD, ClinVar and GnomAd databases can be downloaded using this package. GnomAd and
ClinVar are limited with EYS gene, but it is possible to download data for any gene in LOVD.
All necessary functionality can be imported directly from data without
specifying the module.
data collection pipeline example is established for project's specific usage.
"""

# CONSTANTS IMPORT
from .constants import (
# URLs for LOVD database
LOVD_URL, LOVD_URL_EYS, LOVD_FILE_URL, LOVD_FILE_URL_EYS,

# URLs for gnomAD database
GNOMAD_URL, GNOMAD_URL_EYS, GNOMAD_FILE_URL_EYS,

# URLs for ClinVar database
CLINVAR_URL, CLINVAR_URL_EYS, CLINVAR_FILE_URL_EYS,

# Paths for data storage
DATA_PATH, LOVD_PATH, GNOMAD_PATH, CLINVAR_PATH,

# Data types for tables
LOVD_TABLES_DATA_TYPES,

# Paths for database downloads
DATABASES_DOWNLOAD_PATHS,

GNOMAD_PATH,
)

# DATA COLLECTION IMPORT
from .collection import (
# Custom exceptions
BadResponseException,
DownloadError,

# Custom utility functions
get_file_from_url,

# Functions for downloading databases
download_lovd_database_for_eys_gene,
download_genes_lovd,
download_database_for_eys_gene,

# Functions for storing databases
store_database_for_eys_gene
)

# DATA REFACTORING IMPORT
from .refactoring import (
# Functions for refactoring data
set_lovd_dtypes,
parse_lovd,
from_clinvar_name_to_cdna_position,
save_lovd_as_vcf,
request_gnomad_api_data,
merge_gnomad_lovd,
parse_gnomad,
set_gnomad_dtypes,
)
147 changes: 147 additions & 0 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import os
import logging

import requests

import pandas as pd
from pandas import DataFrame

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH



def set_lovd_dtypes(df_dict):
"""
Convert data from LOVD format table to desired data format based on specified data types.
Expand Down Expand Up @@ -242,3 +245,147 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"):

f.write("\t".join(record))
f.write("\n")


def prepare_popmax_calculation(df, pop_data, name, pop_ids, index):
"""
prepares the calculation of popmax and popmax population for a variant.
genome and exome data of ac and an.
:param DataFrame df: DataFrame containing gnomAD data
:param dict pop_data: dictionary containing population data
:param str name: name of the population
:param list[str] pop_ids: list of population ids
:param int index: index of the variant
"""

for pop_id in pop_ids:
df.loc[index, f'{name}_ac_{pop_id}'] = 0
df.loc[index, f'{name}_an_{pop_id}'] = 0
if isinstance(pop_data, list):
for pop in pop_data:
variant_id = pop['id']
df.loc[index, f'{name}_ac_{variant_id}'] = pop['ac']
df.loc[index, f'{name}_an_{variant_id}'] = pop['an']


def request_gnomad_api_data(gene_name):
"""
Requests gnomAD API for data about a specific gene containing:
- variant_id
- cDNA change
- protein change
- allele frequency
- homozygote count
- popmax
- popmax population
:param str gene_name: name of gene
:param bool to_file: if True, saves data to variants.csv
:returns: DataFrame from gnomAD API
:rtype: DataFrame
"""

url = 'https://gnomad.broadinstitute.org/api'
query = f"""
query{{
gene(gene_symbol: "{gene_name}", reference_genome: GRCh38) {{
variants(dataset: gnomad_r4)
{{
variant_id
chrom
pos
ref
hgvsc
hgvsp
alt
exome {{
ac
an
ac_hom
populations
{{
id
ac
an
}}
}}
genome
{{
ac
an
ac_hom
populations
{{
id
ac
an
}}
}}
}}
}}
}}
"""

response = requests.post(url, json={'query': query}, timeout=300) # timeout set to 5 minutes

if response.status_code != 200:
print('Error:', response.status_code)

data = response.json()['data']['gene']['variants']

df = pd.json_normalize(data)

df.loc[:, 'total_ac'] = df.loc[:, 'exome.ac'].fillna(0) + df.loc[:, 'genome.ac'].fillna(0)
df.loc[:, 'total_an'] = df.loc[:, 'exome.an'].fillna(0) + df.loc[:, 'genome.an'].fillna(0)

df.loc[:, 'HGVS Consequence'] = df.loc[:, 'hgvsc'].fillna(0) # cDNA change
df.loc[:, 'Protein Consequence'] = df.loc[:, 'hgvsp'].fillna(0) # Protein change

df.loc[:, 'Allele Frequency'] = df.loc[:, 'total_ac'] / df.loc[:, 'total_an']
df.loc[:, 'Homozygote Count'] = df.loc[:, 'exome.ac_hom'].fillna(0) + df.loc[:, 'genome.ac_hom'].fillna(0)
exome_populations = df.loc[:, 'exome.populations']
genome_populations = df.loc[:, 'genome.populations']
population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining']

for i in range(len(exome_populations)):
exome_pop = exome_populations[i]
prepare_popmax_calculation(df, exome_pop, 'exome', population_ids, i)
genome_pop = genome_populations[i]
prepare_popmax_calculation(df, genome_pop, 'genome', population_ids, i)

for population_id in population_ids:
df.loc[:, f'Allele_Frequency_{population_id}'] = (
(df.loc[:, f'exome_ac_{population_id}'].fillna(0) + df.loc[:, f'genome_ac_{population_id}'].fillna(0)) / (
df.loc[:, f'exome_an_{population_id}'].fillna(0) + df.loc[:, f'genome_an_{population_id}'].fillna(0)))
population_mapping = {
'afr': 'African/African American',
'eas': 'East Asian',
'asj': 'Ashkenazi Jew',
'sas': 'South Asian',
'nfe': 'European (non-Finnish)',
'fin': 'European (Finnish)',
'mid': 'Middle Eastern',
'amr': 'Admixed American',
'ami': "Amish",
'remaining': 'Remaining',
'': ''
}

for i in range(df.shape[0]):
max_pop = 0
max_id = ''
for population_id in population_ids:
if df.loc[i, f'Allele_Frequency_{population_id}'] > max_pop:
max_pop = df.loc[i, f'Allele_Frequency_{population_id}']
max_id = population_id
df.loc[i, 'Popmax'] = max_pop
df.loc[i, 'Popmax population'] = population_mapping[max_id]
not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency',
'variant_id', 'cDNA change', 'Protein change']

df = df.filter(not_to_drop, axis="columns")

df.rename(columns={'variant_id': 'gnomAD ID'})

return df
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export const EditorView: React.FC = () => {
const { totalRows, header, rows } = fileContentResponse;

if (!header) {
fileStateUpdate(undefined, { columns: [], rows: [], aggregations: {} }, undefined);
fileStateUpdate(undefined, { columns: [], rows: [], aggregations: fileContent.aggregations }, undefined);
return;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { List, useTheme } from '@mui/material';
import { Box, List, useTheme } from '@mui/material';

export interface ToolbarGroupProps {
children: React.ReactNode;
params: React.ReactNode;
buttons: React.ReactNode;
}

/**
Expand All @@ -25,24 +26,34 @@ export interface ToolbarGroupProps {
* @param {React.ReactNode} children - The child elements to be displayed inside the list.
* @returns {JSX.Element} The rendered List component.
*/
export const ToolbarGroup: React.FC<ToolbarGroupProps> = ({ children }) => {
export const ToolbarGroup: React.FC<ToolbarGroupProps> = ({ params, buttons }) => {
const Theme = useTheme();

return (
<List
<Box
sx={{
height: '75%',
bgcolor: Theme.palette.background.paper,
px: '1rem',
display: 'flex',
flexDirection: 'row',
flexWrap: 'wrap',
gap: '1rem',
width: '100%',
height: '100%',
display: 'grid',
gridTemplateColumns: '30% 70%',
overflow: 'auto',
alignContent: 'flex-start',
bgcolor: Theme.palette.background.paper,
}}
>
{children}
</List>
<Box sx={{ borderRight: `solid 2px ${Theme.palette.action.selected}` }}>{params}</Box>
<List
sx={{
pl: '1rem',
display: 'flex',
flexDirection: 'row',
flexWrap: 'wrap',
gap: '1rem',
overflow: 'auto',
alignContent: 'flex-start',
}}
>
{buttons}
</List>
</Box>
);
};
Loading

0 comments on commit e2e9c48

Please sign in to comment.