Skip to content

Commit

Permalink
Changed datatype function
Browse files Browse the repository at this point in the history
  • Loading branch information
KajusC committed Mar 4, 2024
1 parent 55d1f9b commit 4d2835c
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 31 deletions.
4 changes: 2 additions & 2 deletions data_collection/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
from pandas import DataFrame, Series
from tools import get_file_from_url, from_lovd_to_pandas, from_clinvar_name_to_DNA, convert_lovd_to_datatype, \
from tools import get_file_from_url, from_lovd_to_pandas, from_clinvar_name_to_DNA, convert_lovd_to_datatypes, \
LOVD_DATA_TYPES

# CONSTANTS
Expand Down Expand Up @@ -68,7 +68,7 @@ def calculate_max_frequency(row):
clinvar_data = pd.read_csv(CLINVAR_PATH + "/clinvar_data.txt", sep='\t')

# Convert LOVD data types
convert_lovd_to_datatype(from_lovd_to_pandas(LOVD_PATH + "/lovd_data.txt"))
convert_lovd_to_datatypes(lovd_data)

# renaming databases' columns
gnomad_data.columns += "(gnomad)"
Expand Down
29 changes: 0 additions & 29 deletions data_collection/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,35 +253,6 @@ def convert_lovd_to_datatypes(table):
print(f"Error: {e}")


def convert_lovd_to_datatype(table):
"""
Convert data from LOVD format table to desired data format based on specified data types.
:param dict table: Dictionary of tables where each table is represented by its name
and contains a tuple with a DataFrame and a list of notes.
"""

for constant_table_name, attributes in LOVD_DATA_TYPES.items():
frame, notes = table[constant_table_name]
for column, data_type in attributes.items():
if column not in frame.columns:
continue

match [data_type]:
case ["Date"]:
frame[column] = pd.to_datetime(frame[column], errors='coerce')
case ["Boolean"]:
frame[column] = (frame[column] != 0).astype('bool')
case ["String"]:
frame[column] = frame[column].astype('string')
case ["Integer"]:
frame[column] = pd.to_numeric(frame[column], errors='coerce').astype('Int64')
case ["Double"]:
frame[column] = pd.to_numeric(frame[column], errors='coerce').astype('float')
case _:
continue


def from_lovd_to_pandas(path):
"""
Converts data from text file with LOVD format to dictionary of tables. \
Expand Down

0 comments on commit 4d2835c

Please sign in to comment.