From 4d2835c6e3f04486d96d0151f888e18ed4185f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kajus=20=C4=8Cerniauskas?= <kajeliukasc@gmail.com>
Date: Mon, 4 Mar 2024 20:19:27 +0200
Subject: [PATCH] Changed datatype function

---
 data_collection/pipeline.py |  4 ++--
 data_collection/tools.py    | 29 -----------------------------
 2 files changed, 2 insertions(+), 31 deletions(-)

diff --git a/data_collection/pipeline.py b/data_collection/pipeline.py
index 5cf52ef..45d0dbb 100644
--- a/data_collection/pipeline.py
+++ b/data_collection/pipeline.py
@@ -1,6 +1,6 @@
 import pandas as pd
 from pandas import DataFrame, Series
-from tools import get_file_from_url, from_lovd_to_pandas, from_clinvar_name_to_DNA, convert_lovd_to_datatype, \
+from tools import get_file_from_url, from_lovd_to_pandas, from_clinvar_name_to_DNA, convert_lovd_to_datatypes, \
     LOVD_DATA_TYPES
 
 # CONSTANTS
@@ -68,7 +68,7 @@ def calculate_max_frequency(row):
 clinvar_data = pd.read_csv(CLINVAR_PATH + "/clinvar_data.txt", sep='\t')
 
 # Convert LOVD data types
-convert_lovd_to_datatype(from_lovd_to_pandas(LOVD_PATH + "/lovd_data.txt"))
+convert_lovd_to_datatypes(lovd_data)
 
 # renaming databases' columns
 gnomad_data.columns += "(gnomad)"
diff --git a/data_collection/tools.py b/data_collection/tools.py
index aa81520..cb67ff5 100644
--- a/data_collection/tools.py
+++ b/data_collection/tools.py
@@ -253,35 +253,6 @@ def convert_lovd_to_datatypes(table):
         print(f"Error: {e}")
 
 
-def convert_lovd_to_datatype(table):
-    """
-    Convert data from LOVD format table to desired data format based on specified data types.
-
-    :param dict table: Dictionary of tables where each table is represented by its name
-     and contains a tuple with a DataFrame and a list of notes.
-    """
-
-    for constant_table_name, attributes in LOVD_DATA_TYPES.items():
-        frame, notes = table[constant_table_name]
-        for column, data_type in attributes.items():
-            if column not in frame.columns:
-                continue
-
-            match [data_type]:
-                case ["Date"]:
-                    frame[column] = pd.to_datetime(frame[column], errors='coerce')
-                case ["Boolean"]:
-                    frame[column] = (frame[column] != 0).astype('bool')
-                case ["String"]:
-                    frame[column] = frame[column].astype('string')
-                case ["Integer"]:
-                    frame[column] = pd.to_numeric(frame[column], errors='coerce').astype('Int64')
-                case ["Double"]:
-                    frame[column] = pd.to_numeric(frame[column], errors='coerce').astype('float')
-                case _:
-                    continue
-
-
 def from_lovd_to_pandas(path):
     """
     Converts data from text file with LOVD format to dictionary of tables. \