From 6f8bb6da1695a0ed76d9436c934a4910988886ff Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 14 Apr 2024 17:20:34 +0300 Subject: [PATCH] Strexas requested changes --- data_collection/pipeline.py | 4 ++++ data_collection/refactoring.py | 15 +++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/data_collection/pipeline.py b/data_collection/pipeline.py index 9acff9c..7fe5426 100644 --- a/data_collection/pipeline.py +++ b/data_collection/pipeline.py @@ -1,6 +1,7 @@ """ Module executes general pipeline for data collection """ import pandas as pd +import logging from .collection import store_database_for_eys_gene from .refactoring import parse_lovd, convert_lovd_to_datatype, from_clinvar_name_to_cdna_position @@ -9,6 +10,9 @@ GNOMAD_PATH, CLINVAR_PATH) +# Configure logging +logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S") def calculate_max_frequency(row): """ diff --git a/data_collection/refactoring.py b/data_collection/refactoring.py index 8cf1d69..9329a62 100644 --- a/data_collection/refactoring.py +++ b/data_collection/refactoring.py @@ -1,18 +1,13 @@ """ Module dedicated for refactoring collected data for further processing """ import os -import logging as log +import logging import pandas as pd from pandas import DataFrame from .constants import LOVD_TABLES_DATA_TYPES -# Configure logging -log.basicConfig(level=log.INFO, format="%(asctime)s - %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S") - - def convert_lovd_to_datatype(df_dict): """ Convert data from LOVD format table to desired data format based on specified data types. @@ -46,7 +41,7 @@ def parse_lovd(path): Converts data from text file with LOVD format to dictionary of tables. Key is name of table, value is data saved as pandas DataFrame. - Notes for each tables are displayed with log. + Notes for each table are displayed with log. **IMPORTANT:** It doesn't provide types for data inside. Use convert_lovd_to_datatype for this. @@ -66,7 +61,7 @@ def parse_lovd(path): [f.readline() for _ in range(4)] # pylint: disable=expression-not-assigned # Notify about parsing in log - log.info("Parsing file %s using parse_lovd.", path) + logging.info("Parsing file %s using parse_lovd.", path) while True: line = f.readline() @@ -81,13 +76,13 @@ def parse_lovd(path): i = 1 line = f.readline() while line.startswith("##"): - notes += (f"\n - Note {i}: {line[3:-1]}") + notes += f"\n - Note {i}: {line[3:-1]}" i += 1 line = f.readline() # Log notes for each table if notes: - log.info("[%s]%s", table_name, notes) + logging.info("[%s]%s", table_name, notes) table_header = [column[3:-3] for column in line[:-1].split('\t')] frame = DataFrame([], columns=table_header)