Skip to content

Commit

Permalink
Strexas requested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mantvydasdeltuva committed Apr 14, 2024
1 parent 7e6a90f commit 6f8bb6d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
4 changes: 4 additions & 0 deletions data_collection/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" Module executes general pipeline for data collection """

import pandas as pd
import logging

from .collection import store_database_for_eys_gene
from .refactoring import parse_lovd, convert_lovd_to_datatype, from_clinvar_name_to_cdna_position
Expand All @@ -9,6 +10,9 @@
GNOMAD_PATH,
CLINVAR_PATH)

# Configure logging
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S")

def calculate_max_frequency(row):
"""
Expand Down
15 changes: 5 additions & 10 deletions data_collection/refactoring.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
""" Module dedicated for refactoring collected data for further processing """

import os
import logging as log
import logging

import pandas as pd
from pandas import DataFrame

from .constants import LOVD_TABLES_DATA_TYPES

# Configure logging
log.basicConfig(level=log.INFO, format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S")


def convert_lovd_to_datatype(df_dict):
"""
Convert data from LOVD format table to desired data format based on specified data types.
Expand Down Expand Up @@ -46,7 +41,7 @@ def parse_lovd(path):
Converts data from text file with LOVD format to dictionary of tables.
Key is name of table, value is data saved as pandas DataFrame.
Notes for each tables are displayed with log.
Notes for each table are displayed with log.
**IMPORTANT:** It doesn't provide types for data inside. Use convert_lovd_to_datatype for this.
Expand All @@ -66,7 +61,7 @@ def parse_lovd(path):
[f.readline() for _ in range(4)] # pylint: disable=expression-not-assigned

# Notify about parsing in log
log.info("Parsing file %s using parse_lovd.", path)
logging.info("Parsing file %s using parse_lovd.", path)

while True:
line = f.readline()
Expand All @@ -81,13 +76,13 @@ def parse_lovd(path):
i = 1
line = f.readline()
while line.startswith("##"):
notes += (f"\n - Note {i}: {line[3:-1]}")
notes += f"\n - Note {i}: {line[3:-1]}"
i += 1
line = f.readline()

# Log notes for each table
if notes:
log.info("[%s]%s", table_name, notes)
logging.info("[%s]%s", table_name, notes)

table_header = [column[3:-3] for column in line[:-1].split('\t')]
frame = DataFrame([], columns=table_header)
Expand Down

0 comments on commit 6f8bb6d

Please sign in to comment.