-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #217 from MannLabs/development
Release 1.7.0
- Loading branch information
Showing
45 changed files
with
2,432 additions
and
1,751 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
#!python | ||
|
||
__version__ = "1.6.2" | ||
__version__ = "1.7.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import logging | ||
import os | ||
import pandas as pd | ||
|
||
logger = logging.getLogger() | ||
supported_formats = ["parquet", "tsv"] | ||
|
||
|
||
def read_df(path_no_format, file_format="parquet"): | ||
"""Read dataframe from disk with choosen file format | ||
Parameters | ||
---------- | ||
path_no_format: str | ||
File to read from disk without file format | ||
file_format: str, default = 'parquet' | ||
File format for loading the file. Available options: ['parquet', 'tsv'] | ||
Returns | ||
------- | ||
pd.DataFrame | ||
loaded dataframe from disk | ||
""" | ||
|
||
file_path = f"{path_no_format}.{file_format}" | ||
|
||
if not os.path.exists(file_path): | ||
raise FileNotFoundError(f"Can't load file as file was not found: {file_path}") | ||
|
||
logger.info(f"Reading {file_path} from disk") | ||
|
||
if file_format == "parquet": | ||
return pd.read_parquet(file_path) | ||
|
||
elif file_format == "tsv": | ||
return pd.read_csv(file_path, sep="\t") | ||
|
||
else: | ||
raise ValueError( | ||
f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" | ||
) | ||
|
||
|
||
def write_df(df, path_no_format, file_format="parquet"): | ||
"""Read dataframe from disk with choosen file format | ||
Parameters | ||
---------- | ||
df: pd.DataFrame | ||
Dataframe to save to disk | ||
path_no_format: str | ||
Path for file without format | ||
file_format: str, default = 'parquet' | ||
File format for loading the file. Available options: ['parquet', 'tsv'] | ||
""" | ||
|
||
if file_format not in supported_formats: | ||
raise ValueError( | ||
f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" | ||
) | ||
|
||
file_path = f"{path_no_format}.{file_format}" | ||
|
||
logger.info(f"Saving {file_path} to disk") | ||
|
||
if file_format == "parquet": | ||
df.to_parquet(file_path, index=False) | ||
|
||
elif file_format == "tsv": | ||
df.to_csv(file_path, sep="\t", index=False, float_format="%.6f") | ||
|
||
else: | ||
raise ValueError("I don't know how you ended up here") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import numpy as np | ||
import logging | ||
|
||
logger = logging.getLogger() | ||
|
||
|
||
def log_stats(rt_values: np.array, cycle: np.array): | ||
"""Log raw file statistics | ||
Parameters | ||
---------- | ||
rt_values: np.ndarray | ||
retention time values in seconds for all frames | ||
cycle: np.ndarray | ||
DIA cycle object describing the msms pattern | ||
""" | ||
|
||
logger.info(f"============ Raw file stats ============") | ||
|
||
rt_limits = rt_values.min() / 60, rt_values.max() / 60 | ||
rt_duration_sec = rt_values.max() - rt_values.min() | ||
rt_duration_min = rt_duration_sec / 60 | ||
|
||
logger.info(f"{'RT (min)':<20}: {rt_limits[0]:.1f} - {rt_limits[1]:.1f}") | ||
logger.info(f"{'RT duration (sec)':<20}: {rt_duration_sec:.1f}") | ||
logger.info(f"{'RT duration (min)':<20}: {rt_duration_min:.1f}") | ||
|
||
cycle_length = cycle.shape[1] | ||
cycle_duration = np.diff(rt_values[::cycle_length]).mean() | ||
cycle_number = len(rt_values) // cycle_length | ||
|
||
logger.info(f"{'Cycle len (scans)':<20}: {cycle_length:.0f}") | ||
logger.info(f"{'Cycle len (sec)':<20}: {cycle_duration:.2f}") | ||
logger.info(f"{'Number of cycles':<20}: {cycle_number:.0f}") | ||
|
||
flat_cycle = cycle.flatten() | ||
flat_cycle = flat_cycle[flat_cycle > 0] | ||
msms_range = flat_cycle.min(), flat_cycle.max() | ||
|
||
logger.info(f"{'MS2 range (m/z)':<20}: {msms_range[0]:.1f} - {msms_range[1]:.1f}") | ||
|
||
logger.info(f"========================================") |
Oops, something went wrong.