Skip to content

Commit

Permalink
Use python 3.10
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshLoecker committed Mar 8, 2023
1 parent 92c3e02 commit a3d59b3
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 83 deletions.
2 changes: 1 addition & 1 deletion environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- pandas==1.*
- pytest==7.*
- python-libsbml==5.*
- python==3.11.*
- python==3.10.*
- r-biocmanager==1.*
- r-devtools==2.*
- r-ggrepel==0.*
Expand Down
2 changes: 1 addition & 1 deletion main/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ taxon_id = "human" # accepts a bioDBnet taxon id, "human", or "
create_counts_matrix = true # set to false if using a pregenerated matrix file
gene_format = "Ensembl" # accepts "Entrez", "Ensembl", and "Symbol"
preprocess_mode = "provide-matrix" # "create-matrix" or "provide-matrix"

matrix_filename = "" # This is required if preprocess_mode is "create-matrix"

[rna_seq_generation]
trnaseq_config_file = "trnaseq_data_inputs_auto.xlsx"
Expand Down
201 changes: 143 additions & 58 deletions main/py/project.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/usr/bin/python3

import os
import tomllib
import toml
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass, field
from typing import Literal

from async_bioservices.input_database import InputDatabase

@dataclass
class general:
taxon_id: str
Expand All @@ -14,13 +17,14 @@ class general:
@dataclass
class rnaseq_preprocess:
create_counts_matrix: bool
gene_format: str
gene_format: InputDatabase
preprocess_mode: Literal["provide-matrix", "create-matrix"]
matrix_filename: Path

@dataclass
class rna_seq_generation:
trnaseq_config_file: str
mrnaseq_config_file: str
trnaseq_config_filepath: Path
mrnaseq_config_filepath: Path
technique: str
rep_ratio: float
group_ratio: float
Expand All @@ -31,7 +35,7 @@ class rna_seq_generation:

@dataclass
class proteomics_analysis:
proteomics_config_file: str
proteomics_config_file: Path
rep_ratio: float
batch_ratio: float
high_rep_ratio: float
Expand All @@ -53,11 +57,11 @@ class model_creation:
high_threshold: int
output_filetypes: str
objective_dict: dict
general_model_file: str
general_model_file: Path
solver: str
boundary_reactions_filename: str
force_reactions_filename: str
exclude_reactions_filename: str
boundary_reactions_filepath: Path
force_reactions_filepath: Path
exclude_reactions_filepath: Path
recon_algorithms: list[str] = field(default_factory=list[str])

@dataclass
Expand All @@ -68,7 +72,7 @@ class disease_analysis:
@dataclass
class drug_repurposing:
sovler: str
drug_raw_file: str
drug_raw_filepath: Path

@dataclass
class about:
Expand All @@ -79,8 +83,8 @@ def __post_init__(self):
# Set self.date to todays date in the format of "Month Day, Year"
self.date = datetime.now().strftime("%B %d, %Y")

# Get "VERSION" from the environment
self.version = os.environ["COMO_VERSION"]
# EXAMPLE: get "v1.0.0-BRANCH" from "refs/tags/v1.0.0-BRANCH"
self.version = os.environ["COMO_VERSION"].split("/")[-1]


class Configs:
Expand All @@ -103,94 +107,173 @@ def __init__(self, projectdir):
self.drug_repurposing: drug_repurposing = self._get_drug_repurposing()
self.about: about = self._get_about()

def _read_from_toml(self):
toml_file: str = os.path.join(self.rootdir, "config.toml")
with open(toml_file, "rb") as i_stream:
data = toml.load(i_stream)
return data

def _get_general(self) -> general:
taxon_id = self._toml_data["general"]["taxon_id"]
context_names: list[str] = self._toml_data["general"]["context_names"]

if isinstance(taxon_id, str):
if taxon_id.lower() not in ["human", "mouse"]:
raise ValueError("The taxon_id setting under 'general' must be either 'human' or 'mouse'.\nPlease edit `config.toml`")

if not isinstance(context_names, list):
raise ValueError("The context_names setting under 'general' must be a list, such as `['type1', 'type2']`.\nPlease edit `config.toml`")

data: general = general(
taxon_id=self._toml_data["general"]["taxon_id"],
context_names=self._toml_data["general"]["context_names"],
taxon_id=taxon_id,
context_names=context_names,
)
return data

def _get_rnaseq_preprocess(self) -> rnaseq_preprocess:
create_counts_matrix = self._toml_data["rnaseq_preprocess"]["create_counts_matrix"]
gene_format = self._toml_data["rnaseq_preprocess"]["gene_format"]
preprocess_mode = self._toml_data["rnaseq_preprocess"]["preprocess_mode"]
matrix_filename = self._toml_data["rnaseq_preprocess"]["matrix_filename"]

if not isinstance(create_counts_matrix, bool):
raise ValueError("The create_counts_matrix setting under 'rnaseq_preprocess' must be either 'true' or 'false'.\nPlease edit `config.toml`")

if gene_format.lower() not in ["Entrez", "Ensembl", "Symbol"]:
raise ValueError("The gene_format setting under 'rnaseq_preprocess' must be either 'Entrez', 'Ensembl', or 'Symbol'.\nPlease edit `config.toml`")
else:
if gene_format.lower() in ["ensembl", "ensemble", "ensg", "ensmusg", "ensembl id", "ensembl gene id"]:
gene_format_database: InputDatabase = InputDatabase.ENSEMBL_GENE_ID
elif gene_format.lower() in ["hgnc symbol", "hugo", "hugo symbol", "symbol", "hgnc", "gene symbol"]:
gene_format_database: InputDatabase = InputDatabase.GENE_SYMBOL
elif gene_format.lower() in ["entrez", "entres", "entrez id", "entrez number" "gene id"]:
gene_format_database: InputDatabase = InputDatabase.GENE_ID

if preprocess_mode.lower() not in ["provide-matrix", "create-matrix"]:
raise ValueError("The preprocess_mode setting under 'rnaseq_preprocess' must be either 'provide-matrix' or 'create-matrix'.\nPlease edit `config.toml`")

if preprocess_mode.lower() == "create-matrix" and matrix_filename == "":
raise ValueError("The matrix_filename setting under 'rnaseq_preprocess' must be set if the preprocess_mode is set to 'create-matrix'.\nPlease edit `config.toml`")

data: rnaseq_preprocess = rnaseq_preprocess(
create_counts_matrix=self._toml_data["rnaseq_preprocess"]["create_counts_matrix"],
gene_format=self._toml_data["rnaseq_preprocess"]["gene_format"],
preprocess_mode=self._toml_data["rnaseq_preprocess"]["preprocess_mode"],
create_counts_matrix=create_counts_matrix,
gene_format=gene_format_database,
preprocess_mode=preprocess_mode,
matrix_filename=matrix_filename,
)
return data

def _get_rna_seq_generation(self) -> rna_seq_generation:
trnaseq_config_filepath: Path = Path(self.configdir, self._toml_data["rna_seq_generation"]["trnaseq_config_file"])
mrnaseq_config_filepath: Path = Path(self.configdir, self._toml_data["rna_seq_generation"]["mrnaseq_config_file"])
technique = self._toml_data["rna_seq_generation"]["technique"]
rep_ratio = self._toml_data["rna_seq_generation"]["rep_ratio"]
group_ratio = self._toml_data["rna_seq_generation"]["group_ratio"]
rep_ratio_h = self._toml_data["rna_seq_generation"]["rep_ratio_h"]
group_ratio_h = self._toml_data["rna_seq_generation"]["group_ratio_h"]
quantile = self._toml_data["rna_seq_generation"]["quantile"]
min_zfpkm = self._toml_data["rna_seq_generation"]["min_zfpkm"]

if technique.lower() not in ["quantile", "zfpkm", "cpm"]:
raise ValueError("The technique setting under 'rna_seq_generation' must be either 'quantile', 'zfpkm', or 'cpm'.\nPlease edit `config.toml`")

data: rna_seq_generation = rna_seq_generation(
trnaseq_config_file=self._toml_data["rna_seq_generation"]["trnaseq_config_file"],
mrnaseq_config_file=self._toml_data["rna_seq_generation"]["mrnaseq_config_file"],
technique=self._toml_data["rna_seq_generation"]["technique"],
rep_ratio=self._toml_data["rna_seq_generation"]["rep_ratio"],
group_ratio=self._toml_data["rna_seq_generation"]["group_ratio"],
rep_ratio_h=self._toml_data["rna_seq_generation"]["rep_ratio_h"],
group_ratio_h=self._toml_data["rna_seq_generation"]["group_ratio_h"],
quantile=self._toml_data["rna_seq_generation"]["quantile"],
min_zfpkm=self._toml_data["rna_seq_generation"]["min_zfpkm"],
trnaseq_config_filepath=trnaseq_config_filepath,
mrnaseq_config_filepath=mrnaseq_config_filepath,
technique=technique,
rep_ratio=rep_ratio,
group_ratio=group_ratio,
rep_ratio_h=rep_ratio_h,
group_ratio_h=group_ratio_h,
quantile=quantile,
min_zfpkm=min_zfpkm,
)
return data

def _get_proteomics_analysis(self) -> proteomics_analysis:
proteomics_config_file: Path = Path(self.configdir, self._toml_data["proteomics_analysis"]["proteomics_config_file"])
rep_ratio = self._toml_data["proteomics_analysis"]["rep_ratio"]
batch_ratio = self._toml_data["proteomics_analysis"]["batch_ratio"]
high_rep_ratio = self._toml_data["proteomics_analysis"]["high_rep_ratio"]
high_batch_ratio = self._toml_data["proteomics_analysis"]["high_batch_ratio"]
quantile = self._toml_data["proteomics_analysis"]["quantile"]

data: proteomics_analysis = proteomics_analysis(
proteomics_config_file=self._toml_data["proteomics_analysis"]["proteomics_config_file"],
rep_ratio=self._toml_data["proteomics_analysis"]["rep_ratio"],
batch_ratio=self._toml_data["proteomics_analysis"]["batch_ratio"],
high_rep_ratio=self._toml_data["proteomics_analysis"]["high_rep_ratio"],
high_batch_ratio=self._toml_data["proteomics_analysis"]["high_batch_ratio"],
quantile=self._toml_data["proteomics_analysis"]["quantile"],
proteomics_config_file=proteomics_config_file,
rep_ratio=rep_ratio,
batch_ratio=batch_ratio,
high_rep_ratio=high_rep_ratio,
high_batch_ratio=high_batch_ratio,
quantile=quantile,
)
return data

def _get_merge_xomics(self) -> merge_xomics:
expression_requirement = self._toml_data["merge_xomics"]["expression_requirement"]
requirement_adjust = self._toml_data["merge_xomics"]["requirement_adjust"]
total_rna_weight = self._toml_data["merge_xomics"]["total_rna_weight"]
mrna_weight = self._toml_data["merge_xomics"]["mrna_weight"]
single_cell_weight = self._toml_data["merge_xomics"]["single_cell_weight"]
proteomics_weight = self._toml_data["merge_xomics"]["proteomics_weight"]

data: merge_xomics = merge_xomics(
expression_requirement=self._toml_data["merge_xomics"]["expression_requirement"],
requirement_adjust=self._toml_data["merge_xomics"]["requirement_adjust"],
total_rna_weight=self._toml_data["merge_xomics"]["total_rna_weight"],
mrna_weight=self._toml_data["merge_xomics"]["mrna_weight"],
single_cell_weight=self._toml_data["merge_xomics"]["single_cell_weight"],
proteomics_weight=self._toml_data["merge_xomics"]["proteomics_weight"],
expression_requirement=expression_requirement,
requirement_adjust=requirement_adjust,
total_rna_weight=total_rna_weight,
mrna_weight=mrna_weight,
single_cell_weight=single_cell_weight,
proteomics_weight=proteomics_weight,
)
return data

def _get_model_creation(self) -> model_creation:
low_threshold = self._toml_data["model_creation"]["low_threshold"]
high_threshold = self._toml_data["model_creation"]["high_threshold"]
output_filetypes = self._toml_data["model_creation"]["output_filetypes"]
objective_dict = self._toml_data["model_creation"]["objective_dict"]
general_model_file = self._toml_data["model_creation"]["general_model_file"]
solver = self._toml_data["model_creation"]["solver"]
boundary_reactions_filepath = self._toml_data["model_creation"]["boundary_reactions_filename"]
force_reactions_filepath = self._toml_data["model_creation"]["force_reactions_filename"]
exclude_reactions_filepath = self._toml_data["model_creation"]["exclude_reactions_filename"]
recon_algorithms = self._toml_data["model_creation"]["recon_algorithms"]

data: model_creation = model_creation(
low_threshold=self._toml_data["model_creation"]["low_threshold"],
high_threshold=self._toml_data["model_creation"]["high_threshold"],
output_filetypes=self._toml_data["model_creation"]["output_filetypes"],
objective_dict=self._toml_data["model_creation"]["objective_dict"],
general_model_file=self._toml_data["model_creation"]["general_model_file"],
solver=self._toml_data["model_creation"]["solver"],
boundary_reactions_filename=self._toml_data["model_creation"]["boundary_reactions_filename"],
force_reactions_filename=self._toml_data["model_creation"]["force_reactions_filename"],
exclude_reactions_filename=self._toml_data["model_creation"]["exclude_reactions_filename"],
recon_algorithms=self._toml_data["model_creation"]["recon_algorithms"],
low_threshold=low_threshold,
high_threshold=high_threshold,
output_filetypes=output_filetypes,
objective_dict=objective_dict,
general_model_file=general_model_file,
solver=solver,
boundary_reactions_filepath=boundary_reactions_filepath,
force_reactions_filepath=force_reactions_filepath,
exclude_reactions_filepath=exclude_reactions_filepath,
recon_algorithms=recon_algorithms,
)
return data

def _get_disease_analysis(self) -> disease_analysis:
data_source = self._toml_data["disease_analysis"]["data_source"]
disease_names = self._toml_data["disease_analysis"]["disease_names"]

data: disease_analysis = disease_analysis(
data_source=self._toml_data["disease_analysis"]["data_source"],
disease_names=self._toml_data["disease_analysis"]["disease_names"],
data_source=data_source,
disease_names=disease_names,
)
return data

def _get_drug_repurposing(self) -> drug_repurposing:
sovler = self._toml_data["drug_repurposing"]["sovler"]
drug_raw_filepath = self._toml_data["drug_repurposing"]["drug_raw_file"]

data: drug_repurposing = drug_repurposing(
sovler=self._toml_data["drug_repurposing"]["sovler"],
drug_raw_file=self._toml_data["drug_repurposing"]["drug_raw_file"],
sovler=sovler,
drug_raw_filepath=drug_raw_filepath,
)
return data

def _get_about(self) -> about:
return about()

def _read_from_toml(self):
toml_file: str = os.path.join(self.rootdir, "config.toml")
with open(toml_file, "rb") as i_stream:
data = tomllib.load(i_stream)
return data


current_dir = os.getcwd()
Expand All @@ -210,3 +293,5 @@ def _read_from_toml(self):
# Add leading "/", as it will not exist right now
work_dir = os.path.join("/", work_dir)
configs = Configs(work_dir)
print(configs.about.version)
print(configs.about.date)
Loading

0 comments on commit a3d59b3

Please sign in to comment.