Skip to content

Commit

Permalink
fix: non-convervative merging + partial frag txs solution
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandrogzi committed Apr 15, 2024
1 parent 6adfa96 commit c00f456
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions modules/make_query_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,20 @@ def query_table(path: str) -> pd.DataFrame:
score["transcripts"] = [f"{k}.{v}" for k,v in zip(score["gene"], score["chain"])]
score = score[["transcripts", "pred", "gene"]]

table = pd.merge(ortho_x_loss, score, left_on="transcript", right_on="transcripts", how="left")
table = pd.merge(ortho_x_loss, score, left_on="transcript", right_on="transcripts", how="outer")
table["helper"].fillna(table["gene"], inplace=True)

# Create a new column with a rename orthology relationship
table["relation"] = table["orthology_class"].map(Constants.ORTHOLOGY_TYPE)
table["t_gene"].fillna(table["helper"].map(isoforms_dict), inplace=True)
table["transcripts"].fillna(table["transcript"], inplace=True)

# Add paralog probabilities
paralog = pd.merge(score, paralogs, on="transcripts").groupby("gene").agg({"pred": "max"}).reset_index()
paralog.columns = ["helper", "paralog_prob"]
table = pd.merge(table, paralog, on="helper", how="left")
table["paralog_prob"].fillna(0, inplace=True)

# # Merge quality data
# table = pd.merge(table, quality, left_on="transcripts", right_on="Projection_ID")

table = table[
[
"t_gene",
Expand All @@ -82,15 +81,13 @@ def query_table(path: str) -> pd.DataFrame:
"pred",
"q_gene",
"paralog_prob",
# "confidence_level",
]
]

info = [
f"found {len(table)} projections, {len(table['helper'].unique())} unique transcripts, {len(table['t_gene'].unique())} unique genes",
f"class stats: {table['class'].value_counts().to_dict()}",
f"relation stats: {table['relation'].value_counts().to_dict()}"
# f"confidence stats: {table['confidence_level'].value_counts().to_dict()}",
]

[log.record(i) for i in info]
Expand Down

0 comments on commit c00f456

Please sign in to comment.