Skip to content

Commit

Permalink
Treat Series input like dict for chromsizes and midpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
nvictus authored Nov 7, 2023
1 parent baf3344 commit 3d2f347
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 29 deletions.
17 changes: 11 additions & 6 deletions bioframe/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def make_chromarms(
Parameters
----------
chromsizes : pandas.Dataframe or pandas.Series
If pandas.Series, a map from chromosomes to lengths in bp.
chromsizes : pandas.Dataframe or dict-like
If dict or pandas.Series, a map from chromosomes to lengths in bp.
If pandas.Dataframe, a dataframe with columns defined by cols_chroms.
If cols_chroms is a triplet (e.g. 'chrom','start','end'), then
values in chromsizes[cols_chroms[1]].values must all be zero.
midpoints : pandas.Dataframe or dict-like
Mapping of chromosomes to midpoint (aka centromere) locations.
If pandas.Series, a map from chromosomes to midpoints in bp.
If dict or pandas.Series, a map from chromosomes to midpoints in bp.
If pandas.Dataframe, a dataframe with columns defined by cols_mids.
cols_chroms : (str, str) or (str, str, str)
Expand All @@ -59,9 +59,13 @@ def make_chromarms(
elif len(cols_chroms) == 3:
ck1, sk1, ek1 = cols_chroms

if isinstance(chromsizes, pd.Series):
if isinstance(chromsizes, (pd.Series, dict)):
chromsizes = dict(chromsizes)
df_chroms = (
pd.DataFrame(chromsizes).reset_index().rename(columns={"index": ck1})
pd.DataFrame({
ck1: list(chromsizes.keys()),
"length": list(chromsizes.values()),
})
)
elif isinstance(chromsizes, pd.DataFrame):
df_chroms = chromsizes.copy()
Expand All @@ -83,7 +87,8 @@ def make_chromarms(
raise ValueError("invalid number of cols_chroms")

ck2, sk2 = cols_mids
if isinstance(midpoints, dict):
if isinstance(midpoints, (pd.Series, dict)):
midpoints = dict(midpoints)
df_mids = pd.DataFrame.from_dict(midpoints, orient="index", columns=[sk2])
df_mids.reset_index(inplace=True)
df_mids.rename(columns={"index": ck2}, inplace=True)
Expand Down
59 changes: 36 additions & 23 deletions tests/test_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,59 @@
def test_make_chromarms():

### test the case where columns have different names
df1 = pd.DataFrame(
df = pd.DataFrame(
[["chrX", 0, 8]],
columns=["chromosome", "lo", "hi"],
)

df2 = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"])

df_result = pd.DataFrame(
mids = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"])
arms = pd.DataFrame(
[
["chrX", 0, 4, "chrX_p"],
["chrX", 4, 8, "chrX_q"],
],
columns=["chromosome", "lo", "hi", "name"],
columns=["chrom", "start", "end", "name"],
)
arms = arms.astype({"start": pd.Int64Dtype(), "end": pd.Int64Dtype()})

# test passing 3 columns
result = bioframe.make_chromarms(
df,
mids,
cols_chroms=["chromosome", "lo", "hi"],
cols_mids=["chromosome", "loc"],
)
pd.testing.assert_frame_equal(
df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}),
bioframe.make_chromarms(
df1,
df2,
cols_chroms=["chromosome", "lo", "hi"],
cols_mids=["chromosome", "loc"],
),
result,
arms.rename(columns={"chrom": "chromosome", "start": "lo", "end": "hi"})
)

# test passing 2 columns
result = bioframe.make_chromarms(
df,
mids,
cols_chroms=["chromosome", "hi"],
cols_mids=["chromosome", "loc"],
)
pd.testing.assert_frame_equal(
df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}).rename(
columns={"lo": "start", "hi": "end"}
),
bioframe.make_chromarms(
df1,
df2,
cols_chroms=["chromosome", "hi"],
cols_mids=["chromosome", "loc"],
),
result
arms.rename(columns={"chrom": "chromosome"}),
)

# todo: test for passing pd.series !
# test for passing Series or dict
result = bioframe.make_chromarms(pd.Series({"chrX": 8}), mids, cols_mids=["chromosome", "loc"])
pd.testing.assert_frame_equal(arms, result)

result = bioframe.make_chromarms(pd.Series({"chrX": 8}), pd.Series({"chrX": 4}))
pd.testing.assert_frame_equal(arms, result)

bioframe.make_chromarms({"chrX": 8}, mids, cols_mids=["chromosome", "loc"])
pd.testing.assert_frame_equal(arms, result)

bioframe.make_chromarms({"chrX": 8}, pd.Series({"chrX": 4}))
pd.testing.assert_frame_equal(arms, result)

bioframe.make_chromarms({"chrX": 8}, {"chrX": 4})
pd.testing.assert_frame_equal(arms, result)


def test_binnify():
Expand Down

0 comments on commit 3d2f347

Please sign in to comment.