Skip to content

Commit

Permalink
Fix inconsistent results produced on different platforms
Browse files Browse the repository at this point in the history
Fix a bug that resulted in different results being returned when running
stripepy call on different platform (and potentially even when using
different versions of numpy and pandas).

The bug was caused by not forcing numpy and pandas to use stable
sorting algorithms.
This resulted in slightly different results produced on macOS
and Linux (Windows and Linux produced consistent results).
  • Loading branch information
robomics committed Jan 13, 2025
1 parent 53c5e5c commit 948c425
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/stripepy/cli/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _read_stripes(f: ResultFile, chrom: str) -> pd.DataFrame:
df1 = pd.concat([geo_lt, bio_lt], axis="columns")
df2 = pd.concat([geo_ut, bio_ut], axis="columns")

return pd.concat([df1, df2]).set_index("seed").sort_index()
return pd.concat([df1, df2]).set_index("seed").sort_index(kind="stable")
except Exception as e:
raise RuntimeError(f'failed to read stripes for chromosome "{chrom}": {e}')

Expand Down
4 changes: 2 additions & 2 deletions src/stripepy/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,10 +521,10 @@ def fetch(v: npt.NDArray[int], left_bound: int, right_bound: int) -> Tuple[npt.N

min_persistence = result.min_persistence
lt_idx, lt_seeds = fetch(
np.sort(TDA(pd_lt, min_persistence=min_persistence)[2]), start // resolution, end // resolution
np.sort(TDA(pd_lt, min_persistence=min_persistence)[2], stable=True), start // resolution, end // resolution
)
ut_idx, ut_seeds = fetch(
np.sort(TDA(pd_ut, min_persistence=min_persistence)[2]), start // resolution, end // resolution
np.sort(TDA(pd_ut, min_persistence=min_persistence)[2], stable=True), start // resolution, end // resolution
)

return {
Expand Down
2 changes: 1 addition & 1 deletion src/stripepy/utils/TDA.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def TDA(marginal_pd, min_persistence=0):
)

# Sorting maximum points (and, as a consequence, the corresponding minimum points) w.r.t. persistence:
argsorting = np.argsort(list(zip(*filtered_max_points_and_persistence))[1]).tolist()
argsorting = np.argsort(list(zip(*filtered_max_points_and_persistence))[1], stable=True).tolist()

if len(filtered_min_points_and_persistence) == 0:
filtered_min_points = []
Expand Down
2 changes: 1 addition & 1 deletion src/stripepy/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def sort_based_on_arg0(*vectors: Sequence) -> Tuple[NDArray]:
if len(vectors[0]) == 0:
return tuple((np.array(v) for v in vectors)) # noqa

permutation = np.argsort(vectors[0])
permutation = np.argsort(vectors[0], stable=True)

return tuple((np.array(v)[permutation] for v in vectors)) # noqa

Expand Down
2 changes: 1 addition & 1 deletion src/stripepy/utils/persistence1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def run_persistence(data, level_sets="lower"):

# Number of data to break ties (leftmost index comes first):
num_elements = len(data)
sorted_idx = np.argsort(data, kind="stable")[::-1] if level_sets == "upper" else np.argsort(data, kind="stable")
sorted_idx = np.argsort(data, stable=True)[::-1] if level_sets == "upper" else np.argsort(data, stable=True)

# Get a union find data structure:
uf = UnionFind(num_elements)
Expand Down

0 comments on commit 948c425

Please sign in to comment.