Skip to content

Commit ce48f8e

Browse files
Merge pull request #26 from PixelgenTechnologies/feature/exe-1177-fix-filtering-edgelist-bug
Fix filtering edgelist bug, and layout generation problem
2 parents bf50ce9 + 6e53f69 commit ce48f8e

File tree

5 files changed

+105
-7
lines changed

5 files changed

+105
-7
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [0.15.0]- 2023-10-16
8+
## [UNRELEASED]
9+
10+
### Fixed
11+
12+
* Fixed a bug in filtering pixeldataset causing it to return the wrong types.
13+
* Fixed a bug in graph layout generation due to incorrect data frame concatenation.
14+
15+
16+
## [0.15.0] - 2023-10-16
917

1018
### Added
1119

src/pixelator/graph/backends/implementations.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ def layout_coordinates(
325325
coordinates = pd.DataFrame(
326326
layout_inst.coords,
327327
columns=["x", "y"] if layout_inst.dim == 2 else ["x", "y", "z"],
328+
index=raw.vs["name"],
328329
)
329330

330331
# If we are doing a 3D layout we add the option of normalized
@@ -340,7 +341,7 @@ def layout_coordinates(
340341
# Added here to avoid circular imports
341342
from pixelator.graph.utils import create_node_markers_counts
342343

343-
node_marker_counts = create_node_markers_counts(raw)
344+
node_marker_counts = create_node_markers_counts(self._raw)
344345
df = pd.concat([coordinates, node_marker_counts], axis=1)
345346
else:
346347
df = coordinates

src/pixelator/pixeldataset.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,7 @@ def graph(
700700
(
701701
self.edgelist_lazy.filter(pl.col("component") == component_id)
702702
.collect()
703-
.to_pandas(use_pyarrow_extension_array=True)
703+
.to_pandas()
704704
)
705705
)
706706
if potential_component.empty:
@@ -839,9 +839,7 @@ def _all_true_array(shape):
839839
else self.edgelist_lazy
840840
)
841841

842-
edgelist = _enforce_edgelist_types(
843-
edgelist_pred.collect().to_pandas(use_pyarrow_extension_array=True)
844-
)
842+
edgelist = _enforce_edgelist_types(edgelist_pred.collect().to_pandas())
845843

846844
if self.polarization is not None:
847845
polarization_mask = (
@@ -1299,6 +1297,11 @@ def _enforce_edgelist_types(edgelist: pd.DataFrame) -> pd.DataFrame:
12991297
if edgelist.shape[0] == 0:
13001298
edgelist = pd.DataFrame(columns=required_types.keys())
13011299

1300+
# If we have the optional sample column, this should be
1301+
# set to use a categorical type
1302+
if "sample" in edgelist.columns:
1303+
required_types["sample"] = "category"
1304+
13021305
# If all of the prescribed types are already set, just return the edgelist
13031306
type_dict = edgelist.dtypes.to_dict()
13041307
if all(type_dict[key] == type_ for key, type_ in required_types.items()):

tests/graph/test_graph_utils.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,76 @@ def test_build_graph_a_node_projected(full_graph_edgelist: pd.DataFrame):
9393
assert graph.vs.attributes() == ["name", "markers", "type", "pixel_type"]
9494

9595

96+
def test_layout_coordinates_all_pixels(full_graph_edgelist: pd.DataFrame):
97+
graph = Graph.from_edgelist(
98+
edgelist=full_graph_edgelist,
99+
add_marker_counts=True,
100+
simplify=True,
101+
use_full_bipartite=True,
102+
)
103+
result = graph.layout_coordinates(only_keep_a_pixels=False)
104+
assert result.shape == (100, 4)
105+
assert set(result.columns) == {"x", "y", "A", "B"}
106+
107+
108+
def test_layout_coordinates_3d_layout(full_graph_edgelist: pd.DataFrame):
109+
graph = Graph.from_edgelist(
110+
edgelist=full_graph_edgelist,
111+
add_marker_counts=True,
112+
simplify=True,
113+
use_full_bipartite=True,
114+
)
115+
result = graph.layout_coordinates(
116+
layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=False
117+
)
118+
assert set(result.columns) == {
119+
"x",
120+
"y",
121+
"z",
122+
"x_norm",
123+
"y_norm",
124+
"z_norm",
125+
"A",
126+
"B",
127+
}
128+
assert result.shape == (100, 8)
129+
130+
131+
def test_layout_coordinates_only_a_pixels(full_graph_edgelist: pd.DataFrame):
132+
graph = Graph.from_edgelist(
133+
edgelist=full_graph_edgelist,
134+
add_marker_counts=True,
135+
simplify=True,
136+
use_full_bipartite=True,
137+
)
138+
result = graph.layout_coordinates(only_keep_a_pixels=True)
139+
assert result.shape == (50, 4)
140+
assert set(result.columns) == {"x", "y", "A", "B"}
141+
142+
143+
def test_layout_coordinates_3d_layout_only_a_pixels(full_graph_edgelist: pd.DataFrame):
144+
graph = Graph.from_edgelist(
145+
edgelist=full_graph_edgelist,
146+
add_marker_counts=True,
147+
simplify=True,
148+
use_full_bipartite=True,
149+
)
150+
result = graph.layout_coordinates(
151+
layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=True
152+
)
153+
assert set(result.columns) == {
154+
"x",
155+
"y",
156+
"z",
157+
"x_norm",
158+
"y_norm",
159+
"z_norm",
160+
"A",
161+
"B",
162+
}
163+
assert result.shape == (50, 8)
164+
165+
96166
def test_components_metrics(full_graph_edgelist: pd.DataFrame):
97167
"""Test generating component metrics."""
98168
# test component metrics

tests/test_pixeldataset.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,6 @@ def test_edgelist_to_anndata(
516516
assert set(adata.obs_names) == set(edgelist["component"].unique())
517517

518518

519-
@pytest.mark.test_this
520519
def test_simple_aggregate(setup_basic_pixel_dataset):
521520
"""test_simple_aggregate."""
522521
dataset_1, *_ = setup_basic_pixel_dataset
@@ -699,6 +698,23 @@ def test_simple_aggregate_ignore_edgelist(setup_basic_pixel_dataset):
699698
assert result.edgelist.shape == (0, 9)
700699

701700

701+
def test_filter_should_return_proper_typed_edgelist_data(setup_basic_pixel_dataset):
702+
# Test to check for bug EXE-1177
703+
# This bug was caused by filtering returning an incorrectly typed
704+
# edgelist, which in turn caused getting the graph to fail
705+
dataset_1, *_ = setup_basic_pixel_dataset
706+
dataset_2 = dataset_1.copy()
707+
708+
aggregated_data = simple_aggregate(
709+
sample_names=["sample1", "sample2"], datasets=[dataset_1, dataset_2]
710+
)
711+
712+
result = aggregated_data.filter(components=aggregated_data.adata.obs.index[:2])
713+
assert isinstance(result.edgelist["component"].dtype, pd.CategoricalDtype)
714+
# Running graph here to make sure it does not raise an exception
715+
result.graph(result.adata.obs.index[0])
716+
717+
702718
def test_copy(setup_basic_pixel_dataset):
703719
"""test_copy."""
704720
dataset_1, *_ = setup_basic_pixel_dataset

0 commit comments

Comments
 (0)