Skip to content

Commit 104a914

Browse files
authored
BUG: Fix groupby.apply() dropping metadata from subclassed DataFrames (#62134) (#62408)
1 parent 4e92c63 commit 104a914

File tree

4 files changed

+34
-4
lines changed

4 files changed

+34
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,7 @@ Groupby/resample/rolling
10681068
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
10691069
- Bug in :meth:`DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`)
10701070
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
1071+
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` not preserving ``_metadata`` attributes from subclassed DataFrames and Series (:issue:`62134`)
10711072
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
10721073
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
10731074
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)

pandas/core/groupby/generic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ def _wrap_applied_output(
621621
if not self.as_index and not_indexed_same:
622622
result = self._insert_inaxis_grouper(result)
623623
result.index = default_index(len(result))
624-
return result
624+
return result.__finalize__(self.obj, method="groupby")
625625
else:
626626
# GH #6265 #24880
627627
result = self.obj._constructor(
@@ -630,7 +630,7 @@ def _wrap_applied_output(
630630
if not self.as_index:
631631
result = self._insert_inaxis_grouper(result)
632632
result.index = default_index(len(result))
633-
return result
633+
return result.__finalize__(self.obj, method="groupby")
634634

635635
__examples_series_doc = dedent(
636636
"""
@@ -2169,7 +2169,7 @@ def _wrap_applied_output_series(
21692169
if not self.as_index:
21702170
result = self._insert_inaxis_grouper(result)
21712171

2172-
return result
2172+
return result.__finalize__(self.obj, method="groupby")
21732173

21742174
def _cython_transform(
21752175
self,

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,7 @@ def _concat_objects(
11881188
if isinstance(result, Series) and name is not None:
11891189
result.name = name
11901190

1191-
return result
1191+
return result.__finalize__(self.obj, method="groupby")
11921192

11931193
@final
11941194
def _set_result_index_ordered(

pandas/tests/groupby/test_groupby_subclass.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,35 @@ def func2(group):
9898
tm.assert_series_equal(result, expected)
9999

100100

101+
def test_groupby_apply_preserves_metadata():
102+
# GH#62134 - Test that apply() preserves metadata when returning DataFrames/Series
103+
custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]})
104+
custom_df.testattr = "hello"
105+
106+
def sum_func(group):
107+
assert isinstance(group, tm.SubclassedDataFrame)
108+
assert hasattr(group, "testattr")
109+
assert group.testattr == "hello"
110+
return group.sum()
111+
112+
result = custom_df.groupby("c").apply(sum_func)
113+
assert hasattr(result, "testattr"), "DataFrame apply() should preserve metadata"
114+
assert result.testattr == "hello"
115+
116+
custom_series = tm.SubclassedSeries([1, 2, 3])
117+
custom_series.testattr = "hello"
118+
119+
def sum_series_func(group):
120+
assert isinstance(group, tm.SubclassedSeries)
121+
assert hasattr(group, "testattr")
122+
assert group.testattr == "hello"
123+
return group.sum()
124+
125+
result = custom_series.groupby(custom_df["c"]).apply(sum_series_func)
126+
assert hasattr(result, "testattr"), "Series apply() should preserve metadata"
127+
assert result.testattr == "hello"
128+
129+
101130
@pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame])
102131
def test_groupby_resample_preserves_subclass(obj):
103132
# GH28330 -- preserve subclass through groupby.resample()

0 commit comments

Comments
 (0)