Skip to content

Commit 6ad23cf

Browse files
authored
REF: simplify _append_internal (#62351)
1 parent bc500f7 commit 6ad23cf

File tree

7 files changed

+51
-167
lines changed

7 files changed

+51
-167
lines changed

pandas/core/frame.py

Lines changed: 25 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10893,61 +10893,41 @@ def infer(x):
1089310893

1089410894
def _append_internal(
1089510895
self,
10896-
other,
10896+
other: Series,
1089710897
ignore_index: bool = False,
10898-
verify_integrity: bool = False,
10899-
sort: bool = False,
1090010898
) -> DataFrame:
10901-
if isinstance(other, (Series, dict)):
10902-
if isinstance(other, dict):
10903-
if not ignore_index:
10904-
raise TypeError("Can only append a dict if ignore_index=True")
10905-
other = Series(other)
10906-
if other.name is None and not ignore_index:
10907-
raise TypeError(
10908-
"Can only append a Series if ignore_index=True "
10909-
"or if the Series has a name"
10910-
)
10899+
assert isinstance(other, Series), type(other)
1091110900

10912-
index = Index(
10913-
[other.name],
10914-
name=(
10915-
self.index.names
10916-
if isinstance(self.index, MultiIndex)
10917-
else self.index.name
10918-
),
10901+
if other.name is None and not ignore_index:
10902+
raise TypeError(
10903+
"Can only append a Series if ignore_index=True "
10904+
"or if the Series has a name"
1091910905
)
10920-
row_df = other.to_frame().T
10921-
if isinstance(self.index.dtype, ExtensionDtype):
10922-
# GH#41626 retain e.g. CategoricalDtype if reached via
10923-
# df.loc[key] = item
10924-
row_df.index = self.index.array._cast_pointwise_result(
10925-
row_df.index._values
10926-
)
1092710906

10928-
# infer_objects is needed for
10929-
# test_append_empty_frame_to_series_with_dateutil_tz
10930-
other = row_df.infer_objects().rename_axis(index.names)
10931-
elif isinstance(other, list):
10932-
if not other:
10933-
pass
10934-
elif not isinstance(other[0], DataFrame):
10935-
other = DataFrame(other)
10936-
if self.index.name is not None and not ignore_index:
10937-
other.index.name = self.index.name
10907+
index = Index(
10908+
[other.name],
10909+
name=(
10910+
self.index.names
10911+
if isinstance(self.index, MultiIndex)
10912+
else self.index.name
10913+
),
10914+
)
1093810915

10939-
from pandas.core.reshape.concat import concat
10916+
row_df = other.to_frame().T
10917+
if isinstance(self.index.dtype, ExtensionDtype):
10918+
# GH#41626 retain e.g. CategoricalDtype if reached via
10919+
# df.loc[key] = item
10920+
row_df.index = self.index.array._cast_pointwise_result(row_df.index._values)
1094010921

10941-
if isinstance(other, (list, tuple)):
10942-
to_concat = [self, *other]
10943-
else:
10944-
to_concat = [self, other]
10922+
# infer_objects is needed for
10923+
# test_append_empty_frame_to_series_with_dateutil_tz
10924+
row_df = row_df.infer_objects().rename_axis(index.names)
10925+
10926+
from pandas.core.reshape.concat import concat
1094510927

1094610928
result = concat(
10947-
to_concat,
10929+
[self, row_df],
1094810930
ignore_index=ignore_index,
10949-
verify_integrity=verify_integrity,
10950-
sort=sort,
1095110931
)
1095210932
return result.__finalize__(self, method="append")
1095310933

pandas/core/reshape/pivot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ def _add_margins(
504504
margin_dummy[cols] = margin_dummy[cols].apply(
505505
maybe_downcast_to_dtype, args=(dtype,)
506506
)
507-
result = result._append_internal(margin_dummy)
507+
result = concat([result, margin_dummy])
508508
result.index.names = row_names
509509

510510
return result

pandas/core/series.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2989,22 +2989,10 @@ def searchsorted( # type: ignore[override]
29892989
# -------------------------------------------------------------------
29902990
# Combination
29912991

2992-
def _append_internal(
2993-
self, to_append, ignore_index: bool = False, verify_integrity: bool = False
2994-
):
2992+
def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Series:
29952993
from pandas.core.reshape.concat import concat
29962994

2997-
if isinstance(to_append, (list, tuple)):
2998-
to_concat = [self]
2999-
to_concat.extend(to_append)
3000-
else:
3001-
to_concat = [self, to_append]
3002-
if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]):
3003-
msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
3004-
raise TypeError(msg)
3005-
return concat(
3006-
to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
3007-
)
2995+
return concat([self, to_append], ignore_index=ignore_index)
30082996

30092997
@doc(
30102998
_shared_docs["compare"],

pandas/tests/reshape/concat/test_append.py

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,23 @@ def test_append(self, sort, float_frame):
2828
begin_frame = float_frame.reindex(begin_index)
2929
end_frame = float_frame.reindex(end_index)
3030

31-
appended = begin_frame._append_internal(end_frame)
31+
appended = concat([begin_frame, end_frame])
3232
tm.assert_almost_equal(appended["A"], float_frame["A"])
3333

3434
del end_frame["A"]
35-
partial_appended = begin_frame._append_internal(end_frame, sort=sort)
35+
partial_appended = concat([begin_frame, end_frame], sort=sort)
3636
assert "A" in partial_appended
3737

38-
partial_appended = end_frame._append_internal(begin_frame, sort=sort)
38+
partial_appended = concat([end_frame, begin_frame], sort=sort)
3939
assert "A" in partial_appended
4040

4141
# mixed type handling
42-
appended = mixed_frame[:5]._append_internal(mixed_frame[5:])
42+
appended = concat([mixed_frame[:5], mixed_frame[5:]])
4343
tm.assert_frame_equal(appended, mixed_frame)
4444

4545
# what to test here
46-
mixed_appended = mixed_frame[:5]._append_internal(float_frame[5:], sort=sort)
47-
mixed_appended2 = float_frame[:5]._append_internal(mixed_frame[5:], sort=sort)
46+
mixed_appended = concat([mixed_frame[:5], float_frame[5:]], sort=sort)
47+
mixed_appended2 = concat([float_frame[:5], mixed_frame[5:]], sort=sort)
4848

4949
# all equal except 'foo' column
5050
tm.assert_frame_equal(
@@ -55,18 +55,18 @@ def test_append(self, sort, float_frame):
5555
def test_append_empty(self, float_frame):
5656
empty = DataFrame()
5757

58-
appended = float_frame._append_internal(empty)
58+
appended = concat([float_frame, empty])
5959
tm.assert_frame_equal(float_frame, appended)
6060
assert appended is not float_frame
6161

62-
appended = empty._append_internal(float_frame)
62+
appended = concat([empty, float_frame])
6363
tm.assert_frame_equal(float_frame, appended)
6464
assert appended is not float_frame
6565

6666
def test_append_overlap_raises(self, float_frame):
6767
msg = "Indexes have overlapping values"
6868
with pytest.raises(ValueError, match=msg):
69-
float_frame._append_internal(float_frame, verify_integrity=True)
69+
concat([float_frame, float_frame], verify_integrity=True)
7070

7171
def test_append_new_columns(self):
7272
# see gh-6129: new columns
@@ -85,7 +85,7 @@ def test_append_new_columns(self):
8585
def test_append_length0_frame(self, sort):
8686
df = DataFrame(columns=["A", "B", "C"])
8787
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
88-
df5 = df._append_internal(df3, sort=sort)
88+
df5 = concat([df, df3], sort=sort)
8989

9090
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
9191
tm.assert_frame_equal(df5, expected)
@@ -100,7 +100,7 @@ def test_append_records(self):
100100
df1 = DataFrame(arr1)
101101
df2 = DataFrame(arr2)
102102

103-
result = df1._append_internal(df2, ignore_index=True)
103+
result = concat([df1, df2], ignore_index=True)
104104
expected = DataFrame(np.concatenate((arr1, arr2)))
105105
tm.assert_frame_equal(result, expected)
106106

@@ -109,7 +109,7 @@ def test_append_sorts(self, sort):
109109
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
110110
df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])
111111

112-
result = df1._append_internal(df2, sort=sort)
112+
result = concat([df1, df2], sort=sort)
113113

114114
# for None / True
115115
expected = DataFrame(
@@ -133,36 +133,18 @@ def test_append_different_columns(self, sort):
133133
a = df[:5].loc[:, ["bools", "ints", "floats"]]
134134
b = df[5:].loc[:, ["strings", "ints", "floats"]]
135135

136-
appended = a._append_internal(b, sort=sort)
136+
appended = concat([a, b], sort=sort)
137137
assert isna(appended["strings"][0:4]).all()
138138
assert isna(appended["bools"][5:]).all()
139139

140-
def test_append_many(self, sort, float_frame):
141-
chunks = [
142-
float_frame[:5],
143-
float_frame[5:10],
144-
float_frame[10:15],
145-
float_frame[15:],
146-
]
147-
148-
result = chunks[0]._append_internal(chunks[1:])
149-
tm.assert_frame_equal(result, float_frame)
150-
151-
chunks[-1] = chunks[-1].copy()
152-
chunks[-1]["foo"] = "bar"
153-
result = chunks[0]._append_internal(chunks[1:], sort=sort)
154-
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
155-
assert (result["foo"][15:] == "bar").all()
156-
assert result["foo"][:15].isna().all()
157-
158140
def test_append_preserve_index_name(self):
159141
# #980
160142
df1 = DataFrame(columns=["A", "B", "C"])
161143
df1 = df1.set_index(["A"])
162144
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
163145
df2 = df2.set_index(["A"])
164146

165-
result = df1._append_internal(df2)
147+
result = concat([df1, df2])
166148
assert result.index.name == "A"
167149

168150
indexes_can_append = [
@@ -285,7 +267,7 @@ def test_append_dtype_coerce(self, sort):
285267
axis=1,
286268
sort=sort,
287269
)
288-
result = df1._append_internal(df2, ignore_index=True, sort=sort)
270+
result = concat([df1, df2], ignore_index=True, sort=sort)
289271
if sort:
290272
expected = expected[["end_time", "start_time"]]
291273
else:
@@ -297,7 +279,7 @@ def test_append_missing_column_proper_upcast(self, sort):
297279
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
298280
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})
299281

300-
appended = df1._append_internal(df2, ignore_index=True, sort=sort)
282+
appended = concat([df1, df2], sort=sort)
301283
assert appended["A"].dtype == "f8"
302284
assert appended["B"].dtype == "O"
303285

@@ -323,27 +305,20 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
323305
result_b = result_a._append_internal(ser, ignore_index=True)
324306
tm.assert_frame_equal(result_b, expected)
325307

326-
result = df._append_internal([ser, ser], ignore_index=True)
327-
tm.assert_frame_equal(result, expected)
328-
329308
def test_append_empty_tz_frame_with_datetime64ns(self):
330309
# https://github.com/pandas-dev/pandas/issues/35460
331310
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
332311

333-
# pd.NaT gets inferred as tz-naive, so append result is tz-naive
334-
result = df._append_internal({"a": pd.NaT}, ignore_index=True)
335-
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
336-
tm.assert_frame_equal(result, expected)
337-
338312
# also test with typed value to append
339313
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
340-
other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
341-
result = df._append_internal(other, ignore_index=True)
314+
other = Series({"a": pd.NaT}, dtype="datetime64[ns]").to_frame().T
315+
result = concat([df, other], ignore_index=True)
316+
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
342317
tm.assert_frame_equal(result, expected)
343318

344319
# mismatched tz
345-
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
346-
result = df._append_internal(other, ignore_index=True)
320+
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]").to_frame().T
321+
result = concat([df, other], ignore_index=True)
347322
expected = DataFrame({"a": [pd.NaT]}).astype(object)
348323
tm.assert_frame_equal(result, expected)
349324

@@ -356,7 +331,7 @@ def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val):
356331
df = DataFrame(columns=["a"]).astype(dtype_str)
357332

358333
other = DataFrame({"a": [np.timedelta64(val, "ns")]})
359-
result = df._append_internal(other, ignore_index=True)
334+
result = concat([df, other])
360335

361336
expected = other.astype(object)
362337
tm.assert_frame_equal(result, expected)
@@ -370,7 +345,7 @@ def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
370345
df = DataFrame({"a": pd.array([1], dtype=dtype_str)})
371346

372347
other = DataFrame({"a": [np.timedelta64(val, "ns")]})
373-
result = df._append_internal(other, ignore_index=True)
348+
result = concat([df, other], ignore_index=True)
374349

375350
expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
376351
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)