Skip to content

Commit 88affc3

Browse files
BUG: limit select_dtypes(object) back compat fix to default str dtype (#62402)
1 parent 4386a46 commit 88affc3

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

pandas/core/frame.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
TimedeltaArray,
145145
)
146146
from pandas.core.arrays.sparse import SparseFrameAccessor
147+
from pandas.core.arrays.string_ import StringDtype
147148
from pandas.core.construction import (
148149
ensure_wrapped_if_datetimelike,
149150
sanitize_array,
@@ -5157,7 +5158,12 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
51575158
and getattr(dtype, "_is_numeric", False)
51585159
and not is_bool_dtype(dtype)
51595160
)
5160-
or (dtype.type is str and np.object_ in dtypes_set)
5161+
# backwards compat for the default `str` dtype being selected by object
5162+
or (
5163+
isinstance(dtype, StringDtype)
5164+
and dtype.na_value is np.nan
5165+
and np.object_ in dtypes_set
5166+
)
51615167
)
51625168

51635169
def predicate(arr: ArrayLike) -> bool:

pandas/tests/frame/methods/test_select_dtypes.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,26 @@ def test_select_dtypes_no_view(self):
485485
result = df.select_dtypes(include=["number"])
486486
result.iloc[0, 0] = 0
487487
tm.assert_frame_equal(df, df_orig)
488+
489+
def test_select_dtype_object_and_str(self, using_infer_string):
490+
# https://github.com/pandas-dev/pandas/issues/61916
491+
df = DataFrame(
492+
{
493+
"a": ["a", "b", "c"],
494+
"b": [1, 2, 3],
495+
"c": pd.array(["a", "b", "c"], dtype="string"),
496+
}
497+
)
498+
499+
# with "object" -> only select the object or default str dtype column
500+
result = df.select_dtypes(include=["object"])
501+
expected = df[["a"]]
502+
tm.assert_frame_equal(result, expected)
503+
504+
# with "string" -> select both the default 'str' and the nullable 'string'
505+
result = df.select_dtypes(include=["string"])
506+
if using_infer_string:
507+
expected = df[["a", "c"]]
508+
else:
509+
expected = df[["c"]]
510+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)