Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
TimedeltaArray,
)
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.arrays.string_ import StringDtype
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
sanitize_array,
Expand Down Expand Up @@ -5157,7 +5158,12 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
and getattr(dtype, "_is_numeric", False)
and not is_bool_dtype(dtype)
)
or (dtype.type is str and np.object_ in dtypes_set)
# backwards compat for the default `str` dtype being selected by object
or (
isinstance(dtype, StringDtype)
and dtype.na_value is np.nan
and np.object_ in dtypes_set
)
)

def predicate(arr: ArrayLike) -> bool:
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/methods/test_select_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,26 @@ def test_select_dtypes_no_view(self):
result = df.select_dtypes(include=["number"])
result.iloc[0, 0] = 0
tm.assert_frame_equal(df, df_orig)

def test_select_dtype_object_and_str(self, using_infer_string):
# https://github.com/pandas-dev/pandas/issues/61916
df = DataFrame(
{
"a": ["a", "b", "c"],
"b": [1, 2, 3],
"c": pd.array(["a", "b", "c"], dtype="string"),
}
)

# with "object" -> only select the object or default str dtype column
result = df.select_dtypes(include=["object"])
expected = df[["a"]]
tm.assert_frame_equal(result, expected)

# with "string" -> select both the default 'str' and the nullable 'string'
result = df.select_dtypes(include=["string"])
if using_infer_string:
expected = df[["a", "c"]]
else:
expected = df[["c"]]
tm.assert_frame_equal(result, expected)
Loading