Skip to content

Commit c89a5fb

Browse files
Merge branch 'main' into micol-altomare-replace-ensure-clean-store
2 parents e08de81 + b69fb3c commit c89a5fb

34 files changed

+460
-227
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ jobs:
216216
Linux-32-bit:
217217
runs-on: ubuntu-24.04
218218
container:
219-
image: quay.io/pypa/manylinux2014_i686
219+
image: quay.io/pypa/manylinux_2_28_i686
220220
options: --platform linux/386
221221
steps:
222222
- name: Checkout pandas Repo

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
.noseids
2222
.ipynb_checkpoints
2323
.tags
24+
tags
2425
.cache/
2526
.vscode/
2627

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ Other enhancements
233233
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
234234
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
235235
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
236+
- Added a new :meth:`DataFrame.from_arrow` method to import any Arrow-compatible
237+
tabular data object into a pandas :class:`DataFrame` through the
238+
`Arrow PyCapsule Protocol <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`__ (:issue:`59631`)
236239

237240
.. ---------------------------------------------------------------------------
238241
.. _whatsnew_300.notable_bug_fixes:
@@ -1243,6 +1246,7 @@ Groupby/resample/rolling
12431246
- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
12441247
- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
12451248
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
1249+
- Bug in :meth:`.DataFrameGroupBy.sum` and :meth:`.SeriesGroupby.groups` returning ``NaN`` on overflow. These methods now returns ``inf`` or ``-inf`` on overflow. (:issue:`60303`)
12461250
- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`)
12471251
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
12481252
- Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`)

pandas/_libs/groupby.pyx

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ from cython cimport (
55
)
66
from libc.math cimport (
77
NAN,
8+
isfinite,
89
sqrt,
910
)
1011
from libc.stdlib cimport (
@@ -778,9 +779,9 @@ def group_sum(
778779
if not isna_entry:
779780
nobs[lab, j] += 1
780781

781-
if sum_t is object:
782+
if sum_t is object or sum_t is int64_t or sum_t is uint64_t:
782783
# NB: this does not use 'compensation' like the non-object
783-
# track does.
784+
# and non-integer track does.
784785
if nobs[lab, j] == 1:
785786
# i.e. we haven't added anything yet; avoid TypeError
786787
# if e.g. val is a str and sumx[lab, j] is 0
@@ -793,13 +794,29 @@ def group_sum(
793794
y = val - compensation[lab, j]
794795
t = sumx[lab, j] + y
795796
compensation[lab, j] = t - sumx[lab, j] - y
796-
if compensation[lab, j] != compensation[lab, j]:
797-
# GH#53606
797+
798+
# Handle float overflow
799+
if (
800+
sum_t is float32_t or sum_t is float64_t
801+
) and not isfinite(compensation[lab, j]):
802+
# GH#53606; GH#60303
798803
# If val is +/- infinity compensation is NaN
799804
# which would lead to results being NaN instead
800805
# of +/- infinity. We cannot use util.is_nan
801806
# because of no gil
802807
compensation[lab, j] = 0
808+
809+
# Handle complex overflow
810+
if (
811+
sum_t is complex64_t or sum_t is complex128_t
812+
) and not isfinite(compensation[lab, j].real):
813+
compensation[lab, j].real = 0
814+
815+
if (
816+
sum_t is complex64_t or sum_t is complex128_t
817+
) and not isfinite(compensation[lab, j].imag):
818+
compensation[lab, j].imag = 0
819+
803820
sumx[lab, j] = t
804821
elif not skipna:
805822
if uses_mask:

pandas/_libs/internals.pyi

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,3 @@ class BlockValuesRefs:
9494
def add_reference(self, blk: Block) -> None: ...
9595
def add_index_reference(self, index: Index) -> None: ...
9696
def has_reference(self) -> bool: ...
97-
98-
class SetitemMixin:
99-
def __setitem__(self, key, value) -> None: ...
100-
def __delitem__(self, key) -> None: ...

pandas/_libs/internals.pyx

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from collections import defaultdict
2-
import sys
3-
import warnings
42

53
cimport cython
6-
from cpython cimport PY_VERSION_HEX
74
from cpython.object cimport PyObject
85
from cpython.pyport cimport PY_SSIZE_T_MAX
96
from cpython.slice cimport PySlice_GetIndicesEx
@@ -23,9 +20,6 @@ from numpy cimport (
2320
cnp.import_array()
2421

2522
from pandas._libs.algos import ensure_int64
26-
from pandas.compat import CHAINED_WARNING_DISABLED
27-
from pandas.errors import ChainedAssignmentError
28-
from pandas.errors.cow import _chained_assignment_msg
2923

3024
from pandas._libs.util cimport (
3125
is_array,
@@ -1002,47 +996,3 @@ cdef class BlockValuesRefs:
1002996
return self._has_reference_maybe_locked()
1003997
ELSE:
1004998
return self._has_reference_maybe_locked()
1005-
1006-
1007-
cdef extern from "Python.h":
1008-
"""
1009-
// python version < 3.14
1010-
#if PY_VERSION_HEX < 0x030E0000
1011-
// This function is unused and is declared to avoid a build warning
1012-
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref) {
1013-
return Py_REFCNT(ref) == 1;
1014-
}
1015-
#else
1016-
#define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \
1017-
PyUnstable_Object_IsUniqueReferencedTemporary
1018-
#endif
1019-
"""
1020-
int PyUnstable_Object_IsUniqueReferencedTemporary\
1021-
"__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1
1022-
1023-
1024-
# Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary
1025-
cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
1026-
if PY_VERSION_HEX >= 0x030E0000:
1027-
# Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary
1028-
return PyUnstable_Object_IsUniqueReferencedTemporary(obj)
1029-
else:
1030-
# Fallback for older Python versions using sys.getrefcount
1031-
return sys.getrefcount(obj) <= 1
1032-
1033-
1034-
cdef class SetitemMixin:
1035-
# class used in DataFrame and Series for checking for chained assignment
1036-
1037-
def __setitem__(self, key, value) -> None:
1038-
cdef bint is_unique = 0
1039-
if not CHAINED_WARNING_DISABLED:
1040-
is_unique = _is_unique_referenced_temporary(self)
1041-
if is_unique:
1042-
warnings.warn(
1043-
_chained_assignment_msg, ChainedAssignmentError, stacklevel=1
1044-
)
1045-
self._setitem(key, value)
1046-
1047-
def __delitem__(self, key) -> None:
1048-
self._delitem(key)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ from pandas._libs.tslibs.offsets import Day
8383

8484
from pandas._libs.tslibs.util cimport (
8585
is_array,
86+
is_bool_object,
8687
is_float_object,
8788
is_integer_object,
8889
)
@@ -2311,6 +2312,13 @@ class Timedelta(_Timedelta):
23112312
return self.__mul__(item)
23122313
return other * self.to_timedelta64()
23132314

2315+
elif is_bool_object(other):
2316+
# GH#62316
2317+
raise TypeError(
2318+
"Cannot multiply Timedelta by bool. "
2319+
"Explicitly cast to integer instead."
2320+
)
2321+
23142322
return NotImplemented
23152323

23162324
__rmul__ = __mul__

pandas/_testing/contexts.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@
1212
)
1313
import uuid
1414

15-
from pandas.compat import (
16-
CHAINED_WARNING_DISABLED,
17-
CHAINED_WARNING_DISABLED_INPLACE_METHOD,
18-
)
15+
from pandas.compat import CHAINED_WARNING_DISABLED
1916
from pandas.errors import ChainedAssignmentError
2017

2118
from pandas.io.common import get_handle
@@ -163,18 +160,10 @@ def with_csv_dialect(name: str, **kwargs) -> Generator[None]:
163160
csv.unregister_dialect(name)
164161

165162

166-
def raises_chained_assignment_error(
167-
extra_warnings=(), extra_match=(), inplace_method=False
168-
):
163+
def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
169164
from pandas._testing import assert_produces_warning
170165

171-
WARNING_DISABLED = (
172-
CHAINED_WARNING_DISABLED_INPLACE_METHOD
173-
if inplace_method
174-
else CHAINED_WARNING_DISABLED
175-
)
176-
177-
if WARNING_DISABLED:
166+
if CHAINED_WARNING_DISABLED:
178167
if not extra_warnings:
179168
from contextlib import nullcontext
180169

pandas/_typing.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,4 +533,44 @@ def closed(self) -> bool:
533533

534534
SliceType: TypeAlias = Hashable | None
535535

536+
537+
# Arrow PyCapsule Interface
538+
# from https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#protocol-typehints
539+
540+
541+
class ArrowArrayExportable(Protocol):
542+
"""
543+
An object with an ``__arrow_c_array__`` method.
544+
545+
This method indicates the object is an Arrow-compatible object implementing
546+
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_ in
547+
Python), enabling zero-copy Arrow data interchange across libraries.
548+
549+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
550+
.. _Arrow C Data Interface: https://arrow.apache.org/docs/format/CDataInterface.html
551+
552+
"""
553+
554+
def __arrow_c_array__(
555+
self, requested_schema: object | None = None
556+
) -> tuple[object, object]: ...
557+
558+
559+
class ArrowStreamExportable(Protocol):
560+
"""
561+
An object with an ``__arrow_c_stream__`` method.
562+
563+
This method indicates the object is an Arrow-compatible object implementing
564+
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_
565+
for streams in Python), enabling zero-copy Arrow data interchange across
566+
libraries.
567+
568+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
569+
.. _Arrow C Stream Interface: https://arrow.apache.org/docs/format/CStreamInterface.html
570+
571+
"""
572+
573+
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
574+
575+
536576
__all__ = ["type_t"]

pandas/compat/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
from pandas.compat._constants import (
1919
CHAINED_WARNING_DISABLED,
20-
CHAINED_WARNING_DISABLED_INPLACE_METHOD,
2120
IS64,
2221
ISMUSL,
2322
PY312,
@@ -154,7 +153,6 @@ def is_ci_environment() -> bool:
154153

155154
__all__ = [
156155
"CHAINED_WARNING_DISABLED",
157-
"CHAINED_WARNING_DISABLED_INPLACE_METHOD",
158156
"HAS_PYARROW",
159157
"IS64",
160158
"ISMUSL",

0 commit comments

Comments
 (0)