Skip to content

Commit

Permalink
Update for numpy 2.0 deprecations (#13103)
Browse files Browse the repository at this point in the history
- Replace `np.trapz` with vendored `trapezoid` from scipy
- Replace `np.float_` with `np.float64`
  • Loading branch information
adrianeboyd authored Nov 6, 2023
1 parent 92f1d0a commit c096c5c
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 4 deletions.
42 changes: 42 additions & 0 deletions licenses/3rd_party_licenses.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,45 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


SciPy
-----

* Files: scorer.py

The implementation of trapezoid() is adapted from SciPy, which is distributed
under the following license:

New BSD License

Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
138 changes: 136 additions & 2 deletions spacy/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,140 @@ def get_ner_prf(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
}


# The following implementation of trapezoid() is adapted from SciPy,
# which is distributed under the New BSD License.
# Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
# See licenses/3rd_party_licenses.txt
def trapezoid(y, x=None, dx=1.0, axis=-1):
r"""
Integrate along the given axis using the composite trapezoidal rule.
If `x` is provided, the integration happens in sequence along its
elements - they are not sorted.
Integrate `y` (`x`) along each 1d slice on the given axis, compute
:math:`\int y(x) dx`.
When `x` is specified, this integrates along the parametric curve,
computing :math:`\int_t y(t) dt =
\int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`.
Parameters
----------
y : array_like
Input array to integrate.
x : array_like, optional
The sample points corresponding to the `y` values. If `x` is None,
the sample points are assumed to be evenly spaced `dx` apart. The
default is None.
dx : scalar, optional
The spacing between sample points when `x` is None. The default is 1.
axis : int, optional
The axis along which to integrate.
Returns
-------
trapezoid : float or ndarray
Definite integral of `y` = n-dimensional array as approximated along
a single axis by the trapezoidal rule. If `y` is a 1-dimensional array,
then the result is a float. If `n` is greater than 1, then the result
is an `n`-1 dimensional array.
See Also
--------
cumulative_trapezoid, simpson, romb
Notes
-----
Image [2]_ illustrates trapezoidal rule -- y-axis locations of points
will be taken from `y` array, by default x-axis distances between
points will be 1.0, alternatively they can be provided with `x` array
or with `dx` scalar. Return value will be equal to combined area under
the red lines.
References
----------
.. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule
.. [2] Illustration image:
https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
Examples
--------
Use the trapezoidal rule on evenly spaced points:
>>> import numpy as np
>>> from scipy import integrate
>>> integrate.trapezoid([1, 2, 3])
4.0
The spacing between sample points can be selected by either the
``x`` or ``dx`` arguments:
>>> integrate.trapezoid([1, 2, 3], x=[4, 6, 8])
8.0
>>> integrate.trapezoid([1, 2, 3], dx=2)
8.0
Using a decreasing ``x`` corresponds to integrating in reverse:
>>> integrate.trapezoid([1, 2, 3], x=[8, 6, 4])
-8.0
More generally ``x`` is used to integrate along a parametric curve. We can
estimate the integral :math:`\int_0^1 x^2 = 1/3` using:
>>> x = np.linspace(0, 1, num=50)
>>> y = x**2
>>> integrate.trapezoid(y, x)
0.33340274885464394
Or estimate the area of a circle, noting we repeat the sample which closes
the curve:
>>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True)
>>> integrate.trapezoid(np.cos(theta), x=np.sin(theta))
3.141571941375841
``trapezoid`` can be applied along a specified axis to do multiple
computations in one call:
>>> a = np.arange(6).reshape(2, 3)
>>> a
array([[0, 1, 2],
[3, 4, 5]])
>>> integrate.trapezoid(a, axis=0)
array([1.5, 2.5, 3.5])
>>> integrate.trapezoid(a, axis=1)
array([2., 8.])
"""
y = np.asanyarray(y)
if x is None:
d = dx
else:
x = np.asanyarray(x)
if x.ndim == 1:
d = np.diff(x)
# reshape to correct shape
shape = [1] * y.ndim
shape[axis] = d.shape[0]
d = d.reshape(shape)
else:
d = np.diff(x, axis=axis)
nd = y.ndim
slice1 = [slice(None)] * nd
slice2 = [slice(None)] * nd
slice1[axis] = slice(1, None)
slice2[axis] = slice(None, -1)
try:
ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis)
except ValueError:
# Operations didn't work, cast to ndarray
d = np.asarray(d)
y = np.asarray(y)
ret = np.add.reduce(d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0, axis)
return ret


# The following implementation of roc_auc_score() is adapted from
# scikit-learn, which is distributed under the New BSD License.
# Copyright (c) 2007–2019 The scikit-learn developers.
Expand Down Expand Up @@ -1024,9 +1158,9 @@ def _auc(x, y):
else:
raise ValueError(Errors.E164.format(x=x))

area = direction * np.trapz(y, x)
area = direction * trapezoid(y, x)
if isinstance(area, np.memmap):
# Reductions such as .sum used internally in np.trapz do not return a
# Reductions such as .sum used internally in trapezoid do not return a
# scalar by default for numpy.memmap instances contrary to
# regular numpy.ndarray instances.
area = area.dtype.type(area)
Expand Down
4 changes: 2 additions & 2 deletions spacy/tokens/doc.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Doc:
user_hooks: Dict[str, Callable[..., Any]]
user_token_hooks: Dict[str, Callable[..., Any]]
user_span_hooks: Dict[str, Callable[..., Any]]
tensor: np.ndarray[Any, np.dtype[np.float_]]
tensor: np.ndarray[Any, np.dtype[np.float64]]
user_data: Dict[str, Any]
has_unknown_spaces: bool
_context: Any
Expand Down Expand Up @@ -166,7 +166,7 @@ class Doc:
) -> Doc: ...
def to_array(
self, py_attr_ids: Union[int, str, List[Union[int, str]]]
) -> np.ndarray[Any, np.dtype[np.float_]]: ...
) -> np.ndarray[Any, np.dtype[np.float64]]: ...
@staticmethod
def from_docs(
docs: List[Doc],
Expand Down

0 comments on commit c096c5c

Please sign in to comment.