Skip to content

Commit 07ee82b

Browse files
Implement .dt.total_seconds (rapidsai#17659)
Fixes: rapidsai#16802 This PR implements `.dt.total_seconds` Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: rapidsai#17659
1 parent 62d72df commit 07ee82b

File tree

4 files changed

+102
-11
lines changed

4 files changed

+102
-11
lines changed

python/cudf/cudf/core/column/timedelta.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22

33
from __future__ import annotations
44

55
import datetime
66
import functools
7+
import math
78
from typing import TYPE_CHECKING, cast
89

910
import numpy as np
@@ -263,7 +264,15 @@ def time_unit(self) -> str:
263264
return np.datetime_data(self.dtype)[0]
264265

265266
def total_seconds(self) -> ColumnBase:
266-
raise NotImplementedError("total_seconds is currently not implemented")
267+
conversion = _unit_to_nanoseconds_conversion[self.time_unit] / 1e9
268+
# Typecast to decimal128 to avoid floating point precision issues
269+
# https://github.com/rapidsai/cudf/issues/17664
270+
return (
271+
(self.astype("int64") * conversion)
272+
.astype(cudf.Decimal128Dtype(38, 9))
273+
.round(decimals=abs(int(math.log10(conversion))))
274+
.astype("float64")
275+
)
267276

268277
def ceil(self, freq: str) -> ColumnBase:
269278
raise NotImplementedError("ceil is currently not implemented")

python/cudf/cudf/core/index.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
22

33
from __future__ import annotations
44

@@ -842,22 +842,22 @@ def sort_values(
842842
@_performance_tracking
843843
def _gather(self, gather_map, nullify=False, check_bounds=True):
844844
gather_map = cudf.core.column.as_column(gather_map)
845-
return cudf.Index._from_column(
845+
return Index._from_column(
846846
self._column.take(gather_map, nullify, check_bounds),
847847
name=self.name,
848848
)
849849

850850
@_performance_tracking
851851
def _apply_boolean_mask(self, boolean_mask):
852-
return cudf.Index._from_column(
852+
return Index._from_column(
853853
self._column.apply_boolean_mask(boolean_mask), name=self.name
854854
)
855855

856856
def repeat(self, repeats, axis=None):
857857
return self._as_int_index().repeat(repeats, axis)
858858

859859
def _split(self, splits):
860-
return cudf.Index._from_column(
860+
return Index._from_column(
861861
self._as_int_index()._split(splits), name=self.name
862862
)
863863

@@ -1657,7 +1657,7 @@ def _clean_nulls_from_index(self) -> Index:
16571657
if isinstance(self, (DatetimeIndex, TimedeltaIndex))
16581658
else str(cudf.NA)
16591659
)
1660-
return cudf.Index._from_column(
1660+
return Index._from_column(
16611661
self._column.astype("str").fillna(fill_value),
16621662
name=self.name,
16631663
)
@@ -2964,13 +2964,13 @@ def median(self, *, skipna: bool = True, axis: int | None = 0):
29642964
def std(self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1):
29652965
return self._column.std(skipna=skipna, ddof=ddof)
29662966

2967-
def total_seconds(self) -> cupy.ndarray:
2967+
def total_seconds(self) -> Index:
29682968
"""
29692969
Return total duration of each element expressed in seconds.
29702970
29712971
This method is currently not implemented.
29722972
"""
2973-
return self._column.total_seconds().values
2973+
return Index._from_column(self._column.total_seconds(), name=self.name)
29742974

29752975
def ceil(self, freq: str) -> Self:
29762976
"""

python/cudf/cudf/core/series.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
22

33
from __future__ import annotations
44

@@ -5183,6 +5183,66 @@ def components(self) -> cudf.DataFrame:
51835183
ca, index=self.series.index
51845184
)
51855185

5186+
def total_seconds(self) -> Series:
5187+
"""
5188+
Return total duration of each element expressed in seconds.
5189+
5190+
This method is available directly on TimedeltaIndex
5191+
and on Series containing timedelta values under the ``.dt`` namespace.
5192+
5193+
Returns
5194+
-------
5195+
Index or Series
5196+
When the calling object is a TimedeltaIndex,
5197+
the return type is an Index with a float64 dtype. When the calling object
5198+
is a Series, the return type is Series of type `float64` whose
5199+
index is the same as the original.
5200+
5201+
See Also
5202+
--------
5203+
datetime.timedelta.total_seconds : Standard library version
5204+
of this method.
5205+
TimedeltaIndex.components : Return a DataFrame with components of
5206+
each Timedelta.
5207+
5208+
Examples
5209+
--------
5210+
**Series**
5211+
5212+
>>> import cudf
5213+
>>> import pandas as pd
5214+
>>> import numpy as np
5215+
>>> s = cudf.Series(pd.to_timedelta(np.arange(5), unit="D"))
5216+
>>> s
5217+
0 0 days 00:00:00
5218+
1 1 days 00:00:00
5219+
2 2 days 00:00:00
5220+
3 3 days 00:00:00
5221+
4 4 days 00:00:00
5222+
dtype: timedelta64[ns]
5223+
5224+
>>> s.dt.total_seconds()
5225+
0 0.0
5226+
1 86400.0
5227+
2 172800.0
5228+
3 259200.0
5229+
4 345600.0
5230+
dtype: float64
5231+
5232+
**TimedeltaIndex**
5233+
5234+
>>> idx = cudf.from_pandas(pd.to_timedelta(np.arange(5), unit="D"))
5235+
>>> idx
5236+
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
5237+
dtype='timedelta64[ns]', freq=None)
5238+
5239+
>>> idx.total_seconds()
5240+
Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64')
5241+
"""
5242+
return self._return_result_like_self(
5243+
self.series._column.total_seconds()
5244+
)
5245+
51865246

51875247
@_performance_tracking
51885248
def _align_indices(series_list, how="outer", allow_non_unique=False):

python/cudf/cudf/tests/test_timedelta.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22

33
import datetime
44
import operator
@@ -1506,3 +1506,25 @@ def test_tdi_unit():
15061506
result = pd_tdi.unit
15071507
expected = cudf_tdi.unit
15081508
assert result == expected
1509+
1510+
1511+
@pytest.mark.parametrize("data", _TIMEDELTA_DATA)
1512+
@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
1513+
def test_timedelta_series_total_seconds(data, dtype):
1514+
gsr = cudf.Series(data, dtype=dtype)
1515+
psr = gsr.to_pandas()
1516+
1517+
expected = psr.dt.total_seconds()
1518+
actual = gsr.dt.total_seconds()
1519+
assert_eq(expected, actual)
1520+
1521+
1522+
@pytest.mark.parametrize("data", _TIMEDELTA_DATA)
1523+
@pytest.mark.parametrize("dtype", utils.TIMEDELTA_TYPES)
1524+
def test_timedelta_index_total_seconds(request, data, dtype):
1525+
gi = cudf.Index(data, dtype=dtype)
1526+
pi = gi.to_pandas()
1527+
1528+
expected = pi.total_seconds()
1529+
actual = gi.total_seconds()
1530+
assert_eq(expected, actual)

0 commit comments

Comments
 (0)