From e591c6eb27da389740d78bd11d12f9625583e738 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 13 Nov 2025 18:45:12 -0500 Subject: [PATCH 1/6] Add num_samples and negative intervals --- .../src/dispatch/range/datetime_range.rs | 307 ++++- .../src/dispatch/range/time_range.rs | 4 +- .../polars-expr/src/dispatch/range/utils.rs | 208 +++- crates/polars-python/src/functions/range.rs | 108 +- crates/polars-time/src/date_range.rs | 353 ++++-- crates/polars-time/src/upsample.rs | 2 +- crates/polars-time/src/windows/test.rs | 12 +- .../tests/it/lazy/expressions/expand.rs | 7 +- .../polars/tests/it/lazy/group_by_dynamic.rs | 9 +- crates/polars/tests/it/time/date_range.rs | 15 +- .../expressions/column-selections.rs | 30 +- .../transformations/time-series/resampling.rs | 9 +- .../transformations/time-series/rolling.rs | 18 +- py-polars/src/polars/_plr.pyi | 26 +- .../src/polars/functions/range/date_range.py | 171 ++- .../polars/functions/range/datetime_range.py | 144 ++- .../unit/functions/range/test_date_range.py | 396 +++++++ .../unit/functions/range/test_date_ranges.py | 572 ++++++++- .../functions/range/test_datetime_range.py | 610 +++++++++- .../functions/range/test_datetime_ranges.py | 1047 +++++++++++++++++ .../unit/functions/range/test_time_range.py | 36 +- 21 files changed, 3774 insertions(+), 310 deletions(-) diff --git a/crates/polars-expr/src/dispatch/range/datetime_range.rs b/crates/polars-expr/src/dispatch/range/datetime_range.rs index c60a0eeb0af3..ab2ca165f8e2 100644 --- a/crates/polars-expr/src/dispatch/range/datetime_range.rs +++ b/crates/polars-expr/src/dispatch/range/datetime_range.rs @@ -3,15 +3,27 @@ use polars_core::prelude::time_zone::parse_time_zone; use polars_core::prelude::*; #[cfg(feature = "dtype-date")] use polars_plan::dsl::DateRangeArgs; -use polars_time::{ClosedWindow, Duration, datetime_range_impl}; +use polars_time::{ + ClosedWindow, Duration, datetime_range_impl_start_end_interval, + datetime_range_impl_start_end_samples, datetime_range_impl_start_interval_samples, +}; use super::utils::{ - ensure_items_contain_exactly_one_value, temporal_ranges_impl_broadcast, - temporal_series_to_i64_scalar, + ensure_items_contain_exactly_one_value, temporal_ranges_impl_broadcast_2args, + temporal_ranges_impl_broadcast_3args, temporal_series_to_i64_scalar, }; const CAPACITY_FACTOR: usize = 5; +// Swap left / right closure in the event of an interval inversion. +fn swap_closed_lr(closed: ClosedWindow) -> ClosedWindow { + match closed { + ClosedWindow::Left => ClosedWindow::Right, + ClosedWindow::Right => ClosedWindow::Left, + _ => closed, + } +} + #[cfg(feature = "dtype-date")] pub(super) fn date_range( s: &[Column], @@ -27,9 +39,20 @@ pub(super) fn date_range( interval.unwrap(), closed, ), - DateRangeArgs::StartEndSamples => todo!(), - DateRangeArgs::StartIntervalSamples => todo!(), - DateRangeArgs::EndIntervalSamples => todo!(), + DateRangeArgs::StartEndSamples => { + dt_range_start_end_samples(&s[0].cast(&dt_type)?, &s[1].cast(&dt_type)?, &s[2], closed) + }, + DateRangeArgs::StartIntervalSamples => { + dt_range_start_interval_samples(&s[0].cast(&dt_type)?, interval.unwrap(), &s[1], closed) + }, + // We negate the interval, start at the end, and then reverse., + DateRangeArgs::EndIntervalSamples => dt_range_start_interval_samples( + &s[0].cast(&dt_type)?, + -interval.unwrap(), + &s[1], + swap_closed_lr(closed), + ) + .map(|c| c.reverse()), } .map(|c| c.cast(&DataType::Date))? } @@ -49,9 +72,18 @@ pub(super) fn date_ranges( interval.unwrap(), closed, ), - DateRangeArgs::StartEndSamples => todo!(), - DateRangeArgs::StartIntervalSamples => todo!(), - DateRangeArgs::EndIntervalSamples => todo!(), + DateRangeArgs::StartEndSamples => { + dt_ranges_start_end_samples(&s[0].cast(&dt_type)?, &s[1].cast(&dt_type)?, &s[2], closed) + }, + DateRangeArgs::StartIntervalSamples => dt_ranges_start_interval_samples( + &s[0].cast(&dt_type)?, + interval.unwrap(), + &s[1], + closed, + ), + DateRangeArgs::EndIntervalSamples => { + dt_ranges_end_interval_samples(&s[0].cast(&dt_type)?, interval.unwrap(), &s[1], closed) + }, } .map(|c| c.cast(&DataType::List(Box::new(DataType::Date))))? } @@ -67,9 +99,17 @@ pub(super) fn datetime_range( DateRangeArgs::StartEndInterval => { dt_range_start_end_interval(&s[0], &s[1], interval.unwrap(), closed) }, - DateRangeArgs::StartEndSamples => todo!(), - DateRangeArgs::StartIntervalSamples => todo!(), - DateRangeArgs::EndIntervalSamples => todo!(), + DateRangeArgs::StartEndSamples => dt_range_start_end_samples(&s[0], &s[1], &s[2], closed), + DateRangeArgs::StartIntervalSamples => { + dt_range_start_interval_samples(&s[0], interval.unwrap(), &s[1], closed) + }, + DateRangeArgs::EndIntervalSamples => dt_range_start_interval_samples( + &s[0], + -interval.unwrap(), + &s[1], + swap_closed_lr(closed), + ) + .map(|c| c.reverse()), } } @@ -84,9 +124,13 @@ pub(super) fn datetime_ranges( DateRangeArgs::StartEndInterval => { dt_ranges_start_end_interval(&s[0], &s[1], interval.unwrap(), closed) }, - DateRangeArgs::StartEndSamples => todo!(), - DateRangeArgs::StartIntervalSamples => todo!(), - DateRangeArgs::EndIntervalSamples => todo!(), + DateRangeArgs::StartEndSamples => dt_ranges_start_end_samples(&s[0], &s[1], &s[2], closed), + DateRangeArgs::StartIntervalSamples => { + dt_ranges_start_interval_samples(&s[0], interval.unwrap(), &s[1], closed) + }, + DateRangeArgs::EndIntervalSamples => { + dt_ranges_end_interval_samples(&s[0], interval.unwrap(), &s[1], closed) + }, } } @@ -111,8 +155,15 @@ fn dt_range_start_end_interval( .ok_or_else(|| polars_err!(ComputeError: "start is an out-of-range time."))?; let end = temporal_series_to_i64_scalar(end) .ok_or_else(|| polars_err!(ComputeError: "end is an out-of-range time."))?; - let result = - datetime_range_impl(name.clone(), start, end, interval, closed, *tu, tz.as_ref())?; + let result = datetime_range_impl_start_end_interval( + name.clone(), + start, + end, + interval, + closed, + *tu, + tz.as_ref(), + )?; Ok(result.into_column()) } else { polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); @@ -146,7 +197,7 @@ fn dt_ranges_start_end_interval( _ => None, }; let range_impl = |start, end, builder: &mut ListPrimitiveChunkedBuilder| { - let rng = datetime_range_impl( + let rng = datetime_range_impl_start_end_interval( PlSmallStr::EMPTY, start, end, @@ -159,7 +210,225 @@ fn dt_ranges_start_end_interval( Ok(()) }; - temporal_ranges_impl_broadcast(start, end, range_impl, &mut builder)? + temporal_ranges_impl_broadcast_2args(start, end, range_impl, &mut builder)? + } else { + polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); + }; + + let to_type = DataType::List(Box::new(dtype.clone())); + out.cast(&to_type) +} + +fn dt_range_start_end_samples( + start: &Column, + end: &Column, + num_samples: &Column, + closed: ClosedWindow, +) -> PolarsResult { + ensure_items_contain_exactly_one_value(&[start, end], &["start", "end"])?; + ensure_items_contain_exactly_one_value(&[start, end], &["start", "end"])?; + let dtype = start.dtype(); + + if let DataType::Datetime(tu, time_zone) = dtype { + let tz = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(parse_time_zone(tz)?), + _ => None, + }; + + let name = start.name(); + let start = temporal_series_to_i64_scalar(start) + .ok_or_else(|| polars_err!(ComputeError: "start is an out-of-range time."))?; + let end = temporal_series_to_i64_scalar(end) + .ok_or_else(|| polars_err!(ComputeError: "end is an out-of-range time."))?; + let num_samples = num_samples.get(0).unwrap().extract::().unwrap(); + let result = datetime_range_impl_start_end_samples( + name.clone(), + start, + end, + num_samples, + closed, + *tu, + tz.as_ref(), + )?; + Ok(result.into_column()) + } else { + polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); + } +} + +fn dt_ranges_start_end_samples( + start: &Column, + end: &Column, + num_samples: &Column, + closed: ClosedWindow, +) -> PolarsResult { + let dtype = start.dtype(); + let start = start.to_physical_repr(); + let start = start.i64()?; + let end = end.to_physical_repr(); + let end = end.i64()?; + let num_samples = num_samples.i64()?; + + let out = if let DataType::Datetime(tu, time_zone) = dtype { + let mut builder = ListPrimitiveChunkedBuilder::::new( + start.name().clone(), + start.len(), + start.len() * CAPACITY_FACTOR, + DataType::Int64, + ); + + let tz = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(parse_time_zone(tz)?), + _ => None, + }; + let range_impl = + |start, end, num_samples, builder: &mut ListPrimitiveChunkedBuilder| { + let rng = datetime_range_impl_start_end_samples( + PlSmallStr::EMPTY, + start, + end, + num_samples, + closed, + *tu, + tz.as_ref(), + )?; + builder.append_slice(rng.physical().cont_slice().unwrap()); + Ok(()) + }; + + temporal_ranges_impl_broadcast_3args(start, end, num_samples, range_impl, &mut builder)? + } else { + polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); + }; + + let to_type = DataType::List(Box::new(dtype.clone())); + out.cast(&to_type) +} + +fn dt_range_start_interval_samples( + start: &Column, + interval: Duration, + num_samples: &Column, + closed: ClosedWindow, +) -> PolarsResult { + ensure_items_contain_exactly_one_value(&[start, num_samples], &["start", "num_samples"])?; + let dtype = start.dtype(); + + if let DataType::Datetime(tu, time_zone) = dtype { + let tz = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(parse_time_zone(tz)?), + _ => None, + }; + let name = start.name(); + let start = temporal_series_to_i64_scalar(start) + .ok_or_else(|| polars_err!(ComputeError: "start is an out-of-range time."))?; + let num_samples = num_samples.get(0).unwrap().extract::().unwrap(); + let result = datetime_range_impl_start_interval_samples( + name.clone(), + start, + interval, + num_samples, + closed, + *tu, + tz.as_ref(), + )?; + Ok(result.into_column()) + } else { + polars_bail!(ComputeError: "Expected Datetime input, got {:?}", dtype); + } +} + +fn dt_ranges_start_interval_samples( + start: &Column, + interval: Duration, + num_samples: &Column, + closed: ClosedWindow, +) -> PolarsResult { + let dtype = start.dtype(); + let start = start.to_physical_repr(); + let start = start.i64()?; + let num_samples = num_samples.i64()?; + + let out = if let DataType::Datetime(tu, time_zone) = dtype { + let mut builder = ListPrimitiveChunkedBuilder::::new( + start.name().clone(), + start.len(), + start.len() * CAPACITY_FACTOR, + DataType::Int64, + ); + + let tz = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(parse_time_zone(tz)?), + _ => None, + }; + let range_impl = + |start, num_samples, builder: &mut ListPrimitiveChunkedBuilder| { + let rng = datetime_range_impl_start_interval_samples( + PlSmallStr::EMPTY, + start, + interval, + num_samples, + closed, + *tu, + tz.as_ref(), + )?; + builder.append_slice(rng.physical().cont_slice().unwrap()); + Ok(()) + }; + + temporal_ranges_impl_broadcast_2args(start, num_samples, range_impl, &mut builder)? + } else { + polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); + }; + + let to_type = DataType::List(Box::new(dtype.clone())); + out.cast(&to_type) +} + +fn dt_ranges_end_interval_samples( + end: &Column, + interval: Duration, + num_samples: &Column, + closed: ClosedWindow, +) -> PolarsResult { + let dtype = end.dtype(); + let end = end.to_physical_repr(); + let end = end.i64()?; + let num_samples = num_samples.i64()?; + + let out = if let DataType::Datetime(tu, time_zone) = dtype { + let mut builder = ListPrimitiveChunkedBuilder::::new( + end.name().clone(), + end.len(), + end.len() * CAPACITY_FACTOR, + DataType::Int64, + ); + + let tz = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(parse_time_zone(tz)?), + _ => None, + }; + let range_impl = + |end, num_samples, builder: &mut ListPrimitiveChunkedBuilder| { + let rng = datetime_range_impl_start_interval_samples( + PlSmallStr::EMPTY, + end, + -interval, + num_samples, + swap_closed_lr(closed), + *tu, + tz.as_ref(), + )?; + builder.append_slice(rng.physical().reverse().cont_slice().unwrap()); + Ok(()) + }; + + temporal_ranges_impl_broadcast_2args(end, num_samples, range_impl, &mut builder)? } else { polars_bail!(ComputeError: "expected Datetime input, got {:?}", dtype); }; diff --git a/crates/polars-expr/src/dispatch/range/time_range.rs b/crates/polars-expr/src/dispatch/range/time_range.rs index 0967d2fb2848..8ea1f2666b56 100644 --- a/crates/polars-expr/src/dispatch/range/time_range.rs +++ b/crates/polars-expr/src/dispatch/range/time_range.rs @@ -2,7 +2,7 @@ use polars_core::prelude::*; use polars_time::{ClosedWindow, Duration, time_range_impl}; use super::utils::{ - ensure_items_contain_exactly_one_value, temporal_ranges_impl_broadcast, + ensure_items_contain_exactly_one_value, temporal_ranges_impl_broadcast_2args, temporal_series_to_i64_scalar, }; @@ -59,7 +59,7 @@ pub(super) fn time_ranges( Ok(()) }; - let out = temporal_ranges_impl_broadcast(start, end, range_impl, &mut builder)?; + let out = temporal_ranges_impl_broadcast_2args(start, end, range_impl, &mut builder)?; let to_type = DataType::List(Box::new(DataType::Time)); out.cast(&to_type) diff --git a/crates/polars-expr/src/dispatch/range/utils.rs b/crates/polars-expr/src/dispatch/range/utils.rs index f39ecddac5fd..141a81627295 100644 --- a/crates/polars-expr/src/dispatch/range/utils.rs +++ b/crates/polars-expr/src/dispatch/range/utils.rs @@ -136,10 +136,10 @@ where Ok(out) } -/// Create a ranges column from the given start/end columns and a range function. -pub(super) fn temporal_ranges_impl_broadcast( - start: &ChunkedArray, - end: &ChunkedArray, +/// Create a ranges column from two columns and a range function. +pub(super) fn temporal_ranges_impl_broadcast_2args( + s1: &ChunkedArray, + s2: &ChunkedArray, range_impl: F, builder: &mut ListPrimitiveChunkedBuilder, ) -> PolarsResult @@ -149,44 +149,161 @@ where F: Fn(T::Native, T::Native, &mut ListPrimitiveChunkedBuilder) -> PolarsResult<()>, ListPrimitiveChunkedBuilder: ListBuilderTrait, { - match (start.len(), end.len()) { - (len_start, len_end) if len_start == len_end => { - build_temporal_ranges::<_, _, T, U, F>( - start.downcast_iter().flatten(), - end.downcast_iter().flatten(), + match (s1.len(), s2.len()) { + (len_s1, len_s2) if len_s1 == len_s2 => { + build_temporal_ranges_2args::<_, _, T, U, F>( + s1.downcast_iter().flatten(), + s2.downcast_iter().flatten(), range_impl, builder, )?; }, - (1, len_end) => { - let start_scalar = start.get(0); - match start_scalar { - Some(start) => build_temporal_ranges::<_, _, T, U, F>( - std::iter::repeat(Some(&start)), - end.downcast_iter().flatten(), + (1, len_s2) => { + let s1_scalar = s1.get(0); + match s1_scalar { + Some(s1) => build_temporal_ranges_2args::<_, _, T, U, F>( + std::iter::repeat(Some(&s1)), + s2.downcast_iter().flatten(), range_impl, builder, )?, - None => build_nulls(builder, len_end), + None => build_nulls(builder, len_s2), } }, - (len_start, 1) => { - let end_scalar = end.get(0); - match end_scalar { - Some(end) => build_temporal_ranges::<_, _, T, U, F>( - start.downcast_iter().flatten(), - std::iter::repeat(Some(&end)), + (len_s1, 1) => { + let s2_scalar = s2.get(0); + match s2_scalar { + Some(s2) => build_temporal_ranges_2args::<_, _, T, U, F>( + s1.downcast_iter().flatten(), + std::iter::repeat(Some(&s2)), range_impl, builder, )?, - None => build_nulls(builder, len_start), + None => build_nulls(builder, len_s1), + } + }, + (len_s1, len_s2) => { + polars_bail!( + ComputeError: + "lengths of `s1` ({}) and `s2` ({}) do not match", + len_s1, len_s2 + ) + }, + }; + let out = builder.finish().into_column(); + Ok(out) +} + +/// Create a ranges column from two columns and a range function. +pub(super) fn temporal_ranges_impl_broadcast_3args( + s1: &ChunkedArray, + s2: &ChunkedArray, + s3: &ChunkedArray, + range_impl: F, + builder: &mut ListPrimitiveChunkedBuilder, +) -> PolarsResult +where + T: PolarsIntegerType, + U: PolarsIntegerType, + F: Fn(T::Native, T::Native, T::Native, &mut ListPrimitiveChunkedBuilder) -> PolarsResult<()>, + ListPrimitiveChunkedBuilder: ListBuilderTrait, +{ + match (s1.len(), s2.len(), s3.len()) { + (len1, len2, len3) if len1 == len2 && len1 == len3 => { + build_temporal_ranges_3args::<_, _, _, T, U, F>( + s1.downcast_iter().flatten(), + s2.downcast_iter().flatten(), + s3.downcast_iter().flatten(), + range_impl, + builder, + )?; + }, + (len1, len2, 1) if (len1 == len2) => { + let s3_scalar = s3.get(0); + match s3_scalar { + Some(s3) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + s1.downcast_iter().flatten(), + s2.downcast_iter().flatten(), + std::iter::repeat(Some(&s3)), + range_impl, + builder, + )?, + None => build_nulls(builder, len1), + } + }, + (len1, 1, len3) if (len1 == len3) => { + let s2_scalar = s2.get(0); + match s2_scalar { + Some(s2) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + s1.downcast_iter().flatten(), + std::iter::repeat(Some(&s2)), + s3.downcast_iter().flatten(), + range_impl, + builder, + )?, + None => build_nulls(builder, len1), } }, - (len_start, len_end) => { + (1, len2, len3) if (len2 == len3) => { + let s1_scalar = s1.get(0); + match s1_scalar { + Some(s1) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + std::iter::repeat(Some(&s1)), + s2.downcast_iter().flatten(), + s3.downcast_iter().flatten(), + range_impl, + builder, + )?, + None => build_nulls(builder, len2), + } + }, + (1, 1, len3) => { + let s1_scalar = s1.get(0); + let s2_scalar = s2.get(0); + match (s1_scalar, s2_scalar) { + (Some(s1), Some(s2)) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + std::iter::repeat(Some(&s1)), + std::iter::repeat(Some(&s2)), + s3.downcast_iter().flatten(), + range_impl, + builder, + )?, + _ => build_nulls(builder, len3), + } + }, + (1, len2, 1) => { + let s1_scalar = s1.get(0); + let s3_scalar = s3.get(0); + match (s1_scalar, s3_scalar) { + (Some(s1), Some(s3)) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + std::iter::repeat(Some(&s1)), + s2.downcast_iter().flatten(), + std::iter::repeat(Some(&s3)), + range_impl, + builder, + )?, + _ => build_nulls(builder, len2), + } + }, + (len1, 1, 1) => { + let s2_scalar = s2.get(0); + let s3_scalar = s3.get(0); + match (s2_scalar, s3_scalar) { + (Some(s2), Some(s3)) => build_temporal_ranges_3args::<_, _, _, T, U, F>( + s1.downcast_iter().flatten(), + std::iter::repeat(Some(&s2)), + std::iter::repeat(Some(&s3)), + range_impl, + builder, + )?, + _ => build_nulls(builder, len1), + } + }, + (len1, len2, len3) => { polars_bail!( ComputeError: - "lengths of `start` ({}) and `end` ({}) do not match", - len_start, len_end + "lengths of `s1` ({}), `s2` ({}), and `s3` ({}) do not match", + len1, len2, len3 ) }, }; @@ -220,10 +337,10 @@ where Ok(()) } -/// Iterate over a start and end column and create a range for each entry. -fn build_temporal_ranges<'a, I, J, T, U, F>( - start: I, - end: J, +/// Iterate over two columns and create a range for each entry. +fn build_temporal_ranges_2args<'a, I, J, T, U, F>( + s1: I, + s2: J, range_impl: F, builder: &mut ListPrimitiveChunkedBuilder, ) -> PolarsResult<()> @@ -235,9 +352,34 @@ where F: Fn(T::Native, T::Native, &mut ListPrimitiveChunkedBuilder) -> PolarsResult<()>, ListPrimitiveChunkedBuilder: ListBuilderTrait, { - for (start, end) in start.zip(end) { - match (start, end) { - (Some(start), Some(end)) => range_impl(*start, *end, builder)?, + for (s1, s2) in s1.zip(s2) { + match (s1, s2) { + (Some(s1), Some(s2)) => range_impl(*s1, *s2, builder)?, + _ => builder.append_null(), + } + } + Ok(()) +} +/// Iterate over two columns and create a range for each entry. +fn build_temporal_ranges_3args<'a, I, J, K, T, U, F>( + s1: I, + s2: J, + s3: K, + range_impl: F, + builder: &mut ListPrimitiveChunkedBuilder, +) -> PolarsResult<()> +where + I: Iterator>, + J: Iterator>, + K: Iterator>, + T: PolarsIntegerType, + U: PolarsIntegerType, + F: Fn(T::Native, T::Native, T::Native, &mut ListPrimitiveChunkedBuilder) -> PolarsResult<()>, + ListPrimitiveChunkedBuilder: ListBuilderTrait, +{ + for ((s1, s2), s3) in s1.zip(s2).zip(s3) { + match (s1, s2, s3) { + (Some(s1), Some(s2), Some(s3)) => range_impl(*s1, *s2, *s3, builder)?, _ => builder.append_null(), } } diff --git a/crates/polars-python/src/functions/range.rs b/crates/polars-python/src/functions/range.rs index f0a801364016..43e771d61fae 100644 --- a/crates/polars-python/src/functions/range.rs +++ b/crates/polars-python/src/functions/range.rs @@ -62,69 +62,72 @@ pub fn int_ranges( #[pyfunction] pub fn date_range( - start: PyExpr, - end: PyExpr, - interval: &str, + start: Option, + end: Option, + interval: Option<&str>, + num_samples: Option, closed: Wrap, ) -> PyResult { - let start = start.inner; - let end = end.inner; - let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; + let start = start.map(|x| x.inner); + let end = end.map(|x| x.inner); + let num_samples = num_samples.map(|x| x.inner); + let interval = match interval { + None => None, + Some(x) => Some(Duration::try_parse(x).map_err(PyPolarsErr::from)?), + }; let closed = closed.0; - let out = dsl::date_range( - Some(start), - Some(end), - Some(interval), - None, // TODO: num_samples - closed, - ) - .map_err(PyPolarsErr::from)?; + let out = + dsl::date_range(start, end, interval, num_samples, closed).map_err(PyPolarsErr::from)?; Ok(out.into()) } #[pyfunction] pub fn date_ranges( - start: PyExpr, - end: PyExpr, - interval: &str, + start: Option, + end: Option, + interval: Option<&str>, + num_samples: Option, closed: Wrap, ) -> PyResult { - let start = start.inner; - let end = end.inner; - let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; + let start = start.map(|x| x.inner); + let end = end.map(|x| x.inner); + let interval = match interval { + None => None, + Some(x) => Some(Duration::try_parse(x).map_err(PyPolarsErr::from)?), + }; + let num_samples = num_samples.map(|x| x.inner); let closed = closed.0; - let out = dsl::date_ranges( - Some(start), - Some(end), - Some(interval), - None, // TODO: num_samples - closed, - ) - .map_err(PyPolarsErr::from)?; + let out = + dsl::date_ranges(start, end, interval, num_samples, closed).map_err(PyPolarsErr::from)?; Ok(out.into()) } #[pyfunction] -#[pyo3(signature = (start, end, interval, closed, time_unit, time_zone))] +#[pyo3(signature = (start, end, interval, num_samples, closed, time_unit=None, time_zone=Wrap(None)))] pub fn datetime_range( - start: PyExpr, - end: PyExpr, - interval: &str, + start: Option, + end: Option, + interval: Option<&str>, + num_samples: Option, closed: Wrap, time_unit: Option>, time_zone: Wrap>, ) -> PyResult { - let start = start.inner; - let end = end.inner; - let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; + let start = start.map(|x| x.inner); + let end = end.map(|x| x.inner); + let interval = match interval { + None => None, + Some(x) => Some(Duration::try_parse(x).map_err(PyPolarsErr::from)?), + }; + let num_samples = num_samples.map(|x| x.inner); let closed = closed.0; let time_unit = time_unit.map(|x| x.0); let time_zone = time_zone.0; let out = dsl::datetime_range( - Some(start), - Some(end), - Some(interval), - None, // TODO: num_samples + start, + end, + interval, + num_samples, closed, time_unit, time_zone, @@ -134,26 +137,31 @@ pub fn datetime_range( } #[pyfunction] -#[pyo3(signature = (start, end, interval, closed, time_unit, time_zone))] +#[pyo3(signature = (start, end, interval, num_samples, closed, time_unit=None, time_zone=Wrap(None)))] pub fn datetime_ranges( - start: PyExpr, - end: PyExpr, - interval: &str, + start: Option, + end: Option, + interval: Option<&str>, + num_samples: Option, closed: Wrap, time_unit: Option>, time_zone: Wrap>, ) -> PyResult { - let start = start.inner; - let end = end.inner; - let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; + let start = start.map(|x| x.inner); + let end = end.map(|x| x.inner); + let interval = match interval { + None => None, + Some(x) => Some(Duration::try_parse(x).map_err(PyPolarsErr::from)?), + }; + let num_samples = num_samples.map(|x| x.inner); let closed = closed.0; let time_unit = time_unit.map(|x| x.0); let time_zone = time_zone.0; let out = dsl::datetime_ranges( - Some(start), - Some(end), - Some(interval), - None, // TODO: num_samples + start, + end, + interval, + num_samples, closed, time_unit, time_zone, diff --git a/crates/polars-time/src/date_range.rs b/crates/polars-time/src/date_range.rs index 5ad0a3d27b22..de0004732e70 100644 --- a/crates/polars-time/src/date_range.rs +++ b/crates/polars-time/src/date_range.rs @@ -1,5 +1,6 @@ use arrow::legacy::time_zone::Tz; use chrono::{Datelike, NaiveDateTime, NaiveTime}; +use num_traits::signum; use polars_core::chunked_array::temporal::time_to_time64ns; use polars_core::prelude::*; use polars_core::series::IsSorted; @@ -11,46 +12,120 @@ pub fn in_nanoseconds_window(ndt: &NaiveDateTime) -> bool { !(ndt.year() > 2554 || ndt.year() < 1386) } -/// Create a [`DatetimeChunked`] from a given `start` and `end` date and a given `interval`. +/// Create a [`DatetimeChunked`] from a given `start`, `end`, `interval`, and `num_samples`. +#[allow(clippy::too_many_arguments)] pub fn date_range( name: PlSmallStr, - start: NaiveDateTime, - end: NaiveDateTime, + start: Option, + end: Option, + interval: Option, + num_samples: Option, + closed: ClosedWindow, + tu: TimeUnit, + tz: Option<&Tz>, +) -> PolarsResult { + macro_rules! extract { + ($t:ident, $tu:ident) => { + match $tu { + TimeUnit::Nanoseconds => $t.and_utc().timestamp_nanos_opt().unwrap(), + TimeUnit::Microseconds => $t.and_utc().timestamp_micros(), + TimeUnit::Milliseconds => $t.and_utc().timestamp_millis(), + } + }; + } + + match (start, end, interval, num_samples) { + (Some(start), Some(end), Some(interval), None) => { + let start = extract!(start, tu); + let end = extract!(end, tu); + datetime_range_impl_start_end_interval(name, start, end, interval, closed, tu, tz) + }, + (Some(start), Some(end), None, Some(num_samples)) => { + let start = extract!(start, tu); + let end = extract!(end, tu); + datetime_range_impl_start_end_samples(name, start, end, num_samples, closed, tu, tz) + }, + (Some(start), None, Some(interval), Some(num_samples)) => { + let start = extract!(start, tu); + datetime_range_impl_start_interval_samples( + name, + start, + interval, + num_samples, + closed, + tu, + tz, + ) + }, + (None, Some(end), Some(interval), Some(num_samples)) => { + let end = extract!(end, tu); + let out = datetime_range_impl_start_interval_samples( + name, + end, + -interval, + num_samples, + closed, + tu, + tz, + )?; + let out = out.into_physical().reverse(); + match tz { + #[cfg(feature = "timezones")] + Some(tz) => Ok(out.into_datetime(tu, Some(TimeZone::from_chrono(tz)))), + _ => Ok(out.into_datetime(tu, None)), + } + }, + _ => { + polars_bail!(InvalidOperation: "Exactly three of 'start', 'end', 'interval', and 'num_samples' must be supplied."); + }, + } +} + +#[doc(hidden)] +pub fn datetime_range_impl_start_end_interval( + name: PlSmallStr, + start: i64, + end: i64, interval: Duration, closed: ClosedWindow, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { - let (start, end) = match tu { - TimeUnit::Nanoseconds => ( - start.and_utc().timestamp_nanos_opt().unwrap(), - end.and_utc().timestamp_nanos_opt().unwrap(), - ), - TimeUnit::Microseconds => ( - start.and_utc().timestamp_micros(), - end.and_utc().timestamp_micros(), - ), - TimeUnit::Milliseconds => ( - start.and_utc().timestamp_millis(), - end.and_utc().timestamp_millis(), - ), + let values = if (end < start) != interval.negative { + // Interval is wrong direction, result is empty. + Vec::::new() + } else { + datetime_range_i64_start_end_interval(start, end, interval, closed, tu, tz)? + }; + let out = Int64Chunked::new_vec(name, values); + let mut out = match tz { + #[cfg(feature = "timezones")] + Some(tz) => out.into_datetime(tu, Some(TimeZone::from_chrono(tz))), + _ => out.into_datetime(tu, None), + }; + + let flag = if interval.negative { + IsSorted::Descending + } else { + IsSorted::Ascending }; - datetime_range_impl(name, start, end, interval, closed, tu, tz) + out.physical_mut().set_sorted_flag(flag); + Ok(out) } #[doc(hidden)] -pub fn datetime_range_impl( +pub fn datetime_range_impl_start_interval_samples( name: PlSmallStr, start: i64, - end: i64, interval: Duration, + num_samples: i64, closed: ClosedWindow, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { let out = Int64Chunked::new_vec( name, - datetime_range_i64(start, end, interval, closed, tu, tz)?, + datetime_range_i64_start_interval_samples(start, interval, num_samples, closed, tu, tz)?, ); let mut out = match tz { #[cfg(feature = "timezones")] @@ -58,7 +133,69 @@ pub fn datetime_range_impl( _ => out.into_datetime(tu, None), }; - out.physical_mut().set_sorted_flag(IsSorted::Ascending); + let flag = if interval.negative { + IsSorted::Descending + } else { + IsSorted::Ascending + }; + out.physical_mut().set_sorted_flag(flag); + Ok(out) +} + +#[doc(hidden)] +pub fn datetime_range_impl_start_end_samples( + name: PlSmallStr, + start: i64, + end: i64, + num_samples: i64, + closed: ClosedWindow, + tu: TimeUnit, + tz: Option<&Tz>, +) -> PolarsResult { + let ascending = start >= end; + let values = if num_samples == 0 { + Vec::::new() + } else { + // The bin width depends on the interval closure. + let divisor = match closed { + ClosedWindow::None => num_samples + 1, + ClosedWindow::Left => num_samples, + ClosedWindow::Right => num_samples, + ClosedWindow::Both => num_samples - 1, + }; + let bin_width = (end - start) as f64 / (divisor as f64); + + // For left-open intervals, increase the left by one interval. + let start = if closed == ClosedWindow::None || closed == ClosedWindow::Right { + start as f64 + bin_width + } else { + start as f64 + }; + + let mut values: Vec = (0..num_samples) + .map(|x| (x as f64 * bin_width + start) as i64) + .collect(); + + // For right-closed and fully-closed interval, ensure the last point is exact. + if closed == ClosedWindow::Right || closed == ClosedWindow::Both { + let last = values.len() - 1; + values[last] = end; + } + values + }; + let out = Int64Chunked::new_vec(name, values); + let mut out = match tz { + #[cfg(feature = "timezones")] + Some(tz) => out.into_datetime(tu, Some(TimeZone::from_chrono(tz))), + _ => out.into_datetime(tu, None), + }; + + let flag = if ascending { + IsSorted::Ascending + } else { + IsSorted::Descending + }; + out.physical_mut().set_sorted_flag(flag); Ok(out) } @@ -85,7 +222,14 @@ pub fn time_range_impl( ) -> PolarsResult { let mut out = Int64Chunked::new_vec( name, - datetime_range_i64(start, end, interval, closed, TimeUnit::Nanoseconds, None)?, + datetime_range_i64_start_end_interval( + start, + end, + interval, + closed, + TimeUnit::Nanoseconds, + None, + )?, ) .into_time(); @@ -94,27 +238,22 @@ pub fn time_range_impl( } /// vector of i64 representing temporal values -pub(crate) fn datetime_range_i64( - start: i64, - end: i64, +pub(crate) fn datetime_range_i64_start_end_interval( + mut start: i64, + mut end: i64, interval: Duration, closed: ClosedWindow, time_unit: TimeUnit, time_zone: Option<&Tz>, ) -> PolarsResult> { - if start > end { - return Ok(Vec::new()); - } - polars_ensure!( - !interval.negative && !interval.is_zero(), - ComputeError: "`interval` must be positive" - ); - - let duration = match time_unit { + let mut step = match time_unit { TimeUnit::Nanoseconds => interval.duration_ns(), TimeUnit::Microseconds => interval.duration_us(), TimeUnit::Milliseconds => interval.duration_ms(), }; + if interval.negative { + step = -step; + } let time_zone_opt: Option = match time_zone { #[cfg(feature = "timezones")] Some(tz) => Some(TimeZone::from_chrono(tz)), @@ -123,52 +262,134 @@ pub(crate) fn datetime_range_i64( if interval.is_constant_duration(time_zone_opt.as_ref()) { // Fast path! - let step: usize = duration.try_into().map_err( - |_err| polars_err!(ComputeError: "Could not convert {:?} to usize", duration), - )?; polars_ensure!( step != 0, - InvalidOperation: "interval {} is too small for time unit {} and was rounded down to zero", - interval, + InvalidOperation: "interval {} is too small for time unit {} and was rounded to zero", + if interval.negative { -interval } else { interval }, time_unit, ); - return match closed { - ClosedWindow::Both => Ok((start..=end).step_by(step).collect::>()), - ClosedWindow::None => Ok((start + duration..end).step_by(step).collect::>()), - ClosedWindow::Left => Ok((start..end).step_by(step).collect::>()), - ClosedWindow::Right => Ok((start + duration..=end).step_by(step).collect::>()), + + // Update end points based on interval closure. + if closed == ClosedWindow::Right || closed == ClosedWindow::None { + start += step; // This works whether step is negative or positive. }; + if closed == ClosedWindow::Left || closed == ClosedWindow::None { + end -= signum(step); // If our interval is negative, we increment the end + } + + let out = if step < 0 { + // Negative interval, we move backwards. + (end..=start) + .rev() + .step_by(-step as usize) + .collect::>() + } else { + // Positive interval, we move forwards. + (start..=end).step_by(step as usize).collect::>() + }; + return Ok(out); } - let size = ((end - start) / duration + 1) as usize; + let size = ((end - start) / step + 1) as usize; let offset_fn = match time_unit { TimeUnit::Nanoseconds => Duration::add_ns, TimeUnit::Microseconds => Duration::add_us, TimeUnit::Milliseconds => Duration::add_ms, }; + let mut ts = Vec::with_capacity(size); - let mut i = match closed { - ClosedWindow::Both | ClosedWindow::Left => 0, - ClosedWindow::Right | ClosedWindow::None => 1, - }; - let mut t = offset_fn(&(interval * i), start, time_zone)?; - i += 1; - match closed { - ClosedWindow::Both | ClosedWindow::Right => { - while t <= end { - ts.push(t); - t = offset_fn(&(interval * i), start, time_zone)?; - i += 1; - } - }, - ClosedWindow::Left | ClosedWindow::None => { - while t < end { - ts.push(t); - t = offset_fn(&(interval * i), start, time_zone)?; - i += 1; - } - }, + + // Shift the left limit if we're right-closed or none + let mut t = start; + let mut i = 0; + if closed == ClosedWindow::Right || closed == ClosedWindow::None { + t = offset_fn(&interval, start, time_zone)?; + i += 1; + } + // Shift the right limit if we're right-closed or none + if closed == ClosedWindow::Left || closed == ClosedWindow::None { + end = offset_fn(&(-interval), end, time_zone)?; + } + + if step >= 0 { + while t <= end { + ts.push(t); + i += 1; + t = offset_fn(&(interval * i), start, time_zone)?; + } + } else { + while t >= end { + ts.push(t); + i += 1; + t = offset_fn(&(interval * i), start, time_zone)?; + } } debug_assert!(size >= ts.len()); Ok(ts) } + +pub(crate) fn datetime_range_i64_start_interval_samples( + mut start: i64, + interval: Duration, + num_samples: i64, + closed: ClosedWindow, + time_unit: TimeUnit, + time_zone: Option<&Tz>, +) -> PolarsResult> { + let time_zone_opt: Option = match time_zone { + #[cfg(feature = "timezones")] + Some(tz) => Some(TimeZone::from_chrono(tz)), + _ => None, + }; + if interval.is_constant_duration(time_zone_opt.as_ref()) { + // Fast path + let mut step = match time_unit { + TimeUnit::Nanoseconds => interval.duration_ns(), + TimeUnit::Microseconds => interval.duration_us(), + TimeUnit::Milliseconds => interval.duration_ms(), + }; + polars_ensure!( + step != 0, + InvalidOperation: "interval {} is too small for time unit {} and was rounded to zero", + if interval.negative { -interval } else { interval }, + time_unit, + ); + + if interval.negative { + step = -step; + } + + // If the interval is left-open, start one interval away. + if closed == ClosedWindow::Right || closed == ClosedWindow::None { + start += step; + } + + let out = if step < 0 { + // Negative interval, we move backwards. + (start + (step * num_samples) + 1..=start) + .rev() + .step_by((-step) as usize) + .collect::>() + } else { + // Positive interval, we move forwards. + (start..start + step * num_samples) + .step_by(step as usize) + .collect::>() + }; + return Ok(out); + } + + let offset_fn = match time_unit { + TimeUnit::Milliseconds => Duration::add_ms, + TimeUnit::Microseconds => Duration::add_us, + TimeUnit::Nanoseconds => Duration::add_ns, + }; + + // Start with one interval offset if we're not left-closed. + let t0 = (closed == ClosedWindow::Right || closed == ClosedWindow::None) as i64; + let ts = (t0..t0 + num_samples) + .map(|t| offset_fn(&(interval * t), start, time_zone)) + .collect::>>()?; + debug_assert!(num_samples as usize == ts.len()); + Ok(ts) +} diff --git a/crates/polars-time/src/upsample.rs b/crates/polars-time/src/upsample.rs index 7c829ddafebb..3dd9166ca069 100644 --- a/crates/polars-time/src/upsample.rs +++ b/crates/polars-time/src/upsample.rs @@ -206,7 +206,7 @@ fn upsample_single_impl( Some(tz) => Some(parse_time_zone(tz)?), _ => None, }; - let range = datetime_range_impl( + let range = datetime_range_impl_start_end_interval( index_col_name.clone(), first, last, diff --git a/crates/polars-time/src/windows/test.rs b/crates/polars-time/src/windows/test.rs index 1fd71b0d0176..22fdb761a834 100644 --- a/crates/polars-time/src/windows/test.rs +++ b/crates/polars-time/src/windows/test.rs @@ -15,7 +15,7 @@ fn test_date_range() { .unwrap() .and_hms_opt(0, 0, 0) .unwrap(); - let dates = datetime_range_i64( + let dates = datetime_range_i64_start_end_interval( start.and_utc().timestamp_nanos_opt().unwrap(), end.and_utc().timestamp_nanos_opt().unwrap(), Duration::parse("1mo"), @@ -52,7 +52,7 @@ fn test_feb_date_range() { .unwrap() .and_hms_opt(0, 0, 0) .unwrap(); - let dates = datetime_range_i64( + let dates = datetime_range_i64_start_end_interval( start.and_utc().timestamp_nanos_opt().unwrap(), end.and_utc().timestamp_nanos_opt().unwrap(), Duration::parse("1mo"), @@ -194,7 +194,7 @@ fn test_boundaries() { .and_hms_opt(3, 0, 0) .unwrap(); - let ts = datetime_range_i64( + let ts = datetime_range_i64_start_end_interval( start.and_utc().timestamp_nanos_opt().unwrap(), stop.and_utc().timestamp_nanos_opt().unwrap(), Duration::parse("30m"), @@ -383,7 +383,7 @@ fn test_boundaries_2() { .and_hms_opt(4, 0, 0) .unwrap(); - let ts = datetime_range_i64( + let ts = datetime_range_i64_start_end_interval( start.and_utc().timestamp_nanos_opt().unwrap(), stop.and_utc().timestamp_nanos_opt().unwrap(), Duration::parse("30m"), @@ -517,7 +517,7 @@ fn test_boundaries_ms() { .and_hms_opt(3, 0, 0) .unwrap(); - let ts = datetime_range_i64( + let ts = datetime_range_i64_start_end_interval( start.and_utc().timestamp_millis(), stop.and_utc().timestamp_millis(), Duration::parse("30m"), @@ -706,7 +706,7 @@ fn test_rolling_lookback() { .unwrap() .and_hms_opt(4, 0, 0) .unwrap(); - let dates = datetime_range_i64( + let dates = datetime_range_i64_start_end_interval( start.and_utc().timestamp_millis(), end.and_utc().timestamp_millis(), Duration::parse("30m"), diff --git a/crates/polars/tests/it/lazy/expressions/expand.rs b/crates/polars/tests/it/lazy/expressions/expand.rs index c360941769d9..7c11f2bc58b4 100644 --- a/crates/polars/tests/it/lazy/expressions/expand.rs +++ b/crates/polars/tests/it/lazy/expressions/expand.rs @@ -14,9 +14,10 @@ fn test_expand_datetimes_3042() -> PolarsResult<()> { .unwrap(); let date_range = polars_time::date_range( "dt1".into(), - low, - high, - Duration::parse("1w"), + Some(low), + Some(high), + Some(Duration::parse("1w")), + None, ClosedWindow::Left, TimeUnit::Milliseconds, None, diff --git a/crates/polars/tests/it/lazy/group_by_dynamic.rs b/crates/polars/tests/it/lazy/group_by_dynamic.rs index e4c57a4a6ea1..74eb86192126 100644 --- a/crates/polars/tests/it/lazy/group_by_dynamic.rs +++ b/crates/polars/tests/it/lazy/group_by_dynamic.rs @@ -15,17 +15,16 @@ use super::*; fn test_group_by_dynamic_week_bounds() -> PolarsResult<()> { let start = NaiveDate::from_ymd_opt(2022, 2, 1) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(); + .and_hms_opt(0, 0, 0); let stop = NaiveDate::from_ymd_opt(2022, 2, 14) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(); + .and_hms_opt(0, 0, 0); let range = polars_time::date_range( "dt".into(), start, stop, - Duration::parse("1d"), + Some(Duration::parse("1d")), + None, ClosedWindow::Left, TimeUnit::Milliseconds, None, diff --git a/crates/polars/tests/it/time/date_range.rs b/crates/polars/tests/it/time/date_range.rs index 9754b3208d0c..8e2c216481e7 100644 --- a/crates/polars/tests/it/time/date_range.rs +++ b/crates/polars/tests/it/time/date_range.rs @@ -7,17 +7,16 @@ use polars::time::date_range; fn test_time_units_9413() { let start = NaiveDate::from_ymd_opt(2022, 1, 1) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(); + .and_hms_opt(0, 0, 0); let stop = NaiveDate::from_ymd_opt(2022, 1, 5) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(); + .and_hms_opt(0, 0, 0); let actual = date_range( "date".into(), start, stop, - Duration::parse("1d"), + Some(Duration::parse("1d")), + None, ClosedWindow::Both, TimeUnit::Milliseconds, None, @@ -38,7 +37,8 @@ Series: 'date' [datetime[ms]] "date".into(), start, stop, - Duration::parse("1d"), + Some(Duration::parse("1d")), + None, ClosedWindow::Both, TimeUnit::Microseconds, None, @@ -59,7 +59,8 @@ Series: 'date' [datetime[μs]] "date".into(), start, stop, - Duration::parse("1d"), + Some(Duration::parse("1d")), + None, ClosedWindow::Both, TimeUnit::Nanoseconds, None, diff --git a/docs/source/src/rust/user-guide/expressions/column-selections.rs b/docs/source/src/rust/user-guide/expressions/column-selections.rs index 32f5a4222517..c05caf03e6e3 100644 --- a/docs/source/src/rust/user-guide/expressions/column-selections.rs +++ b/docs/source/src/rust/user-guide/expressions/column-selections.rs @@ -7,14 +7,28 @@ fn main() -> Result<(), Box> { use polars::time::*; let df = df!( - "id" => &[9, 4, 2], - "place" => &["Mars", "Earth", "Saturn"], - "date" => date_range("date".into(), - NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), NaiveDate::from_ymd_opt(2022, 1, 3).unwrap().and_hms_opt(0, 0, 0).unwrap(), Duration::parse("1d"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?, - "sales" => &[33.4, 2142134.1, 44.7], - "has_people" => &[false, true, false], - "logged_at" => date_range("logged_at".into(), - NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 2).unwrap(), Duration::parse("1s"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?, + "id" => &[9, 4, 2], + "place" => &["Mars", "Earth", "Saturn"], + "date" => date_range( + "date".into(), + NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0), + NaiveDate::from_ymd_opt(2022, 1, 3).unwrap().and_hms_opt(0, 0, 0), + Some(Duration::parse("1d")), + None, + ClosedWindow::Both, + TimeUnit::Milliseconds, None + )?, + "sales" => &[33.4, 2142134.1, 44.7], + "has_people" => &[false, true, false], + "logged_at" => date_range( + "logged_at".into(), + NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0), + NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 2), + Some(Duration::parse("1s")), + None, + ClosedWindow::Both, + TimeUnit::Milliseconds, None + )?, )? .with_row_index("index".into(), None)?; println!("{}", &df); diff --git a/docs/source/src/rust/user-guide/transformations/time-series/resampling.rs b/docs/source/src/rust/user-guide/transformations/time-series/resampling.rs index 49fc331e0af0..c135a728392c 100644 --- a/docs/source/src/rust/user-guide/transformations/time-series/resampling.rs +++ b/docs/source/src/rust/user-guide/transformations/time-series/resampling.rs @@ -9,13 +9,12 @@ fn main() -> Result<(), Box> { "time".into(), NaiveDate::from_ymd_opt(2021, 12, 16) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), + .and_hms_opt(0, 0, 0), NaiveDate::from_ymd_opt(2021, 12, 16) .unwrap() - .and_hms_opt(3, 0, 0) - .unwrap(), - Duration::parse("30m"), + .and_hms_opt(3, 0, 0), + Some(Duration::parse("30m")), + None, ClosedWindow::Both, TimeUnit::Milliseconds, None, diff --git a/docs/source/src/rust/user-guide/transformations/time-series/rolling.rs b/docs/source/src/rust/user-guide/transformations/time-series/rolling.rs index 0891ae39034b..fda3cbf9f205 100644 --- a/docs/source/src/rust/user-guide/transformations/time-series/rolling.rs +++ b/docs/source/src/rust/user-guide/transformations/time-series/rolling.rs @@ -47,13 +47,12 @@ fn main() -> Result<(), Box> { "time".into(), NaiveDate::from_ymd_opt(2021, 1, 1) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), + .and_hms_opt(0, 0, 0), NaiveDate::from_ymd_opt(2021, 12, 31) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), - Duration::parse("1d"), + .and_hms_opt(0, 0, 0), + Some(Duration::parse("1d")), + None, ClosedWindow::Both, TimeUnit::Milliseconds, None, @@ -106,13 +105,12 @@ fn main() -> Result<(), Box> { "time".into(), NaiveDate::from_ymd_opt(2021, 12, 16) .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(), + .and_hms_opt(0, 0, 0), NaiveDate::from_ymd_opt(2021, 12, 16) .unwrap() - .and_hms_opt(3, 0, 0) - .unwrap(), - Duration::parse("30m"), + .and_hms_opt(3, 0, 0), + Some(Duration::parse("30m")), + None, ClosedWindow::Both, TimeUnit::Milliseconds, None, diff --git a/py-polars/src/polars/_plr.pyi b/py-polars/src/polars/_plr.pyi index 2088bee0097d..77e1a76096da 100644 --- a/py-polars/src/polars/_plr.pyi +++ b/py-polars/src/polars/_plr.pyi @@ -2245,23 +2245,33 @@ def int_ranges( start: PyExpr, end: PyExpr, step: PyExpr, dtype: PyDataTypeExpr ) -> PyExpr: ... def date_range( - start: PyExpr, end: PyExpr, interval: str, closed: ClosedWindow + start: PyExpr | None, + end: PyExpr | None, + interval: str | None, + num_samples: PyExpr | None, + closed: ClosedWindow, ) -> PyExpr: ... def date_ranges( - start: PyExpr, end: PyExpr, interval: str, closed: ClosedWindow + start: PyExpr | None, + end: PyExpr | None, + interval: str | None, + num_samples: PyExpr | None, + closed: ClosedWindow, ) -> PyExpr: ... def datetime_range( - start: PyExpr, - end: PyExpr, - every: str, + start: PyExpr | None, + end: PyExpr | None, + interval: str | None, + num_samples: PyExpr | None, closed: ClosedWindow, time_unit: TimeUnit | None, time_zone: TimeZone | None, ) -> PyExpr: ... def datetime_ranges( - start: PyExpr, - end: PyExpr, - every: str, + start: PyExpr | None, + end: PyExpr | None, + interval: str | None, + num_samples: PyExpr | None, closed: ClosedWindow, time_unit: TimeUnit | None, time_zone: TimeZone | None, diff --git a/py-polars/src/polars/functions/range/date_range.py b/py-polars/src/polars/functions/range/date_range.py index 0941f7729511..04f96d7d65aa 100644 --- a/py-polars/src/polars/functions/range/date_range.py +++ b/py-polars/src/polars/functions/range/date_range.py @@ -6,6 +6,7 @@ from polars import functions as F from polars._utils.parse import parse_into_expression from polars._utils.wrap import wrap_expr +from polars.exceptions import InvalidOperationError from polars.functions.range._utils import parse_interval_argument with contextlib.suppress(ImportError): # Module not available when building docs @@ -21,9 +22,10 @@ @overload def date_range( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = ..., + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | None = None, *, closed: ClosedInterval = ..., eager: Literal[False] = ..., @@ -32,9 +34,10 @@ def date_range( @overload def date_range( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = ..., + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | None = None, *, closed: ClosedInterval = ..., eager: Literal[True], @@ -43,9 +46,10 @@ def date_range( @overload def date_range( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = ..., + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | None = None, *, closed: ClosedInterval = ..., eager: bool, @@ -53,9 +57,10 @@ def date_range( def date_range( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = "1d", + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | None = None, *, closed: ClosedInterval = "both", eager: bool = False, @@ -63,6 +68,10 @@ def date_range( """ Generate a date range. + A date range may be defined by any three of 'start', 'end', 'interval', and + 'num_samples'. If only two of `start`, `end`, and `num_samples` are provided, the + interval defaults to 1 day. + Parameters ---------- start @@ -73,8 +82,12 @@ def date_range( Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). Must consist of full days. + num_samples + Number of periods in the date range. This corresponds to the number of points in + the output array, and is thus one more than the number of intervals. closed : {'both', 'left', 'right', 'none'} - Define which sides of the range are closed (inclusive). + Define which sides of the range are closed (inclusive). Only 'closed' is + supported for `date_range`. eager Evaluate immediately and return a `Series`. If set to `False` (default), return an expression instead. @@ -111,9 +124,9 @@ def date_range( Using Polars duration string to specify the interval: >>> from datetime import date - >>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", eager=True).alias( - ... "date" - ... ) + >>> pl.date_range( + ... start=date(2022, 1, 1), end=date(2022, 3, 1), interval="1mo", eager=True + ... ).alias("date") shape: (3,) Series: 'date' [date] [ @@ -126,9 +139,9 @@ def date_range( >>> from datetime import timedelta >>> pl.date_range( - ... date(1985, 1, 1), - ... date(1985, 1, 10), - ... timedelta(days=2), + ... start=date(1985, 1, 1), + ... end=date(1985, 1, 10), + ... interval=timedelta(days=2), ... eager=True, ... ).alias("date") shape: (5,) @@ -141,6 +154,38 @@ def date_range( 1985-01-09 ] + Using 'num_samples' to specify the number of periods: + + >>> pl.date_range( + ... start=date(1985, 1, 1), + ... end=date(1985, 1, 10), + ... num_samples=5, + ... eager=True, + ... ).alias("date") + shape: (5,) + Series: 'date' [date] + [ + 1985-01-01 + 1985-01-03 + 1985-01-05 + 1985-01-07 + 1985-01-10 + ] + >>> pl.date_range( + ... start=date(1985, 1, 1), + ... interval="3d", + ... num_samples=4, + ... eager=True, + ... ).alias("date") + shape: (4,) + Series: 'date' [date] + [ + 1985-01-01 + 1985-01-04 + 1985-01-07 + 1985-01-10 + ] + Omit `eager=True` if you want to use `date_range` as an expression: >>> df = pl.DataFrame( @@ -171,11 +216,27 @@ def date_range( │ two ┆ [2024-01-01, 2024-01-02, 2024-01-03] │ └─────┴──────────────────────────────────────┘ """ - interval = parse_interval_argument(interval) - - start_pyexpr = parse_into_expression(start) - end_pyexpr = parse_into_expression(end) - result = wrap_expr(plr.date_range(start_pyexpr, end_pyexpr, interval, closed)) + if interval is None and (num_samples is None or start is None or end is None): + interval = "1d" + interval = None if interval is None else parse_interval_argument(interval) + start_expr = None if start is None else parse_into_expression(start) + end_expr = None if end is None else parse_into_expression(end) + samples_expr = None if num_samples is None else parse_into_expression(num_samples) + if ( + start is not None + and end is not None + and num_samples is not None + and closed != "both" + ): + msg = ( + "date_range does not support 'left', 'right', or 'none' for the 'closed' " + "parameter when 'start', 'end', and 'num_samples' is provided." + ) + raise InvalidOperationError(msg) + + result = wrap_expr( + plr.date_range(start_expr, end_expr, interval, samples_expr, closed) + ) if eager: return F.select(result).to_series() @@ -185,9 +246,10 @@ def date_range( @overload def date_ranges( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta = ..., + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., eager: Literal[False] = ..., @@ -196,9 +258,10 @@ def date_ranges( @overload def date_ranges( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = ..., + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., eager: Literal[True], @@ -207,9 +270,10 @@ def date_ranges( @overload def date_ranges( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = ..., + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., eager: bool, @@ -217,9 +281,10 @@ def date_ranges( def date_ranges( - start: date | datetime | IntoExprColumn, - end: date | datetime | IntoExprColumn, - interval: str | timedelta = "1d", + start: date | datetime | IntoExprColumn | None = None, + end: date | datetime | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = "both", eager: bool = False, @@ -227,6 +292,10 @@ def date_ranges( """ Create a column of date ranges. + Each date range may be defined by any three of 'start', 'end', 'interval', and + 'num_samples'. If only two of `start`, `end`, and `num_samples` are provided, the + interval defaults to 1 day. + Parameters ---------- start @@ -237,6 +306,9 @@ def date_ranges( Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). Must consist of full days. + num_samples + Number of periods in the date range. This corresponds to the number of points in + the output array, and is thus one more than the number of intervals. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). eager @@ -255,6 +327,9 @@ def date_ranges( Notes ----- + When `start`, `end`, and `num_samples` are provided, the `closed` parameter must be + 'closed'. + `interval` is created according to the following string language: - 1d (1 calendar day) @@ -291,11 +366,27 @@ def date_ranges( │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03] │ └────────────┴────────────┴──────────────────────────────────────┘ """ - interval = parse_interval_argument(interval) - start_pyexpr = parse_into_expression(start) - end_pyexpr = parse_into_expression(end) - - result = wrap_expr(plr.date_ranges(start_pyexpr, end_pyexpr, interval, closed)) + if interval is None and (num_samples is None or start is None or end is None): + interval = "1d" + interval = None if interval is None else parse_interval_argument(interval) + start_expr = None if start is None else parse_into_expression(start) + end_expr = None if end is None else parse_into_expression(end) + samples_expr = None if num_samples is None else parse_into_expression(num_samples) + if ( + start is not None + and end is not None + and num_samples is not None + and closed != "both" + ): + msg = ( + "date_ranges does not support 'left', 'right', or 'none' for the 'closed' " + "parameter when 'start', 'end', and 'num_samples' is provided." + ) + raise InvalidOperationError(msg) + + result = wrap_expr( + plr.date_ranges(start_expr, end_expr, interval, samples_expr, closed) + ) if eager: return F.select(result).to_series() diff --git a/py-polars/src/polars/functions/range/datetime_range.py b/py-polars/src/polars/functions/range/datetime_range.py index d4e825053102..a287f44890dd 100644 --- a/py-polars/src/polars/functions/range/datetime_range.py +++ b/py-polars/src/polars/functions/range/datetime_range.py @@ -21,9 +21,10 @@ @overload def datetime_range( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., @@ -34,9 +35,10 @@ def datetime_range( @overload def datetime_range( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., @@ -47,21 +49,23 @@ def datetime_range( @overload def datetime_range( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., - eager: bool, + eager: bool = False, ) -> Series | Expr: ... def datetime_range( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = "1d", + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = "both", time_unit: TimeUnit | None = None, @@ -71,6 +75,10 @@ def datetime_range( """ Generate a datetime range. + A datetime range may be defined by any three of 'start', 'end', 'interval', and + 'num_samples'. If only two of `start`, `end`, and `num_samples` are provided, the + interval defaults to 1 day. + Parameters ---------- start @@ -80,6 +88,9 @@ def datetime_range( interval Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). + num_samples + Number of periods in the date range. This corresponds to the number of points in + the output array, and is thus one more than the number of intervals. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). time_unit : {None, 'ns', 'us', 'ms'} @@ -178,6 +189,38 @@ def datetime_range( 2022-03-01 00:00:00 EST ] + Using 'num_samples' to specify the number of periods: + + >>> pl.datetime_range( + ... start=date(1985, 1, 1), + ... end=date(1985, 1, 10), + ... num_samples=5, + ... eager=True, + ... ).alias("date") + shape: (5,) + Series: 'date' [datetime[μs]] + [ + 1985-01-01 00:00:00 + 1985-01-03 06:00:00 + 1985-01-05 12:00:00 + 1985-01-07 18:00:00 + 1985-01-10 00:00:00 + ] + >>> pl.datetime_range( + ... start=date(1985, 1, 1), + ... interval="3d", + ... num_samples=4, + ... eager=True, + ... ).alias("date") + shape: (4,) + Series: 'date' [datetime[μs]] + [ + 1985-01-01 00:00:00 + 1985-01-04 00:00:00 + 1985-01-07 00:00:00 + 1985-01-10 00:00:00 + ] + Omit `eager=True` if you want to use `datetime_range` as an expression: >>> df = pl.DataFrame( @@ -208,15 +251,24 @@ def datetime_range( │ two ┆ [2024-01-01 00:00:00, 2024-01-02 00:00:00, 2024-01-03 00:00:00] │ └─────┴─────────────────────────────────────────────────────────────────┘ """ - interval = parse_interval_argument(interval) - if time_unit is None and "ns" in interval: + if interval is None and (num_samples is None or start is None or end is None): + interval = "1d" + interval = None if interval is None else parse_interval_argument(interval) + if time_unit is None and interval is not None and "ns" in interval: time_unit = "ns" + start_expr = None if start is None else parse_into_expression(start) + end_expr = None if end is None else parse_into_expression(end) + samples_expr = None if num_samples is None else parse_into_expression(num_samples) - start_pyexpr = parse_into_expression(start) - end_pyexpr = parse_into_expression(end) result = wrap_expr( plr.datetime_range( - start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone + start_expr, + end_expr, + interval, + samples_expr, + closed, + time_unit, + time_zone, ) ) @@ -228,9 +280,10 @@ def datetime_range( @overload def datetime_ranges( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., @@ -241,9 +294,9 @@ def datetime_ranges( @overload def datetime_ranges( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., @@ -254,21 +307,23 @@ def datetime_ranges( @overload def datetime_ranges( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = ..., + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., - eager: bool, + eager: bool = False, ) -> Series | Expr: ... def datetime_ranges( - start: datetime | date | IntoExprColumn, - end: datetime | date | IntoExprColumn, - interval: str | timedelta = "1d", + start: datetime | date | IntoExprColumn | None = None, + end: datetime | date | IntoExprColumn | None = None, + interval: str | timedelta | None = None, + num_samples: int | IntoExprColumn | None = None, *, closed: ClosedInterval = "both", time_unit: TimeUnit | None = None, @@ -278,6 +333,10 @@ def datetime_ranges( """ Create a column of datetime ranges. + Each datetime range may be defined by any three of 'start', 'end', 'interval', and + 'num_samples'. If only two of `start`, `end`, and `num_samples` are provided, the + interval defaults to 1 day. + Parameters ---------- start @@ -287,6 +346,9 @@ def datetime_ranges( interval Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). + num_samples + Number of periods in the date range. This corresponds to the number of points in + the output array, and is thus one more than the number of intervals. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). time_unit : {None, 'ns', 'us', 'ms'} @@ -351,16 +413,24 @@ def datetime_ranges( │ [2022-01-02 00:00:00, 2022-01-03 00:00:00] │ └─────────────────────────────────────────────────────────────────┘ """ - interval = parse_interval_argument(interval) - if time_unit is None and "ns" in interval: + if interval is None and (num_samples is None or start is None or end is None): + interval = "1d" + interval = None if interval is None else parse_interval_argument(interval) + if time_unit is None and interval is not None and "ns" in interval: time_unit = "ns" - - start_pyexpr = parse_into_expression(start) - end_pyexpr = parse_into_expression(end) + start_expr = None if start is None else parse_into_expression(start) + end_expr = None if end is None else parse_into_expression(end) + samples_expr = None if num_samples is None else parse_into_expression(num_samples) result = wrap_expr( plr.datetime_ranges( - start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone + start_expr, + end_expr, + interval, + samples_expr, + closed, + time_unit, + time_zone, ) ) diff --git a/py-polars/tests/unit/functions/range/test_date_range.py b/py-polars/tests/unit/functions/range/test_date_range.py index f99d35bb4b08..4cb012c2933e 100644 --- a/py-polars/tests/unit/functions/range/test_date_range.py +++ b/py-polars/tests/unit/functions/range/test_date_range.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import date, datetime +from typing import TYPE_CHECKING import pytest @@ -8,6 +9,9 @@ from polars.exceptions import ComputeError, InvalidOperationError, ShapeError from polars.testing import assert_frame_equal, assert_series_equal +if TYPE_CHECKING: + from polars._typing import ClosedInterval + def test_date_range() -> None: # if low/high are both date, range is also be date _iff_ the granularity is >= 1d @@ -180,6 +184,36 @@ def test_date_range_start_end_interval_forwards() -> None: ) +def test_date_range_start_end_interval_backwards() -> None: + start = date(2025, 1, 10) + end = date(2025, 1, 1) + + assert_series_equal( + pl.date_range(start=start, end=end, interval="-3d", closed="left", eager=True), + pl.Series("literal", [date(2025, 1, 10), date(2025, 1, 7), date(2025, 1, 4)]), + ) + assert_series_equal( + pl.date_range(start=start, end=end, interval="-3d", closed="right", eager=True), + pl.Series("literal", [date(2025, 1, 7), date(2025, 1, 4), date(2025, 1, 1)]), + ) + assert_series_equal( + pl.date_range(start=start, end=end, interval="-3d", closed="none", eager=True), + pl.Series("literal", [date(2025, 1, 7), date(2025, 1, 4)]), + ) + assert_series_equal( + pl.date_range(start=start, end=end, interval="-3d", closed="both", eager=True), + pl.Series( + "literal", + [date(2025, 1, 10), date(2025, 1, 7), date(2025, 1, 4), date(2025, 1, 1)], + ), + ) + # test wrong direction is empty + assert_series_equal( + pl.date_range(start=end, end=start, interval="-3d", eager=True), + pl.Series("literal", [], dtype=pl.Date), + ) + + def test_date_range_expr_scalar() -> None: df = pl.DataFrame( { @@ -191,11 +225,373 @@ def test_date_range_expr_scalar() -> None: forward_start_end_interval=pl.date_range( start=pl.col("a").min(), end=pl.col("a").max(), interval="1d" ), + backward_start_end_interval=pl.date_range( + start=pl.col("a").max(), end=pl.col("a").min(), interval="-1d" + ), + forward_start_end_samples=pl.date_range( + start=pl.col("a").min(), + end=pl.col("a").max(), + num_samples=3, + ), + backward_start_end_samples=pl.date_range( + start=pl.col("a").max(), + end=pl.col("a").min(), + num_samples=3, + ), + forward_start_interval_samples=pl.date_range( + start=pl.col("a").min(), + interval="1d", + num_samples=3, + ), + backward_start_interval_samples=pl.date_range( + start=pl.col("a").max(), + interval="-1d", + num_samples=3, + ), + forward_end_interval_samples=pl.date_range( + end=pl.col("a").max(), + interval="1d", + num_samples=3, + ), + backward_end_interval_samples=pl.date_range( + end=pl.col("a").min(), + interval="-1d", + num_samples=3, + ), ) forward = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + backward = forward[-1::-1] expected = pl.DataFrame( { "forward_start_end_interval": forward, + "backward_start_end_interval": backward, + "forward_start_end_samples": forward, + "backward_start_end_samples": backward, + "forward_start_interval_samples": forward, + "backward_start_interval_samples": backward, + "forward_end_interval_samples": forward, + "backward_end_interval_samples": backward, } ) assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("closed", ["left", "right", "none"]) +def test_date_range_start_end_samples_invalid_closure(closed: ClosedInterval) -> None: + msg = ( + "date_range does not support 'left', 'right', or 'none' for the 'closed' " + "parameter when 'start', 'end', and 'num_samples' is provided." + ) + with pytest.raises(InvalidOperationError, match=msg): + pl.date_range( + start=date(2025, 1, 1), end=date(2025, 1, 10), num_samples=3, closed=closed + ) + + +def test_date_range_start_end_samples() -> None: + assert_series_equal( + pl.date_range( + start=date(2025, 1, 1), end=date(2025, 1, 10), num_samples=3, eager=True + ), + pl.Series("literal", [date(2025, 1, 1), date(2025, 1, 5), date(2025, 1, 10)]), + ) + + assert_series_equal( + pl.date_range( + start=date(2025, 1, 1), end=date(2025, 1, 5), num_samples=10, eager=True + ), + pl.Series( + "literal", + [ + date(2025, 1, 1), + date(2025, 1, 1), + date(2025, 1, 1), + date(2025, 1, 2), + date(2025, 1, 2), + date(2025, 1, 3), + date(2025, 1, 3), + date(2025, 1, 4), + date(2025, 1, 4), + date(2025, 1, 5), + ], + ), + ) + + assert_series_equal( + pl.date_range( + start=date(2025, 1, 10), end=date(2025, 1, 1), num_samples=3, eager=True + ), + pl.Series("literal", [date(2025, 1, 10), date(2025, 1, 5), date(2025, 1, 1)]), + ) + + assert_series_equal( + pl.date_range( + start=date(2025, 1, 5), end=date(2025, 1, 1), num_samples=10, eager=True + ), + pl.Series( + "literal", + [ + date(2025, 1, 5), + date(2025, 1, 4), + date(2025, 1, 4), + date(2025, 1, 3), + date(2025, 1, 3), + date(2025, 1, 2), + date(2025, 1, 2), + date(2025, 1, 1), + date(2025, 1, 1), + date(2025, 1, 1), + ], + ), + ) + + assert_series_equal( + pl.date_range( + start=date(2025, 1, 1), end=date(2025, 1, 5), num_samples=0, eager=True + ), + pl.Series("literal", [], dtype=pl.Date), + ) + + +# -- start/interval/samples +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]), + ("left", [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]), + ("right", [date(2025, 1, 2), date(2025, 1, 3), date(2025, 1, 4)]), + ("none", [date(2025, 1, 2), date(2025, 1, 3), date(2025, 1, 4)]), + ], +) +def test_date_range_start_interval_samples_forward_1d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 1, 1), interval="1d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 3), date(2025, 1, 2), date(2025, 1, 1)]), + ("left", [date(2025, 1, 3), date(2025, 1, 2), date(2025, 1, 1)]), + ("right", [date(2025, 1, 2), date(2025, 1, 1), date(2024, 12, 31)]), + ("none", [date(2025, 1, 2), date(2025, 1, 1), date(2024, 12, 31)]), + ], +) +def test_date_range_start_interval_samples_backward_1d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 1, 3), interval="-1d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)]), + ("left", [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)]), + ("right", [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)]), + ("none", [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)]), + ], +) +def test_date_range_start_interval_samples_forward_2d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 1, 1), interval="2d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)]), + ("left", [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)]), + ("right", [date(2025, 1, 3), date(2025, 1, 1), date(2024, 12, 30)]), + ("none", [date(2025, 1, 3), date(2025, 1, 1), date(2024, 12, 30)]), + ], +) +def test_date_range_start_interval_samples_backward_2d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 1, 5), interval="-2d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("left", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("right", [date(2025, 2, 28), date(2025, 3, 31), date(2025, 4, 30)]), + ("none", [date(2025, 2, 28), date(2025, 3, 31), date(2025, 4, 30)]), + ], +) +def test_date_range_start_interval_samples_forward_1mo( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 1, 31), + interval="1mo", + num_samples=3, + closed=closed, + eager=True, + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("left", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("right", [date(2025, 2, 28), date(2025, 1, 31), date(2024, 12, 31)]), + ("none", [date(2025, 2, 28), date(2025, 1, 31), date(2024, 12, 31)]), + ], +) +def test_date_range_start_interval_samples_backward_1mo( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + start=date(2025, 3, 31), + interval="-1mo", + num_samples=3, + closed=closed, + eager=True, + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +# -- end/interval/samples +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 2), date(2025, 1, 3), date(2025, 1, 4)]), + ("left", [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]), + ("right", [date(2025, 1, 2), date(2025, 1, 3), date(2025, 1, 4)]), + ("none", [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]), + ], +) +def test_date_range_end_interval_samples_forward_1d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 1, 4), interval="1d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 3), date(2025, 1, 2), date(2025, 1, 1)]), + ("left", [date(2025, 1, 4), date(2025, 1, 3), date(2025, 1, 2)]), + ("right", [date(2025, 1, 3), date(2025, 1, 2), date(2025, 1, 1)]), + ("none", [date(2025, 1, 4), date(2025, 1, 3), date(2025, 1, 2)]), + ], +) +def test_date_range_end_interval_samples_backward_1d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 1, 1), interval="-1d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)]), + ("left", [date(2024, 12, 30), date(2025, 1, 1), date(2025, 1, 3)]), + ("right", [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)]), + ("none", [date(2024, 12, 30), date(2025, 1, 1), date(2025, 1, 3)]), + ], +) +def test_date_range_end_interval_samples_forward_2d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 1, 5), interval="2d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)]), + ("left", [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)]), + ("right", [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)]), + ("none", [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)]), + ], +) +def test_date_range_end_interval_samples_backward_2d( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 1, 1), interval="-2d", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("left", [date(2024, 12, 31), date(2025, 1, 31), date(2025, 2, 28)]), + ("right", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("none", [date(2024, 12, 31), date(2025, 1, 31), date(2025, 2, 28)]), + ], +) +def test_date_range_end_interval_samples_forward_1mo( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 3, 31), interval="1mo", num_samples=3, closed=closed, eager=True + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("left", [date(2025, 4, 30), date(2025, 3, 31), date(2025, 2, 28)]), + ("right", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("none", [date(2025, 4, 30), date(2025, 3, 31), date(2025, 2, 28)]), + ], +) +def test_date_range_end_interval_samples_backward_1mo( + closed: ClosedInterval, expected: list[date] +) -> None: + result = pl.date_range( + end=date(2025, 1, 31), + interval="-1mo", + num_samples=3, + closed=closed, + eager=True, + ) + assert_series_equal(result, pl.Series("literal", expected)) + + +@pytest.mark.parametrize("closed", ["left", "right", "none"]) +def test_date_range_start_end_samples_notclosed(closed: ClosedInterval) -> None: + with pytest.raises( + InvalidOperationError, + match=( + r"date_range does not support 'left', 'right', or 'none' for the 'closed' " + r"parameter when 'start', 'end', and 'num_samples' is provided." + ), + ): + pl.date_range( + start=date(2025, 1, 1), end=date(2025, 1, 2), num_samples=3, closed=closed + ) diff --git a/py-polars/tests/unit/functions/range/test_date_ranges.py b/py-polars/tests/unit/functions/range/test_date_ranges.py index 341a3962a376..b8d1d971415e 100644 --- a/py-polars/tests/unit/functions/range/test_date_ranges.py +++ b/py-polars/tests/unit/functions/range/test_date_ranges.py @@ -7,7 +7,7 @@ import pytest import polars as pl -from polars.exceptions import ComputeError +from polars.exceptions import ComputeError, InvalidOperationError from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -169,7 +169,7 @@ def test_date_ranges_broadcasting_fail() -> None: end = pl.Series([date(2021, 1, 2), date(2021, 1, 3)]) with pytest.raises( - ComputeError, match=r"lengths of `start` \(3\) and `end` \(2\) do not match" + ComputeError, match=r"lengths of `s1` \(3\) and `s2` \(2\) do not match" ): pl.date_ranges(start=start, end=end, eager=True) @@ -241,3 +241,571 @@ def test_date_ranges_start_end_interval_forwards( dates=pl.date_ranges(start="start", end="end", interval="2d", closed=closed) ) assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + date(2025, 1, 8), + date(2025, 1, 6), + date(2025, 1, 4), + date(2025, 1, 2), + ], + [ + date(2025, 1, 18), + date(2025, 1, 16), + date(2025, 1, 14), + date(2025, 1, 12), + ], + ], + ), + ( + "left", + [ + [ + date(2025, 1, 8), + date(2025, 1, 6), + date(2025, 1, 4), + date(2025, 1, 2), + ], + [ + date(2025, 1, 18), + date(2025, 1, 16), + date(2025, 1, 14), + date(2025, 1, 12), + ], + ], + ), + ( + "right", + [ + [ + date(2025, 1, 6), + date(2025, 1, 4), + date(2025, 1, 2), + ], + [ + date(2025, 1, 16), + date(2025, 1, 14), + date(2025, 1, 12), + ], + ], + ), + ( + "none", + [ + [ + date(2025, 1, 6), + date(2025, 1, 4), + date(2025, 1, 2), + ], + [ + date(2025, 1, 16), + date(2025, 1, 14), + date(2025, 1, 12), + ], + ], + ), + ], +) +def test_date_ranges_start_end_interval_backwards( + closed: ClosedInterval, + expected: list[date], +) -> None: + # backwards + df = pl.DataFrame( + { + "start": [date(2025, 1, 8), date(2025, 1, 18)], + "end": [date(2025, 1, 1), date(2025, 1, 11)], + } + ) + result = df.select( + dates=pl.date_ranges( + start="start", + end="end", + interval="-2d", + closed=closed, + ) + ) + assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +def test_date_ranges_start_end_samples_forwards() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 11)], + "end": [date(2025, 1, 10), date(2025, 1, 15)], + "samples": [5, 8], + } + ) + result = df.select( + dates=pl.date_ranges(start="start", end="end", num_samples="samples") + ) + expected = pl.Series( + "dates", + [ + [ + date(2025, 1, 1), + date(2025, 1, 3), + date(2025, 1, 5), + date(2025, 1, 7), + date(2025, 1, 10), + ], + [ + date(2025, 1, 11), + date(2025, 1, 11), + date(2025, 1, 12), + date(2025, 1, 12), + date(2025, 1, 13), + date(2025, 1, 13), + date(2025, 1, 14), + date(2025, 1, 15), + ], + ], + ).to_frame() + assert_frame_equal(result, expected) + + +def test_date_ranges_start_end_samples_backwards() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 10), date(2025, 1, 15)], + "end": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [5, 8], + } + ) + result = df.select( + dates=pl.date_ranges(start="start", end="end", num_samples="samples") + ) + expected = pl.Series( + "dates", + [ + [ + date(2025, 1, 10), + date(2025, 1, 7), + date(2025, 1, 5), + date(2025, 1, 3), + date(2025, 1, 1), + ], + [ + date(2025, 1, 15), + date(2025, 1, 14), + date(2025, 1, 13), + date(2025, 1, 13), + date(2025, 1, 12), + date(2025, 1, 12), + date(2025, 1, 11), + date(2025, 1, 11), + ], + ], + ).to_frame() + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)], + [ + date(2025, 1, 11), + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + ], + ], + ), + ( + "left", + [ + [date(2025, 1, 1), date(2025, 1, 3), date(2025, 1, 5)], + [ + date(2025, 1, 11), + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + ], + ], + ), + ( + "right", + [ + [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)], + [ + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + date(2025, 1, 19), + ], + ], + ), + ( + "none", + [ + [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)], + [ + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + date(2025, 1, 19), + ], + ], + ), + ], +) +def test_date_ranges_start_interval_samples_forward( + closed: ClosedInterval, expected: list[list[date]] +) -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [3, 4], + } + ) + result = df.select( + dates=pl.date_ranges( + start="start", + num_samples="samples", + interval="2d", + closed=closed, + ) + ) + assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [date(2025, 1, 9), date(2025, 1, 7), date(2025, 1, 5)], + [ + date(2025, 1, 19), + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + ], + ], + ), + ( + "left", + [ + [date(2025, 1, 9), date(2025, 1, 7), date(2025, 1, 5)], + [ + date(2025, 1, 19), + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + ], + ], + ), + ( + "right", + [ + [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)], + [ + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + date(2025, 1, 11), + ], + ], + ), + ( + "none", + [ + [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)], + [ + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + date(2025, 1, 11), + ], + ], + ), + ], +) +def test_date_ranges_start_interval_samples_backward( + closed: ClosedInterval, expected: list[list[date]] +) -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 9), date(2025, 1, 19)], + "samples": [3, 4], + } + ) + result = df.select( + dates=pl.date_ranges( + start="start", + num_samples="samples", + interval="-2d", + closed=closed, + ) + ) + assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [date(2025, 1, 5), date(2025, 1, 7), date(2025, 1, 9)], + [ + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + date(2025, 1, 19), + ], + ], + ), + ( + "left", + [ + [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)], + [ + date(2025, 1, 11), + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + ], + ], + ), + ( + "right", + [ + [date(2025, 1, 5), date(2025, 1, 7), date(2025, 1, 9)], + [ + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + date(2025, 1, 19), + ], + ], + ), + ( + "none", + [ + [date(2025, 1, 3), date(2025, 1, 5), date(2025, 1, 7)], + [ + date(2025, 1, 11), + date(2025, 1, 13), + date(2025, 1, 15), + date(2025, 1, 17), + ], + ], + ), + ], +) +def test_date_ranges_end_interval_samples_forward( + closed: ClosedInterval, expected: list[list[date]] +) -> None: + df = pl.DataFrame( + { + "end": [date(2025, 1, 9), date(2025, 1, 19)], + "samples": [3, 4], + } + ) + result = df.select( + dates=pl.date_ranges( + end="end", + num_samples="samples", + interval="2d", + closed=closed, + ) + ) + assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)], + [ + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + date(2025, 1, 11), + ], + ], + ), + ( + "left", + [ + [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)], + [ + date(2025, 1, 19), + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + ], + ], + ), + ( + "right", + [ + [date(2025, 1, 5), date(2025, 1, 3), date(2025, 1, 1)], + [ + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + date(2025, 1, 11), + ], + ], + ), + ( + "none", + [ + [date(2025, 1, 7), date(2025, 1, 5), date(2025, 1, 3)], + [ + date(2025, 1, 19), + date(2025, 1, 17), + date(2025, 1, 15), + date(2025, 1, 13), + ], + ], + ), + ], +) +def test_date_ranges_end_interval_samples_backward( + closed: ClosedInterval, expected: list[list[date]] +) -> None: + df = pl.DataFrame( + { + "end": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [3, 4], + } + ) + result = df.select( + dates=pl.date_ranges( + end="end", + num_samples="samples", + interval="-2d", + closed=closed, + ) + ) + assert_frame_equal(result, pl.Series("dates", expected).to_frame()) + + +def test_date_ranges_lit_combinations_start_end_interval() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "end": [date(2025, 1, 3), date(2025, 1, 3)], + } + ) + + result = df.select( + start_lit=pl.date_ranges(start=date(2025, 1, 1), end="end", interval="1d"), + end_lit=pl.date_ranges(start="start", end=date(2025, 1, 3), interval="1d"), + ) + dt = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + expected = pl.DataFrame( + { + "start_lit": pl.Series([dt, dt]), + "end_lit": pl.Series([dt, dt]), + } + ) + assert_frame_equal(result, expected) + + +def test_date_ranges_lit_combinations_start_end_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + + result = df.select( + start_lit=pl.date_ranges( + start=date(2025, 1, 1), end="end", num_samples="samples" + ), + end_lit=pl.date_ranges( + start="start", end=date(2025, 1, 3), num_samples="samples" + ), + samples_lit=pl.date_ranges(start="start", end="end", num_samples=3), + ) + dt = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + expected = pl.DataFrame( + { + "start_lit": pl.Series([dt, dt]), + "end_lit": pl.Series([dt, dt]), + "samples_lit": pl.Series([dt, dt]), + } + ) + assert_frame_equal(result, expected) + + +def test_date_ranges_lit_combinations_start_interval_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "samples": [3, 3], + } + ) + + result = df.select( + start_lit=pl.date_ranges( + start=date(2025, 1, 1), interval="1d", num_samples="samples" + ), + samples_lit=pl.date_ranges(start="start", interval="1d", num_samples=3), + ) + dt = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + expected = pl.DataFrame( + { + "start_lit": pl.Series([dt, dt]), + "samples_lit": pl.Series([dt, dt]), + } + ) + assert_frame_equal(result, expected) + + +def test_date_ranges_lit_combinations_end_interval_samples() -> None: + df = pl.DataFrame( + { + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + + result = df.select( + end_lit=pl.date_ranges( + end=date(2025, 1, 3), num_samples="samples", interval="1d" + ), + samples_lit=pl.date_ranges(end="end", num_samples=3, interval="1d"), + ) + dt = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + expected = pl.DataFrame( + { + "end_lit": pl.Series([dt, dt]), + "samples_lit": pl.Series([dt, dt]), + } + ) + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("closed", ["left", "right", "none"]) +def test_date_ranges_start_end_samples_notclosed(closed: ClosedInterval) -> None: + with pytest.raises( + InvalidOperationError, + match=( + r"date_ranges does not support 'left', 'right', or 'none' for the 'closed' " + r"parameter when 'start', 'end', and 'num_samples' is provided." + ), + ): + pl.date_ranges( + start=date(2025, 1, 1), end=date(2025, 1, 2), num_samples=3, closed=closed + ) diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 202defff5b20..f3def7297b9b 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -106,7 +106,7 @@ def test_datetime_range_interval_too_small() -> None: # start/end/interval with pytest.raises( InvalidOperationError, - match="interval 1ns is too small for time unit μs and was rounded down to zero", + match="interval 1ns is too small for time unit μs and was rounded to zero", ): pl.datetime_range( start=datetime(2025, 1, 1), @@ -502,6 +502,89 @@ def test_datetime_range_start_end_interval_forwards(dtype: PolarsDataType) -> No ) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_start_end_interval_backwards(dtype: PolarsDataType) -> None: + start = date(2025, 1, 10) + end = date(2025, 1, 1) + tu: TimeUnit = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[assignment, union-attr] + tz: str = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[assignment, union-attr] + + assert_series_equal( + pl.datetime_range( + start=start, + end=end, + interval="-3d", + closed="left", + eager=True, + time_unit=tu, + time_zone=tz, + ), + to_expected([date(2025, 1, 10), date(2025, 1, 7), date(2025, 1, 4)], dtype), + ) + assert_series_equal( + pl.datetime_range( + start=start, + end=end, + interval="-3d", + closed="right", + eager=True, + time_unit=tu, + time_zone=tz, + ), + to_expected([date(2025, 1, 7), date(2025, 1, 4), date(2025, 1, 1)], dtype), + ) + assert_series_equal( + pl.datetime_range( + start=start, + end=end, + interval="-3d", + closed="none", + eager=True, + time_unit=tu, + time_zone=tz, + ), + to_expected([date(2025, 1, 7), date(2025, 1, 4)], dtype), + ) + assert_series_equal( + pl.datetime_range( + start=start, + end=end, + interval="-3d", + closed="both", + eager=True, + time_unit=tu, + time_zone=tz, + ), + to_expected( + [date(2025, 1, 10), date(2025, 1, 7), date(2025, 1, 4), date(2025, 1, 1)], + dtype, + ), + ) + # test wrong direction is empty + assert_series_equal( + pl.datetime_range( + start=end, + end=start, + interval="-3d", + eager=True, + time_unit=tu, + time_zone=tz, + ), + to_expected([], dtype=dtype), + ) + + @pytest.mark.parametrize( "dtype", [ @@ -531,11 +614,536 @@ def test_datetime_range_expr_scalar(dtype: PolarsDataType) -> None: time_unit=tu, time_zone=tz, ), + backward_start_end_interval=pl.datetime_range( + start=pl.col("a").max(), + end=pl.col("a").min(), + interval="-1d", + time_unit=tu, + time_zone=tz, + ), + forward_start_end_samples=pl.datetime_range( + start=pl.col("a").min(), + end=pl.col("a").max(), + num_samples=3, + time_unit=tu, + time_zone=tz, + ), + backward_start_end_samples=pl.datetime_range( + start=pl.col("a").max(), + end=pl.col("a").min(), + num_samples=3, + time_unit=tu, + time_zone=tz, + ), + forward_start_interval_samples=pl.datetime_range( + start=pl.col("a").min(), + interval="1d", + num_samples=3, + time_unit=tu, + time_zone=tz, + ), + backward_start_interval_samples=pl.datetime_range( + start=pl.col("a").max(), + interval="-1d", + num_samples=3, + time_unit=tu, + time_zone=tz, + ), + forward_end_interval_samples=pl.datetime_range( + end=pl.col("a").max(), + interval="1d", + num_samples=3, + time_unit=tu, + time_zone=tz, + ), + backward_end_interval_samples=pl.datetime_range( + end=pl.col("a").min(), + interval="-1d", + num_samples=3, + time_unit=tu, + time_zone=tz, + ), ) forward = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)] + backward = forward[-1::-1] expected = pl.DataFrame( { "forward_start_end_interval": to_expected(forward, dtype=dtype), + "backward_start_end_interval": to_expected(backward, dtype=dtype), + "forward_start_end_samples": to_expected(forward, dtype=dtype), + "backward_start_end_samples": to_expected(backward, dtype=dtype), + "forward_start_interval_samples": to_expected(forward, dtype=dtype), + "backward_start_interval_samples": to_expected(backward, dtype=dtype), + "forward_end_interval_samples": to_expected(forward, dtype=dtype), + "backward_end_interval_samples": to_expected(backward, dtype=dtype), } ) assert_frame_equal(result, expected) + + +# start/end/num_samples +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 1), datetime(2025, 1, 5, 12), datetime(2025, 1, 10)], + ), + ("left", [datetime(2025, 1, 1), datetime(2025, 1, 4), datetime(2025, 1, 7)]), + ("right", [datetime(2025, 1, 4), datetime(2025, 1, 7), datetime(2025, 1, 10)]), + ( + "none", + [ + datetime(2025, 1, 3, 6), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 7, 18), + ], + ), + ], +) +def test_datetime_range_start_end_samples_forward( + closed: ClosedInterval, + expected: list[datetime], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 1, 1), + end=date(2025, 1, 10), + num_samples=3, + eager=True, + closed=closed, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 10), datetime(2025, 1, 5, 12), datetime(2025, 1, 1)], + ), + ("left", [datetime(2025, 1, 10), datetime(2025, 1, 7), datetime(2025, 1, 4)]), + ("right", [datetime(2025, 1, 7), datetime(2025, 1, 4), datetime(2025, 1, 1)]), + ( + "none", + [ + datetime(2025, 1, 7, 18), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 3, 6), + ], + ), + ], +) +def test_datetime_range_start_end_samples_backward( + closed: ClosedInterval, + expected: list[datetime], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 1, 10), + end=date(2025, 1, 1), + num_samples=3, + closed=closed, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + eager=True, + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +# -- start/interval/samples +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 1), datetime(2025, 1, 2, 12), datetime(2025, 1, 4)], + ), + ( + "left", + [datetime(2025, 1, 1), datetime(2025, 1, 2, 12), datetime(2025, 1, 4)], + ), + ( + "right", + [datetime(2025, 1, 2, 12), datetime(2025, 1, 4), datetime(2025, 1, 5, 12)], + ), + ( + "none", + [datetime(2025, 1, 2, 12), datetime(2025, 1, 4), datetime(2025, 1, 5, 12)], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_start_interval_samples_forward_1d( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 1, 1), + interval="1d12h", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 3), datetime(2025, 1, 1, 12), datetime(2024, 12, 31)], + ), + ( + "left", + [datetime(2025, 1, 3), datetime(2025, 1, 1, 12), datetime(2024, 12, 31)], + ), + ( + "right", + [ + datetime(2025, 1, 1, 12), + datetime(2024, 12, 31), + datetime(2024, 12, 29, 12), + ], + ), + ( + "none", + [ + datetime(2025, 1, 1, 12), + datetime(2024, 12, 31), + datetime(2024, 12, 29, 12), + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_start_interval_samples_backward_1d( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 1, 3), + interval="-1d12h", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("left", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("right", [date(2025, 2, 28), date(2025, 3, 31), date(2025, 4, 30)]), + ("none", [date(2025, 2, 28), date(2025, 3, 31), date(2025, 4, 30)]), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_start_interval_samples_forward_1mo( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 1, 31), + interval="1mo", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("left", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("right", [date(2025, 2, 28), date(2025, 1, 31), date(2024, 12, 31)]), + ("none", [date(2025, 2, 28), date(2025, 1, 31), date(2024, 12, 31)]), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_start_interval_samples_backward_1mo( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + start=date(2025, 3, 31), + interval="-1mo", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +# -- end/interval/samples +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 1), datetime(2025, 1, 2, 12), datetime(2025, 1, 4)], + ), + ( + "left", + [ + datetime(2024, 12, 30, 12), + datetime(2025, 1, 1), + datetime(2025, 1, 2, 12), + ], + ), + ( + "right", + [datetime(2025, 1, 1), datetime(2025, 1, 2, 12), datetime(2025, 1, 4)], + ), + ( + "none", + [ + datetime(2024, 12, 30, 12), + datetime(2025, 1, 1), + datetime(2025, 1, 2, 12), + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_end_interval_samples_forward_1d( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + end=date(2025, 1, 4), + interval="1d12h", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [datetime(2025, 1, 4), datetime(2025, 1, 2, 12), datetime(2025, 1, 1)], + ), + ( + "left", + [datetime(2025, 1, 5, 12), datetime(2025, 1, 4), datetime(2025, 1, 2, 12)], + ), + ( + "right", + [datetime(2025, 1, 4), datetime(2025, 1, 2, 12), datetime(2025, 1, 1)], + ), + ( + "none", + [datetime(2025, 1, 5, 12), datetime(2025, 1, 4), datetime(2025, 1, 2, 12)], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_end_interval_samples_backward_1d( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + end=date(2025, 1, 1), + interval="-1d12h", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("left", [date(2024, 12, 31), date(2025, 1, 31), date(2025, 2, 28)]), + ("right", [date(2025, 1, 31), date(2025, 2, 28), date(2025, 3, 31)]), + ("none", [date(2024, 12, 31), date(2025, 1, 31), date(2025, 2, 28)]), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_end_interval_samples_forward_1mo( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + end=date(2025, 3, 31), + interval="1mo", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + assert_series_equal(result, to_expected(expected, dtype)) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ("both", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("left", [date(2025, 4, 30), date(2025, 3, 31), date(2025, 2, 28)]), + ("right", [date(2025, 3, 31), date(2025, 2, 28), date(2025, 1, 31)]), + ("none", [date(2025, 4, 30), date(2025, 3, 31), date(2025, 2, 28)]), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_range_end_interval_samples_backward_1mo( + closed: ClosedInterval, + expected: list[date], + dtype: PolarsDataType, +) -> None: + result = pl.datetime_range( + end=date(2025, 1, 31), + interval="-1mo", + num_samples=3, + closed=closed, + eager=True, + time_unit=dtype.time_unit if dtype == pl.Datetime else None, # type: ignore[union-attr] + time_zone=dtype.time_zone if dtype == pl.Datetime else None, # type: ignore[union-attr] + ) + s_expected = to_expected(expected, dtype) + assert_series_equal(result, s_expected) diff --git a/py-polars/tests/unit/functions/range/test_datetime_ranges.py b/py-polars/tests/unit/functions/range/test_datetime_ranges.py index 32cc013cf8ed..cfd695ab11ce 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_ranges.py +++ b/py-polars/tests/unit/functions/range/test_datetime_ranges.py @@ -411,6 +411,878 @@ def test_datetime_ranges_start_end_interval_forwards( assert_frame_equal(result, s_expected.to_frame()) +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + datetime(2025, 1, 10), + datetime(2025, 1, 8, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + ], + ], + ), + ( + "left", + [ + [ + datetime(2025, 1, 10), + datetime(2025, 1, 8, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + ], + [ + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + ], + ], + ), + ( + "right", + [ + [ + datetime(2025, 1, 8, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + ], + ], + ), + ( + "none", + [ + [ + datetime(2025, 1, 8, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + ], + [ + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_ranges_start_end_interval_backwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "start": [date(2025, 1, 10), date(2025, 1, 17)], + "end": [date(2025, 1, 1), date(2025, 1, 8)], + } + ) + result = df.select( + dates=pl.datetime_ranges( + start="start", + end="end", + interval="-1d12h", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ # (1, 10, 5) + datetime(2025, 1, 1), + datetime(2025, 1, 3, 6), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 7, 18), + datetime(2025, 1, 10), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10), + datetime(2025, 1, 10, 4, 48), + datetime(2025, 1, 10, 9, 36), + datetime(2025, 1, 10, 14, 24), + datetime(2025, 1, 10, 19, 12), + datetime(2025, 1, 11), + ], + ], + ), + ( + "left", + [ + [ # (1, 10, 5) + datetime(2025, 1, 1), + datetime(2025, 1, 2, 19, 12), + datetime(2025, 1, 4, 14, 24), + datetime(2025, 1, 6, 9, 36), + datetime(2025, 1, 8, 4, 48), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10), + datetime(2025, 1, 10, 4), + datetime(2025, 1, 10, 8), + datetime(2025, 1, 10, 12), + datetime(2025, 1, 10, 16), + datetime(2025, 1, 10, 20), + ], + ], + ), + ( + "right", + [ + [ # (1, 10, 5) + datetime(2025, 1, 2, 19, 12), + datetime(2025, 1, 4, 14, 24), + datetime(2025, 1, 6, 9, 36), + datetime(2025, 1, 8, 4, 48), + datetime(2025, 1, 10), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10, 4), + datetime(2025, 1, 10, 8), + datetime(2025, 1, 10, 12), + datetime(2025, 1, 10, 16), + datetime(2025, 1, 10, 20), + datetime(2025, 1, 11), + ], + ], + ), + ( + "none", + [ + [ # (1, 10, 5) + datetime(2025, 1, 2, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 8, 12), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10, 3, 25, 42, 857142), + datetime(2025, 1, 10, 6, 51, 25, 714285), + datetime(2025, 1, 10, 10, 17, 8, 571428), + datetime(2025, 1, 10, 13, 42, 51, 428571), + datetime(2025, 1, 10, 17, 8, 34, 285714), + datetime(2025, 1, 10, 20, 34, 17, 142857), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + # we disable ns for this test, as it's hard to provide the expected value + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + # pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + # pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_date_ranges_start_end_samples_forwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 10)], + "end": [date(2025, 1, 10), date(2025, 1, 11)], + "samples": [5, 6], + } + ) + result = df.select( + dates=pl.datetime_ranges( + start="start", + end="end", + num_samples="samples", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ # (1, 10, 5) + datetime(2025, 1, 10), + datetime(2025, 1, 7, 18), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 3, 6), + datetime(2025, 1, 1), + ], + [ # (10, 11, 6) + datetime(2025, 1, 11), + datetime(2025, 1, 10, 19, 12), + datetime(2025, 1, 10, 14, 24), + datetime(2025, 1, 10, 9, 36), + datetime(2025, 1, 10, 4, 48), + datetime(2025, 1, 10), + ], + ], + ), + ( + "left", + [ + [ # (1, 10, 5) + datetime(2025, 1, 10), + datetime(2025, 1, 8, 4, 48), + datetime(2025, 1, 6, 9, 36), + datetime(2025, 1, 4, 14, 24), + datetime(2025, 1, 2, 19, 12), + ], + [ # (10, 11, 6) + datetime(2025, 1, 11), + datetime(2025, 1, 10, 20), + datetime(2025, 1, 10, 16), + datetime(2025, 1, 10, 12), + datetime(2025, 1, 10, 8), + datetime(2025, 1, 10, 4), + ], + ], + ), + ( + "right", + [ + [ # (1, 10, 5) + datetime(2025, 1, 8, 4, 48), + datetime(2025, 1, 6, 9, 36), + datetime(2025, 1, 4, 14, 24), + datetime(2025, 1, 2, 19, 12), + datetime(2025, 1, 1), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10, 20), + datetime(2025, 1, 10, 16), + datetime(2025, 1, 10, 12), + datetime(2025, 1, 10, 8), + datetime(2025, 1, 10, 4), + datetime(2025, 1, 10), + ], + ], + ), + ( + "none", + [ + [ # (1, 10, 5) + datetime(2025, 1, 8, 12), + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + ], + [ # (10, 11, 6) + datetime(2025, 1, 10, 20, 34, 17, 142857), + datetime(2025, 1, 10, 17, 8, 34, 285714), + datetime(2025, 1, 10, 13, 42, 51, 428571), + datetime(2025, 1, 10, 10, 17, 8, 571428), + datetime(2025, 1, 10, 6, 51, 25, 714285), + datetime(2025, 1, 10, 3, 25, 42, 857143), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + # we disable ns for this test, as it's hard to provide the expected value + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + # pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + # pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_date_ranges_start_end_samples_backwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "start": [date(2025, 1, 10), date(2025, 1, 11)], + "end": [date(2025, 1, 1), date(2025, 1, 10)], + "samples": [5, 6], + } + ) + result = df.select( + dates=pl.datetime_ranges( + start="start", + end="end", + num_samples="samples", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + datetime(2025, 1, 1), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 5, 12), + ], + [ + datetime(2025, 1, 11), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 17), + ], + ], + ), + ( + "left", + [ + [ + datetime(2025, 1, 1), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 5, 12), + ], + [ + datetime(2025, 1, 11), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 17), + ], + ], + ), + ( + "right", + [ + [ + datetime(2025, 1, 2, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 7), + ], + [ + datetime(2025, 1, 12, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 17), + datetime(2025, 1, 18, 12), + ], + ], + ), + ( + "none", + [ + [ + datetime(2025, 1, 2, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 7), + ], + [ + datetime(2025, 1, 12, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 17), + datetime(2025, 1, 18, 12), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_ranges_start_interval_samples_forwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [4, 5], + } + ) + result = df.select( + dates=pl.datetime_ranges( + start="start", + num_samples="samples", + interval="1d12h", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + datetime(2025, 1, 1), + datetime(2024, 12, 30, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 27, 12), + ], + [ + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 5), + ], + ], + ), + ( + "left", + [ + [ + datetime(2025, 1, 1), + datetime(2024, 12, 30, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 27, 12), + ], + [ + datetime(2025, 1, 11), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 5), + ], + ], + ), + ( + "right", + [ + [ + datetime(2024, 12, 30, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 27, 12), + datetime(2024, 12, 26), + ], + [ + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 5), + datetime(2025, 1, 3, 12), + ], + ], + ), + ( + "none", + [ + [ + datetime(2024, 12, 30, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 27, 12), + datetime(2024, 12, 26), + ], + [ + datetime(2025, 1, 9, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 5), + datetime(2025, 1, 3, 12), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_ranges_start_interval_samples_backwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [4, 5], + } + ) + result = df.select( + dates=pl.datetime_ranges( + start="start", + num_samples="samples", + interval="-1d12h", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + datetime(2024, 12, 27, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 30, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 5), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 11), + ], + ], + ), + ( + "left", + [ + [ + datetime(2024, 12, 26), + datetime(2024, 12, 27, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 30, 12), + ], + [ + datetime(2025, 1, 3, 12), + datetime(2025, 1, 5), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 9, 12), + ], + ], + ), + ( + "right", + [ + [ + datetime(2024, 12, 27, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 30, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 5), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 9, 12), + datetime(2025, 1, 11), + ], + ], + ), + ( + "none", + [ + [ + datetime(2024, 12, 26), + datetime(2024, 12, 27, 12), + datetime(2024, 12, 29), + datetime(2024, 12, 30, 12), + ], + [ + datetime(2025, 1, 3, 12), + datetime(2025, 1, 5), + datetime(2025, 1, 6, 12), + datetime(2025, 1, 8), + datetime(2025, 1, 9, 12), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_ranges_end_interval_samples_forwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "end": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [4, 5], + } + ) + result = df.select( + dates=pl.datetime_ranges( + end="end", + num_samples="samples", + interval="1d12h", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + +@pytest.mark.parametrize( + ("closed", "expected"), + [ + ( + "both", + [ + [ + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + ], + ], + ), + ( + "left", + [ + [ + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + ], + [ + datetime(2025, 1, 18, 12), + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + ], + ], + ), + ( + "right", + [ + [ + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + datetime(2025, 1, 1), + ], + [ + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + datetime(2025, 1, 11), + ], + ], + ), + ( + "none", + [ + [ + datetime(2025, 1, 7), + datetime(2025, 1, 5, 12), + datetime(2025, 1, 4), + datetime(2025, 1, 2, 12), + ], + [ + datetime(2025, 1, 18, 12), + datetime(2025, 1, 17), + datetime(2025, 1, 15, 12), + datetime(2025, 1, 14), + datetime(2025, 1, 12, 12), + ], + ], + ), + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("us"), + pl.Datetime("ns"), + pl.Datetime("ms", time_zone="Asia/Kathmandu"), + pl.Datetime("us", time_zone="Asia/Kathmandu"), + pl.Datetime("ns", time_zone="Asia/Kathmandu"), + ], +) +def test_datetime_ranges_end_interval_samples_backwards( + closed: ClosedInterval, + expected: list[list[datetime]], + dtype: PolarsDataType, +) -> None: + tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr] + tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr] + if tz is not None: + time_zone = ZoneInfo(tz) + expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected] + df = pl.DataFrame( + { + "end": [date(2025, 1, 1), date(2025, 1, 11)], + "samples": [4, 5], + } + ) + result = df.select( + dates=pl.datetime_ranges( + end="end", + num_samples="samples", + interval="-1d12h", + closed=closed, + time_unit=tu, + time_zone=tz, + ) + ) + dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype) + s_expected = pl.Series("dates", expected, dtype=dt_out) + assert_frame_equal(result, s_expected.to_frame()) + + def test_datetime_ranges_lit_combinations_start_end_interval() -> None: df = pl.DataFrame( { @@ -449,3 +1321,178 @@ def test_datetime_ranges_null_lit_combinations_start_end_interval() -> None: s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us"))) expected = pl.DataFrame({"start_lit": s, "end_lit": s, "all_lit": s}) assert_frame_equal(result, expected) + + +def test_datetime_ranges_lit_combinations_start_end_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + start = date(2025, 1, 1) + end = date(2025, 1, 3) + result = df.select( + start_lit=pl.datetime_ranges(start=start, end="end", num_samples="samples"), + end_lit=pl.datetime_ranges(start="start", end=end, num_samples="samples"), + samples_lit=pl.datetime_ranges(start="start", end="end", num_samples=3), + start_end_lit=pl.datetime_ranges(start=start, end=end, num_samples="samples"), + start_samples_lit=pl.datetime_ranges(start=start, end="end", num_samples=3), + end_samples_lit=pl.datetime_ranges(start="start", end=end, num_samples=3), + all_lit=pl.datetime_ranges(start=start, end=end, num_samples=3), + ) + dt = [datetime(2025, 1, 1), datetime(2025, 1, 2), datetime(2025, 1, 3)] + s = pl.Series([dt, dt], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "start_lit": s, + "end_lit": s, + "samples_lit": s, + "start_end_lit": s, + "start_samples_lit": s, + "end_samples_lit": s, + "all_lit": s, + } + ) + assert_frame_equal(result, expected) + + +def test_datetime_ranges_null_lit_combinations_start_end_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + lit_dt = pl.lit(None, dtype=pl.Date) + lit_n = pl.lit(None, dtype=pl.Int64) + result = df.select( + start_lit=pl.datetime_ranges(start=lit_dt, end="end", num_samples="samples"), + end_lit=pl.datetime_ranges(start="start", end=lit_dt, num_samples="samples"), + samples_lit=pl.datetime_ranges(start="start", end="end", num_samples=lit_n), + start_end_lit=pl.datetime_ranges( + start=lit_dt, end=lit_dt, num_samples="samples" + ), + start_samples_lit=pl.datetime_ranges( + start=lit_dt, end="end", num_samples=lit_n + ), + end_samples_lit=pl.datetime_ranges( + start="start", end=lit_dt, num_samples=lit_n + ), + all_lit=pl.datetime_ranges(start=lit_dt, end=lit_dt, num_samples=lit_n), + ) + s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "start_lit": s, + "end_lit": s, + "samples_lit": s, + "start_end_lit": s, + "start_samples_lit": s, + "end_samples_lit": s, + "all_lit": s, + } + ) + assert_frame_equal(result, expected) + + +def test_datetime_ranges_lit_combinations_start_interval_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "samples": [3, 3], + } + ) + result = df.select( + start_lit=pl.datetime_ranges( + start=date(2025, 1, 1), interval="1d", num_samples="samples" + ), + samples_lit=pl.datetime_ranges(start="start", interval="1d", num_samples=3), + ) + dt = [datetime(2025, 1, 1), datetime(2025, 1, 2), datetime(2025, 1, 3)] + s = pl.Series([dt, dt], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "start_lit": s, + "samples_lit": s, + } + ) + assert_frame_equal(result, expected) + + +def test_datetime_ranges_null_lit_combinations_start_interval_samples() -> None: + df = pl.DataFrame( + { + "start": [date(2025, 1, 1), date(2025, 1, 1)], + "samples": [3, 3], + } + ) + lit_dt = pl.lit(None, dtype=pl.Date) + lit_n = pl.lit(None, dtype=pl.Int64) + result = df.select( + start_lit=pl.datetime_ranges( + start=lit_dt, interval="1d", num_samples="samples" + ), + samples_lit=pl.datetime_ranges(start="start", interval="1d", num_samples=lit_n), + all_lit=pl.datetime_ranges(start=lit_dt, interval="1d", num_samples=lit_n), + ) + s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "start_lit": s, + "samples_lit": s, + "all_lit": s, + } + ) + assert_frame_equal(result, expected) + + +def test_datetime_ranges_lit_combinations_end_interval_samples() -> None: + df = pl.DataFrame( + { + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + result = df.select( + end_lit=pl.datetime_ranges( + end=date(2025, 1, 3), num_samples="samples", interval="1d" + ), + samples_lit=pl.datetime_ranges(end="end", num_samples=3, interval="1d"), + ) + dt = [datetime(2025, 1, 1), datetime(2025, 1, 2), datetime(2025, 1, 3)] + s = pl.Series([dt, dt], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "end_lit": s, + "samples_lit": s, + } + ) + assert_frame_equal(result, expected) + + +def test_datetime_ranges_null_lit_combinations_end_interval_samples() -> None: + df = pl.DataFrame( + { + "end": [date(2025, 1, 3), date(2025, 1, 3)], + "samples": [3, 3], + } + ) + lit_dt = pl.lit(None, dtype=pl.Date) + lit_n = pl.lit(None, dtype=pl.Int64) + result = df.select( + end_lit=pl.datetime_ranges(end=lit_dt, num_samples="samples", interval="1d"), + samples_lit=pl.datetime_ranges(end="end", num_samples=lit_n, interval="1d"), + all_lit=pl.datetime_ranges(end=lit_dt, num_samples=lit_n, interval="1d"), + ) + s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us"))) + expected = pl.DataFrame( + { + "end_lit": s, + "samples_lit": s, + "all_lit": s, + } + ) + assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/functions/range/test_time_range.py b/py-polars/tests/unit/functions/range/test_time_range.py index 43c7b7405f22..3a32ca328de6 100644 --- a/py-polars/tests/unit/functions/range/test_time_range.py +++ b/py-polars/tests/unit/functions/range/test_time_range.py @@ -6,7 +6,7 @@ import pytest import polars as pl -from polars.exceptions import ComputeError, ShapeError +from polars.exceptions import ShapeError from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -92,12 +92,6 @@ def test_time_range_start_later_than_end() -> None: assert_series_equal(result, expected) -@pytest.mark.parametrize("interval", [timedelta(0), timedelta(minutes=-10)]) -def test_time_range_invalid_step(interval: timedelta) -> None: - with pytest.raises(ComputeError, match="`interval` must be positive"): - pl.time_range(time(11), time(12), interval=interval, eager=True) - - def test_time_range_lit_lazy() -> None: tm = pl.select( pl.time_range( @@ -265,3 +259,31 @@ def test_time_ranges_mismatched_chunks() -> None: ], ) assert_series_equal(result, expected) + + +def test_time_range_negative_interval() -> None: + expected = pl.Series("literal", [time(12), time(11, 40), time(11, 20), time(11)]) + + # left + result = pl.time_range( + time(12), time(11), interval=timedelta(minutes=-20), eager=True, closed="left" + ) + assert_series_equal(result, expected[:-1]) + + # right + result = pl.time_range( + time(12), time(11), interval=timedelta(minutes=-20), eager=True, closed="right" + ) + assert_series_equal(result, expected[1:]) + + # both + result = pl.time_range( + time(12), time(11), interval=timedelta(minutes=-20), eager=True, closed="both" + ) + assert_series_equal(result, expected) + + # none + result = pl.time_range( + time(12), time(11), interval=timedelta(minutes=-20), eager=True, closed="none" + ) + assert_series_equal(result, expected[1:-1]) From 3f3d417707952594bebe4bc4a9d72e2c269955d0 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 27 Nov 2025 12:55:05 -0500 Subject: [PATCH 2/6] Make num_samples keyword-only --- .../src/polars/functions/range/date_range.py | 16 ++++++++-------- .../src/polars/functions/range/datetime_range.py | 15 ++++++++------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/py-polars/src/polars/functions/range/date_range.py b/py-polars/src/polars/functions/range/date_range.py index 04f96d7d65aa..9e3ac4c86954 100644 --- a/py-polars/src/polars/functions/range/date_range.py +++ b/py-polars/src/polars/functions/range/date_range.py @@ -25,8 +25,8 @@ def date_range( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | None = None, *, + num_samples: int | None = None, closed: ClosedInterval = ..., eager: Literal[False] = ..., ) -> Expr: ... @@ -37,8 +37,8 @@ def date_range( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | None = None, *, + num_samples: int | None = None, closed: ClosedInterval = ..., eager: Literal[True], ) -> Series: ... @@ -49,8 +49,8 @@ def date_range( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | None = None, *, + num_samples: int | None = None, closed: ClosedInterval = ..., eager: bool, ) -> Series | Expr: ... @@ -60,8 +60,8 @@ def date_range( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | None = None, *, + num_samples: int | None = None, closed: ClosedInterval = "both", eager: bool = False, ) -> Series | Expr: @@ -249,8 +249,8 @@ def date_ranges( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta = ..., - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., eager: Literal[False] = ..., ) -> Expr: ... @@ -261,8 +261,8 @@ def date_ranges( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., eager: Literal[True], ) -> Series: ... @@ -273,8 +273,8 @@ def date_ranges( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., eager: bool, ) -> Series | Expr: ... @@ -284,8 +284,8 @@ def date_ranges( start: date | datetime | IntoExprColumn | None = None, end: date | datetime | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = "both", eager: bool = False, ) -> Series | Expr: diff --git a/py-polars/src/polars/functions/range/datetime_range.py b/py-polars/src/polars/functions/range/datetime_range.py index a287f44890dd..1d5fbd3ce62e 100644 --- a/py-polars/src/polars/functions/range/datetime_range.py +++ b/py-polars/src/polars/functions/range/datetime_range.py @@ -24,8 +24,8 @@ def datetime_range( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -38,8 +38,8 @@ def datetime_range( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -52,8 +52,8 @@ def datetime_range( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -65,8 +65,8 @@ def datetime_range( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = "both", time_unit: TimeUnit | None = None, time_zone: str | None = None, @@ -283,8 +283,8 @@ def datetime_ranges( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -298,6 +298,7 @@ def datetime_ranges( end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -310,8 +311,8 @@ def datetime_ranges( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = ..., time_unit: TimeUnit | None = ..., time_zone: str | None = ..., @@ -323,8 +324,8 @@ def datetime_ranges( start: datetime | date | IntoExprColumn | None = None, end: datetime | date | IntoExprColumn | None = None, interval: str | timedelta | None = None, - num_samples: int | IntoExprColumn | None = None, *, + num_samples: int | IntoExprColumn | None = None, closed: ClosedInterval = "both", time_unit: TimeUnit | None = None, time_zone: str | None = None, From 8269a9549ac87a426f3ee743e27a9018c8fefccd Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 27 Nov 2025 13:03:57 -0500 Subject: [PATCH 3/6] Update docstring to not mention 'periods' --- crates/polars-time/src/date_range.rs | 2 +- py-polars/src/polars/functions/range/date_range.py | 6 ++---- py-polars/src/polars/functions/range/datetime_range.py | 6 ++---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/crates/polars-time/src/date_range.rs b/crates/polars-time/src/date_range.rs index de0004732e70..2d4e54e67bc5 100644 --- a/crates/polars-time/src/date_range.rs +++ b/crates/polars-time/src/date_range.rs @@ -173,7 +173,7 @@ pub fn datetime_range_impl_start_end_samples( }; let mut values: Vec = (0..num_samples) - .map(|x| (x as f64 * bin_width + start) as i64) + .map(move |x| (x as f64 * bin_width + start) as i64) .collect(); // For right-closed and fully-closed interval, ensure the last point is exact. diff --git a/py-polars/src/polars/functions/range/date_range.py b/py-polars/src/polars/functions/range/date_range.py index 9e3ac4c86954..87909a0a73b6 100644 --- a/py-polars/src/polars/functions/range/date_range.py +++ b/py-polars/src/polars/functions/range/date_range.py @@ -83,8 +83,7 @@ def date_range( or using the Polars duration string language (see "Notes" section below). Must consist of full days. num_samples - Number of periods in the date range. This corresponds to the number of points in - the output array, and is thus one more than the number of intervals. + Number of samples in the output. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). Only 'closed' is supported for `date_range`. @@ -307,8 +306,7 @@ def date_ranges( or using the Polars duration string language (see "Notes" section below). Must consist of full days. num_samples - Number of periods in the date range. This corresponds to the number of points in - the output array, and is thus one more than the number of intervals. + Number of samples in the output. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). eager diff --git a/py-polars/src/polars/functions/range/datetime_range.py b/py-polars/src/polars/functions/range/datetime_range.py index 1d5fbd3ce62e..3588e4b79b47 100644 --- a/py-polars/src/polars/functions/range/datetime_range.py +++ b/py-polars/src/polars/functions/range/datetime_range.py @@ -89,8 +89,7 @@ def datetime_range( Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). num_samples - Number of periods in the date range. This corresponds to the number of points in - the output array, and is thus one more than the number of intervals. + Number of samples in the output. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). time_unit : {None, 'ns', 'us', 'ms'} @@ -348,8 +347,7 @@ def datetime_ranges( Interval of the range periods, specified as a Python `timedelta` object or using the Polars duration string language (see "Notes" section below). num_samples - Number of periods in the date range. This corresponds to the number of points in - the output array, and is thus one more than the number of intervals. + Number of samples in the output. closed : {'both', 'left', 'right', 'none'} Define which sides of the range are closed (inclusive). time_unit : {None, 'ns', 'us', 'ms'} From 8a68bb3e86291476fd66abf5568970dafe7efbca Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Fri, 28 Nov 2025 15:39:32 -0500 Subject: [PATCH 4/6] Remove duplicate enum and use linear_space for start/end/num_samples --- .../src/dispatch/range/linear_space.rs | 12 ++- .../polars-ops/src/series/ops/linear_space.rs | 52 +++++++++++++ .../src/dsl/function_expr/range.rs | 14 ++-- .../src/plans/aexpr/function_expr/range.rs | 14 ++-- crates/polars-python/src/conversion/mod.rs | 17 ----- crates/polars-time/src/date_range.rs | 75 +++++-------------- crates/polars-time/src/lib.rs | 2 +- crates/polars-time/src/windows/group_by.rs | 12 +-- .../polars/tests/it/lazy/group_by_dynamic.rs | 2 +- 9 files changed, 97 insertions(+), 103 deletions(-) diff --git a/crates/polars-expr/src/dispatch/range/linear_space.rs b/crates/polars-expr/src/dispatch/range/linear_space.rs index a405e8e681e8..2bb6993797e3 100644 --- a/crates/polars-expr/src/dispatch/range/linear_space.rs +++ b/crates/polars-expr/src/dispatch/range/linear_space.rs @@ -1,6 +1,8 @@ use arrow::temporal_conversions::MICROSECONDS_IN_DAY; use polars_core::prelude::*; -use polars_ops::series::{ClosedInterval, new_linear_space_f32, new_linear_space_f64}; +use polars_ops::series::{ + ClosedInterval, new_linear_space_f32, new_linear_space_f64, new_linear_space_i64, +}; use super::utils::{build_nulls, ensure_items_contain_exactly_one_value}; @@ -46,8 +48,12 @@ pub(super) fn linear_space(s: &[Column], closed: ClosedInterval) -> PolarsResult end *= MICROSECONDS_IN_DAY; dt = DataType::Datetime(TimeUnit::Microseconds, None); } - new_linear_space_f64(start as f64, end as f64, num_samples, closed, name.clone()) - .map(|s| s.cast(&dt).unwrap().into_column()) + Ok( + new_linear_space_i64(start, end, num_samples as i64, closed, name.clone()) + .cast(&dt) + .unwrap() + .into_column(), + ) }, (dt1, dt2) if !dt1.is_primitive_numeric() || !dt2.is_primitive_numeric() => { Err(PolarsError::ComputeError( diff --git a/crates/polars-ops/src/series/ops/linear_space.rs b/crates/polars-ops/src/series/ops/linear_space.rs index 16789f2aacab..95b540119b77 100644 --- a/crates/polars-ops/src/series/ops/linear_space.rs +++ b/crates/polars-ops/src/series/ops/linear_space.rs @@ -16,6 +16,58 @@ pub enum ClosedInterval { None, } +// The enumeration for a moving window is identical to ClosedInterval. +pub type ClosedWindow = ClosedInterval; + +// Primarily used for date/datetime logical types. +pub fn new_linear_space_i64( + start: i64, + end: i64, + n: i64, + closed: ClosedInterval, + name: PlSmallStr, +) -> Int64Chunked { + let values = if n == 0 { + Vec::::new() + } else { + // The bin width depends on the interval closure. + let divisor = match closed { + ClosedInterval::None => n + 1, + ClosedInterval::Left => n, + ClosedInterval::Right => n, + ClosedInterval::Both => n - 1, + }; + let bin_width = (end - start) as f64 / (divisor as f64); + + // For left-open intervals, increase the left by one interval. + let start = if closed == ClosedInterval::None || closed == ClosedInterval::Right { + start as f64 + bin_width + } else { + start as f64 + }; + + let right_closed = closed == ClosedInterval::Right || closed == ClosedInterval::Both; + let n = if right_closed { n - 1 } else { n }; + let values = (0..n).map(move |x| (x as f64 * bin_width + start) as i64); + + // For right-closed and fully-closed interval, ensure the last point is exact. + if right_closed { + // ensures floating point accuracy of final value + values.chain(std::iter::once(end)).collect() + } else { + values.collect() + } + }; + let mut ca = Int64Chunked::new_vec(name, values); + let is_sorted = if end < start { + IsSorted::Descending + } else { + IsSorted::Ascending + }; + ca.set_sorted_flag(is_sorted); + ca +} + pub fn new_linear_space_f32( start: f32, end: f32, diff --git a/crates/polars-plan/src/dsl/function_expr/range.rs b/crates/polars-plan/src/dsl/function_expr/range.rs index befc1437cbab..639200b76fab 100644 --- a/crates/polars-plan/src/dsl/function_expr/range.rs +++ b/crates/polars-plan/src/dsl/function_expr/range.rs @@ -3,7 +3,7 @@ use std::fmt; use polars_core::prelude::*; use polars_ops::series::ClosedInterval; #[cfg(feature = "temporal")] -use polars_time::{ClosedWindow, Duration}; +use polars_time::Duration; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -72,19 +72,19 @@ pub enum RangeFunction { #[cfg(feature = "dtype-date")] DateRange { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, arg_type: DateRangeArgs, }, #[cfg(feature = "dtype-date")] DateRanges { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, arg_type: DateRangeArgs, }, #[cfg(feature = "dtype-datetime")] DatetimeRange { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: Option, time_zone: Option, arg_type: DateRangeArgs, @@ -92,7 +92,7 @@ pub enum RangeFunction { #[cfg(feature = "dtype-datetime")] DatetimeRanges { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: Option, time_zone: Option, arg_type: DateRangeArgs, @@ -100,12 +100,12 @@ pub enum RangeFunction { #[cfg(feature = "dtype-time")] TimeRange { interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, }, #[cfg(feature = "dtype-time")] TimeRanges { interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, }, } diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/range.rs b/crates/polars-plan/src/plans/aexpr/function_expr/range.rs index 6470979aa65e..d11ef2b14546 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/range.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/range.rs @@ -3,7 +3,7 @@ use std::fmt::{Display, Formatter}; use polars_core::prelude::*; use polars_ops::series::ClosedInterval; #[cfg(feature = "temporal")] -use polars_time::{ClosedWindow, Duration}; +use polars_time::Duration; use super::{FunctionOptions, IRFunctionExpr}; #[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))] @@ -31,19 +31,19 @@ pub enum IRRangeFunction { #[cfg(feature = "dtype-date")] DateRange { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, arg_type: DateRangeArgs, }, #[cfg(feature = "dtype-date")] DateRanges { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, arg_type: DateRangeArgs, }, #[cfg(feature = "dtype-datetime")] DatetimeRange { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: Option, time_zone: Option, arg_type: DateRangeArgs, @@ -51,7 +51,7 @@ pub enum IRRangeFunction { #[cfg(feature = "dtype-datetime")] DatetimeRanges { interval: Option, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: Option, time_zone: Option, arg_type: DateRangeArgs, @@ -59,12 +59,12 @@ pub enum IRRangeFunction { #[cfg(feature = "dtype-time")] TimeRange { interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, }, #[cfg(feature = "dtype-time")] TimeRanges { interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, }, } diff --git a/crates/polars-python/src/conversion/mod.rs b/crates/polars-python/src/conversion/mod.rs index aae546418871..c057a6868591 100644 --- a/crates/polars-python/src/conversion/mod.rs +++ b/crates/polars-python/src/conversion/mod.rs @@ -1210,23 +1210,6 @@ impl<'py> FromPyObject<'py> for Wrap { } } -impl<'py> FromPyObject<'py> for Wrap { - fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { - let parsed = match &*ob.extract::()? { - "both" => ClosedInterval::Both, - "left" => ClosedInterval::Left, - "right" => ClosedInterval::Right, - "none" => ClosedInterval::None, - v => { - return Err(PyValueError::new_err(format!( - "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}", - ))); - }, - }; - Ok(Wrap(parsed)) - } -} - impl<'py> FromPyObject<'py> for Wrap { fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { diff --git a/crates/polars-time/src/date_range.rs b/crates/polars-time/src/date_range.rs index 2d4e54e67bc5..82aa14f6af20 100644 --- a/crates/polars-time/src/date_range.rs +++ b/crates/polars-time/src/date_range.rs @@ -4,6 +4,7 @@ use num_traits::signum; use polars_core::chunked_array::temporal::time_to_time64ns; use polars_core::prelude::*; use polars_core::series::IsSorted; +use polars_ops::series::{ClosedInterval, new_linear_space_i64}; use crate::prelude::*; @@ -20,7 +21,7 @@ pub fn date_range( end: Option, interval: Option, num_samples: Option, - closed: ClosedWindow, + closed: ClosedInterval, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { @@ -87,7 +88,7 @@ pub fn datetime_range_impl_start_end_interval( start: i64, end: i64, interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { @@ -119,7 +120,7 @@ pub fn datetime_range_impl_start_interval_samples( start: i64, interval: Duration, num_samples: i64, - closed: ClosedWindow, + closed: ClosedInterval, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { @@ -148,54 +149,16 @@ pub fn datetime_range_impl_start_end_samples( start: i64, end: i64, num_samples: i64, - closed: ClosedWindow, + closed: ClosedInterval, tu: TimeUnit, tz: Option<&Tz>, ) -> PolarsResult { - let ascending = start >= end; - let values = if num_samples == 0 { - Vec::::new() - } else { - // The bin width depends on the interval closure. - let divisor = match closed { - ClosedWindow::None => num_samples + 1, - ClosedWindow::Left => num_samples, - ClosedWindow::Right => num_samples, - ClosedWindow::Both => num_samples - 1, - }; - let bin_width = (end - start) as f64 / (divisor as f64); - - // For left-open intervals, increase the left by one interval. - let start = if closed == ClosedWindow::None || closed == ClosedWindow::Right { - start as f64 + bin_width - } else { - start as f64 - }; - - let mut values: Vec = (0..num_samples) - .map(move |x| (x as f64 * bin_width + start) as i64) - .collect(); - - // For right-closed and fully-closed interval, ensure the last point is exact. - if closed == ClosedWindow::Right || closed == ClosedWindow::Both { - let last = values.len() - 1; - values[last] = end; - } - values - }; - let out = Int64Chunked::new_vec(name, values); - let mut out = match tz { + let s = new_linear_space_i64(start, end, num_samples, closed, name); + let out = match tz { #[cfg(feature = "timezones")] - Some(tz) => out.into_datetime(tu, Some(TimeZone::from_chrono(tz))), - _ => out.into_datetime(tu, None), - }; - - let flag = if ascending { - IsSorted::Ascending - } else { - IsSorted::Descending + Some(tz) => s.into_datetime(tu, Some(TimeZone::from_chrono(tz))), + _ => s.into_datetime(tu, None), }; - out.physical_mut().set_sorted_flag(flag); Ok(out) } @@ -205,7 +168,7 @@ pub fn time_range( start: NaiveTime, end: NaiveTime, interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, ) -> PolarsResult { let start = time_to_time64ns(&start); let end = time_to_time64ns(&end); @@ -218,7 +181,7 @@ pub fn time_range_impl( start: i64, end: i64, interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, ) -> PolarsResult { let mut out = Int64Chunked::new_vec( name, @@ -242,7 +205,7 @@ pub(crate) fn datetime_range_i64_start_end_interval( mut start: i64, mut end: i64, interval: Duration, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: TimeUnit, time_zone: Option<&Tz>, ) -> PolarsResult> { @@ -270,10 +233,10 @@ pub(crate) fn datetime_range_i64_start_end_interval( ); // Update end points based on interval closure. - if closed == ClosedWindow::Right || closed == ClosedWindow::None { + if closed == ClosedInterval::Right || closed == ClosedInterval::None { start += step; // This works whether step is negative or positive. }; - if closed == ClosedWindow::Left || closed == ClosedWindow::None { + if closed == ClosedInterval::Left || closed == ClosedInterval::None { end -= signum(step); // If our interval is negative, we increment the end } @@ -302,12 +265,12 @@ pub(crate) fn datetime_range_i64_start_end_interval( // Shift the left limit if we're right-closed or none let mut t = start; let mut i = 0; - if closed == ClosedWindow::Right || closed == ClosedWindow::None { + if closed == ClosedInterval::Right || closed == ClosedInterval::None { t = offset_fn(&interval, start, time_zone)?; i += 1; } // Shift the right limit if we're right-closed or none - if closed == ClosedWindow::Left || closed == ClosedWindow::None { + if closed == ClosedInterval::Left || closed == ClosedInterval::None { end = offset_fn(&(-interval), end, time_zone)?; } @@ -332,7 +295,7 @@ pub(crate) fn datetime_range_i64_start_interval_samples( mut start: i64, interval: Duration, num_samples: i64, - closed: ClosedWindow, + closed: ClosedInterval, time_unit: TimeUnit, time_zone: Option<&Tz>, ) -> PolarsResult> { @@ -360,7 +323,7 @@ pub(crate) fn datetime_range_i64_start_interval_samples( } // If the interval is left-open, start one interval away. - if closed == ClosedWindow::Right || closed == ClosedWindow::None { + if closed == ClosedInterval::Right || closed == ClosedInterval::None { start += step; } @@ -386,7 +349,7 @@ pub(crate) fn datetime_range_i64_start_interval_samples( }; // Start with one interval offset if we're not left-closed. - let t0 = (closed == ClosedWindow::Right || closed == ClosedWindow::None) as i64; + let t0 = (closed == ClosedInterval::Right || closed == ClosedInterval::None) as i64; let ts = (t0..t0 + num_samples) .map(|t| offset_fn(&(interval * t), start, time_zone)) .collect::>>()?; diff --git a/crates/polars-time/src/lib.rs b/crates/polars-time/src/lib.rs index e99257421ef0..102a2f0d01df 100644 --- a/crates/polars-time/src/lib.rs +++ b/crates/polars-time/src/lib.rs @@ -51,5 +51,5 @@ pub use upsample::*; #[cfg(feature = "timezones")] pub use utils::known_timezones; pub use windows::duration::Duration; -pub use windows::group_by::ClosedWindow; +pub use windows::group_by::{ClosedInterval, ClosedWindow}; pub use windows::window::Window; diff --git a/crates/polars-time/src/windows/group_by.rs b/crates/polars-time/src/windows/group_by.rs index 884363bcb1cb..e710bdb5e613 100644 --- a/crates/polars-time/src/windows/group_by.rs +++ b/crates/polars-time/src/windows/group_by.rs @@ -13,6 +13,7 @@ use polars_core::POOL; use polars_core::prelude::*; use polars_core::utils::_split_offsets; use polars_core::utils::flatten::flatten_par; +pub use polars_ops::series::{ClosedInterval, ClosedWindow}; use rayon::prelude::*; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -20,17 +21,6 @@ use strum_macros::IntoStaticStr; use crate::prelude::*; -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, IntoStaticStr)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))] -#[strum(serialize_all = "snake_case")] -pub enum ClosedWindow { - Left, - Right, - Both, - None, -} - #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, IntoStaticStr)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))] diff --git a/crates/polars/tests/it/lazy/group_by_dynamic.rs b/crates/polars/tests/it/lazy/group_by_dynamic.rs index 74eb86192126..a71cfdcb3c2e 100644 --- a/crates/polars/tests/it/lazy/group_by_dynamic.rs +++ b/crates/polars/tests/it/lazy/group_by_dynamic.rs @@ -25,7 +25,7 @@ fn test_group_by_dynamic_week_bounds() -> PolarsResult<()> { stop, Some(Duration::parse("1d")), None, - ClosedWindow::Left, + ClosedInterval::Left, TimeUnit::Milliseconds, None, )? From 621a7459acedbe7bff691a33868a30e57edafadd Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Fri, 28 Nov 2025 15:58:41 -0500 Subject: [PATCH 5/6] format --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cf5ed8e92b91..86b67b86442f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ base64 = "0.22.0" bigdecimal = "0.4.8" bincode = { version = "2.0", features = ["serde", "std"] } bitflags = "2" +blake3 = { version = "1.8.2" } boxcar = "0.2.12" bytemuck = { version = "1.22", features = ["derive", "extern_crate_alloc"] } bytes = { version = "1.10" } @@ -107,7 +108,6 @@ uuid = { version = "1.15.1", features = ["v4"] } version_check = "0.9.4" xxhash-rust = { version = "0.8.6", features = ["xxh3"] } zstd = "0.13" -blake3 = { version = "1.8.2" } polars = { version = "0.52.0", path = "crates/polars", default-features = false } polars-compute = { version = "0.52.0", path = "crates/polars-compute", default-features = false } From 93df94e0650ce7f8c905c7f93aee304914b2636b Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Fri, 28 Nov 2025 16:02:27 -0500 Subject: [PATCH 6/6] Schema hashes --- crates/polars-plan/dsl-schema-hashes.json | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/polars-plan/dsl-schema-hashes.json b/crates/polars-plan/dsl-schema-hashes.json index aef305e86811..1e0473bddb14 100644 --- a/crates/polars-plan/dsl-schema-hashes.json +++ b/crates/polars-plan/dsl-schema-hashes.json @@ -19,7 +19,6 @@ "CategoricalPhysical": "7107ef2de35ebb480b9c69c20056f88ec3d2c2429e3dd3e5e08f2c70defd981b", "ChildFieldOverwrites": "9f1a70f965bd2b80625133617c0ce56488d57e5d6c6405b30541df3d96fa59cf", "ClosedInterval": "8cddc5ec69eb608bd920361aba3f048af98ba1f35558dbe9cdfde638043bc408", - "ClosedWindow": "20dacadcd82ed2d320b9be7d66902774d6353f64411fc229ad28739b7b8c4244", "CloudConfig": "e1492d4c3ec6f64b6cb901e8bbacdf0dc9a91f005dd98f68a48d137ef4c92eb4", "CloudOptions": "618edb468fee35d4907cce1c729f3a14fd8f243e2cb35a38d083979f3cf4f15a", "CloudScheme": "14bf9988c903046841626c9d22a711971908a67aeaa7714db4c3ff245fde264e", @@ -47,12 +46,12 @@ "Duration": "44999d59023085cbb592ce94b30d34f9b983081fc72bd6435a49bdf0869c0074", "DynListLiteralValue": "2266a553cb4a943f7097f24539eaa802453cf8742675996215235bd682dec0e8", "DynLiteralValue": "47dc404f42bef5ab71659b9e10a97413202a61bfa3ac9fc66fff4a176653f7fe", - "DynamicGroupOptions": "ce5acc65b2e124dba5783f7bdfb37f3aa51c6fd7891db024ad66026394a4a510", + "DynamicGroupOptions": "82701b4fe996b61b4a502c6b8a054fabfe912540fb59608f108e0b5b6a392cd7", "EWMOptions": "3997323cf1a48491ab48ed491cabf768954175970f83c0e7899490a58d310322", "Either_PythonObject_or_Schema_for_DataType": "6232a29ef51626d332177544fe80084dbc5451e45087aacafae633c93526ee6e", "EvalVariant": "6f3f2249f963d4b89339a93beace83e0be41310b4779af62ace5d4240013d7d8", "ExplodeOptions": "46ef78ccb0ca3a84a96dc69c4bba22790e9adc50a2862a68fa8c58c793c660bf", - "Expr": "a3b713cacabb85744b30f1808ad47bdcb97f83445e6a517abb2e5b7f2cd4ab0f", + "Expr": "f900b72960459c219ca631c726f16f591e69b55ac72a05ce726394dab2957db1", "ExtensionFunction": "71c0d75cd439c60a5c304faba11dacceb7aeb02d146c6b9f0b34fe9aa1558391", "ExtraColumnsPolicy": "eb81efadce58eb148e658db4f2b5c1f38155d617431b81121043e9f9c21acd30", "Field": "dd95c2b6d7aa44004b900ef31fcf18e70f862d97488ef46c67b7c64c226b50d8", @@ -123,7 +122,7 @@ "QuantileMethod": "dc652061779e61c57da55126eba9439c15aa7d283d2bdac00d3d07726c29f11c", "QuoteStyle": "be86ae062d16fca3258876ecd98e6825fcaa5f8459f1ac7a932b72513e08f9db", "RandomMethod": "5fecdac8c404504bc9112dbf3449a8f3c78d8eb35ade71b46d8fdc4c3e09ae18", - "RangeFunction": "73ab8a04c554de53902cf0a679949f55a2808ae9f7e9daeb0b858a02b3cc52b5", + "RangeFunction": "d53f8f7edd23ba7852a44de9c234b29c8a46263089f163f0e4a604c3d4d6e570", "RangeLiteralValue": "96fb5d61ffab9c471a69d92137959ee31bfe64064db05b46e070b7be095c0303", "RankMethod": "8e867af76bfafd1c0dcb0c97e640d7ea1798adf678ad2388dcfe04e7e1c04784", "RankOptions": "0b6a2f139bc177cf714244d846eb4c44489ac639af03667d13ab3d7d9448d9cd", @@ -135,8 +134,8 @@ "RollingFnParams": "9727bce3a05420c18a5932e5f45c4f7a45c05eb82c38b22f151a7d67b02156ca", "RollingFunction": "109fbfc02e6fc0524a43795c861bf8360c41869ec0e915dddbe03ac637d62250", "RollingFunctionBy": "646ef7028bb9396391c42fabcd66d2ace69a8fcc121411d959594b856026f7df", - "RollingGroupOptions": "895857e3abcb7199dd7625b57b9627924a05a68d0a94fd3755b27d714765ce46", - "RollingOptionsDynamicWindow": "aecd759cc2ed5c0d8f12477187471e5d65cbc003c74a64723747712523f969a6", + "RollingGroupOptions": "12acb61fa8815baca189ccca9ab8b097ec8fd8afa100985b7e5b18ccb7a88139", + "RollingOptionsDynamicWindow": "d80a526813914e3e2109e838b66b3bc67f792f733ea107fd1a00cc278e2fae9a", "RollingOptionsFixedWindow": "623fec123558aef4fed6737a6dbf24572e561f5704bb96b096c3552094c23b29", "RollingQuantileParams": "a8e56a35fd9c8468d539e19bb7a8bc1e6f4ae0907bfd63ac33e0c5025601e0e7", "RollingRankMethod": "52084b557622afd3074d37a1dea7f6175158696913bab7b1b3d4d9797ec83a6a",