Skip to content
Open
89 changes: 87 additions & 2 deletions crates/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::fmt::Write;

use arrow::bitmap::MutableBitmap;
use num_traits::AsPrimitive;
use polars_compute::cast::SerPrimitive;

#[cfg(feature = "dtype-categorical")]
use crate::chunked_array::builder::CategoricalChunkedBuilder;
Expand Down Expand Up @@ -308,17 +309,101 @@ fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<Strin
Ok(builder.finish())
}
fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
fn _write_any_value(av: &AnyValue<'_>, buffer: &mut String, float_buf: &mut Vec<u8>) {
match av {
AnyValue::String(s) => buffer.push_str(s),
AnyValue::Float64(f) => {
float_buf.clear();
SerPrimitive::write(float_buf, *f);
let s = std::str::from_utf8(float_buf).unwrap();
buffer.push_str(s);
},
AnyValue::Float32(f) => {
float_buf.clear();
SerPrimitive::write(float_buf, *f as f64);
let s = std::str::from_utf8(float_buf).unwrap();
buffer.push_str(s);
},
#[cfg(feature = "dtype-struct")]
AnyValue::StructOwned(payload) => {
buffer.push('{');
let mut iter = payload.0.iter().peekable();
while let Some(child) = iter.next() {
_write_any_value(child, buffer, float_buf);
if iter.peek().is_some() {
buffer.push(',')
}
}
buffer.push('}');
},
#[cfg(feature = "dtype-struct")]
AnyValue::Struct(_, _, flds) => {
let mut vals = Vec::with_capacity(flds.len());
av._materialize_struct_av(&mut vals);

buffer.push('{');
let mut iter = vals.iter().peekable();
while let Some(child) = iter.next() {
_write_any_value(child, buffer, float_buf);
if iter.peek().is_some() {
buffer.push(',')
}
}
buffer.push('}');
},
#[cfg(feature = "dtype-array")]
AnyValue::Array(vals, _) => {
buffer.push('[');
let mut iter = vals.iter().peekable();
while let Some(child) = iter.next() {
_write_any_value(&child, buffer, float_buf);
if iter.peek().is_some() {
buffer.push(',');
}
}
buffer.push(']');
},
AnyValue::List(vals) => {
buffer.push('[');
let mut iter = vals.iter().peekable();
while let Some(child) = iter.next() {
_write_any_value(&child, buffer, float_buf);
if iter.peek().is_some() {
buffer.push(',');
}
}
buffer.push(']');
},
av => {
write!(buffer, "{av}").unwrap();
},
}
}

let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
let mut owned = String::new(); // Amortize allocations.
let mut float_buf = vec![];
for av in values {
owned.clear();
float_buf.clear();

match av {
AnyValue::String(s) => builder.append_value(s),
AnyValue::StringOwned(s) => builder.append_value(s),
AnyValue::Null => builder.append_null(),
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
AnyValue::Float64(f) => {
SerPrimitive::write(&mut float_buf, *f);
let s = std::str::from_utf8(&float_buf).unwrap();
builder.append_value(s);
},
AnyValue::Float32(f) => {
SerPrimitive::write(&mut float_buf, *f as f64); // promote to f64 for serialization
let s = std::str::from_utf8(&float_buf).unwrap();
builder.append_value(s);
},
av => {
owned.clear();
write!(owned, "{av}").unwrap();
_write_any_value(av, &mut owned, &mut float_buf);
builder.append_value(&owned);
},
}
Expand Down
42 changes: 42 additions & 0 deletions py-polars/tests/unit/constructors/test_any_value_fallbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import TYPE_CHECKING, Any

import pytest
from numpy import array

import polars as pl
from polars._plr import PySeries
Expand Down Expand Up @@ -408,3 +409,44 @@ def test_categorical_lit_18874() -> None:
]
),
)


@pytest.mark.parametrize(
("values", "expected"),
[
# Float64 should have ~17; Float32 ~6 digits of precision preserved
([0.123, 0.123456789], ["0.123", "0.123456789"]),
([[0.123, 0.123456789]], ["[0.123,0.123456789]"]),
([array([0.123, 0.123456789])], ["[0.123,0.123456789]"]),
([{"a": 0.123, "b": 0.123456789}], ["{0.123,0.123456789}"]),
([[{"a": 0.123, "b": 0.123456789}]], ["[{0.123,0.123456789}]"]),
([{"x": [0.1, 0.2]}, [{"y": 0.3}]], ["{[0.1,0.2]}", "[{0.3}]"]),
(
[None, {"a": None, "b": 1.0}, [None, 2.0]],
[None, "{null,1.0}", "[null,2.0]"],
),
([[], {}], ["[]", "{}"]),
([[0.5]], ["[0.5]"]),
([{"a": 0.5}], ["{0.5}"]),
],
ids=[
"basic_floats",
"nested_list",
"nested_array",
"basic_struct",
"list_of_structs",
"nested_mixed",
"mixed_nulls",
"empty_containers",
"single_element_list",
"single_element_struct",
],
)
def test_float_to_string_precision_25257(
values: list[Any], expected: list[Any]
) -> None:
# verify the conversion is decoupled from Display formatting
with pl.Config(float_precision=1):
s = pl.Series(values, strict=False, dtype=pl.String)

assert (s == pl.Series(expected)).all()
Loading