Skip to content

Commit

Permalink
Merge branch 'main' into ianna/update_to_mac_os_12
Browse files Browse the repository at this point in the history
  • Loading branch information
ianna authored Jun 25, 2024
2 parents 04cdd4e + db6cece commit fb15373
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 14 deletions.
33 changes: 29 additions & 4 deletions src/awkward/_connect/pyarrow/table_conv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import json
Expand Down Expand Up @@ -128,11 +130,34 @@ def native_arrow_field_to_akarraytype(
fields = _fields_of_strg_type(storage_type)
if len(fields) > 0:
# We need to replace storage_type with one that contains AwkwardArrowTypes.
awkwardized_fields = [
native_arrow_field_to_akarraytype(field, meta) # Recurse
for field, meta in zip(fields, metadata["subfield_metadata"])
]
sub_meta = metadata["subfield_metadata"]
awkwardized_fields = None # Temporary
if len(sub_meta) == len(fields):
awkwardized_fields = [
native_arrow_field_to_akarraytype(field, meta) # Recurse
for field, meta in zip(fields, metadata["subfield_metadata"])
]
elif len(fields) < len(sub_meta):
# If a user has read a partial column, we can have fewer Arrow fields than the original.
sub_meta_dict = {sm["field_name"]: sm for sm in sub_meta}
awkwardized_fields = []
for field in fields:
if field.name in sub_meta_dict:
awkwardized_fields.append(
native_arrow_field_to_akarraytype(
field, sub_meta_dict[field.name]
)
)
else:
raise ValueError(
f"Cannot find Awkward metadata for sub-field {field.name}"
)
else:
raise ValueError(
f"Not enough fields in Awkward metadata. Have {len(sub_meta)} need at least {len(fields)}."
)
storage_type = _make_pyarrow_type_like(storage_type, awkwardized_fields)

ak_type = AwkwardArrowType._from_metadata_object(storage_type, metadata)
return pyarrow.field(ntv_field.name, type=ak_type, nullable=ntv_field.nullable)

Expand Down
12 changes: 11 additions & 1 deletion src/awkward/_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,12 @@ def maybe_highlevel_to_lowlevel(obj):
return obj


def from_arraylib(array, regulararray, recordarray):
def from_arraylib(
array,
regulararray,
recordarray,
primitive_policy: Literal["error", "promote", "pass-through"] = "promote",
):
from awkward.contents import (
ByteMaskedArray,
ListArray,
Expand Down Expand Up @@ -341,6 +346,11 @@ def attach(x):
if array.dtype == np.dtype("O"):
raise TypeError("Awkward Array does not support arrays with object dtypes.")

if primitive_policy == "error" and array.ndim == 0:
raise TypeError(
f"Encountered a scalar ({type(array).__name__}), but scalar conversion/promotion is disabled"
)

if isinstance(array, numpy.ma.MaskedArray):
mask = numpy.ma.getmask(array)
array = numpy.ma.getdata(array)
Expand Down
12 changes: 10 additions & 2 deletions src/awkward/operations/ak_from_cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@


@high_level_function()
def from_cupy(array, *, regulararray=False, highlevel=True, behavior=None, attrs=None):
def from_cupy(
array,
*,
regulararray=False,
highlevel=True,
behavior=None,
primitive_policy="error",
attrs=None,
):
"""
Args:
array (cp.ndarray): The CuPy array to convert into an Awkward Array.
Expand All @@ -36,7 +44,7 @@ def from_cupy(array, *, regulararray=False, highlevel=True, behavior=None, attrs
See also #ak.to_cupy, #ak.from_numpy and #ak.from_jax.
"""
return wrap_layout(
from_arraylib(array, regulararray, False),
from_arraylib(array, regulararray, False, primitive_policy=primitive_policy),
highlevel=highlevel,
behavior=behavior,
)
3 changes: 2 additions & 1 deletion src/awkward/operations/ak_from_dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def from_dlpack(
regulararray=False,
highlevel=True,
behavior=None,
primitive_policy="error",
attrs=None,
):
"""
Expand Down Expand Up @@ -77,7 +78,7 @@ def from_dlpack(

array = nplike.from_dlpack(array)
return wrap_layout(
from_arraylib(array, regulararray, False),
from_arraylib(array, regulararray, False, primitive_policy=primitive_policy),
highlevel=highlevel,
behavior=behavior,
)
12 changes: 10 additions & 2 deletions src/awkward/operations/ak_from_jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,15 @@


@high_level_function()
def from_jax(array, *, regulararray=False, highlevel=True, behavior=None, attrs=None):
def from_jax(
array,
*,
regulararray=False,
highlevel=True,
behavior=None,
attrs=None,
primitive_policy="error",
):
"""
Args:
array (jax.numpy.DeviceArray): The JAX DeviceArray to convert into an Awkward Array.
Expand Down Expand Up @@ -38,7 +46,7 @@ def from_jax(array, *, regulararray=False, highlevel=True, behavior=None, attrs=
"""
jax.assert_registered()
return wrap_layout(
from_arraylib(array, regulararray, False),
from_arraylib(array, regulararray, False, primitive_policy=primitive_policy),
highlevel=highlevel,
behavior=behavior,
)
5 changes: 4 additions & 1 deletion src/awkward/operations/ak_from_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def from_numpy(
recordarray=True,
highlevel=True,
behavior=None,
primitive_policy="error",
attrs=None,
):
"""
Expand Down Expand Up @@ -52,7 +53,9 @@ def from_numpy(
See also #ak.to_numpy and #ak.from_cupy.
"""
return wrap_layout(
from_arraylib(array, regulararray, recordarray),
from_arraylib(
array, regulararray, recordarray, primitive_policy=primitive_policy
),
highlevel=highlevel,
behavior=behavior,
)
16 changes: 13 additions & 3 deletions src/awkward/operations/ak_to_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,27 @@ def _impl(
regulararray=regulararray,
recordarray=True,
highlevel=False,
primitive_policy=primitive_policy,
)
return _handle_array_like(
obj, promoted_layout, primitive_policy=primitive_policy
)
elif Cupy.is_own_array(obj):
promoted_layout = ak.operations.from_cupy(
obj, regulararray=regulararray, highlevel=False
obj,
regulararray=regulararray,
highlevel=False,
primitive_policy=primitive_policy,
)
return _handle_array_like(
obj, promoted_layout, primitive_policy=primitive_policy
)
elif Jax.is_own_array(obj):
promoted_layout = ak.operations.from_jax(
obj, regulararray=regulararray, highlevel=False
obj,
regulararray=regulararray,
highlevel=False,
primitive_policy=primitive_policy,
)
return _handle_array_like(
obj, promoted_layout, primitive_policy=primitive_policy
Expand All @@ -215,14 +222,17 @@ def _impl(
elif ak._util.in_module(obj, "pyarrow"):
return ak.operations.from_arrow(obj, highlevel=False)
elif hasattr(obj, "__dlpack__") and hasattr(obj, "__dlpack_device__"):
return ak.operations.from_dlpack(obj, highlevel=False)
return ak.operations.from_dlpack(
obj, highlevel=False, primitive_policy=primitive_policy
)
# Typed scalars
elif isinstance(obj, np.generic):
promoted_layout = ak.operations.from_numpy(
numpy.asarray(obj),
regulararray=regulararray,
recordarray=True,
highlevel=False,
primitive_policy=primitive_policy,
)
return _handle_array_like(
obj, promoted_layout, primitive_policy=primitive_policy
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import numpy as np
import pytest

import awkward as ak


def test_akarray_from_zero_dim_nparray():
np_scalar = np.array(2.7) # A kind of scalar in numpy.
assert np_scalar.ndim == 0 and np_scalar.shape == ()
with pytest.raises(TypeError):
# Conversion to ak.Array ought to throw here:
b = ak.Array(np_scalar) # (bugged) value: <Array [2.7] type='1 * int64'>
# Now we're failing. Here's why.
c = ak.to_numpy(b) # value: array([2.7])
assert np_scalar.shape == c.shape # this fails

with pytest.raises(TypeError):
b = ak.from_numpy(np_scalar)
c = ak.to_numpy(b)
assert np_scalar.shape == c.shape
17 changes: 17 additions & 0 deletions tests/test_2772_parquet_extn_array_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import io
import os

import numpy as np
Expand Down Expand Up @@ -137,6 +138,22 @@ def test_array_conversions(akarray, as_dict):
rt_array = ak.from_arrow(as_extn, highlevel=True)
assert to_list(rt_array) == to_list(akarray)

# Deeper test of types
akarray_high = ak.Array(akarray)
if akarray_high.type.content.parameters.get("__categorical__", False) == as_dict:
# as_dict is supposed to go hand-in-hand with __categorical__: True, and if it
# does not, we do not round-trip perfectly. So only test when this is set correctly.
assert rt_array.type == akarray_high.type

ak_type_str_orig = io.StringIO()
ak_type_str_rtrp = io.StringIO()
akarray_high.type.show(stream=ak_type_str_orig)
rt_array.type.show(stream=ak_type_str_rtrp)
if ak_type_str_orig.getvalue() != ak_type_str_rtrp.getvalue():
print(" Original type:", ak_type_str_orig.getvalue())
print(" Rnd-trip type:", ak_type_str_rtrp.getvalue())
assert ak_type_str_orig.getvalue() == ak_type_str_rtrp.getvalue()


def test_table_conversion():
ak_tbl_like = ak.Array(
Expand Down
40 changes: 40 additions & 0 deletions tests/test_3154_parquet_subcolumn_select.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
# ruff: noqa: E402

from __future__ import annotations

import os

import pytest

import awkward as ak

pa = pytest.importorskip("pyarrow")
pq = pytest.importorskip("pyarrow.parquet")


def test_parquet_subcolumn_select(tmp_path):
ak_tbl = ak.Array(
{
"a": [
{"lbl": "item 1", "idx": 11, "ids": [1, 2, 3]},
{"lbl": "item 2", "idx": 12, "ids": [51, 52]},
{"lbl": "item 3", "idx": 13, "ids": [61, 62, 63, 64]},
],
"b": [
[[111, 112], [121, 122]],
[[211, 212], [221, 222]],
[[311, 312], [321, 322]],
],
}
)
parquet_file = os.path.join(tmp_path, "test_3514.parquet")
ak.to_parquet(ak_tbl, parquet_file)

selection = ak.from_parquet(parquet_file, columns=["a.ids", "b"])
assert selection["a"].to_list() == [
{"ids": [1, 2, 3]},
{"ids": [51, 52]},
{"ids": [61, 62, 63, 64]},
]
assert selection["b"].to_list() == ak_tbl["b"].to_list()

0 comments on commit fb15373

Please sign in to comment.