From 862ae355904b6bed658365eac300b26f78e817f0 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 19 Dec 2019 14:21:07 -0600 Subject: [PATCH] Creating a demo for Coffea will motivate improvements. (#33) Closing off this PR (even though I'm not done with the demo) because the changes are enormous: 7000 lines in 100 files added or removed. In this PR, Types became const-members of arrays (don't have to worry about them changing) and eventually Identities should also become const. Code style was made uniform: `override` instead of `virtual`, all implementations in .cpp files, and non-numeric function arguments passed as const-references, rather than by value. Removed the visibility of RapidJSON to downstream projects (it is now an implementation detail). Now distributing statically linked libraries for downstream projects to include in their projects: can compile as ```bash g++ -Iinclude -Lawkward1 tests/test_PR019_use_json_library.cpp -lawkward-static -lawkward-cpu-kernels-static ``` Now let's see if it still deploys! * [WIP] Creating a demo for Coffea will motivate improvements. * Oops; remove QUICKSTART.md. * Start demo notebook. * Writing notebook. * Writing demo. * [skip ci] Made the ArrayType schema look sane in a demo; unit tests are probably broken. * [skip ci] Fixed segfault; working on tests * [skip ci] Fixed many of the bugs, but not all. * Fixed all bugs. ArrayType no longer appears in layout types. * Remove all 'inner' and 'nolength' calls on Types * [skip ci] Made 'type_' an immutable member of all Contents; need to update tests. * [skip ci] Working through test failures; good up to (and including PR026). * [skip ci] Should be up through PR02*. * [skip ci] It is up through PR02*. * Tests pass once again; now types are immutable members of arrays. * Replace 'virtual' keyword with 'override' when overriding methods. * Replace checks of 'size() == 0' and 'size() != 0' with 'empty()' and '!empty()', respectively. * Replaced 'new' operator with 'std::make_shared' everywhere that it is possible (not arrays). * Pass non-numeric arguments by const-ref, rather than by ref or by value, if possible. * Writing demo. * Writing demo. * Writing demo. * Fixed a bug in FillableUnion, but still haven't found a dataset without nulls or unions. * Writing demo. * Implementing deep copies, will use in high-level Identity interface. * Iterator and Slices have been EXTERNAL_SYMBOLed. * Try to satisfy MacOS and Windows warnings. * That broke MacOS. Try EXPORT_SYMBOL of the template instantiation in the cpp. * EXPORT_SYMBOLS for Index and std::vector. * We don't need to EXPORT_SYMBOLS if downstream users statically link against them. * Moving all implementations out of header files anyway (would have been needed for EXPORT_SYMBOLS). * Moved implementations to cpp for most classes. Only json.h is left. * If this passes, we should close this PR so that other projects can use these updates. * Fix 32-bit error and try to find those .a files on Windows. * Windows should find the statically-linked library now. --- .ci/azure-buildtest-awkward.yml | 3 + VERSION_INFO | 2 +- awkward1/__init__.py | 11 + awkward1/_numba/array/listarray.py | 20 +- awkward1/_numba/array/listoffsetarray.py | 20 +- awkward1/_numba/array/numpyarray.py | 12 +- awkward1/_numba/array/recordarray.py | 22 +- awkward1/_numba/array/regulararray.py | 20 +- awkward1/_numba/types.py | 6 - awkward1/_util.py | 162 +- awkward1/behavior/string.py | 6 +- awkward1/highlevel.py | 255 +--- awkward1/operations/convert.py | 19 +- awkward1/operations/describe.py | 3 + docs/demos/2019-12-20-coffea-demo.ipynb | 1506 +++++++++++++++++++ include/awkward/Content.h | 27 +- include/awkward/Identity.h | 54 +- include/awkward/Index.h | 24 +- include/awkward/Iterator.h | 14 +- include/awkward/Slice.h | 134 +- include/awkward/array/EmptyArray.h | 70 +- include/awkward/array/ListArray.h | 74 +- include/awkward/array/ListOffsetArray.h | 71 +- include/awkward/array/NumpyArray.h | 112 +- include/awkward/array/RawArray.h | 316 ++-- include/awkward/array/Record.h | 97 +- include/awkward/array/RecordArray.h | 100 +- include/awkward/array/RegularArray.h | 73 +- include/awkward/fillable/BoolFillable.h | 44 +- include/awkward/fillable/Fillable.h | 8 +- include/awkward/fillable/FillableArray.h | 27 +- include/awkward/fillable/FillableOptions.h | 7 +- include/awkward/fillable/Float64Fillable.h | 44 +- include/awkward/fillable/GrowableBuffer.h | 95 +- include/awkward/fillable/Int64Fillable.h | 47 +- include/awkward/fillable/ListFillable.h | 44 +- include/awkward/fillable/OptionFillable.h | 44 +- include/awkward/fillable/RecordFillable.h | 56 +- include/awkward/fillable/StringFillable.h | 47 +- include/awkward/fillable/TupleFillable.h | 52 +- include/awkward/fillable/UnionFillable.h | 48 +- include/awkward/fillable/UnknownFillable.h | 44 +- include/awkward/io/json.h | 157 +- include/awkward/type/ArrayType.h | 29 +- include/awkward/type/ListType.h | 27 +- include/awkward/type/OptionType.h | 27 +- include/awkward/type/PrimitiveType.h | 29 +- include/awkward/type/RecordType.h | 56 +- include/awkward/type/RegularType.h | 30 +- include/awkward/type/Type.h | 44 +- include/awkward/type/UnionType.h | 27 +- include/awkward/type/UnknownType.h | 25 +- include/awkward/util.h | 4 +- setup.py | 12 +- src/libawkward/Content.cpp | 56 +- src/libawkward/Identity.cpp | 62 +- src/libawkward/Index.cpp | 41 +- src/libawkward/Iterator.cpp | 16 +- src/libawkward/Slice.cpp | 208 ++- src/libawkward/array/EmptyArray.cpp | 49 +- src/libawkward/array/ListArray.cpp | 124 +- src/libawkward/array/ListOffsetArray.cpp | 118 +- src/libawkward/array/NumpyArray.cpp | 439 +++--- src/libawkward/array/Record.cpp | 103 +- src/libawkward/array/RecordArray.cpp | 266 ++-- src/libawkward/array/RegularArray.cpp | 112 +- src/libawkward/fillable/BoolFillable.cpp | 16 +- src/libawkward/fillable/Fillable.cpp | 11 + src/libawkward/fillable/FillableArray.cpp | 41 +- src/libawkward/fillable/FillableOptions.cpp | 17 + src/libawkward/fillable/Float64Fillable.cpp | 18 +- src/libawkward/fillable/GrowableBuffer.cpp | 115 ++ src/libawkward/fillable/Int64Fillable.cpp | 22 +- src/libawkward/fillable/ListFillable.cpp | 22 +- src/libawkward/fillable/OptionFillable.cpp | 19 +- src/libawkward/fillable/RecordFillable.cpp | 90 +- src/libawkward/fillable/StringFillable.cpp | 35 +- src/libawkward/fillable/TupleFillable.cpp | 60 +- src/libawkward/fillable/UnionFillable.cpp | 22 +- src/libawkward/fillable/UnknownFillable.cpp | 18 +- src/libawkward/io/json.cpp | 324 ++++ src/libawkward/io/root.cpp | 5 +- src/libawkward/type/ArrayType.cpp | 27 +- src/libawkward/type/ListType.cpp | 24 +- src/libawkward/type/OptionType.cpp | 24 +- src/libawkward/type/PrimitiveType.cpp | 24 +- src/libawkward/type/RecordType.cpp | 71 +- src/libawkward/type/RegularType.cpp | 25 +- src/libawkward/type/Type.cpp | 39 +- src/libawkward/type/UnionType.cpp | 28 +- src/libawkward/type/UnknownType.cpp | 23 +- src/libawkward/util.cpp | 4 +- src/pyawkward.cpp | 92 +- tests/test_PR021_emptyarray.py | 22 +- tests/test_PR022_fillablearray_in_numba.py | 4 +- tests/test_PR023_regular_array.py | 2 +- tests/test_PR025_record_array.py | 24 +- tests/test_PR028_add_dressed_types.py | 77 +- tests/test_PR031_types_in_numba.py | 36 +- tests/test_PR032_replace_dressedtype.py | 19 +- 100 files changed, 4890 insertions(+), 2411 deletions(-) create mode 100644 docs/demos/2019-12-20-coffea-demo.ipynb create mode 100644 src/libawkward/fillable/Fillable.cpp create mode 100644 src/libawkward/fillable/FillableOptions.cpp create mode 100644 src/libawkward/fillable/GrowableBuffer.cpp diff --git a/.ci/azure-buildtest-awkward.yml b/.ci/azure-buildtest-awkward.yml index c82290da85..ba5e113ace 100644 --- a/.ci/azure-buildtest-awkward.yml +++ b/.ci/azure-buildtest-awkward.yml @@ -78,6 +78,7 @@ jobs: - script: | python setup.py build + ls awkward1 python -m pytest -vv tests displayName: "Build and test" @@ -129,6 +130,7 @@ jobs: - script: | python setup.py build + ls awkward1 python -m pytest -vv tests displayName: "Build and test" @@ -189,5 +191,6 @@ jobs: - script: | python setup.py build + ls awkward1 python -m pytest -vv tests displayName: "Build and test" diff --git a/VERSION_INFO b/VERSION_INFO index 28d0075395..50140e3536 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.32 +0.1.33 diff --git a/awkward1/__init__.py b/awkward1/__init__.py index 225c666f78..9fde77b27a 100644 --- a/awkward1/__init__.py +++ b/awkward1/__init__.py @@ -3,7 +3,18 @@ namespace = {} import awkward1.layout +from awkward1.layout import Type +from awkward1.layout import UnknownType +from awkward1.layout import PrimitiveType +from awkward1.layout import ListType +from awkward1.layout import RegularType +from awkward1.layout import RecordType +from awkward1.layout import OptionType +from awkward1.layout import UnionType +from awkward1.layout import ArrayType + import awkward1._numba + import awkward1.highlevel from awkward1.highlevel import Array from awkward1.highlevel import Record diff --git a/awkward1/_numba/array/listarray.py b/awkward1/_numba/array/listarray.py index b24e9c8ede..3d635c0ff6 100644 --- a/awkward1/_numba/array/listarray.py +++ b/awkward1/_numba/array/listarray.py @@ -156,11 +156,9 @@ def unbox(tpe, obj, c): proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value c.pyapi.decref(id_obj) if tpe.typetpe != numba.none: - type1_obj = c.pyapi.object_getattr_string(obj, "type") - type2_obj = c.pyapi.object_getattr_string(type1_obj, "type") - proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value - c.pyapi.decref(type1_obj) - c.pyapi.decref(type2_obj) + type_obj = c.pyapi.object_getattr_string(obj, "type") + proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value + c.pyapi.decref(type_obj) is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return numba.extending.NativeValue(proxyout._getvalue(), is_error) @@ -191,14 +189,18 @@ def box(tpe, val, c): args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager)) else: args.append(c.pyapi.make_none()) - if tpe.typetpe != numba.none: - args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)) - else: - args.append(c.pyapi.make_none()) out = c.pyapi.call_function_objargs(ListArray_obj, args) for x in args: c.pyapi.decref(x) c.pyapi.decref(ListArray_obj) + if tpe.typetpe != numba.none: + old = out + astype_obj = c.pyapi.object_getattr_string(out, "astype") + t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager) + out = c.pyapi.call_function_objargs(astype_obj, (t,)) + c.pyapi.decref(old) + c.pyapi.decref(astype_obj) + c.pyapi.decref(t) return out @numba.extending.lower_builtin(len, ListArrayType) diff --git a/awkward1/_numba/array/listoffsetarray.py b/awkward1/_numba/array/listoffsetarray.py index 66ac3c9bfd..10f2b7a912 100644 --- a/awkward1/_numba/array/listoffsetarray.py +++ b/awkward1/_numba/array/listoffsetarray.py @@ -147,11 +147,9 @@ def unbox(tpe, obj, c): proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value c.pyapi.decref(id_obj) if tpe.typetpe != numba.none: - type1_obj = c.pyapi.object_getattr_string(obj, "type") - type2_obj = c.pyapi.object_getattr_string(type1_obj, "type") - proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value - c.pyapi.decref(type1_obj) - c.pyapi.decref(type2_obj) + type_obj = c.pyapi.object_getattr_string(obj, "type") + proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value + c.pyapi.decref(type_obj) is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return numba.extending.NativeValue(proxyout._getvalue(), is_error) @@ -179,14 +177,18 @@ def box(tpe, val, c): args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager)) else: args.append(c.pyapi.make_none()) - if tpe.typetpe != numba.none: - args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)) - else: - args.append(c.pyapi.make_none()) out = c.pyapi.call_function_objargs(ListOffsetArray_obj, args) for x in args: c.pyapi.decref(x) c.pyapi.decref(ListOffsetArray_obj) + if tpe.typetpe != numba.none: + old = out + astype_obj = c.pyapi.object_getattr_string(out, "astype") + t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager) + out = c.pyapi.call_function_objargs(astype_obj, (t,)) + c.pyapi.decref(old) + c.pyapi.decref(astype_obj) + c.pyapi.decref(t) return out @numba.extending.lower_builtin(len, ListOffsetArrayType) diff --git a/awkward1/_numba/array/numpyarray.py b/awkward1/_numba/array/numpyarray.py index 34c160da27..b458cf7916 100644 --- a/awkward1/_numba/array/numpyarray.py +++ b/awkward1/_numba/array/numpyarray.py @@ -123,7 +123,17 @@ def box(tpe, val, c): else: args.append(c.pyapi.make_none()) if tpe.typetpe != numba.none: - args.append(c.pyapi.unserialize(c.pyapi.serialize_object(tpe.typetpe.literal_type))) + RegularType_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.RegularType)) + t = c.pyapi.unserialize(c.pyapi.serialize_object(tpe.typetpe.literal_type)) + arrayval = numba.targets.arrayobj.make_array(tpe.arraytpe)(c.context, c.builder, proxyin.array) + arrayshape = arrayval.shape + for i in range(tpe.arraytpe.ndim - 1, 0, -1): + size_val = c.builder.extract_value(arrayshape, i) + size_obj = c.pyapi.from_native_value(numba.intp, size_val, c.env_manager) + t = c.pyapi.call_function_objargs(RegularType_obj, (t, size_obj)) + c.pyapi.decref(size_obj) + c.pyapi.decref(RegularType_obj) + args.append(t) else: args.append(c.pyapi.make_none()) out = c.pyapi.call_function_objargs(NumpyArray_obj, args) diff --git a/awkward1/_numba/array/recordarray.py b/awkward1/_numba/array/recordarray.py index b85735db6c..21ca8d7ac0 100644 --- a/awkward1/_numba/array/recordarray.py +++ b/awkward1/_numba/array/recordarray.py @@ -186,11 +186,9 @@ def unbox(tpe, obj, c): proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value c.pyapi.decref(id_obj) if tpe.typetpe != numba.none: - type1_obj = c.pyapi.object_getattr_string(obj, "type") - type2_obj = c.pyapi.object_getattr_string(type1_obj, "type") - proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value - c.pyapi.decref(type1_obj) - c.pyapi.decref(type2_obj) + type_obj = c.pyapi.object_getattr_string(obj, "type") + proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value + c.pyapi.decref(type_obj) is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return numba.extending.NativeValue(proxyout._getvalue(), is_error) @@ -217,10 +215,6 @@ def box(tpe, val, c): args.append(id_obj) else: args.append(c.pyapi.make_none()) - if tpe.typetpe != numba.none: - args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)) - else: - args.append(c.pyapi.make_none()) if len(tpe.contenttpes) == 0: RecordArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.RecordArray)) @@ -266,6 +260,16 @@ def box(tpe, val, c): for x in args: c.pyapi.decref(x) + + if tpe.typetpe != numba.none: + old = out + astype_obj = c.pyapi.object_getattr_string(out, "astype") + t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager) + out = c.pyapi.call_function_objargs(astype_obj, (t,)) + c.pyapi.decref(old) + c.pyapi.decref(astype_obj) + c.pyapi.decref(t) + return out @numba.extending.box(RecordType) diff --git a/awkward1/_numba/array/regulararray.py b/awkward1/_numba/array/regulararray.py index df42ed13a0..186870a215 100644 --- a/awkward1/_numba/array/regulararray.py +++ b/awkward1/_numba/array/regulararray.py @@ -128,11 +128,9 @@ def unbox(tpe, obj, c): proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value c.pyapi.decref(id_obj) if tpe.typetpe != numba.none: - type1_obj = c.pyapi.object_getattr_string(obj, "type") - type2_obj = c.pyapi.object_getattr_string(type1_obj, "type") - proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value - c.pyapi.decref(type1_obj) - c.pyapi.decref(type2_obj) + type_obj = c.pyapi.object_getattr_string(obj, "type") + proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value + c.pyapi.decref(type_obj) is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return numba.extending.NativeValue(proxyout._getvalue(), is_error) @@ -147,14 +145,18 @@ def box(tpe, val, c): args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager)) else: args.append(c.pyapi.make_none()) - if tpe.typetpe != numba.none: - args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)) - else: - args.append(c.pyapi.make_none()) out = c.pyapi.call_function_objargs(RegularArray_obj, args) for x in args: c.pyapi.decref(x) c.pyapi.decref(RegularArray_obj) + if tpe.typetpe != numba.none: + old = out + astype_obj = c.pyapi.object_getattr_string(out, "astype") + t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager) + out = c.pyapi.call_function_objargs(astype_obj, (t,)) + c.pyapi.decref(old) + c.pyapi.decref(astype_obj) + c.pyapi.decref(t) return out @numba.extending.lower_builtin(len, RegularArrayType) diff --git a/awkward1/_numba/types.py b/awkward1/_numba/types.py index f51d504abe..407373dbf1 100644 --- a/awkward1/_numba/types.py +++ b/awkward1/_numba/types.py @@ -28,14 +28,8 @@ def box(tpe, val, c): return c.pyapi.unserialize(c.pyapi.serialize_object(tpe.literal_type)) def typeof_literaltype(literal_type): - if isinstance(literal_type, awkward1.layout.ArrayType): - literal_type = literal_type.type return LiteralTypeType(literal_type) -@numba.extending.typeof_impl.register(awkward1.layout.ArrayType) -def typeof_ArrayType(val, c): - return numba.typeof(val.type) - @numba.extending.typeof_impl.register(awkward1.layout.UnknownType) def typeof_UnknownType(val, c): return UnknownTypeType(val.parameters) diff --git a/awkward1/_util.py b/awkward1/_util.py index 7999c15251..fecb2892b2 100644 --- a/awkward1/_util.py +++ b/awkward1/_util.py @@ -7,14 +7,18 @@ def wrap(content, namespace): import awkward1.layout - if isinstance(content, (awkward1.layout.Content, awkward1.layout.Record)): - t = content.type.nolength() - if t.parameters.get("__class__") in namespace: - return namespace[t.parameters["__class__"]](content, namespace=namespace) - elif isinstance(content, awkward1.layout.Record): - return awkward1.Record(content) - else: - return awkward1.Array(content) + if isinstance(content, awkward1.layout.Content): + cls = namespace.get(content.type.parameters.get("__class__")) + if cls is None: + cls = awkward1.Array + return cls(content, namespace=namespace) + + elif isinstance(content, awkward1.layout.Record): + cls = namespace.get(content.type.parameters.get("__class__")) + if cls is None: + cls = awkward1.Record + return cls(content, namespace=namespace) + else: return content @@ -35,3 +39,145 @@ def field2index(lookup, numfields, key): return attempt field2index._pattern = re.compile(r"^[1-9][0-9]*$") + +def minimally_touching_string(limit_length, layout, namespace): + import awkward1.layout + + if len(layout) == 0: + return "[]" + + def forward(x, space, brackets=True, wrap=True): + done = False + if wrap and isinstance(x, awkward1.layout.Content): + cls = namespace.get(x.type.parameters.get("__class__")) + if cls is not None: + y = cls(x, namespace=namespace) + if "__repr__" in type(y).__dict__: + yield space + repr(y) + done = True + if wrap and isinstance(x, awkward1.layout.Record): + cls = namespace.get(x.type.parameters.get("__class__")) + if cls is not None: + y = cls(x, namespace=namespace) + if "__repr__" in type(y).__dict__: + yield space + repr(y) + done = True + if not done: + if isinstance(x, awkward1.layout.Content): + if brackets: + yield space + "[" + sp = "" + for i in range(len(x)): + for token in forward(x[i], sp): + yield token + sp = ", " + if brackets: + yield "]" + elif isinstance(x, awkward1.layout.Record): + yield space + "{" + sp = "" + for k in x.keys(): + key = sp + k + ": " + for token in forward(x[k], ""): + yield key + token + key = "" + sp = ", " + yield "}" + elif isinstance(x, (float, numpy.floating)): + yield space + "{0:.3g}".format(x) + else: + yield space + repr(x) + + def backward(x, space, brackets=True, wrap=True): + done = False + if wrap and isinstance(x, awkward1.layout.Content): + cls = namespace.get(x.type.parameters.get("__class__")) + if cls is not None: + y = cls(x, namespace=namespace) + if "__repr__" in type(y).__dict__: + yield repr(y) + space + done = True + if wrap and isinstance(x, awkward1.layout.Record): + cls = namespace.get(x.type.parameters.get("__class__")) + if cls is not None: + y = cls(x, namespace=namespace) + if "__repr__" in type(y).__dict__: + yield repr(y) + space + done = True + if not done: + if isinstance(x, awkward1.layout.Content): + if brackets: + yield "]" + space + sp = "" + for i in range(len(x) - 1, -1, -1): + for token in backward(x[i], sp): + yield token + sp = ", " + if brackets: + yield "[" + elif isinstance(x, awkward1.layout.Record): + yield "}" + space + keys = x.keys() + for i in range(len(keys) - 1, -1, -1): + last = None + for token in backward(x[keys[i]], ""): + if last is not None: + yield last + last = token + if last is not None: + yield keys[i] + ": " + last + if i != 0: + yield ", " + yield "{" + elif isinstance(x, (float, numpy.floating)): + yield "{0:.3g}".format(x) + space + else: + yield repr(x) + space + + def forever(iterable): + for token in iterable: + yield token + while True: + yield None + + halfway = len(layout) // 2 + left, right = ["["], ["]"] + leftlen, rightlen = 1, 1 + leftgen = forever(forward(layout[:halfway], "", brackets=False, wrap=False)) + rightgen = forever(backward(layout[halfway:], "", brackets=False, wrap=False)) + while True: + l = next(leftgen) + if l is not None: + if leftlen + rightlen + len(l) + (2 if l is None and r is None else 6) > limit_length: + break + left.append(l) + leftlen += len(l) + + r = next(rightgen) + if r is not None: + if leftlen + rightlen + len(r) + (2 if l is None and r is None else 6) > limit_length: + break + right.append(r) + rightlen += len(r) + + if l is None and r is None: + break + + while len(left) > 1 and (left[-1] == "[" or left[-1] == ", [" or left[-1] == "{" or left[-1] == ", {" or left[-1] == ", "): + left.pop() + l = "" + while len(right) > 1 and (right[-1] == "]" or right[-1] == "], " or right[-1] == "}" or right[-1] == "}, " or right[-1] == ", "): + right.pop() + r = "" + if l is None and r is None: + if left == ["["]: + return "[" + "".join(reversed(right)).lstrip(" ") + else: + return "".join(left).rstrip(" ") + ", " + "".join(reversed(right)).lstrip(" ") + else: + if left == ["["] and right == ["]"]: + return "[...]" + elif left == ["["]: + return "[... " + "".join(reversed(right)).lstrip(" ") + else: + return "".join(left).rstrip(" ") + ", ... " + "".join(reversed(right)).lstrip(" ") diff --git a/awkward1/behavior/string.py b/awkward1/behavior/string.py index f2c71dcbc0..2242e259e0 100644 --- a/awkward1/behavior/string.py +++ b/awkward1/behavior/string.py @@ -11,14 +11,14 @@ def __bytes__(self): return numpy.asarray(self.layout).tostring() def __str__(self): - encoding = self.type.nolength().parameters.get("encoding") + encoding = self.layout.type.parameters.get("encoding") if encoding is None: return str(self.__bytes__()) else: return self.__bytes__().decode(encoding) def __repr__(self): - encoding = self.type.nolength().parameters.get("encoding") + encoding = self.layout.type.parameters.get("encoding") if encoding is None: return repr(self.__bytes__()) else: @@ -34,7 +34,7 @@ def __iter__(self): class StringBehavior(awkward1.highlevel.Array): def __iter__(self): - if self.type.nolength().inner().parameters.get("encoding") is None: + if self.layout.type.type.parameters.get("encoding") is None: for x in super(StringBehavior, self).__iter__(): yield x.__bytes__() else: diff --git a/awkward1/highlevel.py b/awkward1/highlevel.py index bd6fe8ea02..0f2360067b 100644 --- a/awkward1/highlevel.py +++ b/awkward1/highlevel.py @@ -19,14 +19,27 @@ def __init__(self, data, type=None, namespace=None): layout = awkward1.operations.convert.fromiter(data).layout if not isinstance(layout, awkward1.layout.Content): raise TypeError("could not convert data into an awkward1.Array") - self.layout = layout + self.namespace = namespace - if type is not None: - self.type = type + + if type is None: + self._type = None else: - t = self.layout.type.nolength() - if t.parameters.get("__class__") in self._namespace: - self.__class__ = self._namespace[t.parameters["__class__"]] + if not isinstance(type, awkward1.layout.ArrayType): + raise TypeError("type must be an awkward1.layout.ArrayType") + if type.length > len(layout): + raise TypeError("ArrayType length ({0}) is greater than layout length {1}".format(type.length, len(layout))) + if type.length < len(layout): + layout = layout[:type.length] + cls = self._namespace.get(type.parameters.get("__class__")) + if cls is not None: + if not isinstance(cls, __builtins__["type"]) or not issubclass(cls, Array): + raise TypeError("type.parameters['__class__'] = {0} must be a subclass of awkward1.Array".format(repr(type.parameters["__class__"]))) + self.__class__ = cls + layout = layout.astype(type.type) + self._type = type + + self.layout = layout @property def layout(self): @@ -38,23 +51,6 @@ def layout(self, layout): raise TypeError("layout must be a subclass of awkward1.layout.Content") self._layout = layout - @property - def type(self): - return self._layout.type - - @type.setter - def type(self, type): - if not isinstance(type, awkward1.layout.Type): - raise TypeError("type must be a subclass of awkward1.layout.Type") - t = type.nolength() - if t.parameters.get("__class__") in self._namespace: - self.__class__ = self._namespace[t.parameters["__class__"]] - self._layout.type = type - - @property - def baretype(self): - return self._layout.baretype - @property def namespace(self): return self._namespace @@ -66,161 +62,56 @@ def namespace(self, namespace): else: self._namespace = namespace + @property + def type(self): + if self._type is None: + return awkward1.layout.ArrayType(self._layout.type, len(self._layout)) + else: + return self._type + + def __len__(self): + return len(self.layout) + def __iter__(self): for x in self.layout: yield awkward1._util.wrap(x, self._namespace) + def __getitem__(self, where): + return awkward1._util.wrap(self.layout[where], self._namespace) + def __str__(self, limit_value=85): - if len(self) == 0: - return "[]" - - def forward(x, space, brackets=True, wrap=True): - done = False - if wrap and isinstance(x, awkward1.layout.Content): - t = x.type.nolength() - if t.parameters.get("__class__") in self._namespace: - y = self._namespace[t.parameters["__class__"]](x, namespace=self._namespace) - if "__repr__" in type(y).__dict__: - yield space + repr(y) - done = True - if not done: - if isinstance(x, awkward1.layout.Content): - if brackets: - yield space + "[" - sp = "" - for i in range(len(x)): - for token in forward(x[i], sp): - yield token - sp = ", " - if brackets: - yield "]" - elif isinstance(x, awkward1.layout.Record): - yield space + "{" - sp = "" - for k in x.keys(): - key = sp + k + ": " - for token in forward(x[k], ""): - yield key + token - key = "" - sp = ", " - yield "}" - elif isinstance(x, (float, numpy.floating)): - yield space + "{0:.3g}".format(x) - else: - yield space + repr(x) - - def backward(x, space, brackets=True, wrap=True): - done = False - if wrap and isinstance(x, awkward1.layout.Content): - t = x.type.nolength() - if t.parameters.get("__class__") in self._namespace: - y = self._namespace[t.parameters["__class__"]](x, namespace=self._namespace) - if "__repr__" in type(y).__dict__: - yield repr(y) + space - done = True - if not done: - if isinstance(x, awkward1.layout.Content): - if brackets: - yield "]" + space - sp = "" - for i in range(len(x) - 1, -1, -1): - for token in backward(x[i], sp): - yield token - sp = ", " - if brackets: - yield "[" - elif isinstance(x, awkward1.layout.Record): - yield "}" + space - keys = x.keys() - for i in range(len(keys) - 1, -1, -1): - last = None - for token in backward(x[keys[i]], ""): - if last is not None: - yield last - last = token - if last is not None: - yield keys[i] + ": " + last - if i != 0: - yield ", " - yield "{" - elif isinstance(x, (float, numpy.floating)): - yield "{0:.3g}".format(x) + space - else: - yield repr(x) + space - - def forever(iterable): - for token in iterable: - yield token - while True: - yield None - - halfway = len(self.layout) // 2 - left, right = ["["], ["]"] - leftlen, rightlen = 1, 1 - leftgen = forever(forward(self.layout[:halfway], "", brackets=False, wrap=False)) - rightgen = forever(backward(self.layout[halfway:], "", brackets=False, wrap=False)) - while True: - l = next(leftgen) - if l is not None: - if leftlen + rightlen + len(l) + (2 if l is None and r is None else 6) > limit_value: - break - left.append(l) - leftlen += len(l) - - r = next(rightgen) - if r is not None: - if leftlen + rightlen + len(r) + (2 if l is None and r is None else 6) > limit_value: - break - right.append(r) - rightlen += len(r) - - if l is None and r is None: - break - - while len(left) > 1 and (left[-1] == "[" or left[-1] == ", [" or left[-1] == "{" or left[-1] == ", {" or left[-1] == ", "): - left.pop() - l = "" - while len(right) > 1 and (right[-1] == "]" or right[-1] == "], " or right[-1] == "}" or right[-1] == "}, " or right[-1] == ", "): - right.pop() - r = "" - if l is None and r is None: - if len(left) == 0: - return "".join(reversed(right)).lstrip(" ") - else: - return "".join(left).rstrip(" ") + ", " + "".join(reversed(right)).lstrip(" ") - else: - if len(left) == 0 and len(right) == 0: - return "..." - elif len(left) == 0: - return "... " + "".join(reversed(right)).lstrip(" ") - else: - return "".join(left).rstrip(" ") + ", ... " + "".join(reversed(right)).lstrip(" ") + return awkward1._util.minimally_touching_string(limit_value, self._layout, self._namespace) def __repr__(self, limit_value=40, limit_total=85): - value = self.__str__(limit_value=limit_value) + value = awkward1._util.minimally_touching_string(limit_value, self._layout, self._namespace) limit_type = limit_total - len(value) - len("") - type = repr(str(self.layout.type)) + type = repr(str(self.type)) if len(type) > limit_type: type = type[:(limit_type - 4)] + "..." + type[-1] return "".format(value, type) - def __len__(self): - return len(self.layout) - - def __getitem__(self, where): - return awkward1._util.wrap(self.layout[where], self._namespace) - class Record(object): - def __init__(self, data, type=None): + def __init__(self, data, type=None, namespace=None): # FIXME: more checks here layout = data if not isinstance(layout, awkward1.layout.Record): raise TypeError("could not convert data into an awkward1.Record") - self.layout = layout + + self.namespace = namespace + if type is not None: - self.type = type + if not isinstance(type, awkward1.layout.RecordType): + raise TypeError("type must be an awkward1.layout.RecordType") + cls = self._namespace.get(type.parameters.get("__class__")) + if cls is not None: + if not isinstance(cls, __builtins__["type"]) or not issubclass(cls, Record): + raise TypeError("type.parameters['__class__'] = {0} must be a subclass of awkward1.Record".format(repr(type.parameters["__class__"]))) + self.__class__ = cls + layout = layout.astype(type) + + self.layout = layout @property def layout(self): @@ -232,36 +123,42 @@ def layout(self, layout): raise TypeError("layout must be a subclass of awkward1.layout.Record") self._layout = layout + @property + def namespace(self): + return self._namespace + + @namespace.setter + def namespace(self, namespace): + if namespace is None: + self._namespace = awkward1.namespace + else: + self._namespace = namespace + @property def type(self): return self._layout.type - @type.setter - def type(self, type): - if not isinstance(type, awkward1.layout.Type): - raise TypeError("type must be a subclass of awkward1.layout.Type") - t = type.nolength() - if t.parameters.get("__class__") in self._namespace: - self.__class__ = self._namespace[t.parameters["__class__"]] - self._layout.type = type + def __getitem__(self, where): + return awkward1._util.wrap(self.layout[where], self._namespace) + + def __str__(self, limit_value=85): + return awkward1._util.minimally_touching_string(limit_value, self._layout, self._namespace) - @property - def baretype(self): - return self._layout.baretype + def __repr__(self, limit_value=40, limit_total=85): + value = awkward1._util.minimally_touching_string(limit_value, self._layout, self._namespace) + + limit_type = limit_total - len(value) - len("") + type = repr(str(self.layout.type)) + if len(type) > limit_type: + type = type[:(limit_type - 4)] + "..." + type[-1] + + return "".format(value, type) class FillableArray(object): def __init__(self, namespace=None): self._fillablearray = awkward1.layout.FillableArray() self.namespace = namespace - @property - def type(self): - return self._fillablearray.type - - @property - def baretype(self): - return self._fillablearray.snapshot().baretype - @property def namespace(self): return self._namespace @@ -273,6 +170,10 @@ def namespace(self, namespace): else: self._namespace = namespace + @property + def type(self): + return awkward1.layout.ArrayType(self._fillablearray.type, len(self._fillablearray)) + def __len__(self): return len(self._fillablearray) diff --git a/awkward1/operations/convert.py b/awkward1/operations/convert.py index 3f56a96a2c..c6dbb3d098 100644 --- a/awkward1/operations/convert.py +++ b/awkward1/operations/convert.py @@ -42,7 +42,7 @@ def tolist(array): return array elif isinstance(array, awkward1.behavior.string.CharBehavior): - if array.type.parameters.get("encoding") is None: + if array.layout.type.parameters.get("encoding") is None: return array.__bytes__() else: return array.__str__() @@ -50,9 +50,12 @@ def tolist(array): elif isinstance(array, awkward1.highlevel.Array): return [tolist(x) for x in array] - elif isinstance(array, awkward1.Record): + elif isinstance(array, awkward1.highlevel.Record): return tolist(array.layout) + elif isinstance(array, awkward1.highlevel.FillableArray): + return tolist(array.snapshot()) + elif isinstance(array, awkward1.layout.Record) and array.istuple: return tuple(tolist(x) for x in array.fields()) @@ -63,7 +66,7 @@ def tolist(array): return array.tolist() elif isinstance(array, awkward1.layout.FillableArray): - return [tolist(x) for x in array] + return [tolist(x) for x in array.snapshot()] elif isinstance(array, awkward1.layout.NumpyArray): return numpy.asarray(array).tolist() @@ -81,17 +84,23 @@ def tojson(array, destination=None, pretty=False, maxdecimals=None, buffersize=6 return json.dumps(array) elif isinstance(array, awkward1.highlevel.Array): - return tojson(array.layout, destination=destination, pretty=pretty, maxdecimals=maxdecimals, buffersize=buffersize) + out = array.layout elif isinstance(array, awkward1.highlevel.Record): - return tojson(array.layout, destination=destination, pretty=pretty, maxdecimals=maxdecimals, buffersize=buffersize) + out = array.layout + + elif isinstance(array, awkward1.highlevel.FillableArray): + out = array.snapshot().layout elif isinstance(array, awkward1.layout.Record): out = array + elif isinstance(array, numpy.ndarray): out = awkward1.layout.NumpyArray(array) + elif isinstance(array, awkward1.layout.FillableArray): out = array.snapshot() + elif isinstance(array, awkward1.layout.Content): out = array diff --git a/awkward1/operations/describe.py b/awkward1/operations/describe.py index 50fa7bcfba..45f3c2f920 100644 --- a/awkward1/operations/describe.py +++ b/awkward1/operations/describe.py @@ -25,6 +25,9 @@ def typeof(array): elif isinstance(array, numpy.generic): raise ValueError("cannot describe {0} as a PrimitiveType".format(type(array))) + elif isinstance(array, (awkward1.highlevel.Array, awkward1.highlevel.Record, awkward1.highlevel.FillableArray)): + return array.type + elif isinstance(array, awkward1.layout.Record): return array.type diff --git a/docs/demos/2019-12-20-coffea-demo.ipynb b/docs/demos/2019-12-20-coffea-demo.ipynb new file mode 100644 index 0000000000..b22bc9ea14 --- /dev/null +++ b/docs/demos/2019-12-20-coffea-demo.ipynb @@ -0,0 +1,1506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2019-12-20-coffea-demo\n", + "\n", + "This demo of the new Awkward Array was presented on December 20, 2019, before the final 1.0 version was released. Some interfaces may have changed. To run this notebook, make sure you have version 0.1.33 ([GitHub](https://github.com/scikit-hep/awkward-1.0/releases/tag/0.1.33), [pip](https://pypi.org/project/awkward1/0.1.33/)) by installing\n", + "\n", + "```bash\n", + "pip install 'awkward1==0.1.33'\n", + "```\n", + "\n", + "The basic concepts of Awkward arrays are presented on the [old Awkward README](https://github.com/scikit-hep/awkward-array/tree/0.12.17#readme) and the motivation for a 1.0 rewrite are presented on the [new Awkward README](https://github.com/scikit-hep/awkward-1.0/tree/0.1.32#readme)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# The base of the GitHub repo is two levels up from this notebook.\n", + "import sys\n", + "import os\n", + "sys.path.insert(0, os.path.join(os.getcwd(), \"..\", \"..\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## High-level array class\n", + "\n", + "The biggest user-facing change is that, instead of mixing NumPy arrays and `JaggedArray` objects, the new Awkward has a single `Array` class." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "import awkward1 as ak\n", + "\n", + "array1 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]])\n", + "array1" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array2 = ak.Array([{\"x\": 0, \"y\": []}, {\"x\": 1, \"y\": [1.1]}, {\"x\": 2, \"y\": [1.1, 2.2]}])\n", + "array2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same `Array` class is used for all data structures, such as the array of lists in `array1` and the array of records in `array2`.\n", + "\n", + "There won't be any user-level functions that apply to some data types and not others. The result of an operation is likely type-dependent, but its accessibility is not. (At this time, the only existing operations are conversions and descriptions.)\n", + "\n", + "(Incidentally, the width of that string representation is exactly large enough to fit into GitHub and StackOverflow text boxes without scrolling.)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[1.1, 2.2, 3.3], [], [4.4, 5.5]]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(array1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[[1.1,2.2,3.3],[],[4.4,5.5]]'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tojson(array1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'x': 0, 'y': []}, {'x': 1, 'y': [1.1]}, {'x': 2, 'y': [1.1, 2.2]}]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(array2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[{\"x\":0,\"y\":[]},{\"x\":1,\"y\":[1.1]},{\"x\":2,\"y\":[1.1,2.2]}]'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tojson(array2)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3 * var * float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(array1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3 * {\"x\": int64, \"y\": var * float64}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(array2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Data types are described using the [datashape language](https://datashape.readthedocs.io/en/latest/). Some Awkward features are [not expressible](https://github.com/blaze/datashape/issues/237) in the current datashape specification, so they're expressed in an extension of the language using the same style of syntax.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next major change in interface is that operations on arrays, such as `ak.tolist` and `ak.typeof` above, are free-standing functions, rather than class methods. This is because it's desirable to put domain specific (e.g. physics) methods on the array object itself; using free-standing functions for array manipulations avoids conflicts. For example,\n", + "\n", + " * `ak.cross(array1, array2)` is an array-manipulation function (the cross-join of `array1` and `array2`)\n", + " * `array1.cross(array2)` could be a user-defined method, such as the 3D cross-product, if `array1` and `array2` represent (arrays of) 3D vectors.\n", + " * `array1.somefield` is a shortcut for `array1[\"somefield\"]`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Low-level array classes\n", + "\n", + "The old `JaggedArray` and `Table` are still available, but you have to ask for them explicitly with `layout`. They're not \"private\" or \"internal implementations\" (there's no underscore in `layout`): they're public for frameworks like Coffea but hidden from data analysts.\n", + "\n", + "As such, their string representations have more low-level detail: the contents of indexes, rather than what they mean as high-level types. (The XML formatting is just an elaboration on Python's angle-bracket convention for `repr` and the fact that we need to denote nesting.)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " var * float64\n", + " \n", + " \n", + " float64\n", + " \n", + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array1.layout" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " {\"x\": int64, \"y\": var * float64}\n", + " \n", + " \n", + " int64\n", + " \n", + " \n", + " \n", + " \n", + " var * float64\n", + " \n", + " \n", + " float64\n", + " \n", + " \n", + " \n", + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array2.layout" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These classes are defined in C++ and wrapped by pybind11. The `awkward1.Array` class is pure Python. Many of the same operations work for layout classes, though less attention has been paid to its interface." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3 * var * float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(array1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "var * float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(array1.layout)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[[1.1,2.2,3.3],[],[4.4,5.5]]'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tojson(array1)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[[1.1,2.2,3.3],[],[4.4,5.5]]'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tojson(array1.layout)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[[1.1,2.2,3.3],[],[4.4,5.5]]'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array1.layout.tojson()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Behavioral mix-ins\n", + "\n", + "The primary use of Awkward arrays so far has been to represent arrays or jagged arrays of physics objects with physics methods on the array objects themselves. In Awkward 0.x, this was implemented with Python multiple inheritance, but that's a Python-only solution that can't be passed into C++ (and it was brittle: easy for an array component to lose its methods).\n", + "\n", + "Now behavioral mix-ins are a \"first class citizen,\" built into Awkward 1.0's type system." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "class PointClass(ak.Record):\n", + " def __repr__(self):\n", + " return \"\".format(self[\"x\"], self[\"y\"])\n", + " \n", + " def mag(self):\n", + " return abs(np.sqrt(self[\"x\"]**2 + self[\"y\"]**2))\n", + "\n", + "ak.namespace[\"Point\"] = PointClass" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array3 = ak.Array([{\"x\": 1, \"y\": 1.1}, {\"x\": 2, \"y\": 2.2}, {\"x\": 3, \"y\": 3.3}])\n", + "array3" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{\"x\": int64, \"y\": float64}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array3.layout.type" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Types can have arbitrary parameters, which modify their meaning. These types are JSON-encoded and passed through C++ or wherever the arrays get sent." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "struct[[\"x\", \"y\"], [int64, float64], parameters={\"__class__\": \"Point\"}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pointtype = array3.layout.type\n", + "pointtype[\"__class__\"] = \"Point\"\n", + "pointtype" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PointType[int64, float64]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pointtype[\"__str__\"] = \"PointType[{}, {}]\".format(pointtype.field(\"x\"), pointtype.field(\"y\"))\n", + "pointtype" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ", ... ] type='3 * PointType[int64, float64]'>" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# There will be a better interface for assigning types...\n", + "array4 = ak.Array(array3.layout, type=ak.ArrayType(pointtype, len(array3.layout)))\n", + "array4" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1.4866068747318506, 2.973213749463701, 4.459820624195552]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[x.mag() for x in array4]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The elements of this array are `PointClass` instances because the `__class__` parameter is `\"Point\"`, a name that is recognized in Awkward's class namespace." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'char': awkward1.behavior.string.CharBehavior,\n", + " 'string': awkward1.behavior.string.StringBehavior,\n", + " 'Point': __main__.PointClass}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.namespace" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, arrays of characters and variable-length strings are implemented as mix-ins. Apart from this type annotation, a string is just a jagged array of 8-bit integers." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array5 = ak.Array([\"Daisy\", \"Daisy\", \"give\", \"me\", \"your\", \"answer\", \"do.\"])\n", + "array5" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " string\n", + " \n", + " \n", + " utf8\n", + " \n", + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array5.layout" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[68, 97, 105, 115, 121],\n", + " [68, 97, 105, 115, 121],\n", + " [103, 105, 118, 101],\n", + " [109, 101],\n", + " [121, 111, 117, 114],\n", + " [97, 110, 115, 119, 101, 114],\n", + " [100, 111, 46]]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(array5.layout)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[97, 105, 115, 121],\n", + " [97, 105, 115, 121],\n", + " [105, 118, 101],\n", + " [101],\n", + " [111, 117, 114],\n", + " [110, 115, 119, 101, 114],\n", + " [111, 46]]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(array5.layout[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array5[:, 1:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The string interpretation is _only_ applied to the high-level `Array` and _not_ to the layout classes. Thus,\n", + "\n", + " * superclass-based mix-ins don't have to be captured and passed on through all operations,\n", + " * mix-ins can pass through C++ because they are only JSON-encoded type parameters, not a Python class,\n", + " * mix-in classes don't have to be dynamically generated (`PointClass` has a \"fixed address\" for pickling),\n", + " * the mechanism for array mix-ins (e.g. `string`) is the same as for producing objects (e.g. `PointClass`); there is no need to introduce an `ObjectArray`,\n", + " * unlike old Awkward's `ObjectArray`, these records remain Awkward data structures when instantiated." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array4[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " \n", + " PointType[int64, float64]\n", + " \n", + " \n", + " int64\n", + " \n", + " \n", + " \n", + " \n", + " float64\n", + " \n", + " \n", + " \n", + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array4[2].layout" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating arrays\n", + "\n", + "A few of the examples above create arrays by passing them to the `Array` constructor. This is like old Awkward's `fromiter` function. In fact, new Awkward has a `fromiter` function, but it's implicitly called by the `Array` constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calls ak.fromiter, which converts rowwise → columnar data.\n", + "ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calls ak.fromjson, which deserializes.\n", + "ak.Array(\"[[1.1, 2.2, 3.3], [], [4.4, 5.5]]\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calls ak.fromnumpy, which views.\n", + "nparray = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])\n", + "akarray = ak.Array(nparray)\n", + "akarray" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nparray[0, 1] = 999\n", + "akarray" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also build these manually from the layouts, but it's a lot of work!" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " \n", + " \n", + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "content = ak.layout.NumpyArray(np.array([1.1, 2.2, 3.3, 4.4, 5.5]))\n", + "offsets = ak.layout.Index64(np.array([0, 3, 3, 5], dtype=np.int64)) # match 64-bit to avoid copy\n", + "listoffsetarray = ak.layout.ListOffsetArray64(offsets, content)\n", + "listoffsetarray" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.Array(listoffsetarray)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FillableArray\n", + "\n", + "The `fromiter` algorithm has been expanded into a builder interface, so that you can accumulate Awkward arrays." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "builder = ak.FillableArray()\n", + "\n", + "for i in range(10):\n", + " builder.beginrecord()\n", + " builder.field(\"x\")\n", + " builder.real(np.random.normal())\n", + " builder.field(\"y\")\n", + " builder.beginlist()\n", + " for j in range(np.random.poisson(2.5)):\n", + " builder.integer(np.random.randint(0, 10))\n", + " builder.endlist()\n", + " builder.endrecord()\n", + "\n", + "builder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is not a regular array, but you can `snapshot` it to get one (and keep filling the `builder`). A `snapshot` does not copy array data: if you take several snapshots while filling, they might share data." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "array6 = builder.snapshot()\n", + "array6" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'x': -2.823186977247066, 'y': [2, 6]},\n", + " {'x': 0.5318433745958818, 'y': [4, 4, 3]},\n", + " {'x': 1.1538269293945211, 'y': [5, 1, 6]},\n", + " {'x': -0.8541769769933868, 'y': [3, 7]},\n", + " {'x': 0.41427084771072303, 'y': [9, 1, 3, 0]},\n", + " {'x': 0.9580185172841779, 'y': [1]},\n", + " {'x': 1.7301223467797593, 'y': [9]},\n", + " {'x': 0.2735825444022002, 'y': [9, 2, 1]},\n", + " {'x': 1.2176011137273288, 'y': [7, 7, 8, 0, 2, 4]},\n", + " {'x': 0.6948803862812712, 'y': [0, 5, 2]}]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(array6)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10 * {\"x\": float64, \"y\": var * int64}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(array6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The array that you produce can have nested structure, as shown above. The structure was determined by the order in which `builder` methods were called.\n", + "\n", + "You can write algorithms that build arrays as if you were printing out JSON:\n", + "\n", + " * call `beginlist()` instead of printing `\"[\"`,\n", + " * call `endlist()` instead of printing `\"]\"`,\n", + " * call `beginrecord()` instead of printing `\"{\"`,\n", + " * call `endrecord()` instead of printing `\"}\"`,\n", + " * call `field(key)` instead of printing `\"key\":`, etc." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "deepbuilder = ak.FillableArray()\n", + "\n", + "def deepnesting(depth):\n", + " if depth == 0:\n", + " deepbuilder.integer(np.random.randint(0, 10))\n", + " else:\n", + " deepbuilder.beginlist()\n", + " for j in range(np.random.poisson(2.5)):\n", + " deepnesting(depth - 1)\n", + " deepbuilder.endlist()\n", + "\n", + "deepnesting(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[[[[[8, 2, 2, 3, 0, 4], [5, 4, 4], [0, 8]], [[6, 5, 7, 1], [7, 8, 5]]],\n", + " [[[3]],\n", + " [[6, 5, 8], [3, 4, 5, 8, 2, 8, 1], [6, 9, 5], [8, 5, 7, 3, 4], [3]]]]]]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.tolist(deepbuilder.snapshot())" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 * var * var * var * var * var * int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ak.typeof(deepbuilder)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + " var * var * var * var * var * int64\n", + " \n", + " \n", + " var * var * var * var * int64\n", + " \n", + " \n", + " var * var * var * int64\n", + " \n", + " \n", + " var * var * int64\n", + " \n", + " \n", + " var * int64\n", + " \n", + " \n", + " int64\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "deepbuilder.snapshot().layout" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Both `fromiter` and `fromjson` are implemented using `FillableArray`, the latter using the RapidJSON C++ library for deserialization." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://scikit-hep.org/uproot/examples/HZZ.json" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hzz = ak.fromjson(\"HZZ.json\")\n", + "hzz" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "jets 2421 * var * {\"px\": float64, \"py\": float64, \"pz\": float64, \"E\": float64, \"id\": bool}\n", + "muons 2421 * var * {\"px\": float64, \"py\": float64, \"pz\": float64, \"E\": float64, \"q\": int64, \"iso\": float64}\n", + "electrons 2421 * var * {\"px\": float64, \"py\": float64, \"pz\": float64, \"E\": float64, \"q\": int64, \"iso\": float64}\n", + "photons 2421 * var * {\"px\": float64, \"py\": float64, \"pz\": float64, \"E\": float64, \"iso\": float64}\n", + "MET 2421 * {\"x\": float64, \"y\": float64}\n", + "MC_hadronic_b 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64}\n", + "MC_leptonic_b 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64}\n", + "MC_hadronicW_q 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64}\n", + "MC_hadronicW_qbar 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64}\n", + "MC_lepton 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64, \"pdgid\": int64}\n", + "MC_neutrino 2421 * {\"px\": float64, \"py\": float64, \"pz\": float64}\n", + "num_PV 2421 * int64\n", + "trigger_isomu24 2421 * bool\n", + "weight 2421 * float64\n" + ] + } + ], + "source": [ + "for key in hzz.layout.keys():\n", + " print(\"{:18s} {}\".format(key, hzz[key].type))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As such, it's about 10× faster than the old version." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import awkward as oldawkward\n", + "import json\n", + "asjson = open(\"HZZ.json\").read()\n", + "aslist = json.load(open(\"HZZ.json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "453 ms ± 43.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "oldawkward.fromiter(aslist)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "44.3 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "ak.fromiter(aslist)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "526 ms ± 33.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "oldawkward.fromiter(json.loads(asjson))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "26.1 ms ± 2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "ak.fromjson(asjson)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Awkward arrays in Numba\n", + "\n", + "One of the motivating goals of the Awkward re-write was to incorporate Numba on the same footing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Awkward arrays in C++\n", + "\n", + "Since everything has been implemented in C++, it can be used in C++ programs. More importantly, we will (someday) be able to create Awkward arrays in C++ and access them in Python or vice-versa.\n", + "\n", + "Here is a standalone example from the unit tests." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;66m// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE\u001b[39m\n", + "\n", + "\u001b[38;5;136m#\u001b[39m\u001b[38;5;136minclude\u001b[39m \u001b[38;5;66m\"awkward/Slice.h\"\u001b[39m\n", + "\u001b[38;5;136m#\u001b[39m\u001b[38;5;136minclude\u001b[39m \u001b[38;5;66m\"awkward/fillable/FillableArray.h\"\u001b[39m\n", + "\u001b[38;5;136m#\u001b[39m\u001b[38;5;136minclude\u001b[39m \u001b[38;5;66m\"awkward/fillable/FillableOptions.h\"\u001b[39m\n", + "\n", + "\u001b[38;5;28;01mnamespace\u001b[39;00m ak \u001b[38;5;241m=\u001b[39m awkward;\n", + "\n", + "\u001b[38;5;125mint\u001b[39m \u001b[38;5;21mmain\u001b[39m(\u001b[38;5;125mint\u001b[39m, \u001b[38;5;125mchar\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m) {\n", + " std\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mvector\u001b[38;5;241m<\u001b[39mstd\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mvector\u001b[38;5;241m<\u001b[39mstd\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mvector\u001b[38;5;241m<\u001b[39m\u001b[38;5;125mdouble\u001b[39m\u001b[38;5;241m>\u001b[39m\u001b[38;5;241m>\u001b[39m\u001b[38;5;241m>\u001b[39m vector \u001b[38;5;241m=\u001b[39m\n", + " {{{\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m1.1\u001b[39m, \u001b[38;5;241m2.2\u001b[39m}, {}, {\u001b[38;5;241m3.3\u001b[39m, \u001b[38;5;241m4.4\u001b[39m}}, {{\u001b[38;5;241m5.5\u001b[39m}}, {}, {{\u001b[38;5;241m6.6\u001b[39m, \u001b[38;5;241m7.7\u001b[39m, \u001b[38;5;241m8.8\u001b[39m, \u001b[38;5;241m9.9\u001b[39m}}};\n", + "\n", + " ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mFillableArray builder(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mFillableOptions(\u001b[38;5;241m1024\u001b[39m, \u001b[38;5;241m2.0\u001b[39m));\n", + " \u001b[38;5;28;01mfor\u001b[39;00m (\u001b[38;5;28;01mauto\u001b[39;00m \u001b[38;5;142mx\u001b[39m : vector) builder.fill(x);\n", + " std\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mshared_ptr\u001b[38;5;241m<\u001b[39mak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mContent\u001b[38;5;241m>\u001b[39m array \u001b[38;5;241m=\u001b[39m builder.snapshot();\n", + "\n", + " ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice slice;\n", + " slice.append(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSliceRange(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone(), ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone(), \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m));\n", + " slice.append(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSliceRange(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone(), ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone(), \u001b[38;5;241m2\u001b[39m));\n", + " slice.append(ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSliceRange(\u001b[38;5;241m1\u001b[39m, ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone(), ak\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mSlice\u001b[38;5;241m:\u001b[39m\u001b[38;5;241m:\u001b[39mnone()));\n", + "\n", + " \u001b[38;5;28;01mif\u001b[39;00m (array.get()\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39mgetitem(slice).get()\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39mtojson(\u001b[38;5;28mfalse\u001b[39m, \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m!\u001b[39m\u001b[38;5;241m=\u001b[39m\n", + " \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[[[7.7,8.8,9.9]],[],[[]],[[1.1,2.2],[4.4]]]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + " \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m;\n", + " \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m;\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "import pygments.formatters\n", + "import pygments.lexers.c_cpp\n", + "\n", + "cpp_code = open(os.path.join(os.getcwd(), \"..\", \"..\", \"tests\", \"test_PR019_use_json_library.cpp\")).read()\n", + "print(pygments.highlight(cpp_code, pygments.lexers.c_cpp.CppLexer(),\n", + " pygments.formatters.Terminal256Formatter()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below is the same thing in Python, demonstrating equivalence." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[[[7.7,8.8,9.9]],[],[[]],[[1.1,2.2],[4.4]]]'" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vector = [[[0.0, 1.1, 2.2], [], [3.3, 4.4]], [[5.5]], [], [[6.6, 7.7, 8.8, 9.9]]]\n", + "\n", + "builder = ak.layout.FillableArray()\n", + "for x in vector: builder.fill(x)\n", + "array = builder.snapshot()\n", + "\n", + "array[::-1, ::2, 1::].tojson()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since the `FillableArray` is available on the C++ side, conventional for loops will be able to make arbitrarily complex Awkward arrays, then send them over to Python for analysis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Identities: database-like index for arrays\n", + "\n", + "In the [PartiQL toy language](https://github.com/jpivarski/PartiQL#readme), it became apparent that set operations, in which unique records are identified by reference, rather than by value, are important. They provide such operations as joins and lossless unions.\n", + "\n", + "No set operations have been implemented, but implementing them will require an index that tracks particle identities through all other operations. This concept of an index is the primary distinction between an array library like NumPy and a relational library like Pandas. In Awkward, this index is called an `Identity` and can optionally be attached to arrays." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hzz = ak.fromjson(\"HZZ.json\")\n", + "hzz" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/include/awkward/Content.h b/include/awkward/Content.h index b9fd0bab37..d6bb2e8269 100644 --- a/include/awkward/Content.h +++ b/include/awkward/Content.h @@ -14,21 +14,20 @@ namespace awkward { class Content { public: - Content(std::shared_ptr id, std::shared_ptr type): id_(id), type_(type) { } - virtual ~Content() { } + Content(const std::shared_ptr& id, const std::shared_ptr& type); + virtual ~Content(); virtual bool isscalar() const; virtual const std::string classname() const = 0; virtual const std::shared_ptr id() const; virtual void setid() = 0; - virtual void setid(const std::shared_ptr id) = 0; - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; + virtual void setid(const std::shared_ptr& id) = 0; + virtual bool isbare() const; + virtual bool istypeptr(Type* pointer) const; + virtual const std::shared_ptr type() const = 0; + virtual const std::shared_ptr astype(const std::shared_ptr& type) const = 0; + virtual const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const = 0; virtual void tojson_part(ToJson& builder) const = 0; - virtual const std::shared_ptr innertype(bool bare) const = 0; - virtual const std::shared_ptr type() const; - virtual void settype(const std::shared_ptr type); - virtual void settype_part(const std::shared_ptr type) = 0; - virtual bool accepts(const std::shared_ptr type) = 0; virtual int64_t length() const = 0; virtual const std::shared_ptr shallow_copy() const = 0; virtual void check_for_iteration() const = 0; @@ -40,7 +39,7 @@ namespace awkward { virtual const std::shared_ptr getitem_field(const std::string& key) const = 0; virtual const std::shared_ptr getitem_fields(const std::vector& keys) const = 0; virtual const std::shared_ptr getitem(const Slice& where) const; - virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr carry(const Index64& carry) const = 0; virtual const std::pair minmax_depth() const = 0; virtual int64_t numfields() const = 0; @@ -51,13 +50,13 @@ namespace awkward { virtual const std::vector keyaliases(const std::string& key) const = 0; virtual const std::vector keys() const = 0; - bool isbare() const; - const std::shared_ptr baretype() const; const std::string tostring() const; const std::string tojson(bool pretty, int64_t maxdecimals) const; void tojson(FILE* destination, bool pretty, int64_t maxdecimals, int64_t buffersize) const; protected: + virtual void checktype() const = 0; + virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const = 0; virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const = 0; virtual const std::shared_ptr getitem_next(const SliceEllipsis& ellipsis, const Slice& tail, const Index64& advanced) const; @@ -66,11 +65,11 @@ namespace awkward { virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; - const std::shared_ptr getitem_next_array_wrap(const std::shared_ptr outcontent, const std::vector& shape) const; + const std::shared_ptr getitem_next_array_wrap(const std::shared_ptr& outcontent, const std::vector& shape) const; protected: std::shared_ptr id_; - std::shared_ptr type_; + const std::shared_ptr type_; }; } diff --git a/include/awkward/Identity.h b/include/awkward/Identity.h index 9e1bd9f7e2..fcfdd1022e 100644 --- a/include/awkward/Identity.h +++ b/include/awkward/Identity.h @@ -17,25 +17,19 @@ namespace awkward { typedef std::vector> FieldLoc; static Ref newref(); - static std::shared_ptr none() { return std::shared_ptr(nullptr); } + static std::shared_ptr none(); - Identity(const Ref ref, const FieldLoc fieldloc, int64_t offset, int64_t width, int64_t length) - : ref_(ref) - , fieldloc_(fieldloc) - , offset_(offset) - , width_(width) - , length_(length) { } - - const Ref ref() const { return ref_; } - const FieldLoc fieldloc() const { return fieldloc_; } - const int64_t offset() const { return offset_; } - const int64_t width() const { return width_; } - const int64_t length() const { return length_; } + Identity(const Ref ref, const FieldLoc& fieldloc, int64_t offset, int64_t width, int64_t length); + const Ref ref() const; + const FieldLoc fieldloc() const; + const int64_t offset() const; + const int64_t width() const; + const int64_t length() const; virtual const std::string classname() const = 0; virtual const std::string location_at(int64_t where) const = 0; virtual const std::shared_ptr to64() const = 0; - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; + virtual const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const = 0; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const = 0; virtual const std::shared_ptr shallow_copy() const = 0; virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const = 0; @@ -55,24 +49,20 @@ namespace awkward { template class IdentityOf: public Identity { public: - IdentityOf(const Ref ref, const FieldLoc fieldloc, int64_t width, int64_t length) - : Identity(ref, fieldloc, 0, width, length) - , ptr_(std::shared_ptr(length*width == 0 ? nullptr : new T[(size_t)(length*width)], awkward::util::array_deleter())) { } - IdentityOf(const Ref ref, const FieldLoc fieldloc, int64_t offset, int64_t width, int64_t length, const std::shared_ptr ptr) - : Identity(ref, fieldloc, offset, width, length) - , ptr_(ptr) { } - - const std::shared_ptr ptr() const { return ptr_; } - - virtual const std::string classname() const; - virtual const std::string location_at(int64_t at) const; - virtual const std::shared_ptr to64() const; - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr shallow_copy() const; - virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const; - virtual const std::shared_ptr withfieldloc(const FieldLoc& fieldloc) const; - virtual int64_t value(int64_t row, int64_t col) const; + IdentityOf(const Ref ref, const FieldLoc& fieldloc, int64_t width, int64_t length); + IdentityOf(const Ref ref, const FieldLoc& fieldloc, int64_t offset, int64_t width, int64_t length, const std::shared_ptr ptr); + + const std::shared_ptr ptr() const; + + const std::string classname() const override; + const std::string location_at(int64_t at) const override; + const std::shared_ptr to64() const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr shallow_copy() const override; + const std::shared_ptr getitem_carry_64(const Index64& carry) const override; + const std::shared_ptr withfieldloc(const FieldLoc& fieldloc) const override; + int64_t value(int64_t row, int64_t col) const override; const std::vector getitem_at(int64_t at) const; const std::vector getitem_at_nowrap(int64_t at) const; diff --git a/include/awkward/Index.h b/include/awkward/Index.h index 40f7a59c4f..6efa9f3fec 100644 --- a/include/awkward/Index.h +++ b/include/awkward/Index.h @@ -12,33 +12,29 @@ namespace awkward { class Index { virtual const std::shared_ptr shallow_copy() const = 0; + virtual const std::shared_ptr deep_copy() const = 0; }; template class IndexOf: public Index { public: - IndexOf(int64_t length) - : ptr_(std::shared_ptr(length == 0 ? nullptr : new T[(size_t)length], awkward::util::array_deleter())) - , offset_(0) - , length_(length) { } - IndexOf(const std::shared_ptr ptr, int64_t offset, int64_t length) - : ptr_(ptr) - , offset_(offset) - , length_(length) { } - - const std::shared_ptr ptr() const { return ptr_; } - int64_t offset() const { return offset_; } - int64_t length() const { return length_; } + IndexOf(int64_t length); + IndexOf(const std::shared_ptr& ptr, int64_t offset, int64_t length); + + const std::shared_ptr ptr() const; + int64_t offset() const; + int64_t length() const; const std::string classname() const; const std::string tostring() const; - const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const; T getitem_at(int64_t at) const; T getitem_at_nowrap(int64_t at) const; void setitem_at_nowrap(int64_t at, T value) const; IndexOf getitem_range(int64_t start, int64_t stop) const; IndexOf getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr shallow_copy() const; + const std::shared_ptr shallow_copy() const override; + const std::shared_ptr deep_copy() const override; private: const std::shared_ptr ptr_; diff --git a/include/awkward/Iterator.h b/include/awkward/Iterator.h index 4d11460fea..cc9ace2163 100644 --- a/include/awkward/Iterator.h +++ b/include/awkward/Iterator.h @@ -9,19 +9,13 @@ namespace awkward { class Iterator { public: - Iterator(const std::shared_ptr content) - : content_(content) - , at_(0) { - content.get()->check_for_iteration(); - } - - const std::shared_ptr content() const { return content_; } - const int64_t at() const { return at_; } + Iterator(const std::shared_ptr& content); + const std::shared_ptr content() const; + const int64_t at() const; const bool isdone() const; const std::shared_ptr next(); - - const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const; const std::string tostring() const; private: diff --git a/include/awkward/Slice.h b/include/awkward/Slice.h index 50cd3f8a5d..a2ad465caf 100644 --- a/include/awkward/Slice.h +++ b/include/awkward/Slice.h @@ -15,10 +15,8 @@ namespace awkward { class SliceItem { public: - static int64_t none() { return kSliceNone; } - - virtual ~SliceItem() { } - + static int64_t none(); + virtual ~SliceItem(); virtual const std::shared_ptr shallow_copy() const = 0; virtual const std::string tostring() const = 0; virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const = 0; @@ -26,36 +24,26 @@ namespace awkward { class SliceAt: public SliceItem { public: - SliceAt(int64_t at): at_(at) { } - int64_t at() const { return at_; } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceAt(at_)); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return false; - } + SliceAt(int64_t at); + int64_t at() const; + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; private: const int64_t at_; }; class SliceRange: public SliceItem { public: - SliceRange(int64_t start, int64_t stop, int64_t step): start_(start), stop_(stop), step_(step == none() ? 1 : step) { - assert(step_ != 0); - } - int64_t start() const { return start_; } - int64_t stop() const { return stop_; } - int64_t step() const { return step_; } - bool hasstart() const { return start_ != none(); } - bool hasstop() const { return stop_ != none(); } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceRange(start_, stop_, step_)); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return true; - } + SliceRange(int64_t start, int64_t stop, int64_t step); + int64_t start() const; + int64_t stop() const; + int64_t step() const; + bool hasstart() const; + bool hasstop() const; + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; private: const int64_t start_; const int64_t stop_; @@ -64,47 +52,32 @@ namespace awkward { class SliceEllipsis: public SliceItem { public: - SliceEllipsis() { } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceEllipsis()); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return true; - } + SliceEllipsis(); + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; }; class SliceNewAxis: public SliceItem { public: - SliceNewAxis() { } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceNewAxis()); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return false; - } + SliceNewAxis(); + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; }; template class SliceArrayOf: public SliceItem { public: - SliceArrayOf(const IndexOf& index, const std::vector& shape, const std::vector& strides): index_(index), shape_(shape), strides_(strides) { - assert(shape_.size() != 0); - assert(shape_.size() == strides_.size()); - } - const IndexOf index() const { return index_; } - const int64_t length() const { return shape_[0]; } - const std::vector shape() const { return shape_; } - const std::vector strides() const { return strides_; } - int64_t ndim() const { return (int64_t)shape_.size(); } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceArrayOf(index_, shape_, strides_)); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return advanced.length() == 0; - } + SliceArrayOf(const IndexOf& index, const std::vector& shape, const std::vector& strides); + const IndexOf index() const; + const int64_t length() const; + const std::vector shape() const; + const std::vector strides() const; + int64_t ndim() const; + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; const std::string tostring_part() const; const IndexOf ravel() const; private: @@ -117,44 +90,35 @@ namespace awkward { class SliceField: public SliceItem { public: - SliceField(const std::string& key): key_(key) { } - const std::string key() const { return key_; } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceField(key_)); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return false; - } + SliceField(const std::string& key); + const std::string key() const; + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; private: const std::string key_; }; class SliceFields: public SliceItem { public: - SliceFields(const std::vector& keys): keys_(keys) { } - const std::vector keys() const { return keys_; } - virtual const std::shared_ptr shallow_copy() const { - return std::shared_ptr(new SliceFields(keys_)); - } - virtual const std::string tostring() const; - virtual bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const { - return type.get() != nullptr && type.get()->numfields() != -1 && util::subset(keys_, type.get()->keys()); - } + SliceFields(const std::vector& keys); + const std::vector keys() const; + const std::shared_ptr shallow_copy() const override; + const std::string tostring() const override; + bool preserves_type(const std::shared_ptr& type, const Index64& advanced) const override; private: const std::vector keys_; }; class Slice { public: - static int64_t none() { return SliceItem::none(); } - - Slice(): items_(std::vector>()), sealed_(false) { } - Slice(const std::vector> items): items_(items), sealed_(false) { } - Slice(const std::vector> items, bool sealed): items_(items), sealed_(sealed) { } - const std::vector> items() const { return items_; } - bool sealed() const { return sealed_; } + static int64_t none(); + Slice(); + Slice(const std::vector>& items); + Slice(const std::vector>& items, bool sealed); + const std::vector> items() const; + bool sealed() const; int64_t length() const; int64_t dimlength() const; const std::shared_ptr head() const; diff --git a/include/awkward/array/EmptyArray.h b/include/awkward/array/EmptyArray.h index fa15aa309b..74dec52f2a 100644 --- a/include/awkward/array/EmptyArray.h +++ b/include/awkward/array/EmptyArray.h @@ -15,43 +15,43 @@ namespace awkward { class EmptyArray: public Content { public: - EmptyArray(const std::shared_ptr id, const std::shared_ptr type) - : Content(id, type) { } - - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + EmptyArray(const std::shared_ptr& id, const std::shared_ptr& type); + + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override; }; } diff --git a/include/awkward/array/ListArray.h b/include/awkward/array/ListArray.h index 148d0437bf..c6137888f0 100644 --- a/include/awkward/array/ListArray.h +++ b/include/awkward/array/ListArray.h @@ -14,48 +14,44 @@ namespace awkward { template class ListArrayOf: public Content { public: - ListArrayOf(const std::shared_ptr id, const std::shared_ptr type, const IndexOf starts, const IndexOf stops, const std::shared_ptr content) - : Content(id, type) - , starts_(starts) - , stops_(stops) - , content_(content) { } + ListArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const IndexOf& starts, const IndexOf& stops, const std::shared_ptr& content); + const IndexOf starts() const; + const IndexOf stops() const; + const std::shared_ptr content() const; - const IndexOf starts() const { return starts_; } - const IndexOf stops() const { return stops_; } - const std::shared_ptr content() const { return content_; } - - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; private: const IndexOf starts_; diff --git a/include/awkward/array/ListOffsetArray.h b/include/awkward/array/ListOffsetArray.h index 6bfdcc0c9f..2765cc5088 100644 --- a/include/awkward/array/ListOffsetArray.h +++ b/include/awkward/array/ListOffsetArray.h @@ -14,46 +14,43 @@ namespace awkward { template class ListOffsetArrayOf: public Content { public: - ListOffsetArrayOf(const std::shared_ptr id, const std::shared_ptr type, const IndexOf offsets, const std::shared_ptr content) - : Content(id, type) - , offsets_(offsets) - , content_(content) { } + ListOffsetArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const IndexOf& offsets, const std::shared_ptr& content); + const IndexOf offsets() const; + const std::shared_ptr content() const; - const IndexOf offsets() const { return offsets_; } - const std::shared_ptr content() const { return content_; } - - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; private: const IndexOf offsets_; diff --git a/include/awkward/array/NumpyArray.h b/include/awkward/array/NumpyArray.h index 02b5ed6454..0e0eb2620b 100644 --- a/include/awkward/array/NumpyArray.h +++ b/include/awkward/array/NumpyArray.h @@ -15,23 +15,16 @@ namespace awkward { class NumpyArray: public Content { public: - NumpyArray(const std::shared_ptr id, const std::shared_ptr type, const std::shared_ptr ptr, const std::vector shape, const std::vector strides, ssize_t byteoffset, ssize_t itemsize, const std::string format) - : Content(id, type) - , ptr_(ptr) - , shape_(shape) - , strides_(strides) - , byteoffset_(byteoffset) - , itemsize_(itemsize) - , format_(format) { - assert(shape_.size() == strides_.size()); - } + static const std::shared_ptr unwrap_regulartype(const std::shared_ptr& type, const std::vector& shape); - const std::shared_ptr ptr() const { return ptr_; } - const std::vector shape() const { return shape_; } - const std::vector strides() const { return strides_; } - ssize_t byteoffset() const { return byteoffset_; } - ssize_t itemsize() const { return itemsize_; } - const std::string format() const { return format_; } + NumpyArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& ptr, const std::vector& shape, const std::vector& strides, ssize_t byteoffset, ssize_t itemsize, const std::string format); + + const std::shared_ptr ptr() const; + const std::vector shape() const; + const std::vector strides() const; + ssize_t byteoffset() const; + ssize_t itemsize() const; + const std::string format() const; ssize_t ndim() const; bool isempty() const; @@ -40,66 +33,57 @@ namespace awkward { ssize_t bytelength() const; uint8_t getbyte(ssize_t at) const; - virtual bool isscalar() const; - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual const std::shared_ptr type() const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr getitem(const Slice& where) const; - virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + bool isscalar() const override; + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + bool istypeptr(Type* pointer) const override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr getitem(const Slice& where) const override; + const std::shared_ptr getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; bool iscontiguous() const; void become_contiguous(); const NumpyArray contiguous() const; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { - throw std::runtime_error("NumpyArray has its own getitem_next system"); - } - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const { - throw std::runtime_error("NumpyArray has its own getitem_next system"); - } - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { - throw std::runtime_error("NumpyArray has its own getitem_next system"); - } - virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { - throw std::runtime_error("NumpyArray has its own getitem_next system"); - } - virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { - throw std::runtime_error("NumpyArray has its own getitem_next system"); - } + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override; - const NumpyArray contiguous_next(Index64 bytepos) const; + const NumpyArray contiguous_next(const Index64& bytepos) const; const NumpyArray getitem_bystrides(const std::shared_ptr& head, const Slice& tail, int64_t length) const; const NumpyArray getitem_bystrides(const SliceAt& at, const Slice& tail, int64_t length) const; const NumpyArray getitem_bystrides(const SliceRange& range, const Slice& tail, int64_t length) const; const NumpyArray getitem_bystrides(const SliceEllipsis& ellipsis, const Slice& tail, int64_t length) const; const NumpyArray getitem_bystrides(const SliceNewAxis& newaxis, const Slice& tail, int64_t length) const; - const NumpyArray getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; + const NumpyArray getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; const NumpyArray getitem_next(const SliceAt& at, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; const NumpyArray getitem_next(const SliceRange& range, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; const NumpyArray getitem_next(const SliceEllipsis& ellipsis, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; diff --git a/include/awkward/array/RawArray.h b/include/awkward/array/RawArray.h index e0b68b346d..02a2fafe2c 100644 --- a/include/awkward/array/RawArray.h +++ b/include/awkward/array/RawArray.h @@ -44,66 +44,142 @@ namespace awkward { template class RawArrayOf: public Content { public: - RawArrayOf(const std::shared_ptr id, const std::shared_ptr type, const std::shared_ptr ptr, const int64_t offset, const int64_t length, const int64_t itemsize) + RawArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& ptr, const int64_t offset, const int64_t length, const int64_t itemsize) : Content(id, type) , ptr_(ptr) , offset_(offset) , length_(length) , itemsize_(itemsize) { - assert(sizeof(T) == itemsize); - } + if (sizeof(T) != itemsize) { + throw std::runtime_error("sizeof(T) != itemsize"); + } + } - RawArrayOf(const std::shared_ptr id, const std::shared_ptr type, const std::shared_ptr ptr, const int64_t length) + RawArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& ptr, const int64_t length) : Content(id, type) , ptr_(ptr) , offset_(0) , length_(length) , itemsize_(sizeof(T)) { } - RawArrayOf(const std::shared_ptr id, const std::shared_ptr type, const int64_t length) + RawArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const int64_t length) : Content(id, type) , ptr_(std::shared_ptr(new T[(size_t)length], awkward::util::array_deleter())) , offset_(0) , length_(length) , itemsize_(sizeof(T)) { } - const std::shared_ptr ptr() const { return ptr_; } - const int64_t offset() const { return offset_; } - const int64_t itemsize() const { return itemsize_; } + const std::shared_ptr ptr() const { + return ptr_; + } + + const int64_t offset() const { + return offset_; + } + + const int64_t itemsize() const { + return itemsize_; + } - bool isempty() const { return length_ == 0; } - ssize_t byteoffset() const { return (ssize_t)itemsize_*(ssize_t)offset_; } - uint8_t* byteptr() const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset()); } - ssize_t bytelength() const { return (ssize_t)itemsize_*(ssize_t)length_; } - uint8_t getbyte(ssize_t at) const { return *reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)(byteoffset() + at)); } + bool isempty() const { + return length_ == 0; + } + + ssize_t byteoffset() const { + return (ssize_t)itemsize_*(ssize_t)offset_; + } + + uint8_t* byteptr() const { + return reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset()); + } + ssize_t bytelength() const { + return (ssize_t)itemsize_*(ssize_t)length_; + } + uint8_t getbyte(ssize_t at) const { + return *reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)(byteoffset() + at)); + } - T* borrow(int64_t at) const { return reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)itemsize_*(ssize_t)(offset_ + at)); } + T* borrow(int64_t at) const { + return reinterpret_cast(reinterpret_cast(ptr_.get()) + (ssize_t)itemsize_*(ssize_t)(offset_ + at)); + } - virtual const std::string classname() const { return std::string("RawArrayOf<") + std::string(typeid(T).name()) + std::string(">"); } + const std::string classname() const override { + return std::string("RawArrayOf<") + std::string(typeid(T).name()) + std::string(">"); + } - virtual void setid() { + void setid() override { if (length() <= kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity32* rawid = reinterpret_cast(newid.get()); awkward_new_identity32(rawid->ptr().get(), length()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity64* rawid = reinterpret_cast(newid.get()); awkward_new_identity64(rawid->ptr().get(), length()); setid(newid); } } - virtual void setid(const std::shared_ptr id) { + + void setid(const std::shared_ptr& id) override { if (id.get() != nullptr && length() != id.get()->length()) { throw std::invalid_argument("content and its id must have the same length"); } id_ = id; } - const std::string tostring() { return tostring_part("", "", ""); } - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr type() const override { + if (type_.get() != nullptr) { + return type_; + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::float64); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::float32); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::int64); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::uint64); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::int32); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::uint32); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::int16); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::uint16); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::int8); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::uint8); + } + else if (std::is_same::value) { + return std::make_shared(Type::Parameters(), PrimitiveType::boolean); + } + else { + throw std::invalid_argument(std::string("RawArrayOf<") + typeid(T).name() + std::string("> does not have a known type")); + } + } + + const std::shared_ptr astype(const std::shared_ptr& type) const override { + return std::make_shared>(id_, type, ptr_, offset_, length_, itemsize_); + } + + const std::string tostring() { + return tostring_part("", "", ""); + } + + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override { std::stringstream out; out << indent << pre << "::value) { tojson_real(builder, reinterpret_cast(byteptr()), length()); } @@ -182,109 +258,25 @@ namespace awkward { } } - virtual const std::shared_ptr innertype(bool bare) const { - if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float64)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float32)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int64)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint64)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int32)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint32)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int16)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint16)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int8)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint8)); - } - else if (std::is_same::value) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::boolean)); - } - else { - throw std::invalid_argument(std::string("RawArrayOf<") + typeid(T).name() + std::string("> cannot be expressed as a PrimitiveType")); - } + int64_t length() const override { + return length_; } - virtual void settype_part(const std::shared_ptr type) { - if (accepts(type)) { - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + type.get()->compare(baretype())); - } + const std::shared_ptr shallow_copy() const override { + return std::make_shared>(id_, type_, ptr_, offset_, length_, itemsize_); } - virtual bool accepts(const std::shared_ptr type) { - std::shared_ptr check = type.get()->level(); - if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float64)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float32)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int64)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint64)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int32)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint32)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int16)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint16)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int8)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint8)), false); - } - else if (std::is_same::value) { - return check.get()->equal(std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::boolean)), false); - } - else { - return false; - } - } - - virtual int64_t length() const { return length_; } - - virtual const std::shared_ptr shallow_copy() const { return std::shared_ptr(new RawArrayOf(id_, type_, ptr_, offset_, length_, itemsize_)); } - - virtual void check_for_iteration() const { + void check_for_iteration() const override { if (id_.get() != nullptr && id_.get()->length() < length_) { util::handle_error(failure("len(id) < len(array)", kSliceNone, kSliceNone), id_.get()->classname(), nullptr); } } - virtual const std::shared_ptr getitem_nothing() const { + const std::shared_ptr getitem_nothing() const override { return getitem_range_nowrap(0, 0); } - virtual const std::shared_ptr getitem_at(int64_t at) const { + const std::shared_ptr getitem_at(int64_t at) const override { int64_t regular_at = at; if (regular_at < 0) { regular_at += length_; @@ -295,11 +287,11 @@ namespace awkward { return getitem_at_nowrap(regular_at); } - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const { + const std::shared_ptr getitem_at_nowrap(int64_t at) const override { return getitem_range_nowrap(at, at + 1); } - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const { + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override { int64_t regular_start = start; int64_t regular_stop = stop; awkward_regularize_rangeslice(®ular_start, ®ular_stop, true, start != Slice::none(), stop != Slice::none(), length_); @@ -309,37 +301,37 @@ namespace awkward { return getitem_range_nowrap(regular_start, regular_stop); } - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const { + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override { std::shared_ptr id(nullptr); if (id_.get() != nullptr) { id = id_.get()->getitem_range_nowrap(start, stop); } - return std::shared_ptr(new RawArrayOf(id, type_, ptr_, offset_ + start, stop - start, itemsize_)); + return std::make_shared>(id, type_, ptr_, offset_ + start, stop - start, itemsize_); } - virtual const std::shared_ptr getitem_field(const std::string& key) const { + const std::shared_ptr getitem_field(const std::string& key) const override { throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); } - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const { + const std::shared_ptr getitem_fields(const std::vector& keys) const override { throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); } - virtual const std::shared_ptr getitem(const Slice& where) const { + const std::shared_ptr getitem(const Slice& where) const override { std::shared_ptr nexthead = where.head(); Slice nexttail = where.tail(); Index64 nextadvanced(0); return getitem_next(nexthead, nexttail, nextadvanced); } - virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const override { if (tail.length() != 0) { throw std::invalid_argument("too many indexes for array"); } return Content::getitem_next(head, tail, advanced); } - virtual const std::shared_ptr carry(const Index64& carry) const { + const std::shared_ptr carry(const Index64& carry) const override { std::shared_ptr ptr(new T[(size_t)carry.length()], awkward::util::array_deleter()); struct Error err = awkward_numpyarray_getitem_next_null_64( reinterpret_cast(ptr.get()), @@ -355,45 +347,89 @@ namespace awkward { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new RawArrayOf(id, type_, ptr, 0, carry.length(), itemsize_)); + return std::make_shared>(id, type_, ptr, 0, carry.length(), itemsize_); } - virtual const std::pair minmax_depth() const { + const std::pair minmax_depth() const override { return std::pair(1, 1); } - virtual int64_t numfields() const { return -1; } + int64_t numfields() const override { + return -1; + } - virtual int64_t fieldindex(const std::string& key) const { + int64_t fieldindex(const std::string& key) const override { throw std::invalid_argument("array contains no Records"); } - virtual const std::string key(int64_t fieldindex) const { + const std::string key(int64_t fieldindex) const override { throw std::invalid_argument("array contains no Records"); } - virtual bool haskey(const std::string& key) const { + bool haskey(const std::string& key) const override { throw std::invalid_argument("array contains no Records"); } - virtual const std::vector keyaliases(int64_t fieldindex) const { + const std::vector keyaliases(int64_t fieldindex) const override { throw std::invalid_argument("array contains no Records"); } - virtual const std::vector keyaliases(const std::string& key) const { + const std::vector keyaliases(const std::string& key) const override { throw std::invalid_argument("array contains no Records"); } - virtual const std::vector keys() const { + const std::vector keys() const override { throw std::invalid_argument("array contains no Records"); } protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { + void checktype() const override { + bool okay = false; + if (PrimitiveType* raw = dynamic_cast(type_.get())) { + if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::float64); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::float32); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::int64); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::uint64); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::int32); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::uint32); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::int16); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::uint16); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::int8); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::uint8); + } + else if (std::is_same::value) { + okay = (raw->dtype() == PrimitiveType::boolean); + } + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override { return getitem_at(at.at()); } - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override { if (range.step() == Slice::none() || range.step() == 1) { return getitem_range(range.start(), range.stop()); } @@ -409,8 +445,8 @@ namespace awkward { } awkward_regularize_rangeslice(&start, &stop, step > 0, range.hasstart(), range.hasstop(), length_); - int64_t numer = abs(start - stop); - int64_t denom = abs(step); + int64_t numer = std::abs(start - stop); + int64_t denom = std::abs(step); int64_t d = numer / denom; int64_t m = numer % denom; int64_t lenhead = d + (m != 0 ? 1 : 0); @@ -425,7 +461,7 @@ namespace awkward { } } - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override { assert(advanced.length() == 0); if (array.shape().size() != 1) { throw std::runtime_error("array.ndim != 1"); @@ -439,11 +475,11 @@ namespace awkward { return carry(flathead); } - virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override { throw std::invalid_argument(field.tostring() + std::string(" is not a valid slice type for ") + classname()); } - virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override { throw std::invalid_argument(fields.tostring() + std::string(" is not a valid slice type for ") + classname()); } diff --git a/include/awkward/array/Record.h b/include/awkward/array/Record.h index 94b6cdc778..9e0d5802aa 100644 --- a/include/awkward/array/Record.h +++ b/include/awkward/array/Record.h @@ -8,55 +8,44 @@ namespace awkward { class Record: public Content { public: - Record(const RecordArray& array, int64_t at) - : Content(Identity::none(), Type::none()) - , array_(array) - , at_(at) { } + Record(const RecordArray& array, int64_t at); + const std::shared_ptr array() const; + int64_t at() const; + const std::vector> contents() const; + const std::shared_ptr lookup() const; + const std::shared_ptr reverselookup() const; + bool istuple() const; - const std::shared_ptr array() const { return array_.shallow_copy(); } - int64_t at() const { return at_; } - const std::vector> contents() const { - std::vector> out; - for (auto item : array_.contents()) { - out.push_back(item.get()->getitem_at_nowrap(at_)); - } - return out; - } - const std::shared_ptr lookup() const { return array_.lookup(); } - const std::shared_ptr reverselookup() const { return array_.reverselookup(); } - bool istuple() const { return lookup().get() == nullptr; } - - virtual bool isscalar() const; - virtual const std::string classname() const; - virtual const std::shared_ptr id() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual const std::shared_ptr type() const; - virtual void settype(const std::shared_ptr type); - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + bool isscalar() const override; + const std::string classname() const override; + const std::shared_ptr id() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + bool isbare() const override; + bool istypeptr(Type* pointer) const override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; const std::shared_ptr field(const std::string& key) const; @@ -65,11 +54,13 @@ namespace awkward { const Record astuple() const; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override; private: RecordArray array_; diff --git a/include/awkward/array/RecordArray.h b/include/awkward/array/RecordArray.h index f9e16a2812..abff128979 100644 --- a/include/awkward/array/RecordArray.h +++ b/include/awkward/array/RecordArray.h @@ -17,62 +17,42 @@ namespace awkward { typedef std::unordered_map Lookup; typedef std::vector ReverseLookup; - RecordArray(const std::shared_ptr id, const std::shared_ptr type, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) - : Content(id, type) - , contents_(contents) - , lookup_(lookup) - , reverselookup_(reverselookup) - , length_(0) { - assert(contents.size() != 0); - } - RecordArray(const std::shared_ptr id, const std::shared_ptr type, const std::vector>& contents) - : Content(id, type) - , contents_(contents) - , lookup_(nullptr) - , reverselookup_(nullptr) - , length_(0) { - assert(contents.size() != 0); - } - RecordArray(const std::shared_ptr id, const std::shared_ptr type, int64_t length, bool istuple) - : Content(id, type) - , contents_() - , lookup_(istuple ? nullptr : new Lookup) - , reverselookup_(istuple ? nullptr : new ReverseLookup) - , length_(length) { } + RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup); + RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents); + RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, int64_t length, bool istuple); - const std::vector> contents() const { return contents_; } - const std::shared_ptr lookup() const { return lookup_; } - const std::shared_ptr reverselookup() const { return reverselookup_; } - bool istuple() const { return lookup_.get() == nullptr; } + const std::vector> contents() const; + const std::shared_ptr lookup() const; + const std::shared_ptr reverselookup() const; + bool istuple() const; - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; const std::shared_ptr field(const std::string& key) const; @@ -85,11 +65,13 @@ namespace awkward { void setkey(int64_t fieldindex, const std::string& key); protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override; private: std::vector> contents_; diff --git a/include/awkward/array/RegularArray.h b/include/awkward/array/RegularArray.h index d88a7e33e5..61a2d1eb49 100644 --- a/include/awkward/array/RegularArray.h +++ b/include/awkward/array/RegularArray.h @@ -15,46 +15,43 @@ namespace awkward { class RegularArray: public Content { public: - RegularArray(const std::shared_ptr id, const std::shared_ptr type, const std::shared_ptr content, int64_t size) - : Content(id, type) - , content_(content) - , size_(size) { } - - const std::shared_ptr content() const { return content_; } - int64_t size() const { return size_; } - - virtual const std::string classname() const; - virtual void setid(); - virtual void setid(const std::shared_ptr id); - virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; - virtual void tojson_part(ToJson& builder) const; - virtual const std::shared_ptr innertype(bool bare) const; - virtual void settype_part(const std::shared_ptr type); - virtual bool accepts(const std::shared_ptr type); - virtual int64_t length() const; - virtual const std::shared_ptr shallow_copy() const; - virtual void check_for_iteration() const; - virtual const std::shared_ptr getitem_nothing() const; - virtual const std::shared_ptr getitem_at(int64_t at) const; - virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; - virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; - virtual const std::shared_ptr getitem_field(const std::string& key) const; - virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; - virtual const std::shared_ptr carry(const Index64& carry) const; - virtual const std::pair minmax_depth() const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + RegularArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& content, int64_t size); + const std::shared_ptr content() const; + int64_t size() const; + + const std::string classname() const override; + void setid() override; + void setid(const std::shared_ptr& id) override; + const std::shared_ptr type() const override; + const std::shared_ptr astype(const std::shared_ptr& type) const override; + const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + void tojson_part(ToJson& builder) const override; + int64_t length() const override; + const std::shared_ptr shallow_copy() const override; + void check_for_iteration() const override; + const std::shared_ptr getitem_nothing() const override; + const std::shared_ptr getitem_at(int64_t at) const override; + const std::shared_ptr getitem_at_nowrap(int64_t at) const override; + const std::shared_ptr getitem_range(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const override; + const std::shared_ptr getitem_field(const std::string& key) const override; + const std::shared_ptr getitem_fields(const std::vector& keys) const override; + const std::shared_ptr carry(const Index64& carry) const override; + const std::pair minmax_depth() const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; protected: - virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; - virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + void checktype() const override; + + const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override; + const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override; private: const std::shared_ptr content_; diff --git a/include/awkward/fillable/BoolFillable.h b/include/awkward/fillable/BoolFillable.h index cb0a1fe856..3b73041a87 100644 --- a/include/awkward/fillable/BoolFillable.h +++ b/include/awkward/fillable/BoolFillable.h @@ -11,30 +11,30 @@ namespace awkward { class BoolFillable: public Fillable { public: - BoolFillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "BoolFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + BoolFillable(const FillableOptions& options, const GrowableBuffer& buffer); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/Fillable.h b/include/awkward/fillable/Fillable.h index c5a09b50aa..435ecb3f07 100644 --- a/include/awkward/fillable/Fillable.h +++ b/include/awkward/fillable/Fillable.h @@ -13,13 +13,13 @@ namespace awkward { class Fillable { public: - virtual ~Fillable() { } + virtual ~Fillable(); virtual const std::string classname() const = 0; virtual int64_t length() const = 0; virtual void clear() = 0; virtual const std::shared_ptr type() const = 0; - virtual const std::shared_ptr snapshot() const = 0; + virtual const std::shared_ptr snapshot(const std::shared_ptr& type) const = 0; virtual bool active() const = 0; virtual const std::shared_ptr null() = 0; @@ -36,9 +36,7 @@ namespace awkward { virtual const std::shared_ptr field(const char* key, bool check) = 0; virtual const std::shared_ptr endrecord() = 0; - void setthat(const std::shared_ptr& that) { - that_ = that; - } + void setthat(const std::shared_ptr& that); protected: std::shared_ptr that_; diff --git a/include/awkward/fillable/FillableArray.h b/include/awkward/fillable/FillableArray.h index 111c2afe1f..e273c5ec49 100644 --- a/include/awkward/fillable/FillableArray.h +++ b/include/awkward/fillable/FillableArray.h @@ -13,7 +13,7 @@ namespace awkward { class FillableArray { public: - FillableArray(const FillableOptions& options): fillable_(UnknownFillable::fromempty(options)) { } + FillableArray(const FillableOptions& options); const std::string tostring() const; int64_t length() const; @@ -32,14 +32,10 @@ namespace awkward { void real(double x); void bytestring(const char* x); void bytestring(const char* x, int64_t length); - void bytestring(const std::string& x) { - bytestring(x.c_str(), (int64_t)x.length()); - } + void bytestring(const std::string& x); void string(const char* x); void string(const char* x, int64_t length); - void string(const std::string& x) { - string(x.c_str(), (int64_t)x.length()); - } + void string(const std::string& x); void beginlist(); void endlist(); void begintuple(int64_t numfields); @@ -48,14 +44,10 @@ namespace awkward { void beginrecord(); void beginrecord_fast(const char* name); void beginrecord_check(const char* name); - void beginrecord_check(const std::string& name) { - beginrecord_check(name.c_str()); - } + void beginrecord_check(const std::string& name); void field_fast(const char* key); void field_check(const char* key); - void field_check(const std::string& key) { - field_check(key.c_str()); - } + void field_check(const std::string& key); void endrecord(); template @@ -66,10 +58,11 @@ namespace awkward { } endlist(); } - void fill(int64_t x) { integer(x); } - void fill(double x) { real(x); } - void fill(const char* x) { bytestring(x); } - void fill(const std::string& x) { bytestring(x.c_str()); } + + void fill(int64_t x); + void fill(double x); + void fill(const char* x); + void fill(const std::string& x); private: void maybeupdate(const std::shared_ptr& tmp); diff --git a/include/awkward/fillable/FillableOptions.h b/include/awkward/fillable/FillableOptions.h index eb7242fc73..ea3c4adc08 100644 --- a/include/awkward/fillable/FillableOptions.h +++ b/include/awkward/fillable/FillableOptions.h @@ -11,10 +11,9 @@ namespace awkward { class FillableOptions { public: - FillableOptions(int64_t initial, double resize): initial_(initial), resize_(resize) { } - - int64_t initial() const { return initial_; } - double resize() const { return resize_; } + FillableOptions(int64_t initial, double resize); + int64_t initial() const; + double resize() const; private: int64_t initial_; diff --git a/include/awkward/fillable/Float64Fillable.h b/include/awkward/fillable/Float64Fillable.h index a25a3056df..6c968f3d26 100644 --- a/include/awkward/fillable/Float64Fillable.h +++ b/include/awkward/fillable/Float64Fillable.h @@ -11,31 +11,31 @@ namespace awkward { class Float64Fillable: public Fillable { public: - Float64Fillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } - static const std::shared_ptr fromempty(const FillableOptions& options); static const std::shared_ptr fromint64(const FillableOptions& options, GrowableBuffer old); - virtual const std::string classname() const { return "Float64Fillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + Float64Fillable(const FillableOptions& options, const GrowableBuffer& buffer); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/GrowableBuffer.h b/include/awkward/fillable/GrowableBuffer.h index ff4d539736..3c0411ee12 100644 --- a/include/awkward/fillable/GrowableBuffer.h +++ b/include/awkward/fillable/GrowableBuffer.h @@ -15,86 +15,21 @@ namespace awkward { template class GrowableBuffer { public: - GrowableBuffer(const FillableOptions& options, std::shared_ptr ptr, int64_t length, int64_t reserved): options_(options), ptr_(ptr), length_(length), reserved_(reserved) { } - GrowableBuffer(const FillableOptions& options): GrowableBuffer(options, std::shared_ptr(new T[(size_t)options.initial()], awkward::util::array_deleter()), 0, options.initial()) { } - - static GrowableBuffer empty(const FillableOptions& options) { - return GrowableBuffer::empty(options, 0); - } - - static GrowableBuffer empty(const FillableOptions& options, int64_t minreserve) { - size_t actual = (size_t)options.initial(); - if (actual < (size_t)minreserve) { - actual = (size_t)minreserve; - } - std::shared_ptr ptr(new T[actual], awkward::util::array_deleter()); - return GrowableBuffer(options, ptr, 0, (int64_t)actual); - } - - static GrowableBuffer full(const FillableOptions& options, T value, int64_t length) { - GrowableBuffer out = empty(options, length); - T* rawptr = out.ptr().get(); - for (int64_t i = 0; i < length; i++) { - rawptr[i] = value; - } - return GrowableBuffer(options, out.ptr(), length, out.reserved()); - } - - static GrowableBuffer arange(const FillableOptions& options, int64_t length) { - size_t actual = (size_t)options.initial(); - if (actual < (size_t)length) { - actual = (size_t)length; - } - T* rawptr = new T[(size_t)actual]; - std::shared_ptr ptr(rawptr, awkward::util::array_deleter()); - for (int64_t i = 0; i < length; i++) { - rawptr[i] = (T)i; - } - return GrowableBuffer(options, ptr, length, (int64_t)actual); - } - - const std::shared_ptr ptr() const { return ptr_; } - - int64_t length() const { return length_; } - void set_length(int64_t newlength) { - if (newlength > reserved_) { - set_reserved(newlength); - } - length_ = newlength; - } - - int64_t reserved() const { return reserved_; } - void set_reserved(int64_t minreserved) { - if (minreserved > reserved_) { - std::shared_ptr ptr(new T[(size_t)minreserved], awkward::util::array_deleter()); - memcpy(ptr.get(), ptr_.get(), (size_t)(length_ * sizeof(T))); - ptr_ = ptr; - reserved_ = minreserved; - } - } - - void clear() { - length_ = 0; - reserved_ = options_.initial(); - ptr_ = std::shared_ptr(new T[(size_t)options_.initial()], awkward::util::array_deleter()); - } - - void append(T datum) { - assert(length_ <= reserved_); - if (length_ == reserved_) { - set_reserved((int64_t)ceil(reserved_ * options_.resize())); - } - ptr_.get()[length_] = datum; - length_++; - } - - T getitem_at_nowrap(int64_t at) const { - return ptr_.get()[at]; - } - - IndexOf toindex() const { - return IndexOf(ptr_, 0, length_); - } + static GrowableBuffer empty(const FillableOptions& options); + static GrowableBuffer empty(const FillableOptions& options, int64_t minreserve); + static GrowableBuffer full(const FillableOptions& options, T value, int64_t length); + static GrowableBuffer arange(const FillableOptions& options, int64_t length); + + GrowableBuffer(const FillableOptions& options, std::shared_ptr ptr, int64_t length, int64_t reserved); + GrowableBuffer(const FillableOptions& options); + const std::shared_ptr ptr() const; + int64_t length() const; + void set_length(int64_t newlength); + int64_t reserved() const; + void set_reserved(int64_t minreserved); + void clear(); + void append(T datum); + T getitem_at_nowrap(int64_t at) const; private: const FillableOptions options_; diff --git a/include/awkward/fillable/Int64Fillable.h b/include/awkward/fillable/Int64Fillable.h index 41967a8bc7..b2e867b79a 100644 --- a/include/awkward/fillable/Int64Fillable.h +++ b/include/awkward/fillable/Int64Fillable.h @@ -11,32 +11,31 @@ namespace awkward { class Int64Fillable: public Fillable { public: - Int64Fillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "Int64Fillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); - - const GrowableBuffer buffer() const { return buffer_; } + Int64Fillable(const FillableOptions& options, const GrowableBuffer& buffer); + const GrowableBuffer buffer() const; + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/ListFillable.h b/include/awkward/fillable/ListFillable.h index 60fded2729..bb53dcbaf0 100644 --- a/include/awkward/fillable/ListFillable.h +++ b/include/awkward/fillable/ListFillable.h @@ -14,30 +14,30 @@ namespace awkward { class ListFillable: public Fillable { public: - ListFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content, bool begun): options_(options), offsets_(offsets), content_(std::shared_ptr(content)), begun_(begun) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "ListFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + ListFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content, bool begun); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/OptionFillable.h b/include/awkward/fillable/OptionFillable.h index c3ce89e1df..d2feef14ba 100644 --- a/include/awkward/fillable/OptionFillable.h +++ b/include/awkward/fillable/OptionFillable.h @@ -13,31 +13,31 @@ namespace awkward { class OptionFillable: public Fillable { public: - OptionFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content): options_(options), offsets_(offsets), content_(content) { } - static const std::shared_ptr fromnulls(const FillableOptions& options, int64_t nullcount, std::shared_ptr content); static const std::shared_ptr fromvalids(const FillableOptions& options, std::shared_ptr content); - virtual const std::string classname() const { return "OptionFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + OptionFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/RecordFillable.h b/include/awkward/fillable/RecordFillable.h index 2558cabde0..0679de6b38 100644 --- a/include/awkward/fillable/RecordFillable.h +++ b/include/awkward/fillable/RecordFillable.h @@ -14,43 +14,33 @@ namespace awkward { class RecordFillable: public Fillable { public: - RecordFillable(const FillableOptions& options, const std::vector>& contents, const std::vector& keys, const std::vector& pointers, const std::string& name, const char* nameptr, int64_t length, bool begun, int64_t nextindex, int64_t nexttotry) - : options_(options) - , contents_(contents) - , keys_(keys) - , pointers_(pointers) - , name_(name) - , nameptr_(nameptr) - , length_(length) - , begun_(begun) - , nextindex_(nextindex) - , nexttotry_(nexttotry) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "RecordFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; + RecordFillable(const FillableOptions& options, const std::vector>& contents, const std::vector& keys, const std::vector& pointers, const std::string& name, const char* nameptr, int64_t length, bool begun, int64_t nextindex, int64_t nexttotry); + + const std::string name() const; + const char* nameptr() const; - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; - const std::string name() const { return name_; } - const char* nameptr() const { return nameptr_; } + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const std::shared_ptr field_fast(const char* key); diff --git a/include/awkward/fillable/StringFillable.h b/include/awkward/fillable/StringFillable.h index 7962be0552..87acee5b00 100644 --- a/include/awkward/fillable/StringFillable.h +++ b/include/awkward/fillable/StringFillable.h @@ -11,32 +11,31 @@ namespace awkward { class StringFillable: public Fillable { public: - StringFillable(const FillableOptions& options, const GrowableBuffer& offsets, GrowableBuffer& content, const char* encoding): options_(options), offsets_(offsets), content_(content), encoding_(encoding) { } - static const std::shared_ptr fromempty(const FillableOptions& options, const char* encoding); - virtual const std::string classname() const { return "StringFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); - - const char* encoding() const { return encoding_; } + StringFillable(const FillableOptions& options, const GrowableBuffer& offsets, GrowableBuffer& content, const char* encoding); + const char* encoding() const; + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/TupleFillable.h b/include/awkward/fillable/TupleFillable.h index 71dd012f2f..e18f7ce7f2 100644 --- a/include/awkward/fillable/TupleFillable.h +++ b/include/awkward/fillable/TupleFillable.h @@ -14,37 +14,31 @@ namespace awkward { class TupleFillable: public Fillable { public: - TupleFillable(const FillableOptions& options, const std::vector>& contents, int64_t length, bool begun, size_t nextindex) - : options_(options) - , contents_(contents) - , length_(length) - , begun_(begun) - , nextindex_(nextindex) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "TupleFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); - - int64_t numfields() const { return (int64_t)contents_.size(); } + TupleFillable(const FillableOptions& options, const std::vector>& contents, int64_t length, bool begun, size_t nextindex); + int64_t numfields() const; + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/UnionFillable.h b/include/awkward/fillable/UnionFillable.h index 4a83935000..3d2e6c1259 100644 --- a/include/awkward/fillable/UnionFillable.h +++ b/include/awkward/fillable/UnionFillable.h @@ -16,30 +16,30 @@ namespace awkward { class UnionFillable: public Fillable { public: - UnionFillable(const FillableOptions& options, const GrowableBuffer& types, const GrowableBuffer& offsets, std::vector> contents): options_(options), types_(types), offsets_(offsets), contents_(contents), current_(-1) { } - - static const std::shared_ptr fromsingle(const FillableOptions& options, const std::shared_ptr firstcontent); - - virtual const std::string classname() const { return "UnionFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + static const std::shared_ptr fromsingle(const FillableOptions& options, const std::shared_ptr& firstcontent); + + UnionFillable(const FillableOptions& options, const GrowableBuffer& types, const GrowableBuffer& offsets, std::vector>& contents); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/fillable/UnknownFillable.h b/include/awkward/fillable/UnknownFillable.h index f050bc26ee..6fea0aa1db 100644 --- a/include/awkward/fillable/UnknownFillable.h +++ b/include/awkward/fillable/UnknownFillable.h @@ -12,30 +12,30 @@ namespace awkward { class UnknownFillable: public Fillable { public: - UnknownFillable(const FillableOptions& options, int64_t nullcount): options_(options), nullcount_(nullcount) { } - static const std::shared_ptr fromempty(const FillableOptions& options); - virtual const std::string classname() const { return "UnknownFillable"; }; - virtual int64_t length() const; - virtual void clear(); - virtual const std::shared_ptr type() const; - virtual const std::shared_ptr snapshot() const; - - virtual bool active() const; - virtual const std::shared_ptr null(); - virtual const std::shared_ptr boolean(bool x); - virtual const std::shared_ptr integer(int64_t x); - virtual const std::shared_ptr real(double x); - virtual const std::shared_ptr string(const char* x, int64_t length, const char* encoding); - virtual const std::shared_ptr beginlist(); - virtual const std::shared_ptr endlist(); - virtual const std::shared_ptr begintuple(int64_t numfields); - virtual const std::shared_ptr index(int64_t index); - virtual const std::shared_ptr endtuple(); - virtual const std::shared_ptr beginrecord(const char* name, bool check); - virtual const std::shared_ptr field(const char* key, bool check); - virtual const std::shared_ptr endrecord(); + UnknownFillable(const FillableOptions& options, int64_t nullcount); + + const std::string classname() const override; + int64_t length() const override; + void clear() override; + const std::shared_ptr type() const override; + const std::shared_ptr snapshot(const std::shared_ptr& type) const override; + + bool active() const override; + const std::shared_ptr null() override; + const std::shared_ptr boolean(bool x) override; + const std::shared_ptr integer(int64_t x) override; + const std::shared_ptr real(double x) override; + const std::shared_ptr string(const char* x, int64_t length, const char* encoding) override; + const std::shared_ptr beginlist() override; + const std::shared_ptr endlist() override; + const std::shared_ptr begintuple(int64_t numfields) override; + const std::shared_ptr index(int64_t index) override; + const std::shared_ptr endtuple() override; + const std::shared_ptr beginrecord(const char* name, bool check) override; + const std::shared_ptr field(const char* key, bool check) override; + const std::shared_ptr endrecord() override; private: const FillableOptions options_; diff --git a/include/awkward/io/json.h b/include/awkward/io/json.h index 7f28f914e0..60976b98d0 100644 --- a/include/awkward/io/json.h +++ b/include/awkward/io/json.h @@ -6,20 +6,10 @@ #include #include -#include "rapidjson/reader.h" -#include "rapidjson/writer.h" -#include "rapidjson/prettywriter.h" -#include "rapidjson/stringbuffer.h" -#include "rapidjson/filereadstream.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/error/en.h" - #include "awkward/fillable/FillableOptions.h" #include "awkward/cpu-kernels/util.h" #include "awkward/util.h" -namespace rj = rapidjson; - namespace awkward { class Content; @@ -42,112 +32,81 @@ namespace awkward { class ToJsonString: public ToJson { public: - ToJsonString(int64_t maxdecimals): buffer_(), writer_(buffer_) { - if (maxdecimals >= 0) { - writer_.SetMaxDecimalPlaces((int)maxdecimals); - } - } - - virtual void null() { writer_.Null(); } - virtual void boolean(bool x) { writer_.Bool(x); } - virtual void integer(int64_t x) { writer_.Int64(x); } - virtual void real(double x) { writer_.Double(x); } - virtual void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } - virtual void beginlist() { writer_.StartArray(); } - virtual void endlist() { writer_.EndArray(); } - virtual void beginrecord() { writer_.StartObject(); } - virtual void field(const char* x) { writer_.Key(x); } - virtual void endrecord() { writer_.EndObject(); } - - std::string tostring() { - return std::string(buffer_.GetString()); - } - + ToJsonString(int64_t maxdecimals); + ~ToJsonString(); + void null() override; + void boolean(bool x) override; + void integer(int64_t x) override; + void real(double x) override; + void string(const char* x, int64_t length) override; + void beginlist() override; + void endlist() override; + void beginrecord() override; + void field(const char* x) override; + void endrecord() override; + const std::string tostring(); private: - rj::StringBuffer buffer_; - rj::Writer writer_; + class Impl; + Impl* impl_; }; class ToJsonPrettyString: public ToJson { public: - ToJsonPrettyString(int64_t maxdecimals): buffer_(), writer_(buffer_) { - if (maxdecimals >= 0) { - writer_.SetMaxDecimalPlaces((int)maxdecimals); - } - } - - virtual void null() { writer_.Null(); } - virtual void boolean(bool x) { writer_.Bool(x); } - virtual void integer(int64_t x) { writer_.Int64(x); } - virtual void real(double x) { writer_.Double(x); } - virtual void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } - virtual void beginlist() { writer_.StartArray(); } - virtual void endlist() { writer_.EndArray(); } - virtual void beginrecord() { writer_.StartObject(); } - virtual void field(const char* x) { writer_.Key(x); } - virtual void endrecord() { writer_.EndObject(); } - - std::string tostring() { - return std::string(buffer_.GetString()); - } - + ToJsonPrettyString(int64_t maxdecimals); + ~ToJsonPrettyString(); + void null() override; + void boolean(bool x) override; + void integer(int64_t x) override; + void real(double x) override; + void string(const char* x, int64_t length) override; + void beginlist() override; + void endlist() override; + void beginrecord() override; + void field(const char* x) override; + void endrecord() override; + const std::string tostring(); private: - rj::StringBuffer buffer_; - rj::PrettyWriter writer_; + class Impl; + Impl* impl_; }; class ToJsonFile: public ToJson { public: - ToJsonFile(FILE* destination, int64_t maxdecimals, int64_t buffersize): buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()), stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)), writer_(stream_) { - if (maxdecimals >= 0) { - writer_.SetMaxDecimalPlaces((int)maxdecimals); - } - } - - virtual void null() { writer_.Null(); } - virtual void boolean(bool x) { writer_.Bool(x); } - virtual void integer(int64_t x) { writer_.Int64(x); } - virtual void real(double x) { writer_.Double(x); } - virtual void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } - virtual void beginlist() { writer_.StartArray(); } - virtual void endlist() { writer_.EndArray(); } - virtual void beginrecord() { writer_.StartObject(); } - virtual void field(const char* x) { writer_.Key(x); } - virtual void endrecord() { writer_.EndObject(); } - + ToJsonFile(FILE* destination, int64_t maxdecimals, int64_t buffersize); + ~ToJsonFile(); + void null() override; + void boolean(bool x) override; + void integer(int64_t x) override; + void real(double x) override; + void string(const char* x, int64_t length) override; + void beginlist() override; + void endlist() override; + void beginrecord() override; + void field(const char* x) override; + void endrecord() override; private: - std::shared_ptr buffer_; - rj::FileWriteStream stream_; - rj::Writer writer_; + class Impl; + Impl* impl_; }; class ToJsonPrettyFile: public ToJson { public: - ToJsonPrettyFile(FILE* destination, int64_t maxdecimals, int64_t buffersize): buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()), stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)), writer_(stream_) { - if (maxdecimals >= 0) { - writer_.SetMaxDecimalPlaces((int)maxdecimals); - } - } - - virtual void null() { writer_.Null(); } - virtual void boolean(bool x) { writer_.Bool(x); } - virtual void integer(int64_t x) { writer_.Int64(x); } - virtual void real(double x) { writer_.Double(x); } - virtual void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } - virtual void beginlist() { writer_.StartArray(); } - virtual void endlist() { writer_.EndArray(); } - virtual void beginrecord() { writer_.StartObject(); } - virtual void field(const char* x) { writer_.Key(x); } - virtual void endrecord() { writer_.EndObject(); } - + ToJsonPrettyFile(FILE* destination, int64_t maxdecimals, int64_t buffersize); + ~ToJsonPrettyFile(); + void null() override; + void boolean(bool x) override; + void integer(int64_t x) override; + void real(double x) override; + void string(const char* x, int64_t length) override; + void beginlist() override; + void endlist() override; + void beginrecord() override; + void field(const char* x) override; + void endrecord() override; private: - std::shared_ptr buffer_; - rj::FileWriteStream stream_; - rj::PrettyWriter writer_; + class Impl; + Impl* impl_; }; - } #endif // AWKWARD_IO_JSON_H_ - -// , rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag diff --git a/include/awkward/type/ArrayType.h b/include/awkward/type/ArrayType.h index e0dd32aa2b..bbf5b9402a 100644 --- a/include/awkward/type/ArrayType.h +++ b/include/awkward/type/ArrayType.h @@ -8,25 +8,18 @@ namespace awkward { class ArrayType: public Type { public: - ArrayType(const Parameters& parameters, const std::shared_ptr type, int64_t length) - : Type(parameters) - , type_(type) - , length_(length) { } + ArrayType(const Parameters& parameters, const std::shared_ptr& type, int64_t length); - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr nolength() const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr type() const; int64_t length() const; diff --git a/include/awkward/type/ListType.h b/include/awkward/type/ListType.h index fb568914fd..88e321b22a 100644 --- a/include/awkward/type/ListType.h +++ b/include/awkward/type/ListType.h @@ -8,23 +8,18 @@ namespace awkward { class ListType: public Type { public: - ListType(const Parameters& parameters, const std::shared_ptr type) - : Type(parameters) - , type_(type) { } + ListType(const Parameters& parameters, const std::shared_ptr& type); - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/OptionType.h b/include/awkward/type/OptionType.h index adfd2a2c1a..1b8b1afd89 100644 --- a/include/awkward/type/OptionType.h +++ b/include/awkward/type/OptionType.h @@ -8,23 +8,18 @@ namespace awkward { class OptionType: public Type { public: - OptionType(const Parameters& parameters, const std::shared_ptr type) - : Type(parameters) - , type_(type) { } + OptionType(const Parameters& parameters, const std::shared_ptr& type); - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/PrimitiveType.h b/include/awkward/type/PrimitiveType.h index ff165648ee..ae685bd35e 100644 --- a/include/awkward/type/PrimitiveType.h +++ b/include/awkward/type/PrimitiveType.h @@ -23,23 +23,18 @@ namespace awkward { numtypes }; - PrimitiveType(const Parameters& parameters, DType dtype) - : Type(parameters) - , dtype_(dtype) { } - - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + PrimitiveType(const Parameters& parameters, DType dtype); + + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const DType dtype() const; diff --git a/include/awkward/type/RecordType.h b/include/awkward/type/RecordType.h index f19e45547d..8d0feb5a8d 100644 --- a/include/awkward/type/RecordType.h +++ b/include/awkward/type/RecordType.h @@ -15,44 +15,38 @@ namespace awkward { typedef std::unordered_map Lookup; typedef std::vector ReverseLookup; - RecordType(const Parameters& parameters, const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) - : Type(parameters) - , types_(types) - , lookup_(lookup) - , reverselookup_(reverselookup) { } - RecordType(const Parameters& parameters, const std::vector>& types) - : Type(parameters) - , types_(types) - , lookup_(nullptr) - , reverselookup_(nullptr) { } - - const std::vector> types() const { return types_; }; - const std::shared_ptr lookup() const { return lookup_; } - const std::shared_ptr reverselookup() const { return reverselookup_; } - - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + RecordType(const Parameters& parameters, const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup); + RecordType(const Parameters& parameters, const std::vector>& types); + + const std::vector> types() const; + const std::shared_ptr lookup() const; + const std::shared_ptr reverselookup() const; + bool istuple() const; + + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; const std::shared_ptr field(const std::string& key) const; const std::vector> fields() const; const std::vector>> fielditems() const; + const std::shared_ptr astuple() const; + + void append(const std::shared_ptr& type); + void setkey(int64_t fieldindex, const std::string& key); private: - const std::vector> types_; - const std::shared_ptr lookup_; - const std::shared_ptr reverselookup_; + std::vector> types_; + std::shared_ptr lookup_; + std::shared_ptr reverselookup_; }; } diff --git a/include/awkward/type/RegularType.h b/include/awkward/type/RegularType.h index 9f8af20713..c9312f68f2 100644 --- a/include/awkward/type/RegularType.h +++ b/include/awkward/type/RegularType.h @@ -10,24 +10,18 @@ namespace awkward { class RegularType: public Type { public: - RegularType(const Parameters& parameters, const std::shared_ptr type, int64_t size) - : Type(parameters) - , type_(type) - , size_(size) { } - - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + RegularType(const Parameters& parameters, const std::shared_ptr& type, int64_t size); + + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; const std::shared_ptr type() const; int64_t size() const; diff --git a/include/awkward/type/Type.h b/include/awkward/type/Type.h index bc6875c024..95f0b50131 100644 --- a/include/awkward/type/Type.h +++ b/include/awkward/type/Type.h @@ -14,18 +14,14 @@ namespace awkward { public: typedef std::map Parameters; - Type(const Parameters& parameters): parameters_(parameters) { } - virtual ~Type() { } + static std::shared_ptr none(); - static std::shared_ptr none() { return std::shared_ptr(nullptr); } + Type(const Parameters& parameters); + virtual ~Type(); - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const = 0; + virtual std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const = 0; virtual const std::shared_ptr shallow_copy() const = 0; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const = 0; - virtual std::shared_ptr nolength() const; - virtual std::shared_ptr level() const = 0; - virtual std::shared_ptr inner() const = 0; - virtual std::shared_ptr inner(const std::string& key) const = 0; + virtual bool equal(const std::shared_ptr& other, bool check_parameters) const = 0; virtual int64_t numfields() const = 0; virtual int64_t fieldindex(const std::string& key) const = 0; virtual const std::string key(int64_t fieldindex) const = 0; @@ -34,30 +30,12 @@ namespace awkward { virtual const std::vector keyaliases(const std::string& key) const = 0; virtual const std::vector keys() const = 0; - const Parameters parameters() const { - return parameters_; - } - void setparameters(const Parameters& parameters) { - parameters_ = parameters; - } - std::string parameter(const std::string& key) { - return parameters_[key]; - } - void setparameter(const std::string& key, const std::string& value) { - parameters_[key] = value; - } - bool parameter_equals(const std::string& key, const std::string& value) { - auto item = parameters_.find(key); - if (item == parameters_.end()) { - return false; - } - else { - return item->second == value; - } - } - std::string tostring() const { - return tostring_part("", "", ""); - }; + const Parameters parameters() const; + void setparameters(const Parameters& parameters); + std::string parameter(const std::string& key); + void setparameter(const std::string& key, const std::string& value); + bool parameter_equals(const std::string& key, const std::string& value); + std::string tostring() const; const std::string compare(std::shared_ptr supertype); protected: diff --git a/include/awkward/type/UnionType.h b/include/awkward/type/UnionType.h index 0b63a0b274..46021dc610 100644 --- a/include/awkward/type/UnionType.h +++ b/include/awkward/type/UnionType.h @@ -10,21 +10,18 @@ namespace awkward { class UnionType: public Type { public: - UnionType(const Parameters& parameters, const std::vector>& types): Type(parameters), types_(types) { } - - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + UnionType(const Parameters& parameters, const std::vector>& types); + + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; int64_t numtypes() const; const std::vector> types() const; diff --git a/include/awkward/type/UnknownType.h b/include/awkward/type/UnknownType.h index 272657c653..cd8943dd66 100644 --- a/include/awkward/type/UnknownType.h +++ b/include/awkward/type/UnknownType.h @@ -8,21 +8,18 @@ namespace awkward { class UnknownType: public Type { public: - UnknownType(const Parameters& parameters): Type(parameters) { } + UnknownType(const Parameters& parameters); - virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; - virtual const std::shared_ptr shallow_copy() const; - virtual bool equal(const std::shared_ptr other, bool check_parameters) const; - virtual std::shared_ptr level() const; - virtual std::shared_ptr inner() const; - virtual std::shared_ptr inner(const std::string& key) const; - virtual int64_t numfields() const; - virtual int64_t fieldindex(const std::string& key) const; - virtual const std::string key(int64_t fieldindex) const; - virtual bool haskey(const std::string& key) const; - virtual const std::vector keyaliases(int64_t fieldindex) const; - virtual const std::vector keyaliases(const std::string& key) const; - virtual const std::vector keys() const; + std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; + const std::shared_ptr shallow_copy() const override; + bool equal(const std::shared_ptr& other, bool check_parameters) const override; + int64_t numfields() const override; + int64_t fieldindex(const std::string& key) const override; + const std::string key(int64_t fieldindex) const override; + bool haskey(const std::string& key) const override; + const std::vector keyaliases(int64_t fieldindex) const override; + const std::vector keyaliases(const std::string& key) const override; + const std::vector keys() const override; private: }; diff --git a/include/awkward/util.h b/include/awkward/util.h index a933e73e43..fdecf8d294 100644 --- a/include/awkward/util.h +++ b/include/awkward/util.h @@ -12,7 +12,7 @@ namespace awkward { class Identity; namespace util { - void handle_error(const struct Error& err, const std::string classname, const Identity* id); + void handle_error(const struct Error& err, const std::string& classname, const Identity* id); template class array_deleter { @@ -28,7 +28,7 @@ namespace awkward { void operator()(T const *p) { } }; - std::string quote(std::string x, bool doublequote); + std::string quote(const std::string& x, bool doublequote); bool subset(const std::vector& super, const std::vector& sub); template diff --git a/setup.py b/setup.py index 2e32e8a9e3..554b54c046 100644 --- a/setup.py +++ b/setup.py @@ -56,13 +56,21 @@ def build_extension(self, ext): subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp) subprocess.check_call(["ctest", "--output-on-failure"], cwd=self.build_temp) - for lib in (glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.so")) + + for lib in (glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "libawkward-cpu-kernels-static.*")) + + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "libawkward-static.*")) + + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.so")) + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.dylib")) + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.dll")) + + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.exp")) + glob.glob(os.path.join(os.path.join(extdir, "awkward1"), "*.pyd"))): if os.path.exists(lib): os.remove(lib) + for lib in os.listdir(self.build_temp): + if lib.startswith("libawkward-cpu-kernels-static.") or lib.startswith("libawkward-static."): + shutil.copy(os.path.join(self.build_temp, lib), "awkward1") + shutil.move(os.path.join(self.build_temp, lib), os.path.join(extdir, "awkward1")) + for lib in os.listdir(extdir): if lib.endswith(".so") or lib.endswith(".dylib") or lib.endswith(".dll") or lib.endswith(".pyd"): shutil.copy(os.path.join(extdir, lib), "awkward1") @@ -70,7 +78,7 @@ def build_extension(self, ext): if platform.system() == "Windows": for lib in os.listdir(os.path.join(self.build_temp, cfg)): - if lib.endswith(".dll") or lib.endswith(".pyd"): + if lib.startswith("awkward-cpu-kernels-static.") or lib.startswith("awkward-static.") or lib.endswith(".dll") or lib.endswith(".exp") or lib.endswith(".pyd"): shutil.copy(os.path.join(os.path.join(self.build_temp, cfg), lib), "awkward1") shutil.move(os.path.join(os.path.join(self.build_temp, cfg), lib), os.path.join(extdir, "awkward1")) diff --git a/src/libawkward/Content.cpp b/src/libawkward/Content.cpp index bfca83ddd6..d04eeb9bbd 100644 --- a/src/libawkward/Content.cpp +++ b/src/libawkward/Content.cpp @@ -8,6 +8,12 @@ #include "awkward/Content.h" namespace awkward { + Content::Content(const std::shared_ptr& id, const std::shared_ptr& type) + : id_(id) + , type_(type) { } + + Content::~Content() { } + bool Content::isscalar() const { return false; } @@ -16,42 +22,12 @@ namespace awkward { return id_; } - const std::shared_ptr Content::type() const { - if (type_.get() == nullptr) { - if (isscalar()) { - return innertype(false); - } - else { - return std::shared_ptr(new ArrayType(Type::Parameters(), innertype(false), length())); - } - } - else { - return std::shared_ptr(new ArrayType(Type::Parameters(), type_, length())); - } - } - - void Content::settype(const std::shared_ptr type) { - std::shared_ptr toset = type; - if (ArrayType* raw = dynamic_cast(type.get())) { - if (raw->length() != length()) { - throw std::invalid_argument(std::string("provided ArrayType is incompatible with length of array: ") + std::to_string(raw->length()) + std::string(" versus ") + std::to_string(length())); - } - toset = raw->type(); - } - settype_part(toset); - } - bool Content::isbare() const { return type_.get() == nullptr; } - const std::shared_ptr Content::baretype() const { - if (isscalar()) { - return innertype(true); - } - else { - return std::shared_ptr(new ArrayType(Type::Parameters(), innertype(true), length())); - } + bool Content::istypeptr(Type* pointer) const { + return type_.get() == pointer; } const std::string Content::tostring() const { @@ -87,7 +63,7 @@ namespace awkward { } const std::shared_ptr Content::getitem(const Slice& where) const { - std::shared_ptr next(new RegularArray(Identity::none(), Type::none(), shallow_copy(), length())); + std::shared_ptr next = std::make_shared(Identity::none(), Type::none(), shallow_copy(), length()); std::shared_ptr nexthead = where.head(); Slice nexttail = where.tail(); @@ -102,7 +78,7 @@ namespace awkward { } } - const std::shared_ptr Content::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr Content::getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const { if (head.get() == nullptr) { return shallow_copy(); } @@ -147,9 +123,9 @@ namespace awkward { } else { std::vector> tailitems = tail.items(); - std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + std::vector> items = { std::make_shared() }; items.insert(items.end(), tailitems.begin(), tailitems.end()); - std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + std::shared_ptr nexthead = std::make_shared(Slice::none(), Slice::none(), 1); Slice nexttail(items); return getitem_next(nexthead, nexttail, advanced); } @@ -158,7 +134,7 @@ namespace awkward { const std::shared_ptr Content::getitem_next(const SliceNewAxis& newaxis, const Slice& tail, const Index64& advanced) const { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); - return std::shared_ptr(new RegularArray(Identity::none(), Type::none(), getitem_next(nexthead, nexttail, advanced), 1)); + return std::make_shared(Identity::none(), Type::none(), getitem_next(nexthead, nexttail, advanced), 1); } const std::shared_ptr Content::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { @@ -173,10 +149,10 @@ namespace awkward { return getitem_fields(fields.keys()).get()->getitem_next(nexthead, nexttail, advanced); } - const std::shared_ptr Content::getitem_next_array_wrap(const std::shared_ptr outcontent, const std::vector& shape) const { - std::shared_ptr out(new RegularArray(Identity::none(), Type::none(), outcontent, (int64_t)shape[shape.size() - 1])); + const std::shared_ptr Content::getitem_next_array_wrap(const std::shared_ptr& outcontent, const std::vector& shape) const { + std::shared_ptr out = std::make_shared(Identity::none(), Type::none(), outcontent, (int64_t)shape[shape.size() - 1]); for (int64_t i = (int64_t)shape.size() - 2; i >= 0; i--) { - out = std::shared_ptr(new RegularArray(Identity::none(), Type::none(), out, (int64_t)shape[(size_t)i])); + out = std::make_shared(Identity::none(), Type::none(), out, (int64_t)shape[(size_t)i]); } return out; } diff --git a/src/libawkward/Identity.cpp b/src/libawkward/Identity.cpp index b713ee71d3..96a133b5bd 100644 --- a/src/libawkward/Identity.cpp +++ b/src/libawkward/Identity.cpp @@ -19,6 +19,52 @@ namespace awkward { return numrefs++; } + std::shared_ptr Identity::none() { + return std::shared_ptr(nullptr); + } + + Identity::Identity(const Ref ref, const FieldLoc& fieldloc, int64_t offset, int64_t width, int64_t length) + : ref_(ref) + , fieldloc_(fieldloc) + , offset_(offset) + , width_(width) + , length_(length) { } + + const Identity::Ref Identity::ref() const { + return ref_; + } + + const Identity::FieldLoc Identity::fieldloc() const { + return fieldloc_; + } + + const int64_t Identity::offset() const { + return offset_; + } + + const int64_t Identity::width() const { + return width_; + } + + const int64_t Identity::length() const { + return length_; + } + + template + IdentityOf::IdentityOf(const Ref ref, const FieldLoc& fieldloc, int64_t width, int64_t length) + : Identity(ref, fieldloc, 0, width, length) + , ptr_(std::shared_ptr(length*width == 0 ? nullptr : new T[(size_t)(length*width)], awkward::util::array_deleter())) { } + + template + IdentityOf::IdentityOf(const Ref ref, const FieldLoc& fieldloc, int64_t offset, int64_t width, int64_t length, const std::shared_ptr ptr) + : Identity(ref, fieldloc, offset, width, length) + , ptr_(ptr) { } + + template + const std::shared_ptr IdentityOf::ptr() const { + return ptr_; + } + template const std::string IdentityOf::classname() const { if (std::is_same::value) { @@ -55,15 +101,15 @@ namespace awkward { return shallow_copy(); } else if (std::is_same::value) { - Identity64* raw = new Identity64(ref_, fieldloc_, width_, length_); - std::shared_ptr out(raw); + std::shared_ptr out = std::make_shared(ref_, fieldloc_, width_, length_); + Identity64* raw = reinterpret_cast(out.get()); awkward_identity32_to_identity64(raw->ptr().get(), reinterpret_cast(ptr_.get()), length_, width_); return out; } } template - const std::string IdentityOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::string IdentityOf::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; std::string name = "Unrecognized Identity"; if (std::is_same::value) { @@ -87,18 +133,18 @@ namespace awkward { template const std::shared_ptr IdentityOf::getitem_range_nowrap(int64_t start, int64_t stop) const { assert(0 <= start && start < length_ && 0 <= stop && stop < length_); - return std::shared_ptr(new IdentityOf(ref_, fieldloc_, offset_ + width_*start*(start != stop), width_, (stop - start), ptr_)); + return std::make_shared>(ref_, fieldloc_, offset_ + width_*start*(start != stop), width_, (stop - start), ptr_); } template const std::shared_ptr IdentityOf::shallow_copy() const { - return std::shared_ptr(new IdentityOf(ref_, fieldloc_, offset_, width_, length_, ptr_)); + return std::make_shared>(ref_, fieldloc_, offset_, width_, length_, ptr_); } template const std::shared_ptr IdentityOf::getitem_carry_64(const Index64& carry) const { - IdentityOf* rawout = new IdentityOf(ref_, fieldloc_, width_, carry.length()); - std::shared_ptr out(rawout); + std::shared_ptr out = std::make_shared>(ref_, fieldloc_, width_, carry.length()); + IdentityOf* rawout = reinterpret_cast*>(out.get()); if (std::is_same::value) { struct Error err = awkward_identity32_getitem_carry_64( @@ -135,7 +181,7 @@ namespace awkward { template const std::shared_ptr IdentityOf::withfieldloc(const FieldLoc& fieldloc) const { - return std::shared_ptr(new IdentityOf(ref_, fieldloc, offset_, width_, length_, ptr_)); + return std::make_shared>(ref_, fieldloc, offset_, width_, length_, ptr_); } template diff --git a/src/libawkward/Index.cpp b/src/libawkward/Index.cpp index 46350fa1c5..bff120ed92 100644 --- a/src/libawkward/Index.cpp +++ b/src/libawkward/Index.cpp @@ -1,5 +1,6 @@ // BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE +#include #include #include #include @@ -9,6 +10,33 @@ #include "awkward/Index.h" namespace awkward { + template + IndexOf::IndexOf(int64_t length) + : ptr_(std::shared_ptr(length == 0 ? nullptr : new T[(size_t)length], awkward::util::array_deleter())) + , offset_(0) + , length_(length) { } + + template + IndexOf::IndexOf(const std::shared_ptr& ptr, int64_t offset, int64_t length) + : ptr_(ptr) + , offset_(offset) + , length_(length) { } + + template + const std::shared_ptr IndexOf::ptr() const { + return ptr_; + } + + template + int64_t IndexOf::offset() const { + return offset_; + } + + template + int64_t IndexOf::length() const { + return length_; + } + template const std::string IndexOf::classname() const { if (std::is_same::value) { @@ -37,7 +65,7 @@ namespace awkward { } template - const std::string IndexOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::string IndexOf::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname() << " i=\"["; if (length_ <= 10) { @@ -108,7 +136,16 @@ namespace awkward { template const std::shared_ptr IndexOf::shallow_copy() const { - return std::shared_ptr(new IndexOf(ptr_, offset_, length_)); + return std::make_shared>(ptr_, offset_, length_); + } + + template + const std::shared_ptr IndexOf::deep_copy() const { + std::shared_ptr ptr(length_ == 0 ? nullptr : new T[(size_t)length_], awkward::util::array_deleter()); + if (length_ != 0) { + memcpy(ptr.get(), &ptr_.get()[(size_t)offset_], sizeof(T)*((size_t)length_)); + } + return std::make_shared>(ptr, 0, length_); } template class IndexOf; diff --git a/src/libawkward/Iterator.cpp b/src/libawkward/Iterator.cpp index a7342ef8d3..577684f12b 100644 --- a/src/libawkward/Iterator.cpp +++ b/src/libawkward/Iterator.cpp @@ -5,6 +5,20 @@ #include "awkward/Iterator.h" namespace awkward { + Iterator::Iterator(const std::shared_ptr& content) + : content_(content) + , at_(0) { + content.get()->check_for_iteration(); + } + + const std::shared_ptr Iterator::content() const { + return content_; + } + + const int64_t Iterator::at() const { + return at_; + } + const bool Iterator::isdone() const { return at_ >= content_.get()->length(); } @@ -13,7 +27,7 @@ namespace awkward { return content_.get()->getitem_at_nowrap(at_++); } - const std::string Iterator::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::string Iterator::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "\n"; out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); diff --git a/src/libawkward/Slice.cpp b/src/libawkward/Slice.cpp index 710e279e52..348565b273 100644 --- a/src/libawkward/Slice.cpp +++ b/src/libawkward/Slice.cpp @@ -7,10 +7,68 @@ #include "awkward/Slice.h" namespace awkward { + int64_t SliceItem::none() { + return kSliceNone; + } + + SliceItem::~SliceItem() { } + + /////////////////////////////////////////////////////// SliceAt + + SliceAt::SliceAt(int64_t at) + : at_(at) { } + + int64_t SliceAt::at() const { + return at_; + } + + const std::shared_ptr SliceAt::shallow_copy() const { + return std::make_shared(at_); + } + const std::string SliceAt::tostring() const { return std::to_string(at_); } + bool SliceAt::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return false; + } + + /////////////////////////////////////////////////////// SliceRange + + SliceRange::SliceRange(int64_t start, int64_t stop, int64_t step) + : start_(start) + , stop_(stop) + , step_(step == none() ? 1 : step) { + if (step_ == 0) { + throw std::runtime_error("step must not be zero"); + } + } + + int64_t SliceRange::start() const { + return start_; + } + + int64_t SliceRange::stop() const { + return stop_; + } + + int64_t SliceRange::step() const { + return step_; + } + + bool SliceRange::hasstart() const { + return start_ != none(); + } + + bool SliceRange::hasstop() const { + return stop_ != none(); + } + + const std::shared_ptr SliceRange::shallow_copy() const { + return std::make_shared(start_, stop_, step_); + } + const std::string SliceRange::tostring() const { std::stringstream out; if (hasstart()) { @@ -26,14 +84,87 @@ namespace awkward { return out.str(); } + bool SliceRange::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return true; + } + + /////////////////////////////////////////////////////// SliceEllipsis + + SliceEllipsis::SliceEllipsis() { } + + const std::shared_ptr SliceEllipsis::shallow_copy() const { + return std::make_shared(); + } + const std::string SliceEllipsis::tostring() const { return std::string("..."); } + bool SliceEllipsis::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return true; + } + + /////////////////////////////////////////////////////// SliceNewAxis + + SliceNewAxis::SliceNewAxis() { } + + const std::shared_ptr SliceNewAxis::shallow_copy() const { + return std::make_shared(); + } + const std::string SliceNewAxis::tostring() const { return std::string("newaxis"); } + bool SliceNewAxis::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return false; + } + + /////////////////////////////////////////////////////// SliceArrayOf + + template + SliceArrayOf::SliceArrayOf(const IndexOf& index, const std::vector& shape, const std::vector& strides) + : index_(index) + , shape_(shape) + , strides_(strides) { + if (shape_.empty()) { + throw std::runtime_error("shape must not be zero-dimensional"); + } + if (shape_.size() != strides_.size()) { + throw std::runtime_error("shape must have the same number of dimensions as strides"); + } + } + + template + const IndexOf SliceArrayOf::index() const { + return index_; + } + + template + const int64_t SliceArrayOf::length() const { + return shape_[0]; + } + + template + const std::vector SliceArrayOf::shape() const { + return shape_; + } + + template + const std::vector SliceArrayOf::strides() const { + return strides_; + } + + template + int64_t SliceArrayOf::ndim() const { + return (int64_t)shape_.size(); + } + + template + const std::shared_ptr SliceArrayOf::shallow_copy() const { + return std::make_shared>(index_, shape_, strides_); + } + template const std::string SliceArrayOf::tostring() const { return std::string("array(") + tostring_part() + std::string(")"); @@ -105,6 +236,11 @@ namespace awkward { return out.str(); } + template + bool SliceArrayOf::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return advanced.length() == 0; + } + template const IndexOf SliceArrayOf::ravel() const { int64_t length = 1; @@ -125,10 +261,40 @@ namespace awkward { template class SliceArrayOf; + /////////////////////////////////////////////////////// SliceField + + SliceField::SliceField(const std::string& key) + : key_(key) { } + + const std::string SliceField::key() const { + return key_; + } + + const std::shared_ptr SliceField::shallow_copy() const { + return std::make_shared(key_); + } + const std::string SliceField::tostring() const { return util::quote(key_, true); } + bool SliceField::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return false; + } + + /////////////////////////////////////////////////////// SliceFields + + SliceFields::SliceFields(const std::vector& keys) + : keys_(keys) { } + + const std::vector SliceFields::keys() const { + return keys_; + } + + const std::shared_ptr SliceFields::shallow_copy() const { + return std::make_shared(keys_); + } + const std::string SliceFields::tostring() const { std::stringstream out; out << "["; @@ -142,6 +308,36 @@ namespace awkward { return out.str(); } + bool SliceFields::preserves_type(const std::shared_ptr& type, const Index64& advanced) const { + return type.get() != nullptr && type.get()->numfields() != -1 && util::subset(keys_, type.get()->keys()); + } + + /////////////////////////////////////////////////////// Slice + + int64_t Slice::none() { + return SliceItem::none(); + } + + Slice::Slice() + : items_(std::vector>()) + , sealed_(false) { } + + Slice::Slice(const std::vector>& items) + : items_(items) + , sealed_(false) { } + + Slice::Slice(const std::vector>& items, bool sealed) + : items_(items) + , sealed_(sealed) { } + + const std::vector> Slice::items() const { + return items_; + } + + bool Slice::sealed() const { + return sealed_; + } + int64_t Slice::length() const { return (int64_t)items_.size(); } @@ -163,7 +359,7 @@ namespace awkward { } const std::shared_ptr Slice::head() const { - if (items_.size() != 0) { + if (!items_.empty()) { return items_[0]; } else { @@ -173,7 +369,7 @@ namespace awkward { const Slice Slice::tail() const { std::vector> items; - if (items_.size() != 0) { + if (!items_.empty()) { items.insert(items.end(), items_.begin() + 1, items_.end()); } return Slice(items, true); @@ -224,7 +420,7 @@ namespace awkward { std::vector shape; for (size_t i = 0; i < items_.size(); i++) { if (SliceArray64* array = dynamic_cast(items_[i].get())) { - if (shape.size() == 0) { + if (shape.empty()) { shape = array->shape(); } else if (shape.size() != array->ndim()) { @@ -244,7 +440,7 @@ namespace awkward { } } - if (shape.size() != 0) { + if (!shape.empty()) { for (size_t i = 0; i < items_.size(); i++) { if (SliceAt* at = dynamic_cast(items_[i].get())) { Index64 index(1); @@ -253,7 +449,7 @@ namespace awkward { for (size_t j = 0; j < shape.size(); j++) { strides.push_back(0); } - items_[i] = std::shared_ptr(new SliceArray64(index, shape, strides)); + items_[i] = std::make_shared(index, shape, strides); } else if (SliceArray64* array = dynamic_cast(items_[i].get())) { std::vector arrayshape = array->shape(); @@ -270,7 +466,7 @@ namespace awkward { throw std::invalid_argument("cannot broadcast arrays in slice"); } } - items_[i] = std::shared_ptr(new SliceArray64(array->index(), shape, strides)); + items_[i] = std::make_shared(array->index(), shape, strides); } } diff --git a/src/libawkward/array/EmptyArray.cpp b/src/libawkward/array/EmptyArray.cpp index f4dbf4fcc3..e662d24f55 100644 --- a/src/libawkward/array/EmptyArray.cpp +++ b/src/libawkward/array/EmptyArray.cpp @@ -10,11 +10,18 @@ #include "awkward/array/EmptyArray.h" namespace awkward { + EmptyArray::EmptyArray(const std::shared_ptr& id, const std::shared_ptr& type) + : Content(id, type) { + if (type_.get() != nullptr) { + checktype(); + } + } + const std::string EmptyArray::classname() const { return "EmptyArray"; } - void EmptyArray::setid(const std::shared_ptr id) { + void EmptyArray::setid(const std::shared_ptr& id) { if (id.get() != nullptr && length() != id.get()->length()) { util::handle_error(failure("content and its id must have the same length", kSliceNone, kSliceNone), classname(), id_.get()); } @@ -23,19 +30,34 @@ namespace awkward { void EmptyArray::setid() { } - const std::string EmptyArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr EmptyArray::type() const { + if (type_.get() != nullptr) { + return type_; + } + else { + return std::make_shared(Type::Parameters()); + } + } + + const std::shared_ptr EmptyArray::astype(const std::shared_ptr& type) const { + return std::make_shared(id_, type); + } + + const std::string EmptyArray::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname(); if (id_.get() == nullptr && type_.get() == nullptr) { out << "/>" << post; } else { + out << ">\n"; if (id_.get() != nullptr) { - out << ">\n" << id_.get()->tostring_part(indent + std::string(" "), "", "\n") << indent << "" << post; + out << id_.get()->tostring_part(indent + std::string(" "), "", "\n") << indent << "" << post; } if (type_.get() != nullptr) { out << indent << " " + type().get()->tostring() + "\n"; } + out << indent << "" << post; } return out.str(); } @@ -45,29 +67,12 @@ namespace awkward { builder.endlist(); } - const std::shared_ptr EmptyArray::innertype(bool bare) const { - return std::shared_ptr(new UnknownType(Type::Parameters())); - } - - void EmptyArray::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - bool EmptyArray::accepts(const std::shared_ptr type) { - return dynamic_cast(type.get()->level().get()) != nullptr; - } - int64_t EmptyArray::length() const { return 0; } const std::shared_ptr EmptyArray::shallow_copy() const { - return std::shared_ptr(new EmptyArray(id_, type_)); + return std::make_shared(id_, type_); } void EmptyArray::check_for_iteration() const { } @@ -136,6 +141,8 @@ namespace awkward { throw std::invalid_argument("array contains no Records"); } + void EmptyArray::checktype() const { } + const std::shared_ptr EmptyArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { util::handle_error(failure("too many dimensions in slice", kSliceNone, kSliceNone), classname(), id_.get()); return std::shared_ptr(nullptr); // make Windows compiler happy diff --git a/src/libawkward/array/ListArray.cpp b/src/libawkward/array/ListArray.cpp index 7100d24b2e..de43c3ccd0 100644 --- a/src/libawkward/array/ListArray.cpp +++ b/src/libawkward/array/ListArray.cpp @@ -15,6 +15,32 @@ #include "awkward/array/ListArray.h" namespace awkward { + template + ListArrayOf::ListArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const IndexOf& starts, const IndexOf& stops, const std::shared_ptr& content) + : Content(id, type) + , starts_(starts) + , stops_(stops) + , content_(content) { + if (type_.get() != nullptr) { + checktype(); + } + } + + template + const IndexOf ListArrayOf::starts() const { + return starts_; + } + + template + const IndexOf ListArrayOf::stops() const { + return stops_; + } + + template + const std::shared_ptr ListArrayOf::content() const { + return content_; + } + template const std::string ListArrayOf::classname() const { if (std::is_same::value) { @@ -32,7 +58,7 @@ namespace awkward { } template <> - void ListArrayOf::setid(const std::shared_ptr id) { + void ListArrayOf::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { content_.get()->setid(id); } @@ -45,8 +71,8 @@ namespace awkward { bigid = id.get()->to64(); } if (Identity32* rawid = dynamic_cast(bigid.get())) { - Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity32* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity32_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), @@ -62,8 +88,8 @@ namespace awkward { content_.get()->setid(subid); } else if (Identity64* rawid = dynamic_cast(bigid.get())) { - Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity64* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity64_from_listarray32( rawsubid->ptr().get(), rawid->ptr().get(), @@ -86,7 +112,7 @@ namespace awkward { } template - void ListArrayOf::setid(const std::shared_ptr id) { + void ListArrayOf::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { content_.get()->setid(id); } @@ -96,8 +122,8 @@ namespace awkward { } std::shared_ptr bigid = id.get()->to64(); if (Identity64* rawid = dynamic_cast(bigid.get())) { - Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity64* rawsubid = reinterpret_cast(subid.get()); struct Error err = util::awkward_identity64_from_listarray( rawsubid->ptr().get(), rawid->ptr().get(), @@ -122,15 +148,15 @@ namespace awkward { template void ListArrayOf::setid() { if (length() <= kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity32* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity64* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); @@ -138,7 +164,28 @@ namespace awkward { } template - const std::string ListArrayOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr ListArrayOf::type() const { + if (type_.get() != nullptr) { + return type_; + } + else { + return std::make_shared(Type::Parameters(), content_.get()->type()); + } + } + + template + const std::shared_ptr ListArrayOf::astype(const std::shared_ptr& type) const { + std::shared_ptr inner = type; + if (inner.get() != nullptr) { + if (ListType* raw = dynamic_cast(inner.get())) { + inner = raw->type(); + } + } + return std::make_shared>(id_, type, starts_, stops_, content_.get()->astype(inner)); + } + + template + const std::string ListArrayOf::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname() << ">\n"; if (id_.get() != nullptr) { @@ -164,32 +211,6 @@ namespace awkward { builder.endlist(); } - template - const std::shared_ptr ListArrayOf::innertype(bool bare) const { - if (bare || content_.get()->isbare()) { - return std::shared_ptr(new ListType(Type::Parameters(), content_.get()->innertype(bare))); - } - else { - return std::shared_ptr(new ListType(Type::Parameters(), content_.get()->type().get()->nolength())); - } - } - - template - void ListArrayOf::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - content_.get()->settype_part(type.get()->inner()); - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - template - bool ListArrayOf::accepts(const std::shared_ptr type) { - return dynamic_cast(type.get()->level().get()) != nullptr; - } - template int64_t ListArrayOf::length() const { return starts_.length(); @@ -197,7 +218,7 @@ namespace awkward { template const std::shared_ptr ListArrayOf::shallow_copy() const { - return std::shared_ptr(new ListArrayOf(id_, type_, starts_, stops_, content_)); + return std::make_shared>(id_, type_, starts_, stops_, content_); } template @@ -270,12 +291,12 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_range_nowrap(start, stop); } - return std::shared_ptr(new ListArrayOf(id, type_, starts_.getitem_range_nowrap(start, stop), stops_.getitem_range_nowrap(start, stop), content_)); + return std::make_shared>(id, type_, starts_.getitem_range_nowrap(start, stop), stops_.getitem_range_nowrap(start, stop), content_); } template const std::shared_ptr ListArrayOf::getitem_field(const std::string& key) const { - return std::shared_ptr(new ListArrayOf(id_, Type::none(), starts_, stops_, content_.get()->getitem_field(key))); + return std::make_shared>(id_, Type::none(), starts_, stops_, content_.get()->getitem_field(key)); } template @@ -284,7 +305,7 @@ namespace awkward { if (SliceFields(keys).preserves_type(type_, Index64(0))) { type = type_; } - return std::shared_ptr(new ListArrayOf(id_, type, starts_, stops_, content_.get()->getitem_fields(keys))); + return std::make_shared>(id_, type, starts_, stops_, content_.get()->getitem_fields(keys)); } template @@ -310,7 +331,7 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new ListArrayOf(id, type_, nextstarts, nextstops, content_)); + return std::make_shared>(id, type_, nextstarts, nextstops, content_); } template @@ -354,6 +375,17 @@ namespace awkward { return content_.get()->keys(); } + template + void ListArrayOf::checktype() const { + bool okay = false; + if (ListType* raw = dynamic_cast(type_.get())) { + okay = (raw->type().get() == content_.get()->type().get()); + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + template const std::shared_ptr ListArrayOf::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { int64_t lenstarts = starts_.length(); @@ -424,7 +456,7 @@ namespace awkward { std::shared_ptr nextcontent = content_.get()->carry(nextcarry); if (advanced.length() == 0) { - return std::shared_ptr(new ListOffsetArrayOf(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + return std::make_shared>(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced)); } else { int64_t total; @@ -440,7 +472,7 @@ namespace awkward { nextoffsets.ptr().get(), lenstarts); util::handle_error(err2, classname(), id_.get()); - return std::shared_ptr(new ListOffsetArrayOf(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + return std::make_shared>(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced)); } } diff --git a/src/libawkward/array/ListOffsetArray.cpp b/src/libawkward/array/ListOffsetArray.cpp index 4810e15570..0e116c26d9 100644 --- a/src/libawkward/array/ListOffsetArray.cpp +++ b/src/libawkward/array/ListOffsetArray.cpp @@ -15,6 +15,26 @@ #include "awkward/array/ListOffsetArray.h" namespace awkward { + template + ListOffsetArrayOf::ListOffsetArrayOf(const std::shared_ptr& id, const std::shared_ptr& type, const IndexOf& offsets, const std::shared_ptr& content) + : Content(id, type) + , offsets_(offsets) + , content_(content) { + if (type_.get() != nullptr) { + checktype(); + } + } + + template + const IndexOf ListOffsetArrayOf::offsets() const { + return offsets_; + } + + template + const std::shared_ptr ListOffsetArrayOf::content() const { + return content_; + } + template const std::string ListOffsetArrayOf::classname() const { if (std::is_same::value) { @@ -42,7 +62,7 @@ namespace awkward { } template <> - void ListOffsetArrayOf::setid(const std::shared_ptr id) { + void ListOffsetArrayOf::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { content_.get()->setid(id); } @@ -55,8 +75,8 @@ namespace awkward { bigid = id.get()->to64(); } if (Identity32* rawid = dynamic_cast(bigid.get())) { - Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity32* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity32_from_listoffsetarray32( rawsubid->ptr().get(), rawid->ptr().get(), @@ -70,8 +90,8 @@ namespace awkward { content_.get()->setid(subid); } else if (Identity64* rawid = dynamic_cast(bigid.get())) { - Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity64* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity64_from_listoffsetarray32( rawsubid->ptr().get(), rawid->ptr().get(), @@ -92,7 +112,7 @@ namespace awkward { } template - void ListOffsetArrayOf::setid(const std::shared_ptr id) { + void ListOffsetArrayOf::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { content_.get()->setid(id); } @@ -102,8 +122,8 @@ namespace awkward { } std::shared_ptr bigid = id.get()->to64(); if (Identity64* rawid = dynamic_cast(bigid.get())) { - Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity64* rawsubid = reinterpret_cast(subid.get()); struct Error err = util::awkward_identity64_from_listoffsetarray( rawsubid->ptr().get(), rawid->ptr().get(), @@ -126,15 +146,15 @@ namespace awkward { template void ListOffsetArrayOf::setid() { if (length() <= kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity32* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity64* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); @@ -142,7 +162,28 @@ namespace awkward { } template - const std::string ListOffsetArrayOf::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr ListOffsetArrayOf::type() const { + if (type_.get() != nullptr) { + return type_; + } + else { + return std::make_shared(Type::Parameters(), content_.get()->type()); + } + } + + template + const std::shared_ptr ListOffsetArrayOf::astype(const std::shared_ptr& type) const { + std::shared_ptr inner = type; + if (inner.get() != nullptr) { + if (ListType* raw = dynamic_cast(inner.get())) { + inner = raw->type(); + } + } + return std::make_shared>(id_, type, offsets_, content_.get()->astype(inner)); + } + + template + const std::string ListOffsetArrayOf::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname() << ">\n"; if (id_.get() != nullptr) { @@ -167,32 +208,6 @@ namespace awkward { builder.endlist(); } - template - const std::shared_ptr ListOffsetArrayOf::innertype(bool bare) const { - if (bare || content_.get()->isbare()) { - return std::shared_ptr(new ListType(Type::Parameters(), content_.get()->innertype(bare))); - } - else { - return std::shared_ptr(new ListType(Type::Parameters(), content_.get()->type().get()->nolength())); - } - } - - template - void ListOffsetArrayOf::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - content_.get()->settype_part(type.get()->inner()); - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - template - bool ListOffsetArrayOf::accepts(const std::shared_ptr type) { - return dynamic_cast(type.get()->level().get()) != nullptr; - } - template int64_t ListOffsetArrayOf::length() const { return offsets_.length() - 1; @@ -200,7 +215,7 @@ namespace awkward { template const std::shared_ptr ListOffsetArrayOf::shallow_copy() const { - return std::shared_ptr(new ListOffsetArrayOf(id_, type_, offsets_, content_)); + return std::make_shared>(id_, type_, offsets_, content_); } template @@ -264,12 +279,12 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_range_nowrap(start, stop); } - return std::shared_ptr(new ListOffsetArrayOf(id, type_, offsets_.getitem_range_nowrap(start, stop + 1), content_)); + return std::make_shared>(id, type_, offsets_.getitem_range_nowrap(start, stop + 1), content_); } template const std::shared_ptr ListOffsetArrayOf::getitem_field(const std::string& key) const { - return std::shared_ptr(new ListOffsetArrayOf(id_, Type::none(), offsets_, content_.get()->getitem_field(key))); + return std::make_shared>(id_, Type::none(), offsets_, content_.get()->getitem_field(key)); } template @@ -278,7 +293,7 @@ namespace awkward { if (SliceFields(keys).preserves_type(type_, Index64(0))) { type = type_; } - return std::shared_ptr(new ListOffsetArrayOf(id_, type, offsets_, content_.get()->getitem_fields(keys))); + return std::make_shared>(id_, type, offsets_, content_.get()->getitem_fields(keys)); } template @@ -302,7 +317,7 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new ListArrayOf(id, type_, nextstarts, nextstops, content_)); + return std::make_shared>(id, type_, nextstarts, nextstops, content_); } template @@ -346,6 +361,17 @@ namespace awkward { return content_.get()->keys(); } + template + void ListOffsetArrayOf::checktype() const { + bool okay = false; + if (ListType* raw = dynamic_cast(type_.get())) { + okay = (raw->type().get() == content_.get()->type().get()); + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + template const std::shared_ptr ListOffsetArrayOf::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { assert(advanced.length() == 0); @@ -412,7 +438,7 @@ namespace awkward { std::shared_ptr nextcontent = content_.get()->carry(nextcarry); if (advanced.length() == 0) { - return std::shared_ptr(new ListOffsetArrayOf(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced))); + return std::make_shared>(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, advanced)); } else { int64_t total; @@ -428,7 +454,7 @@ namespace awkward { nextoffsets.ptr().get(), lenstarts); util::handle_error(err2, classname(), id_.get()); - return std::shared_ptr(new ListOffsetArrayOf(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced))); + return std::make_shared>(id_, type_, nextoffsets, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced)); } } diff --git a/src/libawkward/array/NumpyArray.cpp b/src/libawkward/array/NumpyArray.cpp index 484ebfd1c8..7152fec03e 100644 --- a/src/libawkward/array/NumpyArray.cpp +++ b/src/libawkward/array/NumpyArray.cpp @@ -14,6 +14,67 @@ #include "awkward/array/NumpyArray.h" namespace awkward { + const std::shared_ptr NumpyArray::unwrap_regulartype(const std::shared_ptr& type, const std::vector& shape) { + if (type.get() == nullptr) { + return type; + } + std::shared_ptr out = type; + for (size_t i = 1; i < shape.size(); i++) { + if (RegularType* raw = dynamic_cast(out.get())) { + if (raw->size() == (int64_t)shape[i]) { + out = raw->type(); + } + else { + throw std::invalid_argument(std::string("cannot assign type ") + type.get()->tostring() + std::string(" to NumpyArray")); + } + } + else { + throw std::invalid_argument(std::string("cannot assign type ") + type.get()->tostring() + std::string(" to NumpyArray")); + } + } + return out; + } + + NumpyArray::NumpyArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& ptr, const std::vector& shape, const std::vector& strides, ssize_t byteoffset, ssize_t itemsize, const std::string format) + : Content(id, type) + , ptr_(ptr) + , shape_(shape) + , strides_(strides) + , byteoffset_(byteoffset) + , itemsize_(itemsize) + , format_(format) { + if (shape_.size() != strides_.size()) { + throw std::runtime_error("len(shape) must be equal to len(strides)"); + } + if (type_.get() != nullptr) { + checktype(); + } + } + + const std::shared_ptr NumpyArray::ptr() const { + return ptr_; + } + + const std::vector NumpyArray::shape() const { + return shape_; + } + + const std::vector NumpyArray::strides() const { + return strides_; + } + + ssize_t NumpyArray::byteoffset() const { + return byteoffset_; + } + + ssize_t NumpyArray::itemsize() const { + return itemsize_; + } + + const std::string NumpyArray::format() const { + return format_; + } + ssize_t NumpyArray::ndim() const { return shape_.size(); } @@ -56,7 +117,7 @@ namespace awkward { return "NumpyArray"; } - void NumpyArray::setid(const std::shared_ptr id) { + void NumpyArray::setid(const std::shared_ptr& id) { if (id.get() != nullptr && length() != id.get()->length()) { util::handle_error(failure("content and its id must have the same length", kSliceNone, kSliceNone), classname(), id_.get()); } @@ -66,15 +127,15 @@ namespace awkward { void NumpyArray::setid() { assert(!isscalar()); if (length() <= kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity32* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity64* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); @@ -123,7 +184,89 @@ namespace awkward { } } - const std::string NumpyArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + bool NumpyArray::istypeptr(Type* pointer) const { + Type* ptr = pointer; + if (ptr != nullptr) { + for (size_t i = 1; i < shape_.size(); i++) { + if (RegularType* raw = dynamic_cast(ptr)) { + ptr = raw->type().get(); + } + else { + return false; + } + } + } + return ptr == type_.get(); + } + + const std::shared_ptr NumpyArray::type() const { + std::shared_ptr out; + if (type_.get() != nullptr) { + out = type_; + } + else if (format_.compare("d") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::float64); + } + else if (format_.compare("f") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::float32); + } +#ifdef _MSC_VER + else if (format_.compare("q") == 0) { +#else + else if (format_.compare("l") == 0) { +#endif + out = std::make_shared(Type::Parameters(), PrimitiveType::int64); + } +#ifdef _MSC_VER + else if (format_.compare("Q") == 0) { +#else + else if (format_.compare("L") == 0) { +#endif + out = std::make_shared(Type::Parameters(), PrimitiveType::uint64); + } +#ifdef _MSC_VER + else if (format_.compare("l") == 0) { +#else + else if (format_.compare("i") == 0) { +#endif + out = std::make_shared(Type::Parameters(), PrimitiveType::int32); + } +#ifdef _MSC_VER + else if (format_.compare("L") == 0) { +#else + else if (format_.compare("I") == 0) { +#endif + out = std::make_shared(Type::Parameters(), PrimitiveType::uint32); + } + else if (format_.compare("h") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::int16); + } + else if (format_.compare("H") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::uint16); + } + else if (format_.compare("b") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::int8); + } + else if (format_.compare("B") == 0 || format_.compare("c") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::uint8); + } + else if (format_.compare("?") == 0) { + out = std::make_shared(Type::Parameters(), PrimitiveType::boolean); + } + else { + throw std::invalid_argument(std::string("Numpy format \"") + format_ + std::string("\" cannot be expressed as a PrimitiveType")); + } + for (ssize_t i = shape_.size() - 1; i > 0; i--) { + out = std::make_shared(Type::Parameters(), out, (int64_t)shape_[i]); + } + return out; + } + + const std::shared_ptr NumpyArray::astype(const std::shared_ptr& type) const { + return std::make_shared(id_, unwrap_regulartype(type, shape_), ptr_, shape_, strides_, byteoffset_, itemsize_, format_); + } + + const std::string NumpyArray::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { assert(!isscalar()); std::stringstream out; out << indent << pre << "<" << classname() << " format=" << util::quote(format_, true) << " shape=\""; @@ -271,176 +414,6 @@ namespace awkward { } } - const std::shared_ptr NumpyArray::innertype(bool bare) const { - if (ndim() == 1) { - if (format_.compare("d") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float64)); - } - else if (format_.compare("f") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float32)); - } -#ifdef _MSC_VER - else if (format_.compare("q") == 0) { -#else - else if (format_.compare("l") == 0) { -#endif - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int64)); - } -#ifdef _MSC_VER - else if (format_.compare("Q") == 0) { -#else - else if (format_.compare("L") == 0) { -#endif - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint64)); - } -#ifdef _MSC_VER - else if (format_.compare("l") == 0) { -#else - else if (format_.compare("i") == 0) { -#endif - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int32)); - } -#ifdef _MSC_VER - else if (format_.compare("L") == 0) { -#else - else if (format_.compare("I") == 0) { -#endif - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint32)); - } - else if (format_.compare("h") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int16)); - } - else if (format_.compare("H") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint16)); - } - else if (format_.compare("b") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int8)); - } - else if (format_.compare("B") == 0 || format_.compare("c") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::uint8)); - } - else if (format_.compare("?") == 0) { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::boolean)); - } - else { - throw std::invalid_argument(std::string("Numpy format \"") + format_ + std::string("\" cannot be expressed as a PrimitiveType")); - } - } - else { - NumpyArray tmp(id_, type_, ptr_, std::vector({ 1 }), std::vector({ itemsize_ }), byteoffset_, itemsize_, format_); - std::shared_ptr out = tmp.innertype(bare); - for (ssize_t i = shape_.size() - 1; i > 0; i--) { - out = std::shared_ptr(new RegularType(Type::Parameters(), out, (int64_t)shape_[i])); - } - return out; - } - } - - const std::shared_ptr NumpyArray::type() const { - if (type_.get() == nullptr) { - if (isscalar()) { - return innertype(false); - } - else { - return std::shared_ptr(new ArrayType(Type::Parameters(), innertype(false), length())); - } - } - else { - std::shared_ptr out = type_; - for (ssize_t i = shape_.size() - 1; i > 0; i--) { - out = std::shared_ptr(new RegularType(Type::Parameters(), out, (int64_t)shape_[i])); - } - return std::shared_ptr(new ArrayType(Type::Parameters(), out, length())); - } - } - - void NumpyArray::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - std::shared_ptr t = type; - while (RegularType* raw = dynamic_cast(t.get())) { - t = raw->type(); - } - type_ = t; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - bool NumpyArray::accepts(const std::shared_ptr type) { - std::shared_ptr test = type->level(); - for (size_t i = 1; i < shape_.size(); i++) { - if (RegularType* raw = dynamic_cast(test.get())) { - if (raw->size() == (int64_t)shape_[i]) { - test = raw->inner()->level(); - } - else { - return false; - } - } - else { - return false; - } - } - if (PrimitiveType* raw = dynamic_cast(test.get())) { - if (format_.compare("d") == 0) { - return raw->dtype() == PrimitiveType::float64; - } - else if (format_.compare("f") == 0) { - return raw->dtype() == PrimitiveType::float32; - } -#ifdef _MSC_VER - else if (format_.compare("q") == 0) { -#else - else if (format_.compare("l") == 0) { -#endif - return raw->dtype() == PrimitiveType::int64; - } -#ifdef _MSC_VER - else if (format_.compare("Q") == 0) { -#else - else if (format_.compare("L") == 0) { -#endif - return raw->dtype() == PrimitiveType::uint64; - } -#ifdef _MSC_VER - else if (format_.compare("l") == 0) { -#else - else if (format_.compare("i") == 0) { -#endif - return raw->dtype() == PrimitiveType::int32; - } -#ifdef _MSC_VER - else if (format_.compare("L") == 0) { -#else - else if (format_.compare("I") == 0) { -#endif - return raw->dtype() == PrimitiveType::uint32; - } - else if (format_.compare("h") == 0) { - return raw->dtype() == PrimitiveType::int16; - } - else if (format_.compare("H") == 0) { - return raw->dtype() == PrimitiveType::uint16; - } - else if (format_.compare("b") == 0) { - return raw->dtype() == PrimitiveType::int8; - } - else if (format_.compare("B") == 0 || format_.compare("c") == 0) { - return raw->dtype() == PrimitiveType::uint8; - } - else if (format_.compare("?") == 0) { - return raw->dtype() == PrimitiveType::boolean; - } - else { - return false; - } - } - else { - return false; - } - } - int64_t NumpyArray::length() const { if (isscalar()) { return -1; // just like Record, which is also a scalar @@ -451,7 +424,7 @@ namespace awkward { } const std::shared_ptr NumpyArray::shallow_copy() const { - return std::shared_ptr(new NumpyArray(id_, type_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_)); + return std::make_shared(id_, type_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); } void NumpyArray::check_for_iteration() const { @@ -467,7 +440,7 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_range_nowrap(0, 0); } - return std::shared_ptr(new NumpyArray(id, type_, ptr_, shape, strides, byteoffset_, itemsize_, format_)); + return std::make_shared(id, type_, ptr_, shape, strides, byteoffset_, itemsize_, format_); } const std::shared_ptr NumpyArray::getitem_at(int64_t at) const { @@ -493,7 +466,7 @@ namespace awkward { } id = id_.get()->getitem_range_nowrap(at, at + 1); } - return std::shared_ptr(new NumpyArray(id, type_, ptr_, shape, strides, byteoffset, itemsize_, format_)); + return std::make_shared(id, type_, ptr_, shape, strides, byteoffset, itemsize_, format_); } const std::shared_ptr NumpyArray::getitem_range(int64_t start, int64_t stop) const { @@ -516,7 +489,7 @@ namespace awkward { } id = id_.get()->getitem_range_nowrap(start, stop); } - return std::shared_ptr(new NumpyArray(id, type_, ptr_, shape, strides_, byteoffset, itemsize_, format_)); + return std::make_shared(id, type_, ptr_, shape, strides_, byteoffset, itemsize_, format_); } const std::shared_ptr NumpyArray::getitem_field(const std::string& key) const { @@ -543,7 +516,7 @@ namespace awkward { std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); - return std::shared_ptr(new NumpyArray(out.id_, out.type_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + return std::make_shared(out.id_, out.type_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } else { @@ -564,11 +537,11 @@ namespace awkward { std::vector outshape(out.shape_.begin() + 1, out.shape_.end()); std::vector outstrides(out.strides_.begin() + 1, out.strides_.end()); - return std::shared_ptr(new NumpyArray(out.id_, out.type_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_)); + return std::make_shared(out.id_, out.type_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } } - const std::shared_ptr NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + const std::shared_ptr NumpyArray::getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const { assert(!isscalar()); Index64 carry(shape_[0]); struct Error err = awkward_carry_arange_64(carry.ptr().get(), shape_[0]); @@ -596,7 +569,7 @@ namespace awkward { std::vector shape = { (ssize_t)carry.length() }; shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); - return std::shared_ptr(new NumpyArray(id, type_, ptr, shape, strides_, 0, itemsize_, format_)); + return std::make_shared(id, type_, ptr, shape, strides_, 0, itemsize_, format_); } const std::pair NumpyArray::minmax_depth() const { @@ -629,6 +602,84 @@ namespace awkward { throw std::invalid_argument("array contains no Records"); } + void NumpyArray::checktype() const { + bool okay = false; + if (PrimitiveType* raw = dynamic_cast(type_.get())) { + if (format_.compare("d") == 0) { + okay = (raw->dtype() == PrimitiveType::float64); + } + else if (format_.compare("f") == 0) { + okay = (raw->dtype() == PrimitiveType::float32); + } +#ifdef _MSC_VER + else if (format_.compare("q") == 0) { +#else + else if (format_.compare("l") == 0) { +#endif + okay = (raw->dtype() == PrimitiveType::int64); + } +#ifdef _MSC_VER + else if (format_.compare("Q") == 0) { +#else + else if (format_.compare("L") == 0) { +#endif + okay = (raw->dtype() == PrimitiveType::uint64); + } +#ifdef _MSC_VER + else if (format_.compare("l") == 0) { +#else + else if (format_.compare("i") == 0) { +#endif + okay = (raw->dtype() == PrimitiveType::int32); + } +#ifdef _MSC_VER + else if (format_.compare("L") == 0) { +#else + else if (format_.compare("I") == 0) { +#endif + okay = (raw->dtype() == PrimitiveType::uint32); + } + else if (format_.compare("h") == 0) { + okay = (raw->dtype() == PrimitiveType::int16); + } + else if (format_.compare("H") == 0) { + okay = (raw->dtype() == PrimitiveType::uint16); + } + else if (format_.compare("b") == 0) { + okay = (raw->dtype() == PrimitiveType::int8); + } + else if (format_.compare("B") == 0 || format_.compare("c") == 0) { + okay = (raw->dtype() == PrimitiveType::uint8); + } + else if (format_.compare("?") == 0) { + okay = (raw->dtype() == PrimitiveType::boolean); + } + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + + const std::shared_ptr NumpyArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + + const std::shared_ptr NumpyArray::getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + + const std::shared_ptr NumpyArray::getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + + const std::shared_ptr NumpyArray::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + + const std::shared_ptr NumpyArray::getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + const std::vector flatten_shape(const std::vector shape) { if (shape.size() == 1) { return std::vector(); @@ -681,7 +732,7 @@ namespace awkward { } } - const NumpyArray NumpyArray::contiguous_next(Index64 bytepos) const { + const NumpyArray NumpyArray::contiguous_next(const Index64& bytepos) const { if (iscontiguous()) { std::shared_ptr ptr(new uint8_t[(size_t)(bytepos.length()*strides_[0])], awkward::util::array_deleter()); struct Error err = awkward_numpyarray_contiguous_copy_64( @@ -791,8 +842,8 @@ namespace awkward { } awkward_regularize_rangeslice(&start, &stop, step > 0, range.hasstart(), range.hasstop(), (int64_t)shape_[1]); - int64_t numer = abs(start - stop); - int64_t denom = abs(step); + int64_t numer = std::abs(start - stop); + int64_t denom = std::abs(step); int64_t d = numer / denom; int64_t m = numer % denom; int64_t lenhead = d + (m != 0 ? 1 : 0); @@ -823,10 +874,10 @@ namespace awkward { } else { std::vector> tailitems = tail.items(); - std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + std::vector> items = { std::make_shared() }; items.insert(items.end(), tailitems.begin(), tailitems.end()); - std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + std::shared_ptr nexthead = std::make_shared(Slice::none(), Slice::none(), 1); Slice nexttail(items); return getitem_bystrides(nexthead, nexttail, length); } @@ -844,7 +895,7 @@ namespace awkward { return NumpyArray(out.id_, out.type_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } - const NumpyArray NumpyArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const { + const NumpyArray NumpyArray::getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const { if (head.get() == nullptr) { std::shared_ptr ptr(new uint8_t[(size_t)(carry.length()*stride)], awkward::util::array_deleter()); struct Error err = awkward_numpyarray_getitem_next_null_64( @@ -943,8 +994,8 @@ namespace awkward { } awkward_regularize_rangeslice(&start, &stop, step > 0, range.hasstart(), range.hasstop(), (int64_t)shape_[1]); - int64_t numer = abs(start - stop); - int64_t denom = abs(step); + int64_t numer = std::abs(start - stop); + int64_t denom = std::abs(step); int64_t d = numer / denom; int64_t m = numer % denom; int64_t lenhead = d + (m != 0 ? 1 : 0); @@ -1009,9 +1060,9 @@ namespace awkward { } else { std::vector> tailitems = tail.items(); - std::vector> items = { std::shared_ptr(new SliceEllipsis()) }; + std::vector> items = { std::make_shared() }; items.insert(items.end(), tailitems.begin(), tailitems.end()); - std::shared_ptr nexthead(new SliceRange(Slice::none(), Slice::none(), 1)); + std::shared_ptr nexthead = std::make_shared(Slice::none(), Slice::none(), 1); Slice nexttail(items); return getitem_next(nexthead, nexttail, carry, advanced, length, stride, false); } diff --git a/src/libawkward/array/Record.cpp b/src/libawkward/array/Record.cpp index 8ab68640eb..9456b2a6bc 100644 --- a/src/libawkward/array/Record.cpp +++ b/src/libawkward/array/Record.cpp @@ -10,6 +10,43 @@ #include "awkward/array/Record.h" namespace awkward { + Record::Record(const RecordArray& array, int64_t at) + : Content(Identity::none(), Type::none()) + , array_(array) + , at_(at) { + if (type_.get() != nullptr) { + checktype(); + } + } + + const std::shared_ptr Record::array() const { + return array_.shallow_copy(); + } + + int64_t Record::at() const { + return at_; + } + + const std::vector> Record::contents() const { + std::vector> out; + for (auto item : array_.contents()) { + out.push_back(item.get()->getitem_at_nowrap(at_)); + } + return out; + } + + const std::shared_ptr Record::lookup() const { + return array_.lookup(); + } + + const std::shared_ptr Record::reverselookup() const { + return array_.reverselookup(); + } + + bool Record::istuple() const { + return lookup().get() == nullptr; + } + bool Record::isscalar() const { return true; } @@ -28,19 +65,48 @@ namespace awkward { } } - const std::shared_ptr Record::type() const { - return array_.type().get()->inner(); - } - void Record::setid() { throw std::runtime_error("undefined operation: Record::setid"); } - void Record::setid(const std::shared_ptr id) { + void Record::setid(const std::shared_ptr& id) { throw std::runtime_error("undefined operation: Record::setid"); } - const std::string Record::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + bool Record::isbare() const { + return array_.isbare(); + } + + bool Record::istypeptr(Type* pointer) const { + return array_.istypeptr(pointer); + } + + const std::shared_ptr Record::type() const { + return array_.type(); + } + + const std::shared_ptr Record::astype(const std::shared_ptr& type) const { + if (type.get() == nullptr) { + if (array_.numfields() == 0) { + return std::make_shared(RecordArray(array_.id(), type, array_.length(), array_.istuple()), at_); + } + else { + return std::make_shared(RecordArray(array_.id(), type, array_.contents(), array_.lookup(), array_.reverselookup()), at_); + } + } + else { + std::shared_ptr record = array_.astype(type); + RecordArray* raw = dynamic_cast(record.get()); + if (raw->numfields() == 0) { + return std::make_shared(RecordArray(raw->id(), raw->type(), raw->length(), raw->istuple()), at_); + } + else { + return std::make_shared(RecordArray(raw->id(), raw->type(), raw->contents(), raw->lookup(), raw->reverselookup()), at_); + } + } + } + + const std::string Record::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname() << " at=\"" << at_ << "\">\n"; out << array_.tostring_part(indent + std::string(" "), "", "\n"); @@ -52,7 +118,7 @@ namespace awkward { size_t cols = (size_t)numfields(); std::shared_ptr keys = array_.reverselookup(); if (istuple()) { - keys = std::shared_ptr(new RecordArray::ReverseLookup); + keys = std::make_shared(); for (size_t j = 0; j < cols; j++) { keys.get()->push_back(std::to_string(j)); } @@ -66,31 +132,12 @@ namespace awkward { builder.endrecord(); } - const std::shared_ptr Record::innertype(bool bare) const { - return array_.innertype(bare); - } - - void Record::settype(const std::shared_ptr type) { - if (dynamic_cast(type.get())) { - throw std::invalid_argument("provided ArrayType is incompatible with Record because Record is a scalar"); - } - array_.settype_part(type); - } - - void Record::settype_part(const std::shared_ptr type) { - array_.settype_part(type); - } - - bool Record::accepts(const std::shared_ptr type) { - return array_.accepts(type); - } - int64_t Record::length() const { return -1; // just like NumpyArray with ndim == 0, which is also a scalar } const std::shared_ptr Record::shallow_copy() const { - return std::shared_ptr(new Record(array_, at_)); + return std::make_shared(array_, at_); } void Record::check_for_iteration() const { @@ -217,6 +264,8 @@ namespace awkward { return Record(array_.astuple(), at_); } + void Record::checktype() const { } + const std::shared_ptr Record::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { throw std::runtime_error("undefined operation: Record::getitem_next(at)"); } diff --git a/src/libawkward/array/RecordArray.cpp b/src/libawkward/array/RecordArray.cpp index 72258cd8e0..a83faf0465 100644 --- a/src/libawkward/array/RecordArray.cpp +++ b/src/libawkward/array/RecordArray.cpp @@ -11,6 +11,66 @@ #include "awkward/array/RecordArray.h" namespace awkward { + RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + : Content(id, type) + , contents_(contents) + , lookup_(lookup) + , reverselookup_(reverselookup) + , length_(0) { + if (reverselookup_.get() == nullptr && lookup_.get() == nullptr) { } + else if (reverselookup_.get() != nullptr && lookup_.get() != nullptr) { } + else { + throw std::runtime_error("either 'lookup' and 'reverselookup' should both be None or neither should be"); + } + if (contents_.empty()) { + throw std::runtime_error("this constructor can only be used with non-empty contents"); + } + if (type_.get() != nullptr) { + checktype(); + } + } + + RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents) + : Content(id, type) + , contents_(contents) + , lookup_(nullptr) + , reverselookup_(nullptr) + , length_(0) { + if (contents_.empty()) { + throw std::runtime_error("this constructor can only be used with non-empty contents"); + } + if (type_.get() != nullptr) { + checktype(); + } + } + + RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, int64_t length, bool istuple) + : Content(id, type) + , contents_() + , lookup_(istuple ? nullptr : new Lookup) + , reverselookup_(istuple ? nullptr : new ReverseLookup) + , length_(length) { + if (type_.get() != nullptr) { + checktype(); + } + } + + const std::vector> RecordArray::contents() const { + return contents_; + } + + const std::shared_ptr RecordArray::lookup() const { + return lookup_; + } + + const std::shared_ptr RecordArray::reverselookup() const { + return reverselookup_; + } + + bool RecordArray::istuple() const { + return lookup_.get() == nullptr; + } + const std::string RecordArray::classname() const { return "RecordArray"; } @@ -18,22 +78,22 @@ namespace awkward { void RecordArray::setid() { int64_t len = length(); if (len <= kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, len); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, len); + Identity32* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity32(rawid->ptr().get(), len); util::handle_error(err, classname(), id_.get()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, len); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, len); + Identity64* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity64(rawid->ptr().get(), len); util::handle_error(err, classname(), id_.get()); setid(newid); } } - void RecordArray::setid(const std::shared_ptr id) { + void RecordArray::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { for (auto content : contents_) { content.get()->setid(id); @@ -62,10 +122,58 @@ namespace awkward { id_ = id; } - const std::string RecordArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr RecordArray::type() const { + if (type_.get() != nullptr) { + return type_; + } + else { + std::vector> types; + for (auto item : contents_) { + types.push_back(item.get()->type()); + } + return std::make_shared(Type::Parameters(), types, lookup_, reverselookup_); + } + } + + const std::shared_ptr RecordArray::astype(const std::shared_ptr& type) const { + if (type.get() == nullptr || dynamic_cast(type.get()) == nullptr) { + if (contents_.empty()) { + return std::make_shared(id_, type, length(), istuple()); + } + else { + return std::make_shared(id_, type, contents_, lookup_, reverselookup_); + } + } + RecordType* raw = dynamic_cast(type.get()); + std::vector> contents; + if (raw->reverselookup().get() == nullptr) { + for (int64_t i = 0; i < raw->numfields(); i++) { + if (i >= numfields()) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + contents.push_back(contents_[(size_t)i].get()->astype(raw->field(i))); + } + } + else { + for (auto key : raw->keys()) { + if (!haskey(key)) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + contents.push_back(contents_[(size_t)fieldindex(key)].get()->astype(raw->field(key))); + } + } + if (contents.empty()) { + return std::make_shared(id_, type, length(), istuple()); + } + else { + return std::make_shared(id_, type, contents, raw->lookup(), raw->reverselookup()); + } + } + + const std::string RecordArray::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname(); - if (contents_.size() == 0) { + if (contents_.empty()) { out << " length=\"" << length_ << "\""; } out << ">\n"; @@ -101,7 +209,7 @@ namespace awkward { size_t cols = contents_.size(); std::shared_ptr keys = reverselookup_; if (istuple()) { - keys = std::shared_ptr(new ReverseLookup); + keys = std::make_shared(); for (size_t j = 0; j < cols; j++) { keys.get()->push_back(std::to_string(j)); } @@ -118,63 +226,8 @@ namespace awkward { builder.endlist(); } - const std::shared_ptr RecordArray::innertype(bool bare) const { - std::vector> types; - for (auto item : contents_) { - types.push_back(item.get()->innertype(bare)); - } - return std::shared_ptr(new RecordType(Type::Parameters(), types, lookup_, reverselookup_)); - } - - void RecordArray::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - std::shared_ptr level = type.get()->level(); - RecordType* raw = dynamic_cast(level.get()); - if (reverselookup_.get() == nullptr) { - for (int64_t i = 0; i < numfields(); i++) { - field(i).get()->settype_part(raw->field(i)); - } - } - else { - for (auto key : raw->keys()) { - field(key).get()->settype_part(raw->field(key)); - } - } - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - bool RecordArray::accepts(const std::shared_ptr type) { - std::shared_ptr check = type.get()->level(); - if (RecordType* raw = dynamic_cast(check.get())) { - if (reverselookup_.get() == nullptr) { - if (raw->reverselookup().get() != nullptr) { - return false; - } - return numfields() == raw->numfields(); - } - else { - if (raw->reverselookup().get() == nullptr) { - return false; - } - for (auto key : raw->keys()) { - if (!haskey(key)) { - return false; - } - } - return true; - } - } - else { - return false; - } - } - int64_t RecordArray::length() const { - if (contents_.size() == 0) { + if (contents_.empty()) { return length_; } else { @@ -190,11 +243,11 @@ namespace awkward { } const std::shared_ptr RecordArray::shallow_copy() const { - if (contents_.size() == 0) { - return std::shared_ptr(new RecordArray(id_, type_, length(), istuple())); + if (contents_.empty()) { + return std::make_shared(id_, type_, length(), istuple()); } else { - return std::shared_ptr(new RecordArray(id_, type_, contents_, lookup_, reverselookup_)); + return std::make_shared(id_, type_, contents_, lookup_, reverselookup_); } } @@ -221,35 +274,35 @@ namespace awkward { } const std::shared_ptr RecordArray::getitem_at_nowrap(int64_t at) const { - return std::shared_ptr(new Record(*this, at)); + return std::make_shared(*this, at); } const std::shared_ptr RecordArray::getitem_range(int64_t start, int64_t stop) const { - if (contents_.size() == 0) { + if (contents_.empty()) { int64_t regular_start = start; int64_t regular_stop = stop; awkward_regularize_rangeslice(®ular_start, ®ular_stop, true, start != Slice::none(), stop != Slice::none(), length()); - return std::shared_ptr(new RecordArray(id_, type_, regular_stop - regular_start, istuple())); + return std::make_shared(id_, type_, regular_stop - regular_start, istuple()); } else { std::vector> contents; for (auto content : contents_) { contents.push_back(content.get()->getitem_range(start, stop)); } - return std::shared_ptr(new RecordArray(id_, type_, contents, lookup_, reverselookup_)); + return std::make_shared(id_, type_, contents, lookup_, reverselookup_); } } const std::shared_ptr RecordArray::getitem_range_nowrap(int64_t start, int64_t stop) const { - if (contents_.size() == 0) { - return std::shared_ptr(new RecordArray(id_, type_, stop - start, istuple())); + if (contents_.empty()) { + return std::make_shared(id_, type_, stop - start, istuple()); } else { std::vector> contents; for (auto content : contents_) { contents.push_back(content.get()->getitem_range_nowrap(start, stop)); } - return std::shared_ptr(new RecordArray(id_, type_, contents, lookup_, reverselookup_)); + return std::make_shared(id_, type_, contents, lookup_, reverselookup_); } } @@ -258,11 +311,7 @@ namespace awkward { } const std::shared_ptr RecordArray::getitem_fields(const std::vector& keys) const { - std::shared_ptr type = Type::none(); - if (type_.get() != nullptr && type_.get()->numfields() != -1 && util::subset(keys, type_.get()->keys())) { - type = type_; - } - RecordArray out(id_, type, length(), istuple()); + RecordArray out(id_, type_, length(), istuple()); if (istuple()) { for (auto key : keys) { out.append(field(key).get()->getitem_range_nowrap(0, length())); @@ -277,12 +326,12 @@ namespace awkward { } const std::shared_ptr RecordArray::carry(const Index64& carry) const { - if (contents_.size() == 0) { + if (contents_.empty()) { std::shared_ptr id(nullptr); if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new RecordArray(id, type_, carry.length(), istuple())); + return std::make_shared(id, type_, carry.length(), istuple()); } else { std::vector> contents; @@ -293,12 +342,12 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new RecordArray(id, type_, contents, lookup_, reverselookup_)); + return std::make_shared(id, type_, contents, lookup_, reverselookup_); } } const std::pair RecordArray::minmax_depth() const { - if (contents_.size() == 0) { + if (contents_.empty()) { return std::pair(0, 0); } int64_t min = kMaxInt64; @@ -437,17 +486,24 @@ namespace awkward { } const RecordArray RecordArray::astuple() const { - RecordArray out(id_, Type::none(), contents_); - if (type_.get() != nullptr && type_.get()->numfields() != -1 && util::subset(out.keys(), type_.get()->keys())) { - out.type_ = type_; + if (type_.get() == nullptr) { + return RecordArray(id_, Type::none(), contents_); + } + else { + RecordType* raw = dynamic_cast(type_.get()); + return RecordArray(id_, raw->astuple(), contents_); } - return out; } void RecordArray::append(const std::shared_ptr& content, const std::string& key) { size_t j = contents_.size(); append(content); setkey(j, key); + if (type_.get() != nullptr) { + if (RecordType* raw = dynamic_cast(type_.get())) { + raw->setkey(j, key); + } + } } void RecordArray::append(const std::shared_ptr& content) { @@ -455,12 +511,17 @@ namespace awkward { reverselookup_.get()->push_back(std::to_string(contents_.size())); } contents_.push_back(content); + if (type_.get() != nullptr) { + if (RecordType* raw = dynamic_cast(type_.get())) { + raw->append(content.get()->type()); + } + } } void RecordArray::setkey(int64_t fieldindex, const std::string& fieldname) { if (istuple()) { - lookup_ = std::shared_ptr(new Lookup); - reverselookup_ = std::shared_ptr(new ReverseLookup); + lookup_ = std::make_shared(); + reverselookup_ = std::make_shared(); for (size_t j = 0; j < contents_.size(); j++) { reverselookup_.get()->push_back(std::to_string(j)); } @@ -469,7 +530,30 @@ namespace awkward { (*reverselookup_.get())[(size_t)fieldindex] = fieldname; } - const std::shared_ptr RecordArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + void RecordArray::checktype() const { + bool okay = false; + if (RecordType* raw = dynamic_cast(type_.get())) { + if (raw->lookup().get() != nullptr && lookup_.get() != nullptr && raw->reverselookup().get() != nullptr && reverselookup_.get() != nullptr) { + okay = *(raw->lookup().get()) == *(lookup_.get()) && *(raw->reverselookup().get()) == *(reverselookup_.get()); + } + else { + okay = (raw->numfields() == numfields()); + } + if (okay) { + for (size_t i = 0; i < contents_.size(); i++) { + if (!contents_[i].get()->istypeptr(raw->field((int64_t)i).get())) { + okay = false; + break; + } + } + } + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + + const std::shared_ptr RecordArray::getitem_next(const std::shared_ptr& head, const Slice& tail, const Index64& advanced) const { std::shared_ptr nexthead = tail.head(); Slice nexttail = tail.tail(); Slice emptytail; @@ -486,7 +570,7 @@ namespace awkward { std::shared_ptr out = getitem_next(*fields, emptytail, advanced); return out.get()->getitem_next(nexthead, nexttail, advanced); } - else if (contents_.size() == 0) { + else if (contents_.empty()) { RecordArray out(Identity::none(), type_, length(), istuple()); return out.getitem_next(nexthead, nexttail, advanced); } diff --git a/src/libawkward/array/RegularArray.cpp b/src/libawkward/array/RegularArray.cpp index 09efd3b799..7d3a6321d7 100644 --- a/src/libawkward/array/RegularArray.cpp +++ b/src/libawkward/array/RegularArray.cpp @@ -13,11 +13,28 @@ #include "awkward/array/RegularArray.h" namespace awkward { + RegularArray::RegularArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::shared_ptr& content, int64_t size) + : Content(id, type) + , content_(content) + , size_(size) { + if (type_.get() != nullptr) { + checktype(); + } + } + + const std::shared_ptr RegularArray::content() const { + return content_; + } + + int64_t RegularArray::size() const { + return size_; + } + const std::string RegularArray::classname() const { return "RegularArray"; } - void RegularArray::setid(const std::shared_ptr id) { + void RegularArray::setid(const std::shared_ptr& id) { if (id.get() == nullptr) { content_.get()->setid(id); } @@ -30,8 +47,8 @@ namespace awkward { bigid = id.get()->to64(); } if (Identity32* rawid = dynamic_cast(bigid.get())) { - Identity32* rawsubid = new Identity32(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity32* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity32_from_regulararray( rawsubid->ptr().get(), rawid->ptr().get(), @@ -44,8 +61,8 @@ namespace awkward { content_.get()->setid(subid); } else if (Identity64* rawid = dynamic_cast(bigid.get())) { - Identity64* rawsubid = new Identity64(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); - std::shared_ptr subid(rawsubid); + std::shared_ptr subid = std::make_shared(Identity::newref(), rawid->fieldloc(), rawid->width() + 1, content_.get()->length()); + Identity64* rawsubid = reinterpret_cast(subid.get()); struct Error err = awkward_identity64_from_regulararray( rawsubid->ptr().get(), rawid->ptr().get(), @@ -66,22 +83,41 @@ namespace awkward { void RegularArray::setid() { if (length() < kMaxInt32) { - Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity32* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity32(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } else { - Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, length()); - std::shared_ptr newid(rawid); + std::shared_ptr newid = std::make_shared(Identity::newref(), Identity::FieldLoc(), 1, length()); + Identity64* rawid = reinterpret_cast(newid.get()); struct Error err = awkward_new_identity64(rawid->ptr().get(), length()); util::handle_error(err, classname(), id_.get()); setid(newid); } } - const std::string RegularArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + const std::shared_ptr RegularArray::type() const { + if (type_.get() != nullptr) { + return type_; + } + else { + return std::make_shared(Type::Parameters(), content_.get()->type(), size_); + } + } + + const std::shared_ptr RegularArray::astype(const std::shared_ptr& type) const { + std::shared_ptr inner = type; + if (inner.get() != nullptr) { + if (RegularType* raw = dynamic_cast(inner.get())) { + inner = raw->type(); + } + } + return std::make_shared(id_, type, content_.get()->astype(inner), size_); + } + + const std::string RegularArray::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::stringstream out; out << indent << pre << "<" << classname() << " size=\"" << size_ << "\">\n"; if (id_.get() != nullptr) { @@ -104,40 +140,12 @@ namespace awkward { builder.endlist(); } - const std::shared_ptr RegularArray::innertype(bool bare) const { - if (bare || content_.get()->isbare()) { - return std::shared_ptr(new RegularType(Type::Parameters(), content_.get()->innertype(bare), size_)); - } - else { - return std::shared_ptr(new RegularType(Type::Parameters(), content_.get()->type().get()->nolength(), size_)); - } - } - - void RegularArray::settype_part(const std::shared_ptr type) { - if (accepts(type)) { - content_.get()->settype_part(type.get()->inner()); - type_ = type; - } - else { - throw std::invalid_argument(std::string("provided type is incompatible with array: ") + ArrayType(Type::Parameters(), type, length()).compare(baretype())); - } - } - - bool RegularArray::accepts(const std::shared_ptr type) { - if (RegularType* raw = dynamic_cast(type.get()->level().get())) { - return raw->size() == size_; - } - else { - return false; - } - } - int64_t RegularArray::length() const { return size_ == 0 ? 0 : content_.get()->length() / size_; // floor of length / size } const std::shared_ptr RegularArray::shallow_copy() const { - return std::shared_ptr(new RegularArray(id_, type_, content_, size_)); + return std::make_shared(id_, type_, content_, size_); } void RegularArray::check_for_iteration() const { @@ -181,11 +189,11 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_range_nowrap(start, stop); } - return std::shared_ptr(new RegularArray(id_, type_, content_.get()->getitem_range_nowrap(start*size_, stop*size_), size_)); + return std::make_shared(id_, type_, content_.get()->getitem_range_nowrap(start*size_, stop*size_), size_); } const std::shared_ptr RegularArray::getitem_field(const std::string& key) const { - return std::shared_ptr(new RegularArray(id_, Type::none(), content_.get()->getitem_field(key), size_)); + return std::make_shared(id_, Type::none(), content_.get()->getitem_field(key), size_); } const std::shared_ptr RegularArray::getitem_fields(const std::vector& keys) const { @@ -193,7 +201,7 @@ namespace awkward { if (SliceFields(keys).preserves_type(type_, Index64(0))) { type = type_; } - return std::shared_ptr(new RegularArray(id_, type, content_.get()->getitem_fields(keys), size_)); + return std::make_shared(id_, type, content_.get()->getitem_fields(keys), size_); } const std::shared_ptr RegularArray::carry(const Index64& carry) const { @@ -210,7 +218,7 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::shared_ptr(new RegularArray(id, type_, content_.get()->carry(nextcarry), size_)); + return std::make_shared(id, type_, content_.get()->carry(nextcarry), size_); } const std::pair RegularArray::minmax_depth() const { @@ -246,6 +254,16 @@ namespace awkward { return content_.get()->keys(); } + void RegularArray::checktype() const { + bool okay = false; + if (RegularType* raw = dynamic_cast(type_.get())) { + okay = (raw->type().get() == content_.get()->type().get() && raw->size() == size_); + } + if (!okay) { + throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); + } + } + const std::shared_ptr RegularArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { assert(advanced.length() == 0); @@ -273,7 +291,7 @@ namespace awkward { assert(range.step() != 0); int64_t regular_start = range.start(); int64_t regular_stop = range.stop(); - int64_t regular_step = abs(range.step()); + int64_t regular_step = std::abs(range.step()); awkward_regularize_rangeslice(®ular_start, ®ular_stop, range.step() > 0, range.start() != Slice::none(), range.stop() != Slice::none(), size_); int64_t nextsize = 0; if (range.step() > 0 && regular_stop - regular_start > 0) { @@ -307,11 +325,11 @@ namespace awkward { std::shared_ptr outtype = Type::none(); if (type_.get() != nullptr) { RegularType* raw = dynamic_cast(type_.get()); - outtype = std::shared_ptr(new RegularType(Type::Parameters(), raw->type(), nextsize)); + outtype = std::make_shared(Type::Parameters(), raw->type(), nextsize); } if (advanced.length() == 0) { - return std::shared_ptr(new RegularArray(id_, outtype, nextcontent.get()->getitem_next(nexthead, nexttail, advanced), nextsize)); + return std::make_shared(id_, outtype, nextcontent.get()->getitem_next(nexthead, nexttail, advanced), nextsize); } else { Index64 nextadvanced(len*nextsize); @@ -323,7 +341,7 @@ namespace awkward { nextsize); util::handle_error(err, classname(), id_.get()); - return std::shared_ptr(new RegularArray(id_, outtype, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced), nextsize)); + return std::make_shared(id_, outtype, nextcontent.get()->getitem_next(nexthead, nexttail, nextadvanced), nextsize); } } diff --git a/src/libawkward/fillable/BoolFillable.cpp b/src/libawkward/fillable/BoolFillable.cpp index 6b39f16799..b024f6be61 100644 --- a/src/libawkward/fillable/BoolFillable.cpp +++ b/src/libawkward/fillable/BoolFillable.cpp @@ -10,11 +10,19 @@ namespace awkward { const std::shared_ptr BoolFillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new BoolFillable(options, GrowableBuffer::empty(options))); + std::shared_ptr out = std::make_shared(options, GrowableBuffer::empty(options)); out.get()->setthat(out); return out; } + BoolFillable::BoolFillable(const FillableOptions& options, const GrowableBuffer& buffer) + : options_(options) + , buffer_(buffer) { } + + const std::string BoolFillable::classname() const { + return "BoolFillable"; + }; + int64_t BoolFillable::length() const { return buffer_.length(); } @@ -24,13 +32,13 @@ namespace awkward { } const std::shared_ptr BoolFillable::type() const { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::boolean)); + return std::make_shared(Type::Parameters(), PrimitiveType::boolean); } - const std::shared_ptr BoolFillable::snapshot() const { + const std::shared_ptr BoolFillable::snapshot(const std::shared_ptr& type) const { std::vector shape = { (ssize_t)buffer_.length() }; std::vector strides = { (ssize_t)sizeof(bool) }; - return std::shared_ptr(new NumpyArray(Identity::none(), Type::none(), buffer_.ptr(), shape, strides, 0, sizeof(bool), "?")); + return std::make_shared(Identity::none(), type, buffer_.ptr(), shape, strides, 0, sizeof(bool), "?"); } bool BoolFillable::active() const { diff --git a/src/libawkward/fillable/Fillable.cpp b/src/libawkward/fillable/Fillable.cpp new file mode 100644 index 0000000000..622207382f --- /dev/null +++ b/src/libawkward/fillable/Fillable.cpp @@ -0,0 +1,11 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include "awkward/fillable/Fillable.h" + +namespace awkward { + Fillable::~Fillable() { } + + void Fillable::setthat(const std::shared_ptr& that) { + that_ = that; + } +} diff --git a/src/libawkward/fillable/FillableArray.cpp b/src/libawkward/fillable/FillableArray.cpp index 014407b00a..1100e2d9c0 100644 --- a/src/libawkward/fillable/FillableArray.cpp +++ b/src/libawkward/fillable/FillableArray.cpp @@ -2,11 +2,12 @@ #include -#include "awkward/type/ArrayType.h" - #include "awkward/fillable/FillableArray.h" namespace awkward { + FillableArray::FillableArray(const FillableOptions& options) + : fillable_(UnknownFillable::fromempty(options)) { } + const std::string FillableArray::tostring() const { std::stringstream out; out << "tostring() << "\"/>"; @@ -22,11 +23,11 @@ namespace awkward { } const std::shared_ptr FillableArray::type() const { - return std::shared_ptr(new ArrayType(Type::Parameters(), fillable_.get()->type(), fillable_.get()->length())); + return fillable_.get()->type(); } const std::shared_ptr FillableArray::snapshot() const { - return fillable_.get()->snapshot(); + return fillable_.get()->snapshot(type()); } const std::shared_ptr FillableArray::getitem_at(int64_t at) const { @@ -73,6 +74,10 @@ namespace awkward { maybeupdate(fillable_.get()->string(x, length, no_encoding)); } + void FillableArray::bytestring(const std::string& x) { + bytestring(x.c_str(), (int64_t)x.length()); + } + void FillableArray::string(const char* x) { maybeupdate(fillable_.get()->string(x, -1, utf8_encoding)); } @@ -81,6 +86,10 @@ namespace awkward { maybeupdate(fillable_.get()->string(x, length, utf8_encoding)); } + void FillableArray::string(const std::string& x) { + string(x.c_str(), (int64_t)x.length()); + } + void FillableArray::beginlist() { maybeupdate(fillable_.get()->beginlist()); } @@ -117,6 +126,10 @@ namespace awkward { maybeupdate(fillable_.get()->beginrecord(name, true)); } + void FillableArray::beginrecord_check(const std::string& name) { + beginrecord_check(name.c_str()); + } + void FillableArray::field_fast(const char* key) { maybeupdate(fillable_.get()->field(key, false)); } @@ -125,10 +138,30 @@ namespace awkward { maybeupdate(fillable_.get()->field(key, true)); } + void FillableArray::field_check(const std::string& key) { + field_check(key.c_str()); + } + void FillableArray::endrecord() { maybeupdate(fillable_.get()->endrecord()); } + void FillableArray::fill(int64_t x) { + integer(x); + } + + void FillableArray::fill(double x) { + real(x); + } + + void FillableArray::fill(const char* x) { + bytestring(x); + } + + void FillableArray::fill(const std::string& x) { + bytestring(x.c_str()); + } + void FillableArray::maybeupdate(const std::shared_ptr& tmp) { if (tmp.get() != fillable_.get()) { fillable_ = tmp; diff --git a/src/libawkward/fillable/FillableOptions.cpp b/src/libawkward/fillable/FillableOptions.cpp new file mode 100644 index 0000000000..4c98193817 --- /dev/null +++ b/src/libawkward/fillable/FillableOptions.cpp @@ -0,0 +1,17 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include "awkward/fillable/FillableOptions.h" + +namespace awkward { + FillableOptions::FillableOptions(int64_t initial, double resize) + : initial_(initial) + , resize_(resize) { } + + int64_t FillableOptions::initial() const { + return initial_; + } + + double FillableOptions::resize() const { + return resize_; + } +} diff --git a/src/libawkward/fillable/Float64Fillable.cpp b/src/libawkward/fillable/Float64Fillable.cpp index 70395eb8bb..c58d63e02c 100644 --- a/src/libawkward/fillable/Float64Fillable.cpp +++ b/src/libawkward/fillable/Float64Fillable.cpp @@ -10,7 +10,7 @@ namespace awkward { const std::shared_ptr Float64Fillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new Float64Fillable(options, GrowableBuffer::empty(options))); + std::shared_ptr out = std::make_shared(options, GrowableBuffer::empty(options)); out.get()->setthat(out); return out; } @@ -23,11 +23,19 @@ namespace awkward { newraw[i] = (double)oldraw[i]; } buffer.set_length(old.length()); - std::shared_ptr out(new Float64Fillable(options, buffer)); + std::shared_ptr out = std::make_shared(options, buffer); out.get()->setthat(out); return out; } + Float64Fillable::Float64Fillable(const FillableOptions& options, const GrowableBuffer& buffer) + : options_(options) + , buffer_(buffer) { } + + const std::string Float64Fillable::classname() const { + return "Float64Fillable"; + } + int64_t Float64Fillable::length() const { return buffer_.length(); } @@ -37,13 +45,13 @@ namespace awkward { } const std::shared_ptr Float64Fillable::type() const { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::float64)); + return std::make_shared(Type::Parameters(), PrimitiveType::float64); } - const std::shared_ptr Float64Fillable::snapshot() const { + const std::shared_ptr Float64Fillable::snapshot(const std::shared_ptr& type) const { std::vector shape = { (ssize_t)buffer_.length() }; std::vector strides = { (ssize_t)sizeof(double) }; - return std::shared_ptr(new NumpyArray(Identity::none(), Type::none(), buffer_.ptr(), shape, strides, 0, sizeof(double), "d")); + return std::make_shared(Identity::none(), type, buffer_.ptr(), shape, strides, 0, sizeof(double), "d"); } bool Float64Fillable::active() const { diff --git a/src/libawkward/fillable/GrowableBuffer.cpp b/src/libawkward/fillable/GrowableBuffer.cpp new file mode 100644 index 0000000000..241ea7a84d --- /dev/null +++ b/src/libawkward/fillable/GrowableBuffer.cpp @@ -0,0 +1,115 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include "awkward/fillable/GrowableBuffer.h" + +namespace awkward { + template + GrowableBuffer GrowableBuffer::empty(const FillableOptions& options) { + return GrowableBuffer::empty(options, 0); + } + + template + GrowableBuffer GrowableBuffer::empty(const FillableOptions& options, int64_t minreserve) { + size_t actual = (size_t)options.initial(); + if (actual < (size_t)minreserve) { + actual = (size_t)minreserve; + } + std::shared_ptr ptr(new T[actual], awkward::util::array_deleter()); + return GrowableBuffer(options, ptr, 0, (int64_t)actual); + } + + template + GrowableBuffer GrowableBuffer::full(const FillableOptions& options, T value, int64_t length) { + GrowableBuffer out = empty(options, length); + T* rawptr = out.ptr().get(); + for (int64_t i = 0; i < length; i++) { + rawptr[i] = value; + } + return GrowableBuffer(options, out.ptr(), length, out.reserved()); + } + + template + GrowableBuffer GrowableBuffer::arange(const FillableOptions& options, int64_t length) { + size_t actual = (size_t)options.initial(); + if (actual < (size_t)length) { + actual = (size_t)length; + } + T* rawptr = new T[(size_t)actual]; + std::shared_ptr ptr(rawptr, awkward::util::array_deleter()); + for (int64_t i = 0; i < length; i++) { + rawptr[i] = (T)i; + } + return GrowableBuffer(options, ptr, length, (int64_t)actual); + } + + template + GrowableBuffer::GrowableBuffer(const FillableOptions& options, std::shared_ptr ptr, int64_t length, int64_t reserved) + : options_(options) + , ptr_(ptr) + , length_(length) + , reserved_(reserved) { } + + template + GrowableBuffer::GrowableBuffer(const FillableOptions& options) + : GrowableBuffer(options, std::shared_ptr(new T[(size_t)options.initial()], awkward::util::array_deleter()), 0, options.initial()) { } + + template + const std::shared_ptr GrowableBuffer::ptr() const { + return ptr_; + } + + template + int64_t GrowableBuffer::length() const { + return length_; + } + + template + void GrowableBuffer::set_length(int64_t newlength) { + if (newlength > reserved_) { + set_reserved(newlength); + } + length_ = newlength; + } + + template + int64_t GrowableBuffer::reserved() const { + return reserved_; + } + + template + void GrowableBuffer::set_reserved(int64_t minreserved) { + if (minreserved > reserved_) { + std::shared_ptr ptr(new T[(size_t)minreserved], awkward::util::array_deleter()); + memcpy(ptr.get(), ptr_.get(), (size_t)(length_ * sizeof(T))); + ptr_ = ptr; + reserved_ = minreserved; + } + } + + template + void GrowableBuffer::clear() { + length_ = 0; + reserved_ = options_.initial(); + ptr_ = std::shared_ptr(new T[(size_t)options_.initial()], awkward::util::array_deleter()); + } + + template + void GrowableBuffer::append(T datum) { + assert(length_ <= reserved_); + if (length_ == reserved_) { + set_reserved((int64_t)ceil(reserved_ * options_.resize())); + } + ptr_.get()[length_] = datum; + length_++; + } + + template + T GrowableBuffer::getitem_at_nowrap(int64_t at) const { + return ptr_.get()[at]; + } + + template class GrowableBuffer; + template class GrowableBuffer; + template class GrowableBuffer; + template class GrowableBuffer; +} diff --git a/src/libawkward/fillable/Int64Fillable.cpp b/src/libawkward/fillable/Int64Fillable.cpp index 2e48988ff6..66635bb575 100644 --- a/src/libawkward/fillable/Int64Fillable.cpp +++ b/src/libawkward/fillable/Int64Fillable.cpp @@ -11,11 +11,23 @@ namespace awkward { const std::shared_ptr Int64Fillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new Int64Fillable(options, GrowableBuffer::empty(options))); + std::shared_ptr out = std::make_shared(options, GrowableBuffer::empty(options)); out.get()->setthat(out); return out; } + Int64Fillable::Int64Fillable(const FillableOptions& options, const GrowableBuffer& buffer) + : options_(options) + , buffer_(buffer) { } + + const GrowableBuffer Int64Fillable::buffer() const { + return buffer_; + } + + const std::string Int64Fillable::classname() const { + return "Int64Fillable"; + }; + int64_t Int64Fillable::length() const { return buffer_.length(); } @@ -25,16 +37,16 @@ namespace awkward { } const std::shared_ptr Int64Fillable::type() const { - return std::shared_ptr(new PrimitiveType(Type::Parameters(), PrimitiveType::int64)); + return std::make_shared(Type::Parameters(), PrimitiveType::int64); } - const std::shared_ptr Int64Fillable::snapshot() const { + const std::shared_ptr Int64Fillable::snapshot(const std::shared_ptr& type) const { std::vector shape = { (ssize_t)buffer_.length() }; std::vector strides = { (ssize_t)sizeof(int64_t) }; #ifdef _MSC_VER - return std::shared_ptr(new NumpyArray(Identity::none(), Type::none(), buffer_.ptr(), shape, strides, 0, sizeof(int64_t), "q")); + return std::make_shared(Identity::none(), type, buffer_.ptr(), shape, strides, 0, sizeof(int64_t), "q"); #else - return std::shared_ptr(new NumpyArray(Identity::none(), Type::none(), buffer_.ptr(), shape, strides, 0, sizeof(int64_t), "l")); + return std::make_shared(Identity::none(), type, buffer_.ptr(), shape, strides, 0, sizeof(int64_t), "l"); #endif } diff --git a/src/libawkward/fillable/ListFillable.cpp b/src/libawkward/fillable/ListFillable.cpp index 81bea2053d..fec126ef61 100644 --- a/src/libawkward/fillable/ListFillable.cpp +++ b/src/libawkward/fillable/ListFillable.cpp @@ -15,11 +15,21 @@ namespace awkward { const std::shared_ptr ListFillable::fromempty(const FillableOptions& options) { GrowableBuffer offsets = GrowableBuffer::empty(options); offsets.append(0); - std::shared_ptr out(new ListFillable(options, offsets, UnknownFillable::fromempty(options), false)); + std::shared_ptr out = std::make_shared(options, offsets, UnknownFillable::fromempty(options), false); out.get()->setthat(out); return out; } + ListFillable::ListFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content, bool begun) + : options_(options) + , offsets_(offsets) + , content_(content) + , begun_(begun) { } + + const std::string ListFillable::classname() const { + return "ListFillable"; + }; + int64_t ListFillable::length() const { return offsets_.length() - 1; } @@ -31,12 +41,16 @@ namespace awkward { } const std::shared_ptr ListFillable::type() const { - return std::shared_ptr(new ListType(Type::Parameters(), content_.get()->type())); + return std::make_shared(Type::Parameters(), content_.get()->type()); } - const std::shared_ptr ListFillable::snapshot() const { + const std::shared_ptr ListFillable::snapshot(const std::shared_ptr& type) const { Index64 offsets(offsets_.ptr(), 0, offsets_.length()); - return std::shared_ptr(new ListOffsetArray64(Identity::none(), Type::none(), offsets, content_.get()->snapshot())); + std::shared_ptr innertype = Type::none(); + if (ListType* raw = dynamic_cast(type.get())) { + innertype = raw->type(); + } + return std::make_shared(Identity::none(), type, offsets, content_.get()->snapshot(innertype)); } bool ListFillable::active() const { diff --git a/src/libawkward/fillable/OptionFillable.cpp b/src/libawkward/fillable/OptionFillable.cpp index c8e6c9ac97..99d6696f66 100644 --- a/src/libawkward/fillable/OptionFillable.cpp +++ b/src/libawkward/fillable/OptionFillable.cpp @@ -11,18 +11,27 @@ namespace awkward { const std::shared_ptr OptionFillable::fromnulls(const FillableOptions& options, int64_t nullcount, std::shared_ptr content) { GrowableBuffer offsets = GrowableBuffer::full(options, -1, nullcount); - std::shared_ptr out(new OptionFillable(options, offsets, content)); + std::shared_ptr out = std::make_shared(options, offsets, content); out.get()->setthat(out); return out; } const std::shared_ptr OptionFillable::fromvalids(const FillableOptions& options, std::shared_ptr content) { GrowableBuffer offsets = GrowableBuffer::arange(options, content->length()); - std::shared_ptr out(new OptionFillable(options, offsets, content)); + std::shared_ptr out = std::make_shared(options, offsets, content); out.get()->setthat(out); return out; } + OptionFillable::OptionFillable(const FillableOptions& options, const GrowableBuffer& offsets, std::shared_ptr content) + : options_(options) + , offsets_(offsets) + , content_(content) { } + + const std::string OptionFillable::classname() const { + return "OptionFillable"; + }; + int64_t OptionFillable::length() const { return offsets_.length(); } @@ -34,11 +43,11 @@ namespace awkward { const std::shared_ptr OptionFillable::type() const { Index64 offsets(offsets_.ptr(), 0, offsets_.length()); - return std::shared_ptr(new OptionType(Type::Parameters(), content_.get()->type())); + return std::make_shared(Type::Parameters(), content_.get()->type()); } - const std::shared_ptr OptionFillable::snapshot() const { - throw std::runtime_error("OptionFillable::snapshot() needs OptionArray"); + const std::shared_ptr OptionFillable::snapshot(const std::shared_ptr& type) const { + throw std::runtime_error("OptionFillable::snapshot needs OptionArray"); } bool OptionFillable::active() const { diff --git a/src/libawkward/fillable/RecordFillable.cpp b/src/libawkward/fillable/RecordFillable.cpp index acc2dc42a4..06d5599d82 100644 --- a/src/libawkward/fillable/RecordFillable.cpp +++ b/src/libawkward/fillable/RecordFillable.cpp @@ -18,11 +18,35 @@ namespace awkward { const std::shared_ptr RecordFillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new RecordFillable(options, std::vector>(), std::vector(), std::vector(), "", nullptr, -1, false, -1, -1)); + std::shared_ptr out = std::make_shared(options, std::vector>(), std::vector(), std::vector(), "", nullptr, -1, false, -1, -1); out.get()->setthat(out); return out; } + RecordFillable::RecordFillable(const FillableOptions& options, const std::vector>& contents, const std::vector& keys, const std::vector& pointers, const std::string& name, const char* nameptr, int64_t length, bool begun, int64_t nextindex, int64_t nexttotry) + : options_(options) + , contents_(contents) + , keys_(keys) + , pointers_(pointers) + , name_(name) + , nameptr_(nameptr) + , length_(length) + , begun_(begun) + , nextindex_(nextindex) + , nexttotry_(nexttotry) { } + + const std::string RecordFillable::name() const { + return name_; + } + + const char* RecordFillable::nameptr() const { + return nameptr_; + } + + const std::string RecordFillable::classname() const { + return "RecordFillable"; + }; + int64_t RecordFillable::length() const { return length_; } @@ -43,12 +67,12 @@ namespace awkward { const std::shared_ptr RecordFillable::type() const { if (length_ == -1) { - return std::shared_ptr(new UnknownType(Type::Parameters())); + return std::make_shared(Type::Parameters()); } else { std::vector> types; - std::shared_ptr lookup(new RecordType::Lookup); - std::shared_ptr reverselookup(new RecordType::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); for (size_t i = 0; i < contents_.size(); i++) { types.push_back(contents_[i].get()->type()); (*lookup.get())[keys_[i]] = i; @@ -58,40 +82,35 @@ namespace awkward { if (nameptr_ != nullptr) { parameters["__class__"] = util::quote(name_, true); } - return std::shared_ptr(new RecordType(parameters, types, lookup, reverselookup)); - return std::shared_ptr(new RecordType(parameters, types)); + return std::make_shared(parameters, types, lookup, reverselookup); } } - const std::shared_ptr RecordFillable::snapshot() const { + const std::shared_ptr RecordFillable::snapshot(const std::shared_ptr& type) const { if (length_ == -1) { - return std::shared_ptr(new EmptyArray(Identity::none(), Type::none())); + return std::make_shared(Identity::none(), type); } - else if (contents_.size() == 0) { - std::shared_ptr out(new RecordArray(Identity::none(), Type::none(), length_, false)); - if (nameptr_ != nullptr) { - std::shared_ptr type = out.get()->type(); - type.get()->nolength().get()->setparameter("__class__", util::quote(name_, true)); - out.get()->settype(type); + + RecordType* raw = dynamic_cast(type.get()); + std::vector> contents; + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); + for (size_t i = 0; i < contents_.size(); i++) { + if (raw == nullptr) { + contents.push_back(contents_[i].get()->snapshot(Type::none())); } - return out; + else { + contents.push_back(contents_[i].get()->snapshot(raw->field((int64_t)i))); + } + (*lookup.get())[keys_[i]] = i; + reverselookup.get()->push_back(keys_[i]); + } + + if (contents.empty()) { + return std::make_shared(Identity::none(), type, length_, false); } else { - std::vector> contents; - std::shared_ptr lookup(new RecordArray::Lookup); - std::shared_ptr reverselookup(new RecordArray::ReverseLookup); - for (size_t i = 0; i < contents_.size(); i++) { - contents.push_back(contents_[i].get()->snapshot()); - (*lookup.get())[keys_[i]] = i; - reverselookup.get()->push_back(keys_[i]); - } - std::shared_ptr out(new RecordArray(Identity::none(), Type::none(), contents, lookup, reverselookup)); - if (nameptr_ != nullptr) { - std::shared_ptr type = out.get()->type(); - type.get()->nolength().get()->setparameter("__class__", util::quote(name_, true)); - out.get()->settype(type); - } - return out; + return std::make_shared(Identity::none(), type, contents, lookup, reverselookup); } } @@ -390,13 +409,12 @@ namespace awkward { throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); } else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { - int64_t i = 0; - for (auto content : contents_) { - if (content.get()->length() == length_) { - maybeupdate(i, content.get()->null()); + for (size_t i = 0; i < contents_.size(); i++) { + if (contents_[i].get()->length() == length_) { + maybeupdate((int64_t)i, contents_[i].get()->null()); } - if (content.get()->length() != length_ + 1) { - throw std::invalid_argument(std::string("record field ") + util::quote(keys_[(size_t)i], true) + std::string(" filled more than once")); + if (contents_[i].get()->length() != length_ + 1) { + throw std::invalid_argument(std::string("record field ") + util::quote(keys_[i], true) + std::string(" filled more than once")); } i++; } diff --git a/src/libawkward/fillable/StringFillable.cpp b/src/libawkward/fillable/StringFillable.cpp index 378a490aaf..130fb09f47 100644 --- a/src/libawkward/fillable/StringFillable.cpp +++ b/src/libawkward/fillable/StringFillable.cpp @@ -15,11 +15,25 @@ namespace awkward { GrowableBuffer offsets = GrowableBuffer::empty(options); offsets.append(0); GrowableBuffer content = GrowableBuffer::empty(options); - std::shared_ptr out(new StringFillable(options, offsets, content, encoding)); + std::shared_ptr out = std::make_shared(options, offsets, content, encoding); out.get()->setthat(out); return out; } + StringFillable::StringFillable(const FillableOptions& options, const GrowableBuffer& offsets, GrowableBuffer& content, const char* encoding) + : options_(options) + , offsets_(offsets) + , content_(content) + , encoding_(encoding) { } + + const std::string StringFillable::classname() const { + return "StringFillable"; + }; + + const char* StringFillable::encoding() const { + return encoding_; + } + int64_t StringFillable::length() const { return offsets_.length() - 1; } @@ -56,19 +70,22 @@ namespace awkward { char_parameters["encoding"] = std::string(quoted); } - return std::shared_ptr(new ListType(string_parameters, std::shared_ptr(new PrimitiveType(char_parameters, PrimitiveType::uint8)))); + return std::make_shared(string_parameters, std::make_shared(char_parameters, PrimitiveType::uint8)); } - const std::shared_ptr StringFillable::snapshot() const { - std::shared_ptr stringtype = type(); - + const std::shared_ptr StringFillable::snapshot(const std::shared_ptr& type) const { + ListType* raw = dynamic_cast(type.get()); Index64 offsets(offsets_.ptr(), 0, offsets_.length()); - std::vector shape = { (ssize_t)content_.length() }; std::vector strides = { (ssize_t)sizeof(uint8_t) }; - std::shared_ptr content(new NumpyArray(Identity::none(), stringtype.get()->inner(), content_.ptr(), shape, strides, 0, sizeof(uint8_t), "B")); - - return std::shared_ptr(new ListOffsetArray64(Identity::none(), stringtype, offsets, content)); + std::shared_ptr content; + if (raw == nullptr) { + content = std::make_shared(Identity::none(), Type::none(), content_.ptr(), shape, strides, 0, sizeof(uint8_t), "B"); + } + else { + content = std::make_shared(Identity::none(), raw->type(), content_.ptr(), shape, strides, 0, sizeof(uint8_t), "B"); + } + return std::make_shared(Identity::none(), type, offsets, content); } bool StringFillable::active() const { diff --git a/src/libawkward/fillable/TupleFillable.cpp b/src/libawkward/fillable/TupleFillable.cpp index ff8b09b76a..bbc7095ea7 100644 --- a/src/libawkward/fillable/TupleFillable.cpp +++ b/src/libawkward/fillable/TupleFillable.cpp @@ -15,11 +15,26 @@ namespace awkward { const std::shared_ptr TupleFillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new TupleFillable(options, std::vector>(), -1, false, -1)); + std::shared_ptr out = std::make_shared(options, std::vector>(), -1, false, -1); out.get()->setthat(out); return out; } + TupleFillable::TupleFillable(const FillableOptions& options, const std::vector>& contents, int64_t length, bool begun, size_t nextindex) + : options_(options) + , contents_(contents) + , length_(length) + , begun_(begun) + , nextindex_(nextindex) { } + + int64_t TupleFillable::numfields() const { + return (int64_t)contents_.size(); + } + + const std::string TupleFillable::classname() const { + return "TupleFillable"; + }; + int64_t TupleFillable::length() const { return length_; } @@ -35,30 +50,38 @@ namespace awkward { const std::shared_ptr TupleFillable::type() const { if (length_ == -1) { - return std::shared_ptr(new UnknownType(Type::Parameters())); + return std::make_shared(Type::Parameters()); } else { std::vector> types; - for (auto content : contents_) { - types.push_back(content.get()->type()); + for (size_t i = 0; i < contents_.size(); i++) { + types.push_back(contents_[i].get()->type()); } - return std::shared_ptr(new RecordType(Type::Parameters(), types)); + return std::make_shared(Type::Parameters(), types); } } - const std::shared_ptr TupleFillable::snapshot() const { + const std::shared_ptr TupleFillable::snapshot(const std::shared_ptr& type) const { if (length_ == -1) { - return std::shared_ptr(new EmptyArray(Identity::none(), Type::none())); + return std::make_shared(Identity::none(), type); + } + + RecordType* raw = dynamic_cast(type.get()); + std::vector> contents; + for (size_t i = 0; i < contents_.size(); i++) { + if (raw == nullptr) { + contents.push_back(contents_[i].get()->snapshot(Type::none())); + } + else { + contents.push_back(contents_[i].get()->snapshot(raw->field((int64_t)i))); + } } - else if (contents_.size() == 0) { - return std::shared_ptr(new RecordArray(Identity::none(), Type::none(), length_, true)); + + if (contents.empty()) { + return std::make_shared(Identity::none(), type, length_, true); } else { - std::vector> contents; - for (auto content : contents_) { - contents.push_back(content.get()->snapshot()); - } - return std::shared_ptr(new RecordArray(Identity::none(), Type::none(), contents)); + return std::make_shared(Identity::none(), type, contents); } } @@ -234,12 +257,11 @@ namespace awkward { throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); } else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { - int64_t i = 0; - for (auto content : contents_) { - if (content.get()->length() == length_) { - maybeupdate(i, content.get()->null()); + for (size_t i = 0; i < contents_.size(); i++) { + if (contents_[i].get()->length() == length_) { + maybeupdate(i, contents_[i].get()->null()); } - if (content.get()->length() != length_ + 1) { + if (contents_[i].get()->length() != length_ + 1) { throw std::invalid_argument(std::string("tuple index ") + std::to_string(i) + std::string(" filled more than once")); } i++; diff --git a/src/libawkward/fillable/UnionFillable.cpp b/src/libawkward/fillable/UnionFillable.cpp index e45c15e594..457add078c 100644 --- a/src/libawkward/fillable/UnionFillable.cpp +++ b/src/libawkward/fillable/UnionFillable.cpp @@ -17,15 +17,26 @@ #include "awkward/fillable/UnionFillable.h" namespace awkward { - const std::shared_ptr UnionFillable::fromsingle(const FillableOptions& options, const std::shared_ptr firstcontent) { + const std::shared_ptr UnionFillable::fromsingle(const FillableOptions& options, const std::shared_ptr& firstcontent) { GrowableBuffer types = GrowableBuffer::full(options, 0, firstcontent->length()); GrowableBuffer offsets = GrowableBuffer::arange(options, firstcontent->length()); std::vector> contents({ firstcontent }); - std::shared_ptr out(new UnionFillable(options, types, offsets, contents)); + std::shared_ptr out = std::make_shared(options, types, offsets, contents); out.get()->setthat(out); return out; } + UnionFillable::UnionFillable(const FillableOptions& options, const GrowableBuffer& types, const GrowableBuffer& offsets, std::vector>& contents) + : options_(options) + , types_(types) + , offsets_(offsets) + , contents_(contents) + , current_(-1) { } + + const std::string UnionFillable::classname() const { + return "UnionFillable"; + }; + int64_t UnionFillable::length() const { return types_.length(); } @@ -43,13 +54,13 @@ namespace awkward { for (auto x : contents_) { types.push_back(x.get()->type()); } - return std::shared_ptr(new UnionType(Type::Parameters(), types)); + return std::make_shared(Type::Parameters(), types); } - const std::shared_ptr UnionFillable::snapshot() const { + const std::shared_ptr UnionFillable::snapshot(const std::shared_ptr& type) const { Index8 types(types_.ptr(), 0, types_.length()); Index64 offsets(offsets_.ptr(), 0, offsets_.length()); - throw std::runtime_error("UnionFillable::snapshot() needs UnionArray"); + throw std::runtime_error("UnionFillable::snapshot needs UnionArray"); } bool UnionFillable::active() const { @@ -204,6 +215,7 @@ namespace awkward { contents_.push_back(tofill); } tofill->beginlist(); + current_ = i; } else { contents_[(size_t)current_].get()->beginlist(); diff --git a/src/libawkward/fillable/UnknownFillable.cpp b/src/libawkward/fillable/UnknownFillable.cpp index 4cc19b0e89..4f6e3cdfd5 100644 --- a/src/libawkward/fillable/UnknownFillable.cpp +++ b/src/libawkward/fillable/UnknownFillable.cpp @@ -19,11 +19,19 @@ namespace awkward { const std::shared_ptr UnknownFillable::fromempty(const FillableOptions& options) { - std::shared_ptr out(new UnknownFillable(options, 0)); + std::shared_ptr out = std::make_shared(options, 0); out.get()->setthat(out); return out; } + UnknownFillable::UnknownFillable(const FillableOptions& options, int64_t nullcount) + : options_(options) + , nullcount_(nullcount) { } + + const std::string UnknownFillable::classname() const { + return "UnknownFillable"; + }; + int64_t UnknownFillable::length() const { return nullcount_; } @@ -33,15 +41,15 @@ namespace awkward { } const std::shared_ptr UnknownFillable::type() const { - return std::shared_ptr(new UnknownType(Type::Parameters())); + return std::make_shared(Type::Parameters()); } - const std::shared_ptr UnknownFillable::snapshot() const { + const std::shared_ptr UnknownFillable::snapshot(const std::shared_ptr& type) const { if (nullcount_ == 0) { - return std::shared_ptr(new EmptyArray(Identity::none(), Type::none())); + return std::make_shared(Identity::none(), type); } else { - throw std::runtime_error("UnknownFillable::snapshot() needs OptionArray"); + throw std::runtime_error("UnknownFillable::snapshot needs OptionArray"); } } diff --git a/src/libawkward/io/json.cpp b/src/libawkward/io/json.cpp index b141fe2d1e..65eb31a85d 100644 --- a/src/libawkward/io/json.cpp +++ b/src/libawkward/io/json.cpp @@ -1,11 +1,327 @@ // BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/prettywriter.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" + #include "awkward/fillable/FillableArray.h" #include "awkward/Content.h" #include "awkward/io/json.h" +namespace rj = rapidjson; + namespace awkward { + /////////////////////////////////////////////////////// writing to JSON + + class ToJsonString::Impl { + public: + Impl(int64_t maxdecimals): buffer_(), writer_(buffer_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + void null() { writer_.Null(); } + void boolean(bool x) { writer_.Bool(x); } + void integer(int64_t x) { writer_.Int64(x); } + void real(double x) { writer_.Double(x); } + void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } + void beginlist() { writer_.StartArray(); } + void endlist() { writer_.EndArray(); } + void beginrecord() { writer_.StartObject(); } + void field(const char* x) { writer_.Key(x); } + void endrecord() { writer_.EndObject(); } + const std::string tostring() { + return std::string(buffer_.GetString()); + } + private: + rj::StringBuffer buffer_; + rj::Writer writer_; + // FIXME: rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag + }; + + ToJsonString::ToJsonString(int64_t maxdecimals) + : impl_(new ToJsonString::Impl(maxdecimals)) { } + + ToJsonString::~ToJsonString() { + delete impl_; + } + + void ToJsonString::null() { + impl_->null(); + } + + void ToJsonString::boolean(bool x) { + impl_->boolean(x); + } + + void ToJsonString::integer(int64_t x) { + impl_->integer(x); + } + + void ToJsonString::real(double x) { + impl_->real(x); + } + + void ToJsonString::string(const char* x, int64_t length) { + impl_->string(x, length); + } + + void ToJsonString::beginlist() { + impl_->beginlist(); + } + + void ToJsonString::endlist() { + impl_->endlist(); + } + + void ToJsonString::beginrecord() { + impl_->beginrecord(); + } + + void ToJsonString::field(const char* x) { + impl_->field(x); + } + + void ToJsonString::endrecord() { + impl_->endrecord(); + } + + const std::string ToJsonString::tostring() { + return impl_->tostring(); + } + + class ToJsonPrettyString::Impl { + public: + Impl(int64_t maxdecimals): buffer_(), writer_(buffer_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + void null() { writer_.Null(); } + void boolean(bool x) { writer_.Bool(x); } + void integer(int64_t x) { writer_.Int64(x); } + void real(double x) { writer_.Double(x); } + void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } + void beginlist() { writer_.StartArray(); } + void endlist() { writer_.EndArray(); } + void beginrecord() { writer_.StartObject(); } + void field(const char* x) { writer_.Key(x); } + void endrecord() { writer_.EndObject(); } + const std::string tostring() { + return std::string(buffer_.GetString()); + } + private: + rj::StringBuffer buffer_; + rj::PrettyWriter writer_; + // FIXME: rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag + }; + + ToJsonPrettyString::ToJsonPrettyString(int64_t maxdecimals) + : impl_(new ToJsonPrettyString::Impl(maxdecimals)) { } + + ToJsonPrettyString::~ToJsonPrettyString() { + delete impl_; + } + + void ToJsonPrettyString::null() { + impl_->null(); + } + + void ToJsonPrettyString::boolean(bool x) { + impl_->boolean(x); + } + + void ToJsonPrettyString::integer(int64_t x) { + impl_->integer(x); + } + + void ToJsonPrettyString::real(double x) { + impl_->real(x); + } + + void ToJsonPrettyString::string(const char* x, int64_t length) { + impl_->string(x, length); + } + + void ToJsonPrettyString::beginlist() { + impl_->beginlist(); + } + + void ToJsonPrettyString::endlist() { + impl_->endlist(); + } + + void ToJsonPrettyString::beginrecord() { + impl_->beginrecord(); + } + + void ToJsonPrettyString::field(const char* x) { + impl_->field(x); + } + + void ToJsonPrettyString::endrecord() { + impl_->endrecord(); + } + + const std::string ToJsonPrettyString::tostring() { + return impl_->tostring(); + } + + class ToJsonFile::Impl { + public: + Impl(FILE* destination, int64_t maxdecimals, int64_t buffersize) + : buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()) + , stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)) + , writer_(stream_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + void null() { writer_.Null(); } + void boolean(bool x) { writer_.Bool(x); } + void integer(int64_t x) { writer_.Int64(x); } + void real(double x) { writer_.Double(x); } + void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } + void beginlist() { writer_.StartArray(); } + void endlist() { writer_.EndArray(); } + void beginrecord() { writer_.StartObject(); } + void field(const char* x) { writer_.Key(x); } + void endrecord() { writer_.EndObject(); } + private: + std::shared_ptr buffer_; + rj::FileWriteStream stream_; + rj::Writer writer_ ; + // FIXME: rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag + }; + + ToJsonFile::ToJsonFile(FILE* destination, int64_t maxdecimals, int64_t buffersize) + : impl_(new ToJsonFile::Impl(destination, maxdecimals, buffersize)) { } + + ToJsonFile::~ToJsonFile() { + delete impl_; + } + + void ToJsonFile::null() { + impl_->null(); + } + + void ToJsonFile::boolean(bool x) { + impl_->boolean(x); + } + + void ToJsonFile::integer(int64_t x) { + impl_->integer(x); + } + + void ToJsonFile::real(double x) { + impl_->real(x); + } + + void ToJsonFile::string(const char* x, int64_t length) { + impl_->string(x, length); + } + + void ToJsonFile::beginlist() { + impl_->beginlist(); + } + + void ToJsonFile::endlist() { + impl_->endlist(); + } + + void ToJsonFile::beginrecord() { + impl_->beginrecord(); + } + + void ToJsonFile::field(const char* x) { + impl_->field(x); + } + + void ToJsonFile::endrecord() { + impl_->endrecord(); + } + + class ToJsonPrettyFile::Impl { + public: + Impl(FILE* destination, int64_t maxdecimals, int64_t buffersize) + : buffer_(new char[(size_t)buffersize], awkward::util::array_deleter()) + , stream_(destination, buffer_.get(), ((size_t)buffersize)*sizeof(char)) + , writer_(stream_) { + if (maxdecimals >= 0) { + writer_.SetMaxDecimalPlaces((int)maxdecimals); + } + } + void null() { writer_.Null(); } + void boolean(bool x) { writer_.Bool(x); } + void integer(int64_t x) { writer_.Int64(x); } + void real(double x) { writer_.Double(x); } + void string(const char* x, int64_t length) { writer_.String(x, (rj::SizeType)length); } + void beginlist() { writer_.StartArray(); } + void endlist() { writer_.EndArray(); } + void beginrecord() { writer_.StartObject(); } + void field(const char* x) { writer_.Key(x); } + void endrecord() { writer_.EndObject(); } + private: + std::shared_ptr buffer_; + rj::FileWriteStream stream_; + rj::PrettyWriter writer_; + // FIXME: rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag, rj::UTF8<>, rj::UTF8<>, rj::CrtAllocator<>, rj::kWriteNanAndInfFlag + }; + + ToJsonPrettyFile::ToJsonPrettyFile(FILE* destination, int64_t maxdecimals, int64_t buffersize) + : impl_(new ToJsonPrettyFile::Impl(destination, maxdecimals, buffersize)) { } + + ToJsonPrettyFile::~ToJsonPrettyFile() { + delete impl_; + } + + void ToJsonPrettyFile::null() { + impl_->null(); + } + + void ToJsonPrettyFile::boolean(bool x) { + impl_->boolean(x); + } + + void ToJsonPrettyFile::integer(int64_t x) { + impl_->integer(x); + } + + void ToJsonPrettyFile::real(double x) { + impl_->real(x); + } + + void ToJsonPrettyFile::string(const char* x, int64_t length) { + impl_->string(x, length); + } + + void ToJsonPrettyFile::beginlist() { + impl_->beginlist(); + } + + void ToJsonPrettyFile::endlist() { + impl_->endlist(); + } + + void ToJsonPrettyFile::beginrecord() { + impl_->beginrecord(); + } + + void ToJsonPrettyFile::field(const char* x) { + impl_->field(x); + } + + void ToJsonPrettyFile::endrecord() { + impl_->endrecord(); + } + + /////////////////////////////////////////////////////// reading from JSON + class Handler: public rj::BaseReaderHandler, Handler> { public: Handler(const FillableOptions& options): array_(options), depth_(0) { } @@ -43,11 +359,19 @@ namespace awkward { } bool StartObject() { + if (depth_ == 0) { + array_.beginlist(); + } + depth_++; array_.beginrecord(); return true; } bool EndObject(rj::SizeType numfields) { + depth_--; array_.endrecord(); + if (depth_ == 0) { + array_.endlist(); + } return true; } bool Key(const char* str, rj::SizeType length, bool copy) { diff --git a/src/libawkward/io/root.cpp b/src/libawkward/io/root.cpp index f5ffabbe80..b38f15e3b5 100644 --- a/src/libawkward/io/root.cpp +++ b/src/libawkward/io/root.cpp @@ -66,10 +66,11 @@ namespace awkward { std::vector shape = { (ssize_t)bytepos_tocopy.length() }; std::vector strides = { (ssize_t)itemsize }; - std::shared_ptr out(new NumpyArray(Identity::none(), Type::none(), ptr, shape, strides, 0, (ssize_t)itemsize, format)); + std::shared_ptr out = std::make_shared(Identity::none(), Type::none(), ptr, shape, strides, 0, (ssize_t)itemsize, format); for (int64_t i = depth - 1; i >= 0; i--) { - out = std::shared_ptr(new ListOffsetArray64(Identity::none(), Type::none(), levels[(size_t)i].toindex(), out)); + Index64 index(levels[(size_t)i].ptr(), 0, levels[(size_t)i].length()); + out = std::make_shared(Identity::none(), Type::none(), index, out); } return out; } diff --git a/src/libawkward/type/ArrayType.cpp b/src/libawkward/type/ArrayType.cpp index 4ed95d16f6..971c46fa3f 100644 --- a/src/libawkward/type/ArrayType.cpp +++ b/src/libawkward/type/ArrayType.cpp @@ -5,7 +5,12 @@ #include "awkward/type/ArrayType.h" namespace awkward { - std::string ArrayType::tostring_part(std::string indent, std::string pre, std::string post) const { + ArrayType::ArrayType(const Type::Parameters& parameters, const std::shared_ptr& type, int64_t length) + : Type(parameters) + , type_(type) + , length_(length) { } + + std::string ArrayType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; @@ -15,10 +20,10 @@ namespace awkward { } const std::shared_ptr ArrayType::shallow_copy() const { - return std::shared_ptr(new ArrayType(parameters_, type_, length_)); + return std::make_shared(parameters_, type_, length_); } - bool ArrayType::equal(const std::shared_ptr other, bool check_parameters) const { + bool ArrayType::equal(const std::shared_ptr& other, bool check_parameters) const { if (ArrayType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -30,22 +35,6 @@ namespace awkward { } } - std::shared_ptr ArrayType::nolength() const { - return type_; - } - - std::shared_ptr ArrayType::level() const { - return shallow_copy(); - } - - std::shared_ptr ArrayType::inner() const { - return type_; - } - - std::shared_ptr ArrayType::inner(const std::string& key) const { - throw std::runtime_error("FIXME: ArrayType::inner(key)"); - } - int64_t ArrayType::numfields() const { return type_.get()->numfields(); } diff --git a/src/libawkward/type/ListType.cpp b/src/libawkward/type/ListType.cpp index 08f0e25a13..9042875152 100644 --- a/src/libawkward/type/ListType.cpp +++ b/src/libawkward/type/ListType.cpp @@ -9,14 +9,18 @@ #include "awkward/type/ListType.h" namespace awkward { - std::string ListType::tostring_part(std::string indent, std::string pre, std::string post) const { + ListType::ListType(const Type::Parameters& parameters, const std::shared_ptr& type) + : Type(parameters) + , type_(type) { } + + std::string ListType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; } std::stringstream out; - if (parameters_.size() == 0) { + if (parameters_.empty()) { out << indent << pre << "var * " << type_.get()->tostring_part(indent, "", "") << post; } else { @@ -26,10 +30,10 @@ namespace awkward { } const std::shared_ptr ListType::shallow_copy() const { - return std::shared_ptr(new ListType(parameters_, type_)); + return std::make_shared(parameters_, type_); } - bool ListType::equal(const std::shared_ptr other, bool check_parameters) const { + bool ListType::equal(const std::shared_ptr& other, bool check_parameters) const { if (ListType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -41,18 +45,6 @@ namespace awkward { } } - std::shared_ptr ListType::level() const { - return shallow_copy(); - } - - std::shared_ptr ListType::inner() const { - return type_; - } - - std::shared_ptr ListType::inner(const std::string& key) const { - throw std::runtime_error("FIXME: ListType::inner(key)"); - } - int64_t ListType::numfields() const { return type_.get()->numfields(); } diff --git a/src/libawkward/type/OptionType.cpp b/src/libawkward/type/OptionType.cpp index 0831624b73..9b786ef102 100644 --- a/src/libawkward/type/OptionType.cpp +++ b/src/libawkward/type/OptionType.cpp @@ -10,14 +10,18 @@ #include "awkward/type/OptionType.h" namespace awkward { - std::string OptionType::tostring_part(std::string indent, std::string pre, std::string post) const { + OptionType::OptionType(const Type::Parameters& parameters, const std::shared_ptr& type) + : Type(parameters) + , type_(type) { } + + std::string OptionType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; } std::stringstream out; - if (parameters_.size() == 0) { + if (parameters_.empty()) { if (dynamic_cast(type_.get()) != nullptr || dynamic_cast(type_.get()) != nullptr) { out << indent << pre << "option[" << type_.get()->tostring_part(indent, "", "") << "]" << post; @@ -33,10 +37,10 @@ namespace awkward { } const std::shared_ptr OptionType::shallow_copy() const { - return std::shared_ptr(new OptionType(parameters_, type_)); + return std::make_shared(parameters_, type_); } - bool OptionType::equal(const std::shared_ptr other, bool check_parameters) const { + bool OptionType::equal(const std::shared_ptr& other, bool check_parameters) const { if (OptionType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -48,18 +52,6 @@ namespace awkward { } } - std::shared_ptr OptionType::level() const { - return type_.get()->level(); - } - - std::shared_ptr OptionType::inner() const { - return type_.get()->inner(); - } - - std::shared_ptr OptionType::inner(const std::string& key) const { - throw std::runtime_error("FIXME: OptionType::inner(key)"); - } - int64_t OptionType::numfields() const { return type_.get()->numfields(); } diff --git a/src/libawkward/type/PrimitiveType.cpp b/src/libawkward/type/PrimitiveType.cpp index 5164a5c00d..7aee81b8c0 100644 --- a/src/libawkward/type/PrimitiveType.cpp +++ b/src/libawkward/type/PrimitiveType.cpp @@ -9,7 +9,11 @@ #include "awkward/type/PrimitiveType.h" namespace awkward { - std::string PrimitiveType::tostring_part(std::string indent, std::string pre, std::string post) const { + PrimitiveType::PrimitiveType(const Type::Parameters& parameters, DType dtype) + : Type(parameters) + , dtype_(dtype) { } + + std::string PrimitiveType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; @@ -31,7 +35,7 @@ namespace awkward { case float64: s = "float64"; break; default: assert(dtype_ < numtypes); } - if (parameters_.size() == 0) { + if (parameters_.empty()) { out << indent << pre << s << post; } else { @@ -41,10 +45,10 @@ namespace awkward { } const std::shared_ptr PrimitiveType::shallow_copy() const { - return std::shared_ptr(new PrimitiveType(parameters_, dtype_)); + return std::make_shared(parameters_, dtype_); } - bool PrimitiveType::equal(const std::shared_ptr other, bool check_parameters) const { + bool PrimitiveType::equal(const std::shared_ptr& other, bool check_parameters) const { if (PrimitiveType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -56,18 +60,6 @@ namespace awkward { } } - std::shared_ptr PrimitiveType::level() const { - return shallow_copy(); - } - - std::shared_ptr PrimitiveType::inner() const { - throw std::invalid_argument("PrimitiveType has no inner type"); - } - - std::shared_ptr PrimitiveType::inner(const std::string& key) const { - throw std::invalid_argument("PrimitiveType has no inner type"); - } - int64_t PrimitiveType::numfields() const { return -1; } diff --git a/src/libawkward/type/RecordType.cpp b/src/libawkward/type/RecordType.cpp index 3c1d6e98d7..a3d346ab80 100644 --- a/src/libawkward/type/RecordType.cpp +++ b/src/libawkward/type/RecordType.cpp @@ -10,14 +10,42 @@ #include "awkward/type/RecordType.h" namespace awkward { - std::string RecordType::tostring_part(std::string indent, std::string pre, std::string post) const { + RecordType::RecordType(const Type::Parameters& parameters, const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + : Type(parameters) + , types_(types) + , lookup_(lookup) + , reverselookup_(reverselookup) { } + + RecordType::RecordType(const Type::Parameters& parameters, const std::vector>& types) + : Type(parameters) + , types_(types) + , lookup_(nullptr) + , reverselookup_(nullptr) { } + + const std::vector> RecordType::types() const { + return types_; + }; + + const std::shared_ptr RecordType::lookup() const { + return lookup_; + } + + const std::shared_ptr RecordType::reverselookup() const { + return reverselookup_; + } + + bool RecordType::istuple() const { + return lookup_.get() == nullptr; + } + + std::string RecordType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; } std::stringstream out; - if (parameters_.size() == 0) { + if (parameters_.empty()) { if (reverselookup_.get() == nullptr) { out << "("; for (size_t j = 0; j < types_.size(); j++) { @@ -72,10 +100,10 @@ namespace awkward { } const std::shared_ptr RecordType::shallow_copy() const { - return std::shared_ptr(new RecordType(parameters_, types_, lookup_, reverselookup_)); + return std::make_shared(parameters_, types_, lookup_, reverselookup_); } - bool RecordType::equal(const std::shared_ptr other, bool check_parameters) const { + bool RecordType::equal(const std::shared_ptr& other, bool check_parameters) const { if (RecordType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -121,18 +149,6 @@ namespace awkward { } } - std::shared_ptr RecordType::level() const { - return shallow_copy(); - } - - std::shared_ptr RecordType::inner() const { - throw std::invalid_argument("RecordType has no inner type without a key"); - } - - std::shared_ptr RecordType::inner(const std::string& key) const { - return field(key); - } - int64_t RecordType::numfields() const { return (int64_t)types_.size(); } @@ -253,4 +269,27 @@ namespace awkward { } return out; } + + const std::shared_ptr RecordType::astuple() const { + return std::make_shared(parameters_, types_, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); + } + + void RecordType::append(const std::shared_ptr& type) { + if (!istuple()) { + reverselookup_.get()->push_back(std::to_string(types_.size())); + } + types_.push_back(type); + } + + void RecordType::setkey(int64_t fieldindex, const std::string& fieldname) { + if (istuple()) { + lookup_ = std::make_shared(); + reverselookup_ = std::make_shared(); + for (size_t j = 0; j < types_.size(); j++) { + reverselookup_.get()->push_back(std::to_string(j)); + } + } + (*lookup_.get())[fieldname] = (size_t)fieldindex; + (*reverselookup_.get())[(size_t)fieldindex] = fieldname; + } } diff --git a/src/libawkward/type/RegularType.cpp b/src/libawkward/type/RegularType.cpp index 9683b285d7..c92c9699c8 100644 --- a/src/libawkward/type/RegularType.cpp +++ b/src/libawkward/type/RegularType.cpp @@ -9,14 +9,19 @@ #include "awkward/type/RegularType.h" namespace awkward { - std::string RegularType::tostring_part(std::string indent, std::string pre, std::string post) const { + RegularType::RegularType(const Type::Parameters& parameters, const std::shared_ptr& type, int64_t size) + : Type(parameters) + , type_(type) + , size_(size) { } + + std::string RegularType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; } std::stringstream out; - if (parameters_.size() == 0) { + if (parameters_.empty()) { out << indent << pre << size_ << " * " << type_.get()->tostring_part(indent, "", "") << post; } else { @@ -26,10 +31,10 @@ namespace awkward { } const std::shared_ptr RegularType::shallow_copy() const { - return std::shared_ptr(new RegularType(parameters_, type_, size_)); + return std::make_shared(parameters_, type_, size_); } - bool RegularType::equal(const std::shared_ptr other, bool check_parameters) const { + bool RegularType::equal(const std::shared_ptr& other, bool check_parameters) const { if (RegularType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -41,18 +46,6 @@ namespace awkward { } } - std::shared_ptr RegularType::level() const { - return shallow_copy(); - } - - std::shared_ptr RegularType::inner() const { - return type_; - } - - std::shared_ptr RegularType::inner(const std::string& key) const { - throw std::runtime_error("FIXME: RegularType::inner(key)"); - } - int64_t RegularType::numfields() const { return type_.get()->numfields(); } diff --git a/src/libawkward/type/Type.cpp b/src/libawkward/type/Type.cpp index 73269c9d11..49108fb54b 100644 --- a/src/libawkward/type/Type.cpp +++ b/src/libawkward/type/Type.cpp @@ -12,10 +12,45 @@ namespace rj = rapidjson; namespace awkward { - std::shared_ptr Type::nolength() const { - return shallow_copy(); + std::shared_ptr Type::none() { + return std::shared_ptr(nullptr); } + Type::Type(const Type::Parameters& parameters) + : parameters_(parameters) { } + + Type::~Type() { } + + const Type::Parameters Type::parameters() const { + return parameters_; + } + + void Type::setparameters(const Type::Parameters& parameters) { + parameters_ = parameters; + } + + std::string Type::parameter(const std::string& key) { + return parameters_[key]; + } + + void Type::setparameter(const std::string& key, const std::string& value) { + parameters_[key] = value; + } + + bool Type::parameter_equals(const std::string& key, const std::string& value) { + auto item = parameters_.find(key); + if (item == parameters_.end()) { + return false; + } + else { + return item->second == value; + } + } + + std::string Type::tostring() const { + return tostring_part("", "", ""); + }; + const std::string Type::compare(std::shared_ptr supertype) { // FIXME: better side-by-side comparison return tostring() + std::string(" versus ") + supertype.get()->tostring(); diff --git a/src/libawkward/type/UnionType.cpp b/src/libawkward/type/UnionType.cpp index 560a99a82b..15bd4ceaf6 100644 --- a/src/libawkward/type/UnionType.cpp +++ b/src/libawkward/type/UnionType.cpp @@ -9,7 +9,11 @@ #include "awkward/type/UnionType.h" namespace awkward { - std::string UnionType::tostring_part(std::string indent, std::string pre, std::string post) const { + UnionType::UnionType(const Type::Parameters& parameters, const std::vector>& types) + : Type(parameters) + , types_(types) { } + + std::string UnionType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; @@ -23,7 +27,7 @@ namespace awkward { } out << type(i).get()->tostring_part(indent, "", ""); } - if (parameters_.size() != 0) { + if (!parameters_.empty()) { out << ", " << string_parameters(); } out << "]" << post; @@ -31,10 +35,10 @@ namespace awkward { } const std::shared_ptr UnionType::shallow_copy() const { - return std::shared_ptr(new UnionType(parameters_, types_)); + return std::make_shared(parameters_, types_); } - bool UnionType::equal(const std::shared_ptr other, bool check_parameters) const { + bool UnionType::equal(const std::shared_ptr& other, bool check_parameters) const { if (UnionType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -58,22 +62,6 @@ namespace awkward { return (int64_t)types_.size(); } - std::shared_ptr UnionType::level() const { - std::vector> types; - for (auto t : types_) { - types.push_back(t.get()->level()); - } - return std::shared_ptr(new UnionType(Type::Parameters(), types)); - } - - std::shared_ptr UnionType::inner() const { - throw std::runtime_error("FIXME: UnionType::inner()"); - } - - std::shared_ptr UnionType::inner(const std::string& key) const { - throw std::runtime_error("FIXME: UnionType::inner(key)"); - } - int64_t UnionType::numfields() const { throw std::runtime_error("FIXME: UnionType::numfields"); } diff --git a/src/libawkward/type/UnknownType.cpp b/src/libawkward/type/UnknownType.cpp index dae16d03ab..d068478eef 100644 --- a/src/libawkward/type/UnknownType.cpp +++ b/src/libawkward/type/UnknownType.cpp @@ -6,14 +6,17 @@ #include "awkward/type/UnknownType.h" namespace awkward { - std::string UnknownType::tostring_part(std::string indent, std::string pre, std::string post) const { + UnknownType::UnknownType(const Parameters& parameters) + : Type(parameters) { } + + std::string UnknownType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { std::string typestr; if (get_typestr(typestr)) { return typestr; } std::stringstream out; - if (parameters_.size() == 0) { + if (parameters_.empty()) { out << indent << pre << "unknown" << post; } else { @@ -23,10 +26,10 @@ namespace awkward { } const std::shared_ptr UnknownType::shallow_copy() const { - return std::shared_ptr(new UnknownType(parameters_)); + return std::make_shared(parameters_); } - bool UnknownType::equal(const std::shared_ptr other, bool check_parameters) const { + bool UnknownType::equal(const std::shared_ptr& other, bool check_parameters) const { if (UnknownType* t = dynamic_cast(other.get())) { if (check_parameters && !equal_parameters(other.get()->parameters())) { return false; @@ -38,18 +41,6 @@ namespace awkward { } } - std::shared_ptr UnknownType::level() const { - return shallow_copy(); - } - - std::shared_ptr UnknownType::inner() const { - return shallow_copy(); - } - - std::shared_ptr UnknownType::inner(const std::string& key) const { - return shallow_copy(); - } - int64_t UnknownType::numfields() const { return -1; } diff --git a/src/libawkward/util.cpp b/src/libawkward/util.cpp index f11a044495..01cf79aa9f 100644 --- a/src/libawkward/util.cpp +++ b/src/libawkward/util.cpp @@ -11,7 +11,7 @@ namespace awkward { namespace util { - void handle_error(const struct Error& err, const std::string classname, const Identity* id) { + void handle_error(const struct Error& err, const std::string& classname, const Identity* id) { if (err.str != nullptr) { std::stringstream out; out << "in " << classname; @@ -32,7 +32,7 @@ namespace awkward { } } - std::string quote(std::string x, bool doublequote) { + std::string quote(const std::string& x, bool doublequote) { // TODO: escape characters, possibly using RapidJSON. if (doublequote) { return std::string("\"") + x + std::string("\""); diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index 11137811e1..6531fcbb32 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -439,7 +439,7 @@ py::class_> make_IdentityOf(py::handle m, std::string name) { void toslice_part(ak::Slice& slice, py::object obj) { if (py::isinstance(obj)) { // FIXME: what happens if you give this a Numpy integer? a Numpy 0-dimensional array? - slice.append(std::shared_ptr(new ak::SliceAt(obj.cast()))); + slice.append(std::make_shared(obj.cast())); } else if (py::isinstance(obj)) { @@ -461,21 +461,21 @@ void toslice_part(ak::Slice& slice, py::object obj) { if (step == 0) { throw std::invalid_argument("slice step must not be 0"); } - slice.append(std::shared_ptr(new ak::SliceRange(start, stop, step))); + slice.append(std::make_shared(start, stop, step)); } #if PY_MAJOR_VERSION >= 3 else if (py::isinstance(obj)) { - slice.append(std::shared_ptr(new ak::SliceEllipsis())); + slice.append(std::make_shared()); } #endif else if (obj.is(py::module::import("numpy").attr("newaxis"))) { - slice.append(std::shared_ptr(new ak::SliceNewAxis())); + slice.append(std::make_shared()); } else if (py::isinstance(obj)) { - slice.append(std::shared_ptr(new ak::SliceField(obj.cast()))); + slice.append(std::make_shared(obj.cast())); } else if (py::isinstance(obj)) { @@ -491,8 +491,8 @@ void toslice_part(ak::Slice& slice, py::object obj) { } } - if (all_strings && strings.size() != 0) { - slice.append(std::shared_ptr(new ak::SliceFields(strings))); + if (all_strings && !strings.empty()) { + slice.append(std::make_shared(strings)); } else { py::object objarray = py::module::import("numpy").attr("asarray")(obj); @@ -518,7 +518,7 @@ void toslice_part(ak::Slice& slice, py::object obj) { strides.push_back((int64_t)intinfo.strides[i] / sizeof(int64_t)); } ak::Index64 index(std::shared_ptr(reinterpret_cast(intinfo.ptr), pyobject_deleter(intarray.ptr())), 0, shape[0]); - slice.append(std::shared_ptr(new ak::SliceArray64(index, shape, strides))); + slice.append(std::make_shared(index, shape, strides)); } } @@ -550,7 +550,7 @@ void toslice_part(ak::Slice& slice, py::object obj) { strides.push_back((int64_t)intinfo.strides[i] / (int64_t)sizeof(int64_t)); } ak::Index64 index(std::shared_ptr(reinterpret_cast(intinfo.ptr), pyobject_deleter(intarray.ptr())), 0, shape[0]); - slice.append(std::shared_ptr(new ak::SliceArray64(index, shape, strides))); + slice.append(std::make_shared(index, shape, strides)); } } @@ -646,7 +646,7 @@ py::object getitem(T& self, py::object obj) { break; } } - if (all_strings && strings.size() != 0) { + if (all_strings && !strings.empty()) { return box(self.getitem_fields(strings)); } // NOTE: control flow can pass through here; don't make the last line an 'else'! @@ -759,21 +759,6 @@ py::class_ make_FillableArray(py::handle m, std::string name) /////////////////////////////////////////////////////////////// Type -template -std::shared_ptr inner(T& self) { - return self.inner(); -} - -template -std::shared_ptr inner_key(T& self, std::string key) { - return self.inner(key); -} - -template -py::dict emptydict(T& self) { - return py::dict(); -} - ak::Type::Parameters dict2parameters(py::object in) { ak::Type::Parameters out; if (in.is(py::none())) { @@ -817,10 +802,15 @@ void setparameters(T& self, py::object parameters) { template py::class_ type_methods(py::class_, ak::Type>& x) { return x.def("__repr__", &T::tostring) - .def("nolength", &T::nolength) - .def("level", &T::level) - .def("inner", &inner) - .def("inner", &inner_key) + .def("__getitem__", [](T& self, const std::string& key) -> py::object { + std::string cppvalue = self.parameter(key); + py::str pyvalue(PyUnicode_DecodeUTF8(cppvalue.data(), cppvalue.length(), "surrogateescape")); + return py::module::import("json").attr("loads")(pyvalue); + }) + .def("__setitem__", [](T& self, const std::string& key, py::object value) -> void { + py::object valuestr = py::module::import("json").attr("dumps")(value); + self.setparameter(key, valuestr.cast()); + }) .def_property_readonly("numfields", &T::numfields) .def("fieldindex", &T::fieldindex) .def("key", &T::key) @@ -1086,8 +1076,8 @@ py::class_, ak::Type> make_Recor return ak::RecordType(dict2parameters(parameters), out, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); }), py::arg("types"), py::arg("parameters") = py::none()) .def(py::init([](py::dict types, py::object parameters) -> ak::RecordType { - std::shared_ptr lookup(new ak::RecordType::Lookup); - std::shared_ptr reverselookup(new ak::RecordType::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); std::vector> out; for (auto x : types) { std::string key = x.first.cast(); @@ -1102,8 +1092,8 @@ py::class_, ak::Type> make_Recor for (auto x : types) { out.push_back(unbox_type(x)); } - std::shared_ptr lookup(new ak::RecordType::Lookup); - std::shared_ptr reverselookup(new ak::RecordType::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); from_lookup(lookup, reverselookup, pylookup, pyreverselookup, (int64_t)out.size()); return ak::RecordType(dict2parameters(parameters), out, lookup, reverselookup); }, py::arg("types"), py::arg("lookup"), py::arg("reverselookup") = py::none(), py::arg("parameters") = py::none()) @@ -1160,8 +1150,8 @@ py::class_, ak::Type> make_Recor for (auto x : state[1]) { fields.push_back(unbox_type(x)); } - std::shared_ptr lookup(new ak::RecordType::Lookup); - std::shared_ptr reverselookup(new ak::RecordType::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); from_lookup(lookup, reverselookup, state[2].cast(), state[3], (int64_t)fields.size()); return ak::RecordType(dict2parameters(state[0]), fields, lookup, reverselookup); })) @@ -1223,15 +1213,12 @@ py::class_, ak::Content> content_methods(py::class_ void { self.setid(); }) - .def_property_readonly("baretype", &ak::Content::baretype) - .def_property_readonly("isbare", &ak::Content::isbare) - .def_property("type", [](T& self) -> py::object { + .def_property_readonly("isbare", &T::isbare) + .def_property_readonly("type", [](T& self) -> py::object { return box(self.type()); - }, [](T& self, py::object type) -> void { - self.settype(unbox_type(type)); }) - .def("accepts", [](T& self, py::object type) -> bool { - return self.accepts(unbox_type(type)); + .def("astype", [](T& self, std::shared_ptr& type) -> py::object { + return box(self.astype(type)); }) .def("__len__", &len) .def("__getitem__", &getitem) @@ -1279,7 +1266,7 @@ py::class_, ak::Content> make_Nu if (info.shape.size() != info.ndim || info.strides.size() != info.ndim) { throw std::invalid_argument("NumpyArray len(shape) != ndim or len(strides) != ndim"); } - return ak::NumpyArray(unbox_id_none(id), unbox_type_none(type), std::shared_ptr( + return ak::NumpyArray(unbox_id_none(id), ak::NumpyArray::unwrap_regulartype(unbox_type_none(type), info.shape), std::shared_ptr( reinterpret_cast(info.ptr), pyobject_deleter(array.ptr())), info.shape, info.strides, @@ -1359,8 +1346,8 @@ py::class_, ak::Content> mak py::class_, ak::Content> make_RecordArray(py::handle m, std::string name) { return content_methods(py::class_, ak::Content>(m, name.c_str()) .def(py::init([](py::dict contents, py::object id, py::object type) -> ak::RecordArray { - std::shared_ptr lookup(new ak::RecordArray::Lookup); - std::shared_ptr reverselookup(new ak::RecordArray::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); std::vector> out; for (auto x : contents) { std::string key = x.first.cast(); @@ -1368,7 +1355,7 @@ py::class_, ak::Content> make_ reverselookup.get()->push_back(key); out.push_back(unbox_content(x.second)); } - if (out.size() == 0) { + if (out.empty()) { throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); } return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, lookup, reverselookup); @@ -1378,7 +1365,7 @@ py::class_, ak::Content> make_ for (auto x : contents) { out.push_back(unbox_content(x)); } - if (out.size() == 0) { + if (out.empty()) { throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); } return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); @@ -1391,11 +1378,11 @@ py::class_, ak::Content> make_ for (auto x : contents) { out.push_back(unbox_content(x)); } - if (out.size() == 0) { + if (out.empty()) { throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); } - std::shared_ptr lookup(new ak::RecordArray::Lookup); - std::shared_ptr reverselookup(new ak::RecordArray::ReverseLookup); + std::shared_ptr lookup = std::make_shared(); + std::shared_ptr reverselookup = std::make_shared(); from_lookup(lookup, reverselookup, pylookup, pyreverselookup, (int64_t)out.size()); return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, lookup, reverselookup); }, py::arg("contents"), py::arg("lookup"), py::arg("reverselookup") = py::none(), py::arg("id") = py::none(), py::arg("type") = py::none()) @@ -1452,14 +1439,15 @@ py::class_> make_Record(py::handle m, st .def("__repr__", &repr) .def_property_readonly("id", [](ak::Record& self) -> py::object { return box(self.id()); }) .def("__getitem__", &getitem) - .def_property_readonly("baretype", &ak::Record::baretype) .def_property_readonly("isbare", &ak::Record::isbare) .def_property_readonly("type", [](ak::Record& self) -> py::object { return box(self.type()); }) + .def("astype", [](ak::Record& self, std::shared_ptr& type) -> py::object { + return box(self.astype(type)); + }) .def("tojson", &tojson_string, py::arg("pretty") = false, py::arg("maxdecimals") = py::none()) .def("tojson", &tojson_file, py::arg("destination"), py::arg("pretty") = false, py::arg("maxdecimals") = py::none(), py::arg("buffersize") = 65536) - .def_property_readonly("type", &ak::Content::type) .def_property_readonly("array", [](ak::Record& self) -> py::object { return box(self.array()); }) .def_property_readonly("at", &ak::Record::at) diff --git a/tests/test_PR021_emptyarray.py b/tests/test_PR021_emptyarray.py index bb9b1989e4..898df3ac78 100644 --- a/tests/test_PR021_emptyarray.py +++ b/tests/test_PR021_emptyarray.py @@ -12,14 +12,14 @@ def test_unknown(): a = awkward1.fromjson("[[], [], []]").layout assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * unknown" - assert awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3) - assert not awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3) + assert str(awkward1.typeof(a)) == "var * unknown" + assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) + assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64") a = awkward1.fromjson("[[], [[], []], [[], [], []]]").layout assert awkward1.tolist(a) == [[], [[], []], [[], [], []]] - assert str(awkward1.typeof(a)) == "3 * var * var * unknown" - assert awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.ListType(awkward1.layout.UnknownType())), 3) + assert str(awkward1.typeof(a)) == "var * var * unknown" + assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.ListType(awkward1.layout.UnknownType())) a = awkward1.layout.FillableArray() a.beginlist() @@ -29,15 +29,15 @@ def test_unknown(): a.beginlist() a.endlist() assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * unknown" - assert awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3) - assert not awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3) + assert str(awkward1.typeof(a)) == "var * unknown" + assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) + assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64") a = a.snapshot() assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * unknown" - assert awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3) - assert not awkward1.typeof(a) == awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3) + assert str(awkward1.typeof(a)) == "var * unknown" + assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) + assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64") def test_getitem(): a = awkward1.fromjson("[[], [[], []], [[], [], []]]") diff --git a/tests/test_PR022_fillablearray_in_numba.py b/tests/test_PR022_fillablearray_in_numba.py index c534f334b9..47e0d7512a 100644 --- a/tests/test_PR022_fillablearray_in_numba.py +++ b/tests/test_PR022_fillablearray_in_numba.py @@ -34,7 +34,9 @@ def f2(q): b = f2(a) assert sys.getrefcount(a) == 3 - assert str(b.snapshot()) == "" + assert str(b.snapshot()) == """ + unknown +""" def test_simple(): @numba.njit diff --git a/tests/test_PR023_regular_array.py b/tests/test_PR023_regular_array.py index b698027f83..1679c5ba0e 100644 --- a/tests/test_PR023_regular_array.py +++ b/tests/test_PR023_regular_array.py @@ -17,7 +17,7 @@ listarray = awkward1.layout.ListArray64(starts, stops, regulararray) def test_type(): - assert str(awkward1.typeof(regulararray)) == "3 * 2 * var * float64" + assert str(awkward1.typeof(regulararray)) == "2 * var * float64" def test_iteration(): assert awkward1.tolist(regulararray) == [[[0.0, 1.1, 2.2], []], [[3.3, 4.4], [5.5]], [[6.6, 7.7, 8.8, 9.9], []]] diff --git a/tests/test_PR025_record_array.py b/tests/test_PR025_record_array.py index 7626521b46..561fe075fc 100644 --- a/tests/test_PR025_record_array.py +++ b/tests/test_PR025_record_array.py @@ -98,19 +98,19 @@ def test_type(): recordarray = awkward1.layout.RecordArray(0, True) recordarray.append(content1) recordarray.append(listoffsetarray) - assert str(awkward1.typeof(recordarray)) == '5 * (int64, var * float64)' + assert str(awkward1.typeof(recordarray)) == '(int64, var * float64)' assert recordarray.lookup is None - assert awkward1.typeof(recordarray) == awkward1.layout.ArrayType(awkward1.layout.RecordType(( + assert awkward1.typeof(recordarray) == awkward1.layout.RecordType(( awkward1.layout.PrimitiveType("int64"), - awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))), 5) + awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) assert awkward1.typeof(recordarray[2]) == awkward1.layout.RecordType( (awkward1.layout.PrimitiveType("int64"), awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) recordarray.setkey(0, "one") recordarray.setkey(1, "two") - assert str(awkward1.typeof(recordarray)) in ('5 * {"one": int64, "two": var * float64}', '5 * {"two": var * float64, "one": int64}') + assert str(awkward1.typeof(recordarray)) in ('{"one": int64, "two": var * float64}', '{"two": var * float64, "one": int64}') assert recordarray.lookup == {"one": 0, "two": 1} assert str(awkward1.layout.RecordType( @@ -121,9 +121,9 @@ def test_type(): {"one": awkward1.layout.PrimitiveType("int32"), "two": awkward1.layout.PrimitiveType("float64")})) in ('{"one": int32, "two": float64}', '{"two": float64, "one": int32}') - assert awkward1.typeof(recordarray) == awkward1.layout.ArrayType(awkward1.layout.RecordType({ + assert awkward1.typeof(recordarray) == awkward1.layout.RecordType({ "one": awkward1.layout.PrimitiveType("int64"), - "two": awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))}), 5) + "two": awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))}) assert awkward1.typeof(recordarray[2]) == awkward1.layout.RecordType({ "one": awkward1.layout.PrimitiveType("int64"), "two": awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))}) @@ -276,7 +276,7 @@ def test_setid(): def test_fillable_tuple(): fillable = awkward1.layout.FillableArray() - assert str(fillable.type) == '0 * unknown' + assert str(fillable.type) == 'unknown' assert awkward1.tolist(fillable.snapshot()) == [] fillable.begintuple(0) @@ -288,7 +288,7 @@ def test_fillable_tuple(): fillable.begintuple(0) fillable.endtuple() - assert str(fillable.type) == '3 * ()' + assert str(fillable.type) == '()' assert awkward1.tolist(fillable.snapshot()) == [(), (), ()] fillable = awkward1.layout.FillableArray() @@ -329,12 +329,12 @@ def test_fillable_tuple(): fillable.boolean(True) fillable.endtuple() - assert str(fillable.type) == '3 * (bool, var * int64, float64)' + assert str(fillable.type) == '(bool, var * int64, float64)' assert awkward1.tolist(fillable.snapshot()) == [(True, [1], 1.1), (False, [2, 2], 2.2), (True, [3, 3, 3], 3.3)] def test_fillable_record(): fillable = awkward1.layout.FillableArray() - assert str(fillable.type) == '0 * unknown' + assert str(fillable.type) == 'unknown' assert awkward1.tolist(fillable.snapshot()) == [] fillable.beginrecord() @@ -346,7 +346,7 @@ def test_fillable_record(): fillable.beginrecord() fillable.endrecord() - assert str(fillable.type) == '3 * {}' + assert str(fillable.type) == '{}' assert awkward1.tolist(fillable.snapshot()) == [{}, {}, {}] fillable = awkward1.layout.FillableArray() @@ -372,7 +372,7 @@ def test_fillable_record(): fillable.real(3.3) fillable.endrecord() - assert str(fillable.type) == '3 * {"one": int64, "two": float64}' + assert str(fillable.type) == '{"one": int64, "two": float64}' assert awkward1.tolist(fillable.snapshot()) == [{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}] def test_fromiter(): diff --git a/tests/test_PR028_add_dressed_types.py b/tests/test_PR028_add_dressed_types.py index 0e1a7f348d..c47d829285 100644 --- a/tests/test_PR028_add_dressed_types.py +++ b/tests/test_PR028_add_dressed_types.py @@ -58,11 +58,9 @@ def test_string2(): assert repr(a[0]) == "" assert repr(a[1]) == "" assert repr(a[2]) == "" - - a.type = awkward1.layout.ArrayType(awkward1.string, 3) - + + a = awkward1.Array(listoffsetarray.astype(awkward1.string)) assert isinstance(a, awkward1.Array) - assert isinstance(a, awkward1.behavior.string.StringBehavior) assert awkward1.tolist(a) == ['hey', '', 'there'] assert repr(a.type) == "3 * string" @@ -82,15 +80,13 @@ def test_string2(): assert repr(a[2]) == "'there'" def test_accepts(): - content = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5], dtype=numpy.float64)) - listoffsetarray = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5])), content) - dressed1 = awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"), {"__class__": "Dummy"}) - listoffsetarray.type = dressed1 + content = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5], dtype=numpy.float64)) + listoffsetarray = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5])), content).astype(dressed1) dressed2 = awkward1.layout.PrimitiveType("float64", {"__class__": "Dummy"}) with pytest.raises(ValueError): - listoffsetarray.type = dressed2 + awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5])), content).astype(dressed2) class D(awkward1.highlevel.Array): @staticmethod @@ -108,36 +104,35 @@ def test_type_propagation(): drec = awkward1.layout.RecordType(collections.OrderedDict([("one", dint64), ("two", dvarfloat64)]), {"__class__": "D", "__str__": "D[{\"one\": D[int64], \"two\": D[var * D[float64]]}]"}) dvarrec = awkward1.layout.ListType(drec, {"__class__": "D", "__str__": "D[var * D[{\"one\": D[int64], \"two\": D[var * D[float64]]}]]"}) - array.layout.type = awkward1.layout.ArrayType(dvarrec, 3) - assert array.layout.type == awkward1.layout.ArrayType(dvarrec, 3) - assert array.layout.content.type == awkward1.layout.ArrayType(drec, 5) - assert array.layout.content.field("one").type == awkward1.layout.ArrayType(dint64, 5) - assert array.layout.content.field("two").type == awkward1.layout.ArrayType(dvarfloat64, 5) - assert array.layout.content.field("two").content.type == awkward1.layout.ArrayType(dfloat64, 10) - - assert array.layout[-1].type == awkward1.layout.ArrayType(drec, 2) - assert array.layout[-1]["one"].type == awkward1.layout.ArrayType(dint64, 2) - assert array.layout[-1]["two"].type == awkward1.layout.ArrayType(dvarfloat64, 2) - assert array.layout[-1]["two"][1].type == awkward1.layout.ArrayType(dfloat64, 4) - assert array.layout[-1, "one"].type == awkward1.layout.ArrayType(dint64, 2) - assert array.layout[-1, "two"].type == awkward1.layout.ArrayType(dvarfloat64, 2) - assert array.layout[-1, "two", 1].type == awkward1.layout.ArrayType(dfloat64, 4) - assert array.layout["one", -1].type == awkward1.layout.ArrayType(dint64, 2) - assert array.layout["two", -1].type == awkward1.layout.ArrayType(dvarfloat64, 2) - assert array.layout["two", -1, 1].type == awkward1.layout.ArrayType(dfloat64, 4) - - assert array.layout[1:].type == awkward1.layout.ArrayType(dvarrec, 2) - assert array.layout[1:, "one"].type == awkward1.layout.ArrayType(awkward1.layout.ListType(dint64), 2) - assert array.layout["one", 1:].type == awkward1.layout.ArrayType(awkward1.layout.ListType(dint64), 2) - - assert array.layout[[2, 1]].type == awkward1.layout.ArrayType(dvarrec, 2) - assert array.layout[[2, 1], "one"].type == awkward1.layout.ArrayType(awkward1.layout.ListType(dint64), 2) - - array2 = awkward1.layout.NumpyArray(numpy.arange(2*3*5, dtype=numpy.int64).reshape(2, 3, 5)) - array2.type = awkward1.layout.ArrayType(awkward1.layout.RegularType(awkward1.layout.RegularType(dint64, 5), 3), 2) - - assert repr(array2.baretype) == "2 * 3 * 5 * int64" - assert repr(array2.type) == "2 * 3 * 5 * D[int64]" - assert repr(array2[0].type) == "3 * 5 * D[int64]" - assert repr(array2[0, 0].type) == "5 * D[int64]" + array = awkward1.Array(array.layout.astype(dvarrec)) + + assert array.layout.type == dvarrec + assert array.layout.content.type == drec + assert array.layout.content.field("one").type == dint64 + assert array.layout.content.field("two").type == dvarfloat64 + assert array.layout.content.field("two").content.type == dfloat64 + + assert array.layout[-1].type == drec + assert array.layout[-1]["one"].type == dint64 + assert array.layout[-1]["two"].type == dvarfloat64 + assert array.layout[-1]["two"][1].type == dfloat64 + assert array.layout[-1, "one"].type == dint64 + assert array.layout[-1, "two"].type == dvarfloat64 + assert array.layout[-1, "two", 1].type == dfloat64 + assert array.layout["one", -1].type == dint64 + assert array.layout["two", -1].type == dvarfloat64 + assert array.layout["two", -1, 1].type == dfloat64 + + assert array.layout[1:].type == dvarrec + assert array.layout[1:, "one"].type == awkward1.layout.ListType(dint64) + assert array.layout["one", 1:].type == awkward1.layout.ListType(dint64) + + assert array.layout[[2, 1]].type == dvarrec + assert array.layout[[2, 1], "one"].type == awkward1.layout.ListType(dint64) + + array2 = awkward1.layout.NumpyArray(numpy.arange(2*3*5, dtype=numpy.int64).reshape(2, 3, 5)).astype(awkward1.layout.RegularType(awkward1.layout.RegularType(dint64, 5), 3)) + + assert repr(array2.type) == "3 * 5 * D[int64]" + assert repr(array2[0].type) == "5 * D[int64]" + assert repr(array2[0, 0].type) == "D[int64]" assert array2[-1, -1, -1] == 29 diff --git a/tests/test_PR031_types_in_numba.py b/tests/test_PR031_types_in_numba.py index 32c8510ade..cec3bd0259 100644 --- a/tests/test_PR031_types_in_numba.py +++ b/tests/test_PR031_types_in_numba.py @@ -103,9 +103,8 @@ def typestr(baretype, parameters): return "D[{0}]".format(baretype) def test_numpyarray(): - array1 = awkward1.layout.NumpyArray(numpy.arange(2*3*5, dtype=numpy.int64).reshape(2, 3, 5)) dint64 = awkward1.layout.PrimitiveType("int64", {"__class__": "D", "__str__": "D[int64]"}) - array1.type = awkward1.layout.ArrayType(awkward1.layout.RegularType(awkward1.layout.RegularType(dint64, 5), 3), 2) + array1 = awkward1.layout.NumpyArray(numpy.arange(2*3*5, dtype=numpy.int64).reshape(2, 3, 5), type=awkward1.layout.RegularType(awkward1.layout.RegularType(dint64, 5), 3)) @numba.njit def f1(q): @@ -113,16 +112,14 @@ def f1(q): array2 = f1(array1) - assert repr(array2.baretype) == "2 * 3 * 5 * int64" - assert repr(array2.type) == "2 * 3 * 5 * D[int64]" - assert repr(array2[0].type) == "3 * 5 * D[int64]" - assert repr(array2[0, 0].type) == "5 * D[int64]" + assert repr(array2.type) == "3 * 5 * D[int64]" + assert repr(array2[0].type) == "5 * D[int64]" + assert repr(array2[0, 0].type) == "D[int64]" assert array2[-1, -1, -1] == 29 def test_regulararray(): - array1 = awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.arange(10, dtype=numpy.int64)), 5) dregint64 = awkward1.layout.RegularType(awkward1.layout.PrimitiveType("int64"), 5, {"__class__": "D", "__str__": "D[5 * int64]"}) - array1.type = awkward1.layout.ArrayType(dregint64, 2) + array1 = awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.arange(10, dtype=numpy.int64)), 5).astype(dregint64) @numba.njit def f1(q): @@ -130,13 +127,11 @@ def f1(q): array2 = f1(array1) - assert repr(array2.baretype) == "2 * 5 * int64" - assert repr(array2.type) == "2 * D[5 * int64]" + assert repr(array2.type) == "D[5 * int64]" def test_listoffsetarray(): - array1 = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5], dtype=numpy.int64)), awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64))) dvarint64 = awkward1.layout.ListType(awkward1.layout.PrimitiveType("int64"), {"__class__": "D", "__str__": "D[var * int64]"}) - array1.type = awkward1.layout.ArrayType(dvarint64, 3) + array1 = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5], dtype=numpy.int64)), awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64))).astype(dvarint64) @numba.njit def f1(q): @@ -144,13 +139,11 @@ def f1(q): array2 = f1(array1) - assert repr(array2.baretype) == "3 * var * int64" - assert repr(array2.type) == "3 * D[var * int64]" + assert repr(array2.type) == "D[var * int64]" def test_listarray(): - array1 = awkward1.layout.ListArray64(awkward1.layout.Index64(numpy.array([0, 3, 3], dtype=numpy.int64)), awkward1.layout.Index64(numpy.array([3, 3, 5], dtype=numpy.int64)), awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64))) dvarint64 = awkward1.layout.ListType(awkward1.layout.PrimitiveType("int64"), {"__class__": "D", "__str__": "D[var * int64]"}) - array1.type = awkward1.layout.ArrayType(dvarint64, 3) + array1 = awkward1.layout.ListArray64(awkward1.layout.Index64(numpy.array([0, 3, 3], dtype=numpy.int64)), awkward1.layout.Index64(numpy.array([3, 3, 5], dtype=numpy.int64)), awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64))).astype(dvarint64) @numba.njit def f1(q): @@ -158,13 +151,11 @@ def f1(q): array2 = f1(array1) - assert repr(array2.baretype) == "3 * var * int64" - assert repr(array2.type) == "3 * D[var * int64]" + assert repr(array2.type) == "D[var * int64]" def test_recordarray(): - array1 = awkward1.Array([{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}]).layout dvarrec = awkward1.layout.RecordType({"one": awkward1.layout.PrimitiveType("int64"), "two": awkward1.layout.PrimitiveType("float64")}, {"__class__": "D", "__str__": "D[{\"one\": int64, \"two\": float64}]"}) - array1.type = awkward1.layout.ArrayType(dvarrec, 3) + array1 = awkward1.Array([{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}]).layout.astype(dvarrec) @numba.njit def f1(q): @@ -172,8 +163,5 @@ def f1(q): array2 = f1(array1) - assert repr(array2.baretype) in ('3 * {"one": int64, "two": float64}', '3 * {"two": float64, "one": int64}') - assert repr(array2.type) in ('3 * D[{"one": int64, "two": float64}]', '3 * D[{"two": float64, "one": int64}]') - - assert repr(array2[0].baretype) in ('{"one": int64, "two": float64}', '{"two": float64, "one": int64}') + assert repr(array2.type) in ('D[{"one": int64, "two": float64}]', ' D[{"two": float64, "one": int64}]') assert repr(array2[0].type) in ('D[{"one": int64, "two": float64}]', 'D[{"two": float64, "one": int64}]') diff --git a/tests/test_PR032_replace_dressedtype.py b/tests/test_PR032_replace_dressedtype.py index 93c2f7a450..99810e12ed 100644 --- a/tests/test_PR032_replace_dressedtype.py +++ b/tests/test_PR032_replace_dressedtype.py @@ -40,12 +40,11 @@ def __repr__(self): return "".format(str(self)) ns = {"Dummy": Dummy} - t = awkward1.layout.PrimitiveType("float64", {"__class__": "Dummy"}) - x = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5]), type=t) - a = awkward1.Array(x, namespace=ns) + x = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5])) + a = awkward1.Array(x, type=awkward1.layout.ArrayType(x.type, 5, {"__class__": "Dummy", "__str__": "D[5 * float64]"}), namespace=ns) assert repr(a) == "" - x2 = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5], dtype=numpy.int64)), x) + x2 = awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 3, 3, 5], dtype=numpy.int64)), awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5]), type=awkward1.layout.PrimitiveType("float64", {"__class__": "Dummy"}))) a2 = awkward1.Array(x2, namespace=ns) assert repr(a2) == ", ... ] type='3 * var * float64[parameters={\"__cl...'>" assert repr(a2[0]) == "" @@ -60,7 +59,7 @@ def test_typestr(): assert repr(t2) == "var * something" def test_record_name(): - fillable = awkward1.FillableArray() + fillable = awkward1.layout.FillableArray() fillable.beginrecord("Dummy") fillable.field("one") @@ -77,8 +76,8 @@ def test_record_name(): fillable.endrecord() a = fillable.snapshot() - assert repr(a.type) == '2 * struct[["one", "two"], [int64, float64], parameters={"__class__": "Dummy"}]' - assert a.type.nolength().parameters == {"__class__": "Dummy"} + assert repr(a.type) == 'struct[["one", "two"], [int64, float64], parameters={"__class__": "Dummy"}]' + assert a.type.parameters == {"__class__": "Dummy"} def test_fillable_string(): fillable = awkward1.FillableArray() @@ -150,7 +149,7 @@ def test_fromiter_fromjson(): assert awkward1.tolist(awkward1.fromjson('["one", "two", "three"]')) == ["one", "two", "three"] assert awkward1.tolist(awkward1.fromjson('[["one", "two", "three"], [], ["four", "five"]]')) == [["one", "two", "three"], [], ["four", "five"]] - + numba = pytest.importorskip("numba") def test_record_name_numba(): @@ -174,8 +173,8 @@ def f1(fillable): f1(fillable) a = fillable.snapshot() - assert repr(a.type) == '2 * struct[["one", "two"], [int64, float64], parameters={"__class__": "Dummy"}]' - assert a.type.nolength().parameters == {"__class__": "Dummy"} + assert repr(a.type) == 'struct[["one", "two"], [int64, float64], parameters={"__class__": "Dummy"}]' + assert a.type.parameters == {"__class__": "Dummy"} def test_boxing(): @numba.njit