From 293184c65b26634dde160fb8ba4ef7137dc2a66f Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 27 Nov 2019 16:59:35 -0600 Subject: [PATCH] Start writing RecordArray (C++ and Fillable, but not Numba). (#25) Includes zero-field `RecordArray` (with non-zero length) and `tojson`/`fromjson`, but not Numba. * [WIP] Start writing RecordArray (full scope to be determined). * Skeleton for RecordArray. * Modifiers compile. * Tests pass. * String representation of RecordArray is done. * fields, key, and aliases. * tojson_part is more sane; fixed FIXME in NumpyArray and added RecordArray::tojson_part. * Some more helper functions compile. * RecordArray utility functions work. * Stubs for Record (scalar of RecordArray). * Can produce Records from RecordArray. * Record::tojson_part. * tolist(Record). * RecordType stub compiles. * util::quote as a TODO stub for proper string-escapes. * Fixed Windows errors and warnings. * Actually test before committing. * RecordType::equals. * Done with RecordType (if Python dict order is okay). * Content returns the right type for scalar and non-scalar data. * Defined slices for a string key and a list of string keys. * Stubbed out slicing of fields. * Stubbed all Content::getitem_field and getitem_fields. * Ready to start implementing getitem_field(s). * Record and RecordArray::getitem_* except for all getitem_nexts. * Better constructor for RecordArray. * Ready to implement *::getitem_next(field?) and Record*::getitem_next(*). * Implemented and tested *::getitem_field?. * *::getitem_next(field?) can be defined in general, in terms of *::getitem_field?. * Record*::getitem_next(*) are done. * Slicing within records (if part of a chain, not on a single Record object). * [skip ci] Save work (because the setid is actually functional; tracing down another bug). * RecordArray::setid is done * Fixed some Identity bugs and introduced location at Python-level. * Fixed 32-bit warnings and added Record*::withoutkeys. * Stubs for filling records. * Fillable::beginrec should know the set of keys, even though this will be hard to provide from JSON. * Formal Slots object. * A fillable record type is defined by its slotsid; field names only taken once. * TupleFillable is a simpler version of RecordFillable. * First working version of FillableArray with tuples (RecordArray). * Don't let any fields get ahead of the tuple. * All tuple methods implemented, though only the main path has been tested. * Updated stubs for RecordFillable, but I forgot about the nested case. * Sketch out whole fillable algorithm in Python before implementing in C++. * Repertoire of (mock) Content types for testing the Fillable algorithm. * Stubbed out Fillable implementations. * Filling in Fillable implementations. * Filling in Fillable implementations (part 2). * Filling in Fillable implementations (part 3; nested tuples are working). * Filling in Fillable implementations (part 4; missing fields are working). * Filling in Fillable implementations (part 5; nullptr returns are still needed in lists). * Filling in Fillable implementations (part 6; working on nested lists: there's an error). * Temporarily start a clean slate to try out this new 'active' idea. * Building up new structure: 'active' obliviates the need for a special nullptr interpretation. * Back up to the previous level, but simpler now. * Put all checks into the same order. * Started on unions. * Seems I have 5 cases left... * Studies of fillable-2 have full coverage (except error conditions and some printing). * Move fillable-2 into place as the only fillable study. * [skip ci] Cleared out all implementations to start again (tests will fail). * [skip ci] Don't pass down FillableArray* because we don't need it. * [skip ci] Uniform Fillable constructors. * [skip ci] Copied implementations into UnknownFillable and OptionFillable. * [skip ci] Copied implementations into UnionFillable. * [skip ci] Copied implementations into ListFillable. * [skip ci] Copied implementations into TupleFillable. * [skip ci] Copied implementations into primitive Fillables. * All tests pass with the new Fillable implementations. * Ready to fill in implementations for RecordFillable. * Implemented RecordFillable but have not tested it yet. * fromiter with tuples. * fromiter with records. * Done with this PR. The next PR will put RecordArrays in Numba. * RecordArray with zero fields and non-zero length is now possible. * RecordArray now supports tojson/fromjson. --- README.md | 10 +- VERSION_INFO | 2 +- awkward1/_numba/array/numpyarray.py | 34 - awkward1/operations/convert.py | 9 + awkward1/operations/describe.py | 3 + include/awkward/Content.h | 9 +- include/awkward/Identity.h | 8 +- include/awkward/Iterator.h | 6 +- include/awkward/Slice.h | 24 + include/awkward/array/EmptyArray.h | 4 + include/awkward/array/ListArray.h | 2 + include/awkward/array/ListOffsetArray.h | 2 + include/awkward/array/NumpyArray.h | 16 +- include/awkward/array/RawArray.h | 16 + include/awkward/array/Record.h | 66 ++ include/awkward/array/RecordArray.h | 102 +++ include/awkward/array/RegularArray.h | 2 + include/awkward/fillable/BoolFillable.h | 14 +- include/awkward/fillable/Fillable.h | 12 + include/awkward/fillable/FillableArray.h | 13 +- include/awkward/fillable/Float64Fillable.h | 13 +- include/awkward/fillable/Int64Fillable.h | 16 +- include/awkward/fillable/ListFillable.h | 17 +- include/awkward/fillable/OptionFillable.h | 20 +- include/awkward/fillable/RecordFillable.h | 70 ++ include/awkward/fillable/TupleFillable.h | 61 ++ include/awkward/fillable/UnionFillable.h | 23 +- include/awkward/fillable/UnknownFillable.h | 17 +- include/awkward/io/json.h | 10 +- include/awkward/type/RecordType.h | 55 ++ include/awkward/type/UnionType.h | 4 +- include/awkward/util.h | 2 + src/libawkward/Content.cpp | 37 +- src/libawkward/Identity.cpp | 32 +- src/libawkward/Iterator.cpp | 6 +- src/libawkward/Slice.cpp | 38 +- src/libawkward/array/EmptyArray.cpp | 18 +- src/libawkward/array/ListArray.cpp | 14 +- src/libawkward/array/ListOffsetArray.cpp | 14 +- src/libawkward/array/NumpyArray.cpp | 213 +++-- src/libawkward/array/Record.cpp | 216 +++++ src/libawkward/array/RecordArray.cpp | 468 ++++++++++ src/libawkward/array/RegularArray.cpp | 18 +- src/libawkward/fillable/BoolFillable.cpp | 82 +- src/libawkward/fillable/FillableArray.cpp | 44 + src/libawkward/fillable/Float64Fillable.cpp | 74 +- src/libawkward/fillable/Int64Fillable.cpp | 83 +- src/libawkward/fillable/ListFillable.cpp | 179 +++- src/libawkward/fillable/OptionFillable.cpp | 153 +++- src/libawkward/fillable/RecordFillable.cpp | 404 +++++++++ src/libawkward/fillable/TupleFillable.cpp | 336 +++++++ src/libawkward/fillable/UnionFillable.cpp | 286 +++++- src/libawkward/fillable/UnknownFillable.cpp | 110 ++- src/libawkward/io/json.cpp | 22 +- src/libawkward/type/OptionType.cpp | 10 +- src/libawkward/type/RecordType.cpp | 256 ++++++ src/libawkward/type/UnionType.cpp | 4 +- src/libawkward/type/UnknownType.cpp | 2 +- src/libawkward/util.cpp | 12 +- src/pyawkward.cpp | 478 ++++++++-- studies/fillable.py | 940 ++++++++++++++++++++ tests/test_PR008_slices_and_getitem.py | 25 +- tests/test_PR018_fromiter_fillable.py | 2 +- tests/test_PR019_use_json_library.py | 11 + tests/test_PR021_emptyarray.py | 8 +- tests/test_PR025_record_array.py | 407 +++++++++ 66 files changed, 5262 insertions(+), 402 deletions(-) create mode 100644 include/awkward/array/Record.h create mode 100644 include/awkward/array/RecordArray.h create mode 100644 include/awkward/fillable/RecordFillable.h create mode 100644 include/awkward/fillable/TupleFillable.h create mode 100644 include/awkward/type/RecordType.h create mode 100644 src/libawkward/array/Record.cpp create mode 100644 src/libawkward/array/RecordArray.cpp create mode 100644 src/libawkward/fillable/RecordFillable.cpp create mode 100644 src/libawkward/fillable/TupleFillable.cpp create mode 100644 src/libawkward/type/RecordType.cpp create mode 100644 studies/fillable.py create mode 100644 tests/test_PR025_record_array.py diff --git a/README.md b/README.md index 2864c6b874..886b9358d5 100644 --- a/README.md +++ b/README.md @@ -58,11 +58,11 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit- * [X] Reproduce all of the above as Numba extensions (make `NumpyArray`, `ListArray`, and `ListOffsetArray` usable in Numba-compiled functions). * [X] Error messages with location-of-failure information if the array has an `Identity` (except in Numba). * [X] Fully implement `__getitem__` for int/slice/intarray/boolarray/tuple (placeholders for newaxis/ellipsis), with perfect agreement with [Numpy basic/advanced indexing](https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html), to all levels of depth. - * [ ] Appendable arrays (a distinct phase from readable arrays, when the type is still in flux) to implement `awkward.fromiter` in C++. + * [X] Appendable arrays (a distinct phase from readable arrays, when the type is still in flux) to implement `awkward.fromiter` in C++. * [X] Implemented all types but records; tested all primitives and lists. * [X] Expose appendable arrays to Numba. - * [ ] Implement appendable records. - * [ ] Test all (requires array types for all). + * [X] Implement appendable records. + * [X] Test all (tested in mock [studies/fillable.py](tree/master/studies/fillable.py)). * [X] JSON → Awkward via header-only [RapidJSON](https://rapidjson.org) and `awkward.fromiter`. * [ ] Explicit broadcasting functions for jagged and non-jagged arrays and scalars. * [ ] Structure-preserving ufunc-like operation on the C++ side that applies a lambda function to inner data. The Python `__array_ufunc__` implementation will _call_ this to preserve structure. @@ -74,7 +74,8 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit- * [X] `ListArray`: the new `JaggedArray`, based on `starts` and `stops` (i.e. fully general). * [X] `ListOffsetArray`: the `JaggedArray` case with no unreachable data between reachable data (gaps). * [X] `RegularArray`: for building rectilinear, N-dimensional arrays of arbitrary contents, e.g. putting jagged dimensions inside fixed dimensions. - * [ ] `RecordArray`: the new `Table` _without_ lazy-slicing. + * [X] `RecordArray`: the new `Table` _without_ lazy-slicing. + * [ ] Implement it in Numba as well. * [ ] `MaskedArray`, `BitMaskedArray`, `IndexedMaskedArray`: same as the old versions. * [ ] `UnionArray`: same as the old version; `SparseUnionArray`: the additional case found in Apache Arrow. * [ ] `IndexedArray`: same as the old version. @@ -89,6 +90,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit- * [ ] `PyVirtualArray`: takes a Python lambda (which gets carried into `VirtualArray`). * [ ] `PyObjectArray`: same as the old version. * [X] Describe high-level types using [datashape](https://datashape.readthedocs.io/en/latest/) and possibly also an in-house schema. (Emit datashape _strings_ from C++.) + * [ ] Type compatibility: option to treat nonexistent record fields as nullable data. * [ ] Describe mid-level "persistence types" with no lengths, somewhat minimal JSON, optional dtypes/compression. * [ ] Describe low-level layouts independently of filled arrays (JSON or something)? * [ ] Layer 1 interface `Array`: diff --git a/VERSION_INFO b/VERSION_INFO index 5a48b6be2a..0e7400f186 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.24 +0.1.25 diff --git a/awkward1/_numba/array/numpyarray.py b/awkward1/_numba/array/numpyarray.py index 82f88f8ae0..aabf275519 100644 --- a/awkward1/_numba/array/numpyarray.py +++ b/awkward1/_numba/array/numpyarray.py @@ -123,40 +123,6 @@ def lower_len(context, builder, sig, args): proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val) return numba.targets.arrayobj.array_len(context, builder, numba.intp(tpe.arraytpe), (proxyin.array,)) -# def lower_getitem_nothing(context, builder, tpe, val): -# import awkward1._numba.identity -# -# proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val) -# -# proxyslice = numba.cgutils.create_struct_proxy(numba.types.slice2_type)(context, builder) -# proxyslice.start = context.get_constant(numba.intp, 0) -# proxyslice.stop = context.get_constant(numba.intp, 0) -# proxyslice.step = context.get_constant(numba.intp, 1) -# emptyslice = proxyslice._getvalue() -# emptyarray = numba.targets.arrayobj.getitem_arraynd_intp(context, builder, tpe.arraytpe(tpe.arraytpe, numba.types.slice2_type), (proxyin.array, emptyslice)) -# -# if tpe.arraytpe.ndim > 1: -# shapetpe = numba.types.Tuple((numba.intp,) * tpe.arraytpe.ndim) -# shapeval = numba.targets.arrayobj.make_array(tpe.arraytpe)(context, builder, proxyin.array).shape -# -# newshapetpe = numba.types.Tuple((numba.intp,) * (tpe.arraytpe.ndim - 1)) -# newshapeval = context.make_tuple(builder, newshapetpe, tuple(builder.extract_value(shapeval, i) for i in range(tpe.arraytpe.ndim - 1))) -# -# arraytpe = numba.types.Array(tpe.arraytpe.dtype, tpe.arraytpe.ndim - 1, tpe.arraytpe.layout) -# arrayval = numba.targets.arrayobj.array_reshape(context, builder, arraytpe(tpe.arraytpe, newshapetpe), (proxyin.array, newshapeval)) -# -# else: -# arraytpe = tpe.arraytpe -# arrayval = emptyarray -# -# outtpe = NumpyArrayType(arraytpe, tpe.idtpe) -# proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder) -# proxyout.array = arrayval -# if tpe.idtpe != numba.none: -# proxyout.id = awkward1._numba.identity.lower_getitem_any(context, builder, tpe.idtpe, numba.types.slice2_type, proxyin.id, emptyslice) -# -# return proxyout._getvalue() - @numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.Integer) @numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.SliceType) @numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.Array) diff --git a/awkward1/operations/convert.py b/awkward1/operations/convert.py index b6e4963258..ee5fd11684 100644 --- a/awkward1/operations/convert.py +++ b/awkward1/operations/convert.py @@ -22,6 +22,12 @@ def tolist(array): if array is None or isinstance(array, (bool, str, bytes, numbers.Number)): return array + elif isinstance(array, awkward1.layout.Record) and array.istuple: + return tuple(tolist(x) for x in array.values()) + + elif isinstance(array, awkward1.layout.Record): + return {n: tolist(x) for n, x in array.items()} + elif isinstance(array, numpy.ndarray): return array.tolist() @@ -43,6 +49,9 @@ def tojson(array, *args, **kwargs): if array is None or isinstance(array, (bool, str, bytes, numbers.Number)): return json.dumps(array) + elif isinstance(array, awkward1.layout.Record): + return array.tojson(*args, **kwargs) + elif isinstance(array, numpy.ndarray): return awkward1.layout.NumpyArray(array).tojson(*args, **kwargs) diff --git a/awkward1/operations/describe.py b/awkward1/operations/describe.py index 831a9286a2..50fa7bcfba 100644 --- a/awkward1/operations/describe.py +++ b/awkward1/operations/describe.py @@ -25,6 +25,9 @@ def typeof(array): elif isinstance(array, numpy.generic): raise ValueError("cannot describe {0} as a PrimitiveType".format(type(array))) + elif isinstance(array, awkward1.layout.Record): + return array.type + elif isinstance(array, numpy.ndarray): if len(array.shape) == 0: return typeof(array.reshape((1,))[0]) diff --git a/include/awkward/Content.h b/include/awkward/Content.h index 6f32e71588..c546399bcc 100644 --- a/include/awkward/Content.h +++ b/include/awkward/Content.h @@ -9,13 +9,14 @@ #include "awkward/Identity.h" #include "awkward/Slice.h" #include "awkward/io/json.h" -#include "awkward/type/ArrayType.h" +#include "awkward/type/Type.h" namespace awkward { class Content { public: virtual ~Content() { } + virtual bool isscalar() const; virtual const std::string classname() const = 0; virtual const std::shared_ptr id() const = 0; virtual void setid() = 0; @@ -31,12 +32,14 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const = 0; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const = 0; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const = 0; + virtual const std::shared_ptr getitem_field(const std::string& key) const = 0; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const = 0; virtual const std::shared_ptr getitem(const Slice& where) const; virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr carry(const Index64& carry) const = 0; virtual const std::pair minmax_depth() const = 0; - const ArrayType type() const; + const std::shared_ptr type() const; const std::string tostring() const; const std::string tojson(bool pretty, int64_t maxdecimals) const; void tojson(FILE* destination, bool pretty, int64_t maxdecimals, int64_t buffersize) const; @@ -47,6 +50,8 @@ namespace awkward { virtual const std::shared_ptr getitem_next(const SliceEllipsis& ellipsis, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr getitem_next(const SliceNewAxis& newaxis, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const = 0; + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; const std::shared_ptr getitem_next_array_wrap(const std::shared_ptr outcontent, const std::vector& shape) const; diff --git a/include/awkward/Identity.h b/include/awkward/Identity.h index f751f84b04..9e1bd9f7e2 100644 --- a/include/awkward/Identity.h +++ b/include/awkward/Identity.h @@ -33,12 +33,14 @@ namespace awkward { const int64_t length() const { return length_; } virtual const std::string classname() const = 0; - virtual const std::string location(int64_t where) const = 0; + virtual const std::string location_at(int64_t where) const = 0; virtual const std::shared_ptr to64() const = 0; virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const = 0; virtual const std::shared_ptr shallow_copy() const = 0; virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const = 0; + virtual const std::shared_ptr withfieldloc(const FieldLoc& fieldloc) const = 0; + virtual int64_t value(int64_t row, int64_t col) const = 0; const std::string tostring() const; @@ -63,12 +65,14 @@ namespace awkward { const std::shared_ptr ptr() const { return ptr_; } virtual const std::string classname() const; - virtual const std::string location(int64_t where) const; + virtual const std::string location_at(int64_t at) const; virtual const std::shared_ptr to64() const; virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; virtual const std::shared_ptr shallow_copy() const; virtual const std::shared_ptr getitem_carry_64(const Index64& carry) const; + virtual const std::shared_ptr withfieldloc(const FieldLoc& fieldloc) const; + virtual int64_t value(int64_t row, int64_t col) const; const std::vector getitem_at(int64_t at) const; const std::vector getitem_at_nowrap(int64_t at) const; diff --git a/include/awkward/Iterator.h b/include/awkward/Iterator.h index 235aaac19c..4d11460fea 100644 --- a/include/awkward/Iterator.h +++ b/include/awkward/Iterator.h @@ -11,12 +11,12 @@ namespace awkward { public: Iterator(const std::shared_ptr content) : content_(content) - , where_(0) { + , at_(0) { content.get()->check_for_iteration(); } const std::shared_ptr content() const { return content_; } - const int64_t where() const { return where_; } + const int64_t at() const { return at_; } const bool isdone() const; const std::shared_ptr next(); @@ -26,7 +26,7 @@ namespace awkward { private: const std::shared_ptr content_; - int64_t where_; + int64_t at_; }; } diff --git a/include/awkward/Slice.h b/include/awkward/Slice.h index 3014767ce2..4a2afbb530 100644 --- a/include/awkward/Slice.h +++ b/include/awkward/Slice.h @@ -98,6 +98,30 @@ namespace awkward { typedef SliceArrayOf SliceArray64; + class SliceField: public SliceItem { + public: + SliceField(const std::string& key): key_(key) { } + const std::string key() const { return key_; } + virtual const std::shared_ptr shallow_copy() const { + return std::shared_ptr(new SliceField(key_)); + } + virtual const std::string tostring() const; + private: + const std::string key_; + }; + + class SliceFields: public SliceItem { + public: + SliceFields(const std::vector& keys): keys_(keys) { } + const std::vector keys() const { return keys_; } + virtual const std::shared_ptr shallow_copy() const { + return std::shared_ptr(new SliceFields(keys_)); + } + virtual const std::string tostring() const; + private: + const std::vector keys_; + }; + class Slice { public: static int64_t none() { return SliceItem::none(); } diff --git a/include/awkward/array/EmptyArray.h b/include/awkward/array/EmptyArray.h index 5a159c8745..d556f68525 100644 --- a/include/awkward/array/EmptyArray.h +++ b/include/awkward/array/EmptyArray.h @@ -32,6 +32,8 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; @@ -39,6 +41,8 @@ namespace awkward { virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; private: std::shared_ptr id_; diff --git a/include/awkward/array/ListArray.h b/include/awkward/array/ListArray.h index 9f96863277..62cade38fc 100644 --- a/include/awkward/array/ListArray.h +++ b/include/awkward/array/ListArray.h @@ -39,6 +39,8 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; diff --git a/include/awkward/array/ListOffsetArray.h b/include/awkward/array/ListOffsetArray.h index 980c0ad7b1..3578e1f1f0 100644 --- a/include/awkward/array/ListOffsetArray.h +++ b/include/awkward/array/ListOffsetArray.h @@ -37,6 +37,8 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; diff --git a/include/awkward/array/NumpyArray.h b/include/awkward/array/NumpyArray.h index 87ffd861e9..09c8049779 100644 --- a/include/awkward/array/NumpyArray.h +++ b/include/awkward/array/NumpyArray.h @@ -34,13 +34,13 @@ namespace awkward { const std::string format() const { return format_; } ssize_t ndim() const; - bool isscalar() const; bool isempty() const; void* byteptr() const; void* byteptr(ssize_t at) const; ssize_t bytelength() const; uint8_t getbyte(ssize_t at) const; + virtual bool isscalar() const; virtual const std::string classname() const; virtual const std::shared_ptr id() const { return id_; } virtual void setid(); @@ -56,6 +56,8 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; virtual const std::shared_ptr getitem(const Slice& where) const; virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; virtual const std::shared_ptr carry(const Index64& carry) const; @@ -75,6 +77,12 @@ namespace awkward { virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { throw std::runtime_error("NumpyArray has its own getitem_next system"); } + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("NumpyArray has its own getitem_next system"); + } const NumpyArray contiguous_next(Index64 bytepos) const; const NumpyArray getitem_bystrides(const std::shared_ptr& head, const Slice& tail, int64_t length) const; @@ -89,6 +97,12 @@ namespace awkward { const NumpyArray getitem_next(const SliceNewAxis& newaxis, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; const NumpyArray getitem_next(const SliceArray64& array, const Slice& tail, const Index64& carry, const Index64& advanced, int64_t length, int64_t stride, bool first) const; + void tojson_boolean(ToJson& builder) const; + template + void tojson_integer(ToJson& builder) const; + template + void tojson_real(ToJson& builder) const; + private: std::shared_ptr id_; std::shared_ptr ptr_; diff --git a/include/awkward/array/RawArray.h b/include/awkward/array/RawArray.h index 95df4044c4..2494cc6737 100644 --- a/include/awkward/array/RawArray.h +++ b/include/awkward/array/RawArray.h @@ -263,6 +263,14 @@ namespace awkward { return std::shared_ptr(new RawArrayOf(id, ptr_, offset_ + start, stop - start, itemsize_)); } + virtual const std::shared_ptr getitem_field(const std::string& key) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } + + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } + virtual const std::shared_ptr getitem(const Slice& where) const { std::shared_ptr nexthead = where.head(); Slice nexttail = where.tail(); @@ -349,6 +357,14 @@ namespace awkward { return carry(flathead); } + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(field.tostring() + std::string(" is not a valid slice type for ") + classname()); + } + + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(fields.tostring() + std::string(" is not a valid slice type for ") + classname()); + } + private: std::shared_ptr id_; const std::shared_ptr ptr_; diff --git a/include/awkward/array/Record.h b/include/awkward/array/Record.h new file mode 100644 index 0000000000..fc8020c6f6 --- /dev/null +++ b/include/awkward/array/Record.h @@ -0,0 +1,66 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_RECORD_H_ +#define AWKWARD_RECORD_H_ + +#include "awkward/array/RecordArray.h" + +namespace awkward { + class Record: public Content { + public: + Record(const RecordArray& recordarray, int64_t at) + : recordarray_(recordarray) + , at_(at) { } + + const std::shared_ptr recordarray() const { return recordarray_.shallow_copy(); } + int64_t at() const { return at_; } + bool istuple() const { return recordarray_.istuple(); } + + virtual bool isscalar() const; + virtual const std::string classname() const; + virtual const std::shared_ptr id() const; + virtual void setid(); + virtual void setid(const std::shared_ptr id); + virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual void tojson_part(ToJson& builder) const; + virtual const std::shared_ptr type_part() const; + virtual int64_t length() const; + virtual const std::shared_ptr shallow_copy() const; + virtual void check_for_iteration() const; + virtual const std::shared_ptr getitem_nothing() const; + virtual const std::shared_ptr getitem_at(int64_t at) const; + virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; + virtual const std::shared_ptr carry(const Index64& carry) const; + virtual const std::pair minmax_depth() const; + + int64_t numfields() const; + int64_t index(const std::string& key) const; + const std::string key(int64_t index) const; + bool has(const std::string& key) const; + const std::vector aliases(int64_t index) const; + const std::vector aliases(const std::string& key) const; + const std::shared_ptr field(int64_t index) const; + const std::shared_ptr field(const std::string& key) const; + const std::vector keys() const; + const std::vector> values() const; + const std::vector>> items() const; + const Record withoutkeys() const; + + protected: + virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; + + private: + const RecordArray recordarray_; + int64_t at_; + }; +} + +#endif // AWKWARD_RECORD_H_ diff --git a/include/awkward/array/RecordArray.h b/include/awkward/array/RecordArray.h new file mode 100644 index 0000000000..858b67f62f --- /dev/null +++ b/include/awkward/array/RecordArray.h @@ -0,0 +1,102 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_RECORDARRAY_H_ +#define AWKWARD_RECORDARRAY_H_ + +#include +#include +#include + +#include "awkward/cpu-kernels/util.h" +#include "awkward/Identity.h" +#include "awkward/Content.h" + +namespace awkward { + class RecordArray: public Content { + public: + typedef std::unordered_map Lookup; + typedef std::vector ReverseLookup; + + RecordArray(const std::shared_ptr id, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + : id_(id) + , contents_(contents) + , lookup_(lookup) + , reverselookup_(reverselookup) + , length_(0) { + assert(contents.size() != 0); + } + RecordArray(const std::shared_ptr id, const std::vector>& contents) + : id_(id) + , contents_(contents) + , lookup_(nullptr) + , reverselookup_(nullptr) + , length_(0) { + assert(contents.size() != 0); + } + RecordArray(const std::shared_ptr id, int64_t length, bool istuple) + : id_(id) + , contents_() + , lookup_(istuple ? nullptr : new Lookup) + , reverselookup_(istuple ? nullptr : new ReverseLookup) + , length_(length) { } + + const std::vector> contents() const { return contents_; } + const std::shared_ptr lookup() const { return lookup_; } + const std::shared_ptr reverselookup() const { return reverselookup_; } + bool istuple() const { return lookup_.get() == nullptr; } + + virtual const std::string classname() const; + virtual const std::shared_ptr id() const { return id_; } + virtual void setid(); + virtual void setid(const std::shared_ptr id); + virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; + virtual void tojson_part(ToJson& builder) const; + virtual const std::shared_ptr type_part() const; + virtual int64_t length() const; + virtual const std::shared_ptr shallow_copy() const; + virtual void check_for_iteration() const; + virtual const std::shared_ptr getitem_nothing() const; + virtual const std::shared_ptr getitem_at(int64_t at) const; + virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; + virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; + virtual const std::shared_ptr getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr carry(const Index64& carry) const; + virtual const std::pair minmax_depth() const; + + int64_t numfields() const; + int64_t index(const std::string& key) const; + const std::string key(int64_t index) const; + bool has(const std::string& key) const; + const std::vector aliases(int64_t index) const; + const std::vector aliases(const std::string& key) const; + const std::shared_ptr field(int64_t index) const; + const std::shared_ptr field(const std::string& key) const; + const std::vector keys() const; + const std::vector> values() const; + const std::vector>> items() const; + const RecordArray withoutkeys() const; + + void append(const std::shared_ptr& content, const std::string& key); + void append(const std::shared_ptr& content); + void setkey(int64_t index, const std::string& key); + + protected: + virtual const std::shared_ptr getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const; + virtual const std::shared_ptr getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const; + + private: + std::shared_ptr id_; + std::vector> contents_; + std::shared_ptr lookup_; + std::shared_ptr reverselookup_; + int64_t length_; + }; +} + +#endif // AWKWARD_RECORDARRAY_H_ diff --git a/include/awkward/array/RegularArray.h b/include/awkward/array/RegularArray.h index afa0bf9540..8e9eb18827 100644 --- a/include/awkward/array/RegularArray.h +++ b/include/awkward/array/RegularArray.h @@ -38,6 +38,8 @@ namespace awkward { virtual const std::shared_ptr getitem_at_nowrap(int64_t at) const; virtual const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; virtual const std::shared_ptr getitem_range_nowrap(int64_t start, int64_t stop) const; + virtual const std::shared_ptr getitem_field(const std::string& key) const; + virtual const std::shared_ptr getitem_fields(const std::vector& keys) const; virtual const std::shared_ptr carry(const Index64& carry) const; virtual const std::pair minmax_depth() const; diff --git a/include/awkward/fillable/BoolFillable.h b/include/awkward/fillable/BoolFillable.h index 58008d1429..ea473d4e59 100644 --- a/include/awkward/fillable/BoolFillable.h +++ b/include/awkward/fillable/BoolFillable.h @@ -11,19 +11,31 @@ namespace awkward { class BoolFillable: public Fillable { public: - BoolFillable(const FillableOptions& options): options_(options), buffer_(options) { } + BoolFillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } + + static BoolFillable* fromempty(const FillableOptions& options) { + return new BoolFillable(options, GrowableBuffer::empty(options)); + } virtual int64_t length() const; virtual void clear(); virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; diff --git a/include/awkward/fillable/Fillable.h b/include/awkward/fillable/Fillable.h index 5ee56209eb..ef51473593 100644 --- a/include/awkward/fillable/Fillable.h +++ b/include/awkward/fillable/Fillable.h @@ -3,6 +3,9 @@ #ifndef AWKWARD_FILLABLE_H_ #define AWKWARD_FILLABLE_H_ +#include +#include + #include "awkward/cpu-kernels/util.h" #include "awkward/Content.h" #include "awkward/type/Type.h" @@ -17,12 +20,21 @@ namespace awkward { virtual const std::shared_ptr type() const = 0; virtual const std::shared_ptr snapshot() const = 0; + virtual bool active() const = 0; virtual Fillable* null() = 0; virtual Fillable* boolean(bool x) = 0; virtual Fillable* integer(int64_t x) = 0; virtual Fillable* real(double x) = 0; virtual Fillable* beginlist() = 0; virtual Fillable* endlist() = 0; + virtual Fillable* begintuple(int64_t numfields) = 0; + virtual Fillable* index(int64_t index) = 0; + virtual Fillable* endtuple() = 0; + virtual Fillable* beginrecord(int64_t disambiguator) = 0; + virtual Fillable* field_fast(const char* key) = 0; + virtual Fillable* field_check(const char* key) = 0; + virtual Fillable* endrecord() = 0; + }; } diff --git a/include/awkward/fillable/FillableArray.h b/include/awkward/fillable/FillableArray.h index 9c5d7e6734..7b0e0cb1f8 100644 --- a/include/awkward/fillable/FillableArray.h +++ b/include/awkward/fillable/FillableArray.h @@ -13,7 +13,7 @@ namespace awkward { class FillableArray { public: - FillableArray(const FillableOptions& options): fillable_(new UnknownFillable(options)) { } + FillableArray(const FillableOptions& options): fillable_(UnknownFillable::fromempty(options)) { } const std::string tostring() const; int64_t length() const; @@ -22,14 +22,25 @@ namespace awkward { const std::shared_ptr snapshot() const; const std::shared_ptr getitem_at(int64_t at) const; const std::shared_ptr getitem_range(int64_t start, int64_t stop) const; + const std::shared_ptr getitem_field(const std::string& key) const; + const std::shared_ptr getitem_fields(const std::vector& keys) const; const std::shared_ptr getitem(const Slice& where) const; + bool active() const; void null(); void boolean(bool x); void integer(int64_t x); void real(double x); void beginlist(); void endlist(); + void begintuple(int64_t numfields); + void index(int64_t index); + void endtuple(); + void beginrecord(); + void beginrecord(int64_t disambiguator); + void field_fast(const char* key); + void field_check(const char* key); + void endrecord(); template void fill(const std::vector& vector) { diff --git a/include/awkward/fillable/Float64Fillable.h b/include/awkward/fillable/Float64Fillable.h index c570580633..2a68b8dbc1 100644 --- a/include/awkward/fillable/Float64Fillable.h +++ b/include/awkward/fillable/Float64Fillable.h @@ -11,9 +11,12 @@ namespace awkward { class Float64Fillable: public Fillable { public: - Float64Fillable(const FillableOptions& options): options_(options), buffer_(options) { } Float64Fillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } + static Float64Fillable* fromempty(const FillableOptions& options) { + return new Float64Fillable(options, GrowableBuffer::empty(options)); + } + static Float64Fillable* fromint64(const FillableOptions& options, GrowableBuffer old) { GrowableBuffer buffer = GrowableBuffer::empty(options, old.reserved()); int64_t* oldraw = old.ptr().get(); @@ -30,12 +33,20 @@ namespace awkward { virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; diff --git a/include/awkward/fillable/Int64Fillable.h b/include/awkward/fillable/Int64Fillable.h index e42b1f4e97..7ac1abfb1a 100644 --- a/include/awkward/fillable/Int64Fillable.h +++ b/include/awkward/fillable/Int64Fillable.h @@ -11,19 +11,33 @@ namespace awkward { class Int64Fillable: public Fillable { public: - Int64Fillable(const FillableOptions& options): options_(options), buffer_(options) { } + Int64Fillable(const FillableOptions& options, const GrowableBuffer& buffer): options_(options), buffer_(buffer) { } + + static Int64Fillable* fromempty(const FillableOptions& options) { + return new Int64Fillable(options, GrowableBuffer::empty(options)); + } virtual int64_t length() const; virtual void clear(); virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); + + const GrowableBuffer buffer() const { return buffer_; } private: const FillableOptions options_; diff --git a/include/awkward/fillable/ListFillable.h b/include/awkward/fillable/ListFillable.h index 084ee19c63..bd8ac40cef 100644 --- a/include/awkward/fillable/ListFillable.h +++ b/include/awkward/fillable/ListFillable.h @@ -14,22 +14,33 @@ namespace awkward { class ListFillable: public Fillable { public: - ListFillable(const FillableOptions& options): options_(options), offsets_(options), content_(new UnknownFillable(options)), begun_(false) { - offsets_.append(0); - } ListFillable(const FillableOptions& options, const GrowableBuffer& offsets, Fillable* content, bool begun): options_(options), offsets_(offsets), content_(std::shared_ptr(content)), begun_(begun) { } + static ListFillable* fromempty(const FillableOptions& options) { + GrowableBuffer offsets = GrowableBuffer::empty(options); + offsets.append(0); + return new ListFillable(options, offsets, UnknownFillable::fromempty(options), false); + } + virtual int64_t length() const; virtual void clear(); virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; diff --git a/include/awkward/fillable/OptionFillable.h b/include/awkward/fillable/OptionFillable.h index 99f225a595..68ac3c3606 100644 --- a/include/awkward/fillable/OptionFillable.h +++ b/include/awkward/fillable/OptionFillable.h @@ -13,16 +13,16 @@ namespace awkward { class OptionFillable: public Fillable { public: - OptionFillable(const FillableOptions& options, const GrowableBuffer& index, Fillable* content): options_(options), index_(index), content_(content) { } + OptionFillable(const FillableOptions& options, const GrowableBuffer& offsets, Fillable* content): options_(options), offsets_(offsets), content_(content) { } static OptionFillable* fromnulls(const FillableOptions& options, int64_t nullcount, Fillable* content) { - GrowableBuffer index = GrowableBuffer::full(options, -1, nullcount); - return new OptionFillable(options, index, content); + GrowableBuffer offsets = GrowableBuffer::full(options, -1, nullcount); + return new OptionFillable(options, offsets, content); } static OptionFillable* fromvalids(const FillableOptions& options, Fillable* content) { - GrowableBuffer index = GrowableBuffer::arange(options, content->length()); - return new OptionFillable(options, index, content); + GrowableBuffer offsets = GrowableBuffer::arange(options, content->length()); + return new OptionFillable(options, offsets, content); } virtual int64_t length() const; @@ -30,16 +30,24 @@ namespace awkward { virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; - GrowableBuffer index_; + GrowableBuffer offsets_; std::shared_ptr content_; void maybeupdate(Fillable* tmp); diff --git a/include/awkward/fillable/RecordFillable.h b/include/awkward/fillable/RecordFillable.h new file mode 100644 index 0000000000..a61d1307b7 --- /dev/null +++ b/include/awkward/fillable/RecordFillable.h @@ -0,0 +1,70 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_RECORDFILLABLE_H_ +#define AWKWARD_RECORDFILLABLE_H_ + +#include + +#include "awkward/cpu-kernels/util.h" +#include "awkward/fillable/FillableOptions.h" +#include "awkward/fillable/GrowableBuffer.h" +#include "awkward/fillable/Fillable.h" +#include "awkward/fillable/UnknownFillable.h" + +namespace awkward { + class RecordFillable: public Fillable { + public: + RecordFillable(const FillableOptions& options, const std::vector>& contents, const std::vector& keys, const std::vector& pointers, int64_t disambiguator, int64_t length, bool begun, int64_t nextindex, int64_t nexttotry) + : options_(options) + , contents_(contents) + , keys_(keys) + , pointers_(pointers) + , disambiguator_(disambiguator) + , length_(length) + , begun_(begun) + , nextindex_(nextindex) + , nexttotry_(nexttotry) { } + + static RecordFillable* fromempty(const FillableOptions& options) { + return new RecordFillable(options, std::vector>(), std::vector(), std::vector(), 0, -1, false, -1, -1); + } + + + virtual int64_t length() const; + virtual void clear(); + virtual const std::shared_ptr type() const; + virtual const std::shared_ptr snapshot() const; + + virtual bool active() const; + virtual Fillable* null(); + virtual Fillable* boolean(bool x); + virtual Fillable* integer(int64_t x); + virtual Fillable* real(double x); + virtual Fillable* beginlist(); + virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); + + int64_t disambiguator() const { return disambiguator_; } + + private: + const FillableOptions options_; + std::vector> contents_; + std::vector keys_; + std::vector pointers_; + int64_t disambiguator_; + int64_t length_; + bool begun_; + int64_t nextindex_; + int64_t nexttotry_; + + void maybeupdate(int64_t i, Fillable* tmp); + }; +} + +#endif // AWKWARD_RECORDFILLABLE_H_ diff --git a/include/awkward/fillable/TupleFillable.h b/include/awkward/fillable/TupleFillable.h new file mode 100644 index 0000000000..e2fb11968c --- /dev/null +++ b/include/awkward/fillable/TupleFillable.h @@ -0,0 +1,61 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_TUPLEFILLABLE_H_ +#define AWKWARD_TUPLEFILLABLE_H_ + +#include + +#include "awkward/cpu-kernels/util.h" +#include "awkward/fillable/FillableOptions.h" +#include "awkward/fillable/GrowableBuffer.h" +#include "awkward/fillable/Fillable.h" +#include "awkward/fillable/UnknownFillable.h" + +namespace awkward { + class TupleFillable: public Fillable { + public: + TupleFillable(const FillableOptions& options, const std::vector>& contents, int64_t length, bool begun, size_t nextindex) + : options_(options) + , contents_(contents) + , length_(length) + , begun_(begun) + , nextindex_(nextindex) { } + + static TupleFillable* fromempty(const FillableOptions& options) { + return new TupleFillable(options, std::vector>(), -1, false, -1); + } + + virtual int64_t length() const; + virtual void clear(); + virtual const std::shared_ptr type() const; + virtual const std::shared_ptr snapshot() const; + + virtual bool active() const; + virtual Fillable* null(); + virtual Fillable* boolean(bool x); + virtual Fillable* integer(int64_t x); + virtual Fillable* real(double x); + virtual Fillable* beginlist(); + virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); + + int64_t numfields() const { return (int64_t)contents_.size(); } + + private: + const FillableOptions options_; + std::vector> contents_; + int64_t length_; + bool begun_; + int64_t nextindex_; + + void maybeupdate(int64_t i, Fillable* tmp); + }; +} + +#endif // AWKWARD_TUPLEFILLABLE_H_ diff --git a/include/awkward/fillable/UnionFillable.h b/include/awkward/fillable/UnionFillable.h index d153e6a414..a0ab1e5c66 100644 --- a/include/awkward/fillable/UnionFillable.h +++ b/include/awkward/fillable/UnionFillable.h @@ -11,9 +11,12 @@ #include "awkward/fillable/Fillable.h" namespace awkward { + class TupleFillable; + class RecordFillable; + class UnionFillable: public Fillable { public: - UnionFillable(const FillableOptions& options, const GrowableBuffer& types, const GrowableBuffer& offsets, std::vector> contents): options_(options), types_(types), offsets_(offsets), contents_(contents) { } + UnionFillable(const FillableOptions& options, const GrowableBuffer& types, const GrowableBuffer& offsets, std::vector> contents): options_(options), types_(types), offsets_(offsets), contents_(contents), current_(-1) { } static UnionFillable* fromsingle(const FillableOptions& options, Fillable* firstcontent) { GrowableBuffer types = GrowableBuffer::full(options, 0, firstcontent->length()); @@ -27,27 +30,27 @@ namespace awkward { virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; GrowableBuffer types_; GrowableBuffer offsets_; std::vector> contents_; - - template - T* findfillable(int8_t& type); - template - T* maybenew(T* fillable, int64_t& length); - template - Fillable* get1(int8_t& type, int64_t& length); - template - Fillable* get2(int8_t& type, int64_t& length); + int8_t current_; }; } diff --git a/include/awkward/fillable/UnknownFillable.h b/include/awkward/fillable/UnknownFillable.h index c1c34c03fc..0da621800b 100644 --- a/include/awkward/fillable/UnknownFillable.h +++ b/include/awkward/fillable/UnknownFillable.h @@ -12,26 +12,35 @@ namespace awkward { class UnknownFillable: public Fillable { public: - UnknownFillable(const FillableOptions& options): options_(options), nullcount_(0) { } + UnknownFillable(const FillableOptions& options, int64_t nullcount): options_(options), nullcount_(nullcount) { } + + static UnknownFillable* fromempty(const FillableOptions& options) { + return new UnknownFillable(options, 0); + } virtual int64_t length() const; virtual void clear(); virtual const std::shared_ptr type() const; virtual const std::shared_ptr snapshot() const; + virtual bool active() const; virtual Fillable* null(); virtual Fillable* boolean(bool x); virtual Fillable* integer(int64_t x); virtual Fillable* real(double x); virtual Fillable* beginlist(); virtual Fillable* endlist(); + virtual Fillable* begintuple(int64_t numfields); + virtual Fillable* index(int64_t index); + virtual Fillable* endtuple(); + virtual Fillable* beginrecord(int64_t disambiguator); + virtual Fillable* field_fast(const char* key); + virtual Fillable* field_check(const char* key); + virtual Fillable* endrecord(); private: const FillableOptions options_; int64_t nullcount_; - - template - Fillable* prepare() const; }; } diff --git a/include/awkward/io/json.h b/include/awkward/io/json.h index 07dad54ea6..2510ee39b2 100644 --- a/include/awkward/io/json.h +++ b/include/awkward/io/json.h @@ -36,7 +36,7 @@ namespace awkward { virtual void endlist() = 0; virtual void beginrec() = 0; virtual void endrec() = 0; - virtual void fieldname(const char* x) = 0; + virtual void fieldkey(const char* x) = 0; virtual void string(const char* x) = 0; }; @@ -56,7 +56,7 @@ namespace awkward { virtual void endlist() { writer_.EndArray(); } virtual void beginrec() { writer_.StartObject(); } virtual void endrec() { writer_.EndObject(); } - virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void fieldkey(const char* x) { writer_.Key(x); } virtual void string(const char* x) { writer_.String(x); } std::string tostring() { @@ -84,7 +84,7 @@ namespace awkward { virtual void endlist() { writer_.EndArray(); } virtual void beginrec() { writer_.StartObject(); } virtual void endrec() { writer_.EndObject(); } - virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void fieldkey(const char* x) { writer_.Key(x); } virtual void string(const char* x) { writer_.String(x); } std::string tostring() { @@ -112,7 +112,7 @@ namespace awkward { virtual void endlist() { writer_.EndArray(); } virtual void beginrec() { writer_.StartObject(); } virtual void endrec() { writer_.EndObject(); } - virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void fieldkey(const char* x) { writer_.Key(x); } virtual void string(const char* x) { writer_.String(x); } private: @@ -137,7 +137,7 @@ namespace awkward { virtual void endlist() { writer_.EndArray(); } virtual void beginrec() { writer_.StartObject(); } virtual void endrec() { writer_.EndObject(); } - virtual void fieldname(const char* x) { writer_.Key(x); } + virtual void fieldkey(const char* x) { writer_.Key(x); } virtual void string(const char* x) { writer_.String(x); } private: diff --git a/include/awkward/type/RecordType.h b/include/awkward/type/RecordType.h new file mode 100644 index 0000000000..985d86ba6f --- /dev/null +++ b/include/awkward/type/RecordType.h @@ -0,0 +1,55 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#ifndef AWKWARD_RECORDTYPE_H_ +#define AWKWARD_RECORDTYPE_H_ + +#include +#include +#include + +#include "awkward/type/Type.h" + +namespace awkward { + class RecordType: public Type { + public: + typedef std::unordered_map Lookup; + typedef std::vector ReverseLookup; + + RecordType(const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + : types_(types) + , lookup_(lookup) + , reverselookup_(reverselookup) { } + RecordType(const std::vector>& types) + : types_(types) + , lookup_(nullptr) + , reverselookup_(nullptr) { } + + const std::vector> types() const { return types_; }; + const std::shared_ptr lookup() const { return lookup_; } + const std::shared_ptr reverselookup() const { return reverselookup_; } + + virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; + virtual const std::shared_ptr shallow_copy() const; + virtual bool equal(std::shared_ptr other) const; + virtual bool compatible(std::shared_ptr other, bool bool_is_int, bool int_is_float, bool ignore_null, bool unknown_is_anything) const; + + int64_t numfields() const; + int64_t index(const std::string& key) const; + const std::string key(int64_t index) const; + bool has(const std::string& key) const; + const std::vector aliases(int64_t index) const; + const std::vector aliases(const std::string& key) const; + const std::shared_ptr field(int64_t index) const; + const std::shared_ptr field(const std::string& key) const; + const std::vector keys() const; + const std::vector> values() const; + const std::vector>> items() const; + + private: + const std::vector> types_; + const std::shared_ptr lookup_; + const std::shared_ptr reverselookup_; + }; +} + +#endif // AWKWARD_RECORDTYPE_H_ diff --git a/include/awkward/type/UnionType.h b/include/awkward/type/UnionType.h index 7cf13f7d33..f86e2778c8 100644 --- a/include/awkward/type/UnionType.h +++ b/include/awkward/type/UnionType.h @@ -10,7 +10,7 @@ namespace awkward { class UnionType: public Type { public: - UnionType(const std::vector> types): types_(types) { } + UnionType(const std::vector>& types): types_(types) { } virtual std::string tostring_part(std::string indent, std::string pre, std::string post) const; virtual const std::shared_ptr shallow_copy() const; @@ -19,7 +19,7 @@ namespace awkward { int64_t numtypes() const; const std::vector> types() const; - const std::shared_ptr type(int64_t i) const; + const std::shared_ptr type(int64_t index) const; private: const std::vector> types_; diff --git a/include/awkward/util.h b/include/awkward/util.h index 4ba327fa2b..083d3741bd 100644 --- a/include/awkward/util.h +++ b/include/awkward/util.h @@ -27,6 +27,8 @@ namespace awkward { void operator()(T const *p) { } }; + std::string quote(std::string x, bool doublequote); + template Error awkward_identity64_from_listoffsetarray(int64_t* toptr, const int64_t* fromptr, const T* fromoffsets, int64_t fromptroffset, int64_t offsetsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth); template diff --git a/src/libawkward/Content.cpp b/src/libawkward/Content.cpp index 12c562583b..15aee92814 100644 --- a/src/libawkward/Content.cpp +++ b/src/libawkward/Content.cpp @@ -8,8 +8,17 @@ #include "awkward/Content.h" namespace awkward { - const ArrayType Content::type() const { - return ArrayType(type_part(), length()); + bool Content::isscalar() const { + return false; + } + + const std::shared_ptr Content::type() const { + if (isscalar()) { + return type_part(); + } + else { + return std::shared_ptr(new ArrayType(type_part(), length())); + } } const std::string Content::tostring() const { @@ -19,16 +28,12 @@ namespace awkward { const std::string Content::tojson(bool pretty, int64_t maxdecimals) const { if (pretty) { ToJsonPrettyString builder(maxdecimals); - builder.beginlist(); tojson_part(builder); - builder.endlist(); return builder.tostring(); } else { ToJsonString builder(maxdecimals); - builder.beginlist(); tojson_part(builder); - builder.endlist(); return builder.tostring(); } } @@ -83,6 +88,12 @@ namespace awkward { else if (SliceArray64* array = dynamic_cast(head.get())) { return getitem_next(*array, tail, advanced); } + else if (SliceField* field = dynamic_cast(head.get())) { + return getitem_next(*field, tail, advanced); + } + else if (SliceFields* fields = dynamic_cast(head.get())) { + return getitem_next(*fields, tail, advanced); + } else { throw std::runtime_error("unrecognized slice type"); } @@ -117,10 +128,22 @@ namespace awkward { return std::shared_ptr(new RegularArray(Identity::none(), getitem_next(nexthead, nexttail, advanced), 1)); } + const std::shared_ptr Content::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_field(field.key()).get()->getitem_next(nexthead, nexttail, advanced); + } + + const std::shared_ptr Content::getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_fields(fields.keys()).get()->getitem_next(nexthead, nexttail, advanced); + } + const std::shared_ptr Content::getitem_next_array_wrap(const std::shared_ptr outcontent, const std::vector& shape) const { std::shared_ptr out(new RegularArray(Identity::none(), outcontent, (int64_t)shape[shape.size() - 1])); for (int64_t i = (int64_t)shape.size() - 2; i >= 0; i--) { - out = std::shared_ptr(new RegularArray(Identity::none(), out, (int64_t)shape[i])); + out = std::shared_ptr(new RegularArray(Identity::none(), out, (int64_t)shape[(size_t)i])); } return out; } diff --git a/src/libawkward/Identity.cpp b/src/libawkward/Identity.cpp index 3167fe0686..b713ee71d3 100644 --- a/src/libawkward/Identity.cpp +++ b/src/libawkward/Identity.cpp @@ -33,21 +33,17 @@ namespace awkward { } template - const std::string IdentityOf::location(int64_t where) const { + const std::string IdentityOf::location_at(int64_t at) const { std::stringstream out; - int64_t fieldi = 0; - int64_t widthi = 0; - for (int64_t bothi = 0; bothi < (int64_t)fieldloc_.size() + width_; bothi++) { - if (bothi != 0) { + for (int64_t i = 0; i < width_; i++) { + if (i != 0) { out << ", "; } - if (fieldi < (int64_t)fieldloc_.size() && fieldloc_[(size_t)fieldi].first == bothi) { - out << "\"" << fieldloc_[(size_t)fieldi].second << "\""; - fieldi++; - } - else { - out << ptr_.get()[offset_ + where*width_ + widthi]; - widthi++; + out << ptr_.get()[offset_ + at*width_ + i]; + for (auto pair : fieldloc_) { + if (pair.first == i) { + out << ", " << util::quote(pair.second, true); + } } } return out.str(); @@ -81,7 +77,7 @@ namespace awkward { if (i != 0) { out << " "; } - out << "(" << fieldloc_[i].first << ", '" << fieldloc_[i].second << "')"; + out << "(" << fieldloc_[i].first << ", " << util::quote(fieldloc_[i].second, false) << ")"; } out << "]\" width=\"" << width_ << "\" offset=\"" << offset_ << "\" length=\"" << length_ << "\" at=\"0x"; out << std::hex << std::setw(12) << std::setfill('0') << reinterpret_cast(ptr_.get()) << "\"/>" << post; @@ -137,6 +133,16 @@ namespace awkward { return tostring_part("", "", ""); } + template + const std::shared_ptr IdentityOf::withfieldloc(const FieldLoc& fieldloc) const { + return std::shared_ptr(new IdentityOf(ref_, fieldloc, offset_, width_, length_, ptr_)); + } + + template + int64_t IdentityOf::value(int64_t row, int64_t col) const { + return (int64_t)ptr_.get()[offset_ + row*width_ + col]; + } + template const std::vector IdentityOf::getitem_at(int64_t at) const { int64_t regular_at = at; diff --git a/src/libawkward/Iterator.cpp b/src/libawkward/Iterator.cpp index d3d7194782..a7342ef8d3 100644 --- a/src/libawkward/Iterator.cpp +++ b/src/libawkward/Iterator.cpp @@ -6,16 +6,16 @@ namespace awkward { const bool Iterator::isdone() const { - return where_ >= content_.get()->length(); + return at_ >= content_.get()->length(); } const std::shared_ptr Iterator::next() { - return content_.get()->getitem_at_nowrap(where_++); + return content_.get()->getitem_at_nowrap(at_++); } const std::string Iterator::tostring_part(const std::string indent, const std::string pre, const std::string post) const { std::stringstream out; - out << indent << pre << "\n"; + out << indent << pre << "\n"; out << content_.get()->tostring_part(indent + std::string(" "), "", "\n"); out << indent << "" << post; return out.str(); diff --git a/src/libawkward/Slice.cpp b/src/libawkward/Slice.cpp index ce4975c372..710e279e52 100644 --- a/src/libawkward/Slice.cpp +++ b/src/libawkward/Slice.cpp @@ -12,9 +12,18 @@ namespace awkward { } const std::string SliceRange::tostring() const { - return (hasstart() ? std::to_string(start_) : std::string("")) + std::string(":") + - (hasstop() ? std::to_string(stop_) : std::string("")) + std::string(":") + - (step_ != 1 ? std::to_string(step_) : std::string("")); + std::stringstream out; + if (hasstart()) { + out << start_; + } + out << ":"; + if (hasstop()) { + out << stop_; + } + if (step_ != 1) { + out << ":" << step_; + } + return out.str(); } const std::string SliceEllipsis::tostring() const { @@ -116,6 +125,23 @@ namespace awkward { template class SliceArrayOf; + const std::string SliceField::tostring() const { + return util::quote(key_, true); + } + + const std::string SliceFields::tostring() const { + std::stringstream out; + out << "["; + for (size_t i = 0; i < keys_.size(); i++) { + if (i != 0) { + out << ", "; + } + out << util::quote(keys_[i], true); + } + out << "]"; + return out.str(); + } + int64_t Slice::length() const { return (int64_t)items_.size(); } @@ -265,6 +291,12 @@ namespace awkward { else if (dynamic_cast(items_[i].get()) != nullptr) { types.push_back('A'); } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('"'); + } + else if (dynamic_cast(items_[i].get()) != nullptr) { + types.push_back('['); + } } if (std::count(types.begin(), types.end(), '.') > 1) { diff --git a/src/libawkward/array/EmptyArray.cpp b/src/libawkward/array/EmptyArray.cpp index 5961ec8611..c0beaa39a6 100644 --- a/src/libawkward/array/EmptyArray.cpp +++ b/src/libawkward/array/EmptyArray.cpp @@ -35,7 +35,8 @@ namespace awkward { } void EmptyArray::tojson_part(ToJson& builder) const { - // Do nothing (builder.beginlist() and builder.endlist() are called outside of tojson_part). + builder.beginlist(); + builder.endlist(); } const std::shared_ptr EmptyArray::type_part() const { @@ -74,6 +75,13 @@ namespace awkward { return shallow_copy(); } + const std::shared_ptr EmptyArray::getitem_field(const std::string& key) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } + + const std::shared_ptr EmptyArray::getitem_fields(const std::vector& keys) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } const std::shared_ptr EmptyArray::carry(const Index64& carry) const { return shallow_copy(); @@ -98,4 +106,12 @@ namespace awkward { return std::shared_ptr(nullptr); // make Windows compiler happy } + const std::shared_ptr EmptyArray::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(field.tostring() + std::string(" is not a valid slice type for ") + classname()); + } + + const std::shared_ptr EmptyArray::getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(fields.tostring() + std::string(" is not a valid slice type for ") + classname()); + } + } diff --git a/src/libawkward/array/ListArray.cpp b/src/libawkward/array/ListArray.cpp index 72d635fbbe..3fc9a8802f 100644 --- a/src/libawkward/array/ListArray.cpp +++ b/src/libawkward/array/ListArray.cpp @@ -152,11 +152,11 @@ namespace awkward { template void ListArrayOf::tojson_part(ToJson& builder) const { int64_t len = length(); + builder.beginlist(); for (int64_t i = 0; i < len; i++) { - builder.beginlist(); getitem_at_nowrap(i).get()->tojson_part(builder); - builder.endlist(); } + builder.endlist(); } template @@ -247,6 +247,16 @@ namespace awkward { return std::shared_ptr(new ListArrayOf(id, starts_.getitem_range_nowrap(start, stop), stops_.getitem_range_nowrap(start, stop), content_)); } + template + const std::shared_ptr ListArrayOf::getitem_field(const std::string& key) const { + return std::shared_ptr(new ListArrayOf(id_, starts_, stops_, content_.get()->getitem_field(key))); + } + + template + const std::shared_ptr ListArrayOf::getitem_fields(const std::vector& keys) const { + return std::shared_ptr(new ListArrayOf(id_, starts_, stops_, content_.get()->getitem_fields(keys))); + } + template const std::shared_ptr ListArrayOf::carry(const Index64& carry) const { int64_t lenstarts = starts_.length(); diff --git a/src/libawkward/array/ListOffsetArray.cpp b/src/libawkward/array/ListOffsetArray.cpp index 8c0711f60c..62ad268449 100644 --- a/src/libawkward/array/ListOffsetArray.cpp +++ b/src/libawkward/array/ListOffsetArray.cpp @@ -155,11 +155,11 @@ namespace awkward { template void ListOffsetArrayOf::tojson_part(ToJson& builder) const { int64_t len = length(); + builder.beginlist(); for (int64_t i = 0; i < len; i++) { - builder.beginlist(); getitem_at_nowrap(i).get()->tojson_part(builder); - builder.endlist(); } + builder.endlist(); } template @@ -241,6 +241,16 @@ namespace awkward { return std::shared_ptr(new ListOffsetArrayOf(id, offsets_.getitem_range_nowrap(start, stop + 1), content_)); } + template + const std::shared_ptr ListOffsetArrayOf::getitem_field(const std::string& key) const { + return std::shared_ptr(new ListOffsetArrayOf(id_, offsets_, content_.get()->getitem_field(key))); + } + + template + const std::shared_ptr ListOffsetArrayOf::getitem_fields(const std::vector& keys) const { + return std::shared_ptr(new ListOffsetArrayOf(id_, offsets_, content_.get()->getitem_fields(keys))); + } + template const std::shared_ptr ListOffsetArrayOf::carry(const Index64& carry) const { IndexOf starts = make_starts(offsets_); diff --git a/src/libawkward/array/NumpyArray.cpp b/src/libawkward/array/NumpyArray.cpp index 3330704682..55949e9700 100644 --- a/src/libawkward/array/NumpyArray.cpp +++ b/src/libawkward/array/NumpyArray.cpp @@ -8,6 +8,7 @@ #include "awkward/cpu-kernels/getitem.h" #include "awkward/type/PrimitiveType.h" #include "awkward/type/RegularType.h" +#include "awkward/util.h" #include "awkward/array/NumpyArray.h" @@ -16,10 +17,6 @@ namespace awkward { return shape_.size(); } - bool NumpyArray::isscalar() const { - return ndim() == 0; - } - bool NumpyArray::isempty() const { for (auto x : shape_) { if (x == 0) { @@ -50,6 +47,10 @@ namespace awkward { return *reinterpret_cast(reinterpret_cast(ptr_.get()) + byteoffset_ + at); } + bool NumpyArray::isscalar() const { + return ndim() == 0; + } + const std::string NumpyArray::classname() const { return "NumpyArray"; } @@ -109,7 +110,7 @@ namespace awkward { const std::string NumpyArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { assert(!isscalar()); std::stringstream out; - out << indent << pre << "<" << classname() << " format=\"" << format_ << "\" shape=\""; + out << indent << pre << "<" << classname() << " format=" << util::quote(format_, true) << " shape=\""; for (ssize_t i = 0; i < ndim(); i++) { if (i != 0) { out << " "; @@ -187,90 +188,58 @@ namespace awkward { return out.str(); } - // FIXME: turn each of these three functions into (builder, array, offset, length) - // so that the can be called once per multidimensional array (no getitem_at_nowrap; - // do it internally in tojson_boolean/integer/real). - - void tojson_boolean(ToJson& builder, bool* array, int64_t length) { - for (int i = 0; i < length; i++) { - builder.boolean(array[i]); - } - } - - template - void tojson_integer(ToJson& builder, T* array, int64_t length) { - for (int i = 0; i < length; i++) { - builder.integer(array[i]); + void NumpyArray::tojson_part(ToJson& builder) const { + if (format_.compare("d") == 0) { + tojson_real(builder); } - } - - template - void tojson_real(ToJson& builder, T* array, int64_t length) { - for (int i = 0; i < length; i++) { - builder.real(array[i]); + else if (format_.compare("f") == 0) { + tojson_real(builder); } - } - - void NumpyArray::tojson_part(ToJson& builder) const { - assert(!isscalar()); - if (ndim() == 1) { - if (format_.compare("d") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } - else if (format_.compare("f") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } #ifdef _MSC_VER - else if (format_.compare("q") == 0) { + else if (format_.compare("q") == 0) { #else - else if (format_.compare("l") == 0) { + else if (format_.compare("l") == 0) { #endif - tojson_integer(builder, reinterpret_cast(byteptr()), length()); - } + tojson_integer(builder); + } #ifdef _MSC_VER - else if (format_.compare("Q") == 0) { + else if (format_.compare("Q") == 0) { #else - else if (format_.compare("L") == 0) { + else if (format_.compare("L") == 0) { #endif - tojson_integer(builder, reinterpret_cast(byteptr()), length()); - } + tojson_integer(builder); + } #ifdef _MSC_VER else if (format_.compare("l") == 0) { #else else if (format_.compare("i") == 0) { #endif - tojson_integer(builder, reinterpret_cast(byteptr()), length()); - } + tojson_integer(builder); + } #ifdef _MSC_VER - else if (format_.compare("L") == 0) { + else if (format_.compare("L") == 0) { #else - else if (format_.compare("I") == 0) { + else if (format_.compare("I") == 0) { #endif - tojson_integer(builder, reinterpret_cast(byteptr()), length()); - } - else if (format_.compare("h") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } - else if (format_.compare("H") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } - else if (format_.compare("b") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } - else if (format_.compare("B") == 0 || format_.compare("c") == 0) { - tojson_real(builder, reinterpret_cast(byteptr()), length()); - } - else { - throw std::invalid_argument(std::string("cannot convert Numpy format \"") + format_ + std::string("\" into JSON")); - } + tojson_integer(builder); + } + else if (format_.compare("h") == 0) { + tojson_real(builder); + } + else if (format_.compare("H") == 0) { + tojson_real(builder); + } + else if (format_.compare("b") == 0) { + tojson_real(builder); + } + else if (format_.compare("B") == 0) { + tojson_real(builder); + } + else if (format_.compare("?") == 0) { + tojson_boolean(builder); } else { - int64_t len = length(); - for (int64_t i = 0; i < len; i++) { - builder.beginlist(); - getitem_at_nowrap(i).get()->tojson_part(builder); - builder.endlist(); - } + throw std::invalid_argument(std::string("cannot convert Numpy format \"") + format_ + std::string("\" into JSON")); } } @@ -338,7 +307,7 @@ namespace awkward { int64_t NumpyArray::length() const { if (isscalar()) { - return -1; + return -1; // just like Record, which is also a scalar } else { return (int64_t)shape_[0]; @@ -414,6 +383,14 @@ namespace awkward { return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides_, byteoffset, itemsize_, format_)); } + const std::shared_ptr NumpyArray::getitem_field(const std::string& key) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } + + const std::shared_ptr NumpyArray::getitem_fields(const std::vector& keys) const { + throw std::invalid_argument(std::string("cannot slice ") + classname() + std::string(" by field name")); + } + const std::shared_ptr NumpyArray::getitem(const Slice& where) const { assert(!isscalar()); @@ -605,6 +582,12 @@ namespace awkward { else if (SliceNewAxis* newaxis = dynamic_cast(head.get())) { return getitem_bystrides(*newaxis, tail, length); } + else if (SliceField* field = dynamic_cast(head.get())) { + throw std::invalid_argument(field->tostring() + std::string(" is not a valid slice type for ") + classname()); + } + else if (SliceFields* fields = dynamic_cast(head.get())) { + throw std::invalid_argument(fields->tostring() + std::string(" is not a valid slice type for ") + classname()); + } else { throw std::runtime_error("unrecognized slice item type"); } @@ -738,6 +721,12 @@ namespace awkward { else if (SliceArray64* array = dynamic_cast(head.get())) { return getitem_next(*array, tail, carry, advanced, length, stride, first); } + else if (SliceField* field = dynamic_cast(head.get())) { + throw std::invalid_argument(field->tostring() + std::string(" is not a valid slice type for ") + classname()); + } + else if (SliceFields* fields = dynamic_cast(head.get())) { + throw std::invalid_argument(fields->tostring() + std::string(" is not a valid slice type for ") + classname()); + } else { throw std::runtime_error("unrecognized slice item type"); } @@ -942,4 +931,84 @@ namespace awkward { } } + void NumpyArray::tojson_boolean(ToJson& builder) const { + if (ndim() == 0) { + bool* array = reinterpret_cast(byteptr()); + builder.boolean(array[0]); + } + else if (ndim() == 1) { + bool* array = reinterpret_cast(byteptr()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + builder.boolean(array[i]); + } + builder.endlist(); + } + else { + const std::vector shape(shape_.begin() + 1, shape_.end()); + const std::vector strides(strides_.begin() + 1, strides_.end()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)i); + NumpyArray numpy(Identity::none(), ptr_, shape, strides, byteoffset, itemsize_, format_); + numpy.tojson_boolean(builder); + } + builder.endlist(); + } + } + + template + void NumpyArray::tojson_integer(ToJson& builder) const { + if (ndim() == 0) { + T* array = reinterpret_cast(byteptr()); + builder.integer(array[0]); + } + else if (ndim() == 1) { + T* array = reinterpret_cast(byteptr()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + builder.integer(array[i]); + } + builder.endlist(); + } + else { + const std::vector shape(shape_.begin() + 1, shape_.end()); + const std::vector strides(strides_.begin() + 1, strides_.end()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)i); + NumpyArray numpy(Identity::none(), ptr_, shape, strides, byteoffset, itemsize_, format_); + numpy.tojson_integer(builder); + } + builder.endlist(); + } + } + + template + void NumpyArray::tojson_real(ToJson& builder) const { + if (ndim() == 0) { + T* array = reinterpret_cast(byteptr()); + builder.real(array[0]); + } + else if (ndim() == 1) { + T* array = reinterpret_cast(byteptr()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + builder.real(array[i]); + } + builder.endlist(); + } + else { + const std::vector shape(shape_.begin() + 1, shape_.end()); + const std::vector strides(strides_.begin() + 1, strides_.end()); + builder.beginlist(); + for (int64_t i = 0; i < length(); i++) { + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)i); + NumpyArray numpy(Identity::none(), ptr_, shape, strides, byteoffset, itemsize_, format_); + numpy.tojson_real(builder); + } + builder.endlist(); + } + } + } diff --git a/src/libawkward/array/Record.cpp b/src/libawkward/array/Record.cpp new file mode 100644 index 0000000000..9f1a4dc501 --- /dev/null +++ b/src/libawkward/array/Record.cpp @@ -0,0 +1,216 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include + +#include "awkward/cpu-kernels/identity.h" +#include "awkward/cpu-kernels/getitem.h" +#include "awkward/type/RecordType.h" + +#include "awkward/array/Record.h" + +namespace awkward { + bool Record::isscalar() const { + return true; + } + + const std::string Record::classname() const { + return "Record"; + } + + const std::shared_ptr Record::id() const { + std::shared_ptr recid = recordarray_.id(); + if (recid.get() == nullptr) { + return recid; + } + else { + return recid.get()->getitem_range_nowrap(at_, at_ + 1); + } + } + + void Record::setid() { + throw std::runtime_error("undefined operation: Record::setid"); + } + + void Record::setid(const std::shared_ptr id) { + throw std::runtime_error("undefined operation: Record::setid"); + } + + const std::string Record::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + out << indent << pre << "<" << classname() << " at=\"" << at_ << "\">\n"; + out << recordarray_.tostring_part(indent + std::string(" "), "", "\n"); + out << indent << "" << post; + return out.str(); + } + + void Record::tojson_part(ToJson& builder) const { + size_t cols = (size_t)numfields(); + std::shared_ptr keys = recordarray_.reverselookup(); + if (istuple()) { + keys = std::shared_ptr(new RecordArray::ReverseLookup); + for (size_t j = 0; j < cols; j++) { + keys.get()->push_back(std::to_string(j)); + } + } + std::vector> contents = recordarray_.contents(); + builder.beginrec(); + for (size_t j = 0; j < cols; j++) { + builder.fieldkey(keys.get()->at(j).c_str()); + contents[j].get()->getitem_at_nowrap(at_).get()->tojson_part(builder); + } + builder.endrec(); + } + + const std::shared_ptr Record::type_part() const { + return recordarray_.type_part(); + } + + int64_t Record::length() const { + return -1; // just like NumpyArray with ndim == 0, which is also a scalar + } + + const std::shared_ptr Record::shallow_copy() const { + return std::shared_ptr(new Record(recordarray_, at_)); + } + + void Record::check_for_iteration() const { + if (recordarray_.id().get() != nullptr && recordarray_.id().get()->length() != 1) { + util::handle_error(failure("len(id) != 1 for scalar Record", kSliceNone, kSliceNone), recordarray_.id().get()->classname(), nullptr); + } + } + + const std::shared_ptr Record::getitem_nothing() const { + throw std::runtime_error("undefined operation: Record::getitem_nothing"); + } + + const std::shared_ptr Record::getitem_at(int64_t at) const { + throw std::invalid_argument(std::string("scalar Record can only be sliced by field name (string); try ") + util::quote(std::to_string(at), true)); + } + + const std::shared_ptr Record::getitem_at_nowrap(int64_t at) const { + throw std::invalid_argument(std::string("scalar Record can only be sliced by field name (string); try ") + util::quote(std::to_string(at), true)); + } + + const std::shared_ptr Record::getitem_range(int64_t start, int64_t stop) const { + throw std::invalid_argument("scalar Record can only be sliced by field name (string)"); + } + + const std::shared_ptr Record::getitem_range_nowrap(int64_t start, int64_t stop) const { + throw std::invalid_argument("scalar Record can only be sliced by field name (string)"); + } + + const std::shared_ptr Record::getitem_field(const std::string& key) const { + return recordarray_.field(key).get()->getitem_at_nowrap(at_); + } + + const std::shared_ptr Record::getitem_fields(const std::vector& keys) const { + RecordArray out(recordarray_.id(), length(), istuple()); + if (istuple()) { + for (auto key : keys) { + out.append(recordarray_.field(key)); + } + } + else { + for (auto key : keys) { + out.append(recordarray_.field(key), key); + } + } + return out.getitem_at_nowrap(at_); + } + + const std::shared_ptr Record::carry(const Index64& carry) const { + throw std::runtime_error("undefined operation: Record::carry"); + } + + const std::pair Record::minmax_depth() const { + return recordarray_.minmax_depth(); + } + + int64_t Record::numfields() const { + return recordarray_.numfields(); + } + + int64_t Record::index(const std::string& key) const { + return recordarray_.index(key); + } + + const std::string Record::key(int64_t index) const { + return recordarray_.key(index); + } + + bool Record::has(const std::string& key) const { + return recordarray_.has(key); + } + + const std::vector Record::aliases(int64_t index) const { + return recordarray_.aliases(index); + } + + const std::vector Record::aliases(const std::string& key) const { + return recordarray_.aliases(key); + } + + const std::shared_ptr Record::field(int64_t index) const { + return recordarray_.field(index).get()->getitem_at_nowrap(at_); + } + + const std::shared_ptr Record::field(const std::string& key) const { + return recordarray_.field(key).get()->getitem_at_nowrap(at_); + } + + const std::vector Record::keys() const { + return recordarray_.keys(); + } + + const std::vector> Record::values() const { + std::vector> out; + int64_t cols = numfields(); + for (int64_t j = 0; j < cols; j++) { + out.push_back(recordarray_.field(j).get()->getitem_at_nowrap(at_)); + } + return out; + } + + const std::vector>> Record::items() const { + std::vector>> out; + std::shared_ptr keys = recordarray_.reverselookup(); + if (istuple()) { + int64_t cols = numfields(); + for (int64_t j = 0; j < cols; j++) { + out.push_back(std::pair>(std::to_string(j), recordarray_.field(j).get()->getitem_at_nowrap(at_))); + } + } + else { + int64_t cols = numfields(); + for (int64_t j = 0; j < cols; j++) { + out.push_back(std::pair>(keys.get()->at((size_t)j), recordarray_.field(j).get()->getitem_at_nowrap(at_))); + } + } + return out; + } + + const Record Record::withoutkeys() const { + return Record(recordarray_.withoutkeys(), at_); + } + + const std::shared_ptr Record::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("undefined operation: Record::getitem_next(at)"); + } + + const std::shared_ptr Record::getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("undefined operation: Record::getitem_next(range)"); + } + + const std::shared_ptr Record::getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("undefined operation: Record::getitem_next(array)"); + } + + const std::shared_ptr Record::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("undefined operation: Record::getitem_next(field)"); + } + + const std::shared_ptr Record::getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + throw std::runtime_error("undefined operation: Record::getitem_next(fields)"); + } + +} diff --git a/src/libawkward/array/RecordArray.cpp b/src/libawkward/array/RecordArray.cpp new file mode 100644 index 0000000000..04ab51e871 --- /dev/null +++ b/src/libawkward/array/RecordArray.cpp @@ -0,0 +1,468 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include + +#include "awkward/cpu-kernels/identity.h" +#include "awkward/cpu-kernels/getitem.h" +#include "awkward/type/RecordType.h" +#include "awkward/array/Record.h" + +#include "awkward/array/RecordArray.h" + +namespace awkward { + const std::string RecordArray::classname() const { + return "RecordArray"; + } + + void RecordArray::setid() { + int64_t len = length(); + if (len <= kMaxInt32) { + Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, len); + std::shared_ptr newid(rawid); + struct Error err = awkward_new_identity32(rawid->ptr().get(), len); + util::handle_error(err, classname(), id_.get()); + setid(newid); + } + else { + Identity64* rawid = new Identity64(Identity::newref(), Identity::FieldLoc(), 1, len); + std::shared_ptr newid(rawid); + struct Error err = awkward_new_identity64(rawid->ptr().get(), len); + util::handle_error(err, classname(), id_.get()); + setid(newid); + } + } + + void RecordArray::setid(const std::shared_ptr id) { + if (id.get() == nullptr) { + for (auto content : contents_) { + content.get()->setid(id); + } + } + else { + if (length() != id.get()->length()) { + util::handle_error(failure("content and its id must have the same length", kSliceNone, kSliceNone), classname(), id_.get()); + } + if (istuple()) { + for (size_t j = 0; j < contents_.size(); j++) { + Identity::FieldLoc fieldloc(id.get()->fieldloc().begin(), id.get()->fieldloc().end()); + fieldloc.push_back(std::pair(id.get()->width() - 1, std::to_string(j))); + contents_[j].get()->setid(id.get()->withfieldloc(fieldloc)); + } + } + else { + Identity::FieldLoc original = id.get()->fieldloc(); + for (size_t j = 0; j < contents_.size(); j++) { + Identity::FieldLoc fieldloc(original.begin(), original.end()); + fieldloc.push_back(std::pair(id.get()->width() - 1, reverselookup_.get()->at(j))); + contents_[j].get()->setid(id.get()->withfieldloc(fieldloc)); + } + } + } + id_ = id; + } + + const std::string RecordArray::tostring_part(const std::string indent, const std::string pre, const std::string post) const { + std::stringstream out; + out << indent << pre << "<" << classname(); + if (contents_.size() == 0) { + out << " length=\"" << length_ << "\""; + } + out << ">\n"; + if (id_.get() != nullptr) { + out << id_.get()->tostring_part(indent + std::string(" "), "", "\n"); + } + for (size_t j = 0; j < contents_.size(); j++) { + out << indent << " at(j) << "\">"; + for (auto pair : *lookup_.get()) { + if (pair.second == j && pair.first != reverselookup_.get()->at(j)) { + out << "" << pair.first << ""; + } + } + } + else { + out << ">"; + } + out << "\n"; + out << contents_[j].get()->tostring_part(indent + std::string(" "), "", "\n"); + out << indent << " \n"; + } + out << indent << "" << post; + return out.str(); + } + + void RecordArray::tojson_part(ToJson& builder) const { + int64_t rows = length(); + size_t cols = contents_.size(); + std::shared_ptr keys = reverselookup_; + if (istuple()) { + keys = std::shared_ptr(new ReverseLookup); + for (size_t j = 0; j < cols; j++) { + keys.get()->push_back(std::to_string(j)); + } + } + builder.beginlist(); + for (int64_t i = 0; i < rows; i++) { + builder.beginrec(); + for (size_t j = 0; j < cols; j++) { + builder.fieldkey(keys.get()->at(j).c_str()); + contents_[j].get()->getitem_at_nowrap(i).get()->tojson_part(builder); + } + builder.endrec(); + } + builder.endlist(); + } + + const std::shared_ptr RecordArray::type_part() const { + std::vector> types; + for (auto item : contents_) { + types.push_back(item.get()->type_part()); + } + return std::shared_ptr(new RecordType(types, lookup_, reverselookup_)); + } + + int64_t RecordArray::length() const { + if (contents_.size() == 0) { + return length_; + } + else { + int64_t out = -1; + for (auto x : contents_) { + int64_t len = x.get()->length(); + if (out < 0 || out > len) { + out = len; + } + } + return out; + } + } + + const std::shared_ptr RecordArray::shallow_copy() const { + if (contents_.size() == 0) { + return std::shared_ptr(new RecordArray(id_, length(), istuple())); + } + else { + return std::shared_ptr(new RecordArray(id_, contents_, lookup_, reverselookup_)); + } + } + + void RecordArray::check_for_iteration() const { + if (id_.get() != nullptr && id_.get()->length() < length()) { + util::handle_error(failure("len(id) < len(array)", kSliceNone, kSliceNone), id_.get()->classname(), nullptr); + } + } + + const std::shared_ptr RecordArray::getitem_nothing() const { + return getitem_range_nowrap(0, 0); + } + + const std::shared_ptr RecordArray::getitem_at(int64_t at) const { + int64_t regular_at = at; + int64_t len = length(); + if (regular_at < 0) { + regular_at += len; + } + if (!(0 <= regular_at && regular_at < len)) { + util::handle_error(failure("index out of range", kSliceNone, at), classname(), id_.get()); + } + return getitem_at_nowrap(regular_at); + } + + const std::shared_ptr RecordArray::getitem_at_nowrap(int64_t at) const { + return std::shared_ptr(new Record(*this, at)); + } + + const std::shared_ptr RecordArray::getitem_range(int64_t start, int64_t stop) const { + if (contents_.size() == 0) { + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(®ular_start, ®ular_stop, true, start != Slice::none(), stop != Slice::none(), length()); + return std::shared_ptr(new RecordArray(id_, regular_stop - regular_start, istuple())); + } + else { + std::vector> contents; + for (auto content : contents_) { + contents.push_back(content.get()->getitem_range(start, stop)); + } + return std::shared_ptr(new RecordArray(id_, contents, lookup_, reverselookup_)); + } + } + + const std::shared_ptr RecordArray::getitem_range_nowrap(int64_t start, int64_t stop) const { + if (contents_.size() == 0) { + return std::shared_ptr(new RecordArray(id_, stop - start, istuple())); + } + else { + std::vector> contents; + for (auto content : contents_) { + contents.push_back(content.get()->getitem_range_nowrap(start, stop)); + } + return std::shared_ptr(new RecordArray(id_, contents, lookup_, reverselookup_)); + } + } + + const std::shared_ptr RecordArray::getitem_field(const std::string& key) const { + return field(key).get()->getitem_range_nowrap(0, length()); + } + + const std::shared_ptr RecordArray::getitem_fields(const std::vector& keys) const { + RecordArray out(id_, length(), istuple()); + if (istuple()) { + for (auto key : keys) { + out.append(field(key).get()->getitem_range_nowrap(0, length())); + } + } + else { + for (auto key : keys) { + out.append(field(key).get()->getitem_range_nowrap(0, length()), key); + } + } + return out.shallow_copy(); + } + + const std::shared_ptr RecordArray::carry(const Index64& carry) const { + if (contents_.size() == 0) { + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new RecordArray(id, carry.length(), istuple())); + } + else { + std::vector> contents; + for (auto content : contents_) { + contents.push_back(content.get()->carry(carry)); + } + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + return std::shared_ptr(new RecordArray(id, contents, lookup_, reverselookup_)); + } + } + + const std::pair RecordArray::minmax_depth() const { + if (contents_.size() == 0) { + return std::pair(0, 0); + } + int64_t min = kMaxInt64; + int64_t max = 0; + for (auto content : contents_) { + std::pair minmax = content.get()->minmax_depth(); + if (minmax.first < min) { + min = minmax.first; + } + if (minmax.second > max) { + max = minmax.second; + } + } + return std::pair(min, max); + } + + int64_t RecordArray::numfields() const { + return (int64_t)contents_.size(); + } + + int64_t RecordArray::index(const std::string& key) const { + int64_t out = -1; + if (!istuple()) { + try { + out = (int64_t)lookup_.get()->at(key); + } + catch (std::out_of_range err) { } + if (out != -1 && out >= numfields()) { + throw std::invalid_argument(std::string("key \"") + key + std::string("\" points to tuple index ") + std::to_string(out) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); + } + } + if (out == -1) { + try { + out = (int64_t)std::stoi(key); + } + catch (std::invalid_argument err) { + throw std::invalid_argument(std::string("key \"") + key + std::string("\" is not in RecordArray")); + } + if (out >= numfields()) { + throw std::invalid_argument(std::string("key interpreted as index ") + key + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); + } + } + return out; + } + + const std::string RecordArray::key(int64_t index) const { + if (index >= numfields()) { + throw std::invalid_argument(std::string("index ") + std::to_string(index) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); + } + if (!istuple()) { + return reverselookup_.get()->at((size_t)index); + } + else { + return std::to_string(index); + } + } + + bool RecordArray::has(const std::string& key) const { + try { + index(key); + } + catch (std::invalid_argument err) { + return false; + } + return true; + } + + const std::vector RecordArray::aliases(int64_t index) const { + std::vector out; + std::string _default = std::to_string(index); + bool has_default = false; + if (!istuple()) { + for (auto pair : *lookup_.get()) { + if (pair.second == index) { + out.push_back(pair.first); + if (pair.first == _default) { + has_default = true; + } + } + } + } + if (!has_default) { + out.push_back(_default); + } + return out; + } + + const std::vector RecordArray::aliases(const std::string& key) const { + return aliases(index(key)); + } + + const std::shared_ptr RecordArray::field(int64_t index) const { + if (index >= numfields()) { + throw std::invalid_argument(std::string("index ") + std::to_string(index) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); + } + return contents_[(size_t)index]; + } + + const std::shared_ptr RecordArray::field(const std::string& key) const { + return contents_[(size_t)index(key)]; + } + + const std::vector RecordArray::keys() const { + std::vector out; + if (istuple()) { + int64_t cols = numfields(); + for (int64_t j = 0; j < cols; j++) { + out.push_back(std::to_string(j)); + } + } + else { + out.insert(out.end(), reverselookup_.get()->begin(), reverselookup_.get()->end()); + } + return out; + } + + const std::vector> RecordArray::values() const { + return std::vector>(contents_); + } + + const std::vector>> RecordArray::items() const { + std::vector>> out; + if (istuple()) { + size_t cols = contents_.size(); + for (size_t j = 0; j < cols; j++) { + out.push_back(std::pair>(std::to_string(j), contents_[j])); + } + } + else { + size_t cols = contents_.size(); + for (size_t j = 0; j < cols; j++) { + out.push_back(std::pair>(reverselookup_.get()->at(j), contents_[j])); + } + } + return out; + } + + const RecordArray RecordArray::withoutkeys() const { + return RecordArray(id_, contents_); + } + + void RecordArray::append(const std::shared_ptr& content, const std::string& key) { + size_t j = contents_.size(); + append(content); + setkey(j, key); + } + + void RecordArray::append(const std::shared_ptr& content) { + if (!istuple()) { + reverselookup_.get()->push_back(std::to_string(contents_.size())); + } + contents_.push_back(content); + } + + void RecordArray::setkey(int64_t index, const std::string& fieldname) { + if (istuple()) { + lookup_ = std::shared_ptr(new Lookup); + reverselookup_ = std::shared_ptr(new ReverseLookup); + for (size_t j = 0; j < contents_.size(); j++) { + reverselookup_.get()->push_back(std::to_string(j)); + } + } + (*lookup_.get())[fieldname] = (size_t)index; + (*reverselookup_.get())[(size_t)index] = fieldname; + } + + const std::shared_ptr RecordArray::getitem_next(const std::shared_ptr head, const Slice& tail, const Index64& advanced) const { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + Slice emptytail; + emptytail.become_sealed(); + + if (head.get() == nullptr) { + return shallow_copy(); + } + else if (SliceField* field = dynamic_cast(head.get())) { + std::shared_ptr out = getitem_next(*field, emptytail, advanced); + return out.get()->getitem_next(nexthead, nexttail, advanced); + } + else if (SliceFields* fields = dynamic_cast(head.get())) { + std::shared_ptr out = getitem_next(*fields, emptytail, advanced); + return out.get()->getitem_next(nexthead, nexttail, advanced); + } + else if (contents_.size() == 0) { + RecordArray out(Identity::none(), length(), istuple()); + return out.getitem_next(nexthead, nexttail, advanced); + } + else { + std::vector> contents; + for (auto content : contents_) { + contents.push_back(content.get()->getitem_next(head, emptytail, advanced)); + } + RecordArray out(Identity::none(), contents, lookup_, reverselookup_); + return out.getitem_next(nexthead, nexttail, advanced); + } + } + + const std::shared_ptr RecordArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(std::string("undefined operation: RecordArray::getitem_next(at)")); + } + + const std::shared_ptr RecordArray::getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(std::string("undefined operation: RecordArray::getitem_next(range)")); + } + + const std::shared_ptr RecordArray::getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const { + throw std::invalid_argument(std::string("undefined operation: RecordArray::getitem_next(array)")); + } + + const std::shared_ptr RecordArray::getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_field(field.key()).get()->getitem_next(nexthead, nexttail, advanced); + } + + const std::shared_ptr RecordArray::getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const { + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + return getitem_fields(fields.keys()).get()->getitem_next(nexthead, nexttail, advanced); + } + +} diff --git a/src/libawkward/array/RegularArray.cpp b/src/libawkward/array/RegularArray.cpp index f7f3c8e1b4..53190cc846 100644 --- a/src/libawkward/array/RegularArray.cpp +++ b/src/libawkward/array/RegularArray.cpp @@ -92,11 +92,11 @@ namespace awkward { void RegularArray::tojson_part(ToJson& builder) const { int64_t len = length(); + builder.beginlist(); for (int64_t i = 0; i < len; i++) { - builder.beginlist(); getitem_at_nowrap(i).get()->tojson_part(builder); - builder.endlist(); } + builder.endlist(); } const std::shared_ptr RegularArray::type_part() const { @@ -111,7 +111,11 @@ namespace awkward { return std::shared_ptr(new RegularArray(id_, content_, size_)); } - void RegularArray::check_for_iteration() const { } + void RegularArray::check_for_iteration() const { + if (id_.get() != nullptr && id_.get()->length() < length()) { + util::handle_error(failure("len(id) < len(array)", kSliceNone, kSliceNone), id_.get()->classname(), nullptr); + } + } const std::shared_ptr RegularArray::getitem_nothing() const { return content_.get()->getitem_range_nowrap(0, 0); @@ -151,6 +155,14 @@ namespace awkward { return std::shared_ptr(new RegularArray(id_, content_.get()->getitem_range_nowrap(start*size_, stop*size_), size_)); } + const std::shared_ptr RegularArray::getitem_field(const std::string& key) const { + return std::shared_ptr(new RegularArray(id_, content_.get()->getitem_field(key), size_)); + } + + const std::shared_ptr RegularArray::getitem_fields(const std::vector& keys) const { + return std::shared_ptr(new RegularArray(id_, content_.get()->getitem_fields(keys), size_)); + } + const std::shared_ptr RegularArray::carry(const Index64& carry) const { Index64 nextcarry(carry.length()*size_); diff --git a/src/libawkward/fillable/BoolFillable.cpp b/src/libawkward/fillable/BoolFillable.cpp index 5e64dbd7a5..bf214906a7 100644 --- a/src/libawkward/fillable/BoolFillable.cpp +++ b/src/libawkward/fillable/BoolFillable.cpp @@ -27,9 +27,19 @@ namespace awkward { return std::shared_ptr(new NumpyArray(Identity::none(), buffer_.ptr(), shape, strides, 0, sizeof(bool), "?")); } + bool BoolFillable::active() const { + return false; + } + Fillable* BoolFillable::null() { Fillable* out = OptionFillable::fromvalids(options_, this); - out->null(); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } return out; } @@ -40,24 +50,86 @@ namespace awkward { Fillable* BoolFillable::integer(int64_t x) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->integer(x); + try { + out->integer(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* BoolFillable::real(double x) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->real(x); + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* BoolFillable::beginlist() { Fillable* out = UnionFillable::fromsingle(options_, this); - out->beginlist(); + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* BoolFillable::endlist() { - return nullptr; + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + + Fillable* BoolFillable::begintuple(int64_t numfields) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* BoolFillable::index(int64_t index) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + + Fillable* BoolFillable::endtuple() { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + + Fillable* BoolFillable::beginrecord(int64_t disambiguator) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* BoolFillable::field_fast(const char* key) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + + Fillable* BoolFillable::field_check(const char* key) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + + Fillable* BoolFillable::endrecord() { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); } } diff --git a/src/libawkward/fillable/FillableArray.cpp b/src/libawkward/fillable/FillableArray.cpp index 4c84b5503d..5551a3c492 100644 --- a/src/libawkward/fillable/FillableArray.cpp +++ b/src/libawkward/fillable/FillableArray.cpp @@ -37,10 +37,22 @@ namespace awkward { return snapshot().get()->getitem_range(start, stop); } + const std::shared_ptr FillableArray::getitem_field(const std::string& key) const { + return snapshot().get()->getitem_field(key); + } + + const std::shared_ptr FillableArray::getitem_fields(const std::vector& keys) const { + return snapshot().get()->getitem_fields(keys); + } + const std::shared_ptr FillableArray::getitem(const Slice& where) const { return snapshot().get()->getitem(where); } + bool FillableArray::active() const { + return fillable_.get()->active(); + } + void FillableArray::null() { maybeupdate(fillable_.get()->null()); } @@ -69,6 +81,38 @@ namespace awkward { maybeupdate(tmp); } + void FillableArray::begintuple(int64_t numfields) { + maybeupdate(fillable_.get()->begintuple(numfields)); + } + + void FillableArray::index(int64_t index) { + maybeupdate(fillable_.get()->index(index)); + } + + void FillableArray::endtuple() { + maybeupdate(fillable_.get()->endtuple()); + } + + void FillableArray::beginrecord() { + beginrecord(0); + } + + void FillableArray::beginrecord(int64_t disambiguator) { + maybeupdate(fillable_.get()->beginrecord(disambiguator)); + } + + void FillableArray::field_fast(const char* key) { + maybeupdate(fillable_.get()->field_fast(key)); + } + + void FillableArray::field_check(const char* key) { + maybeupdate(fillable_.get()->field_check(key)); + } + + void FillableArray::endrecord() { + maybeupdate(fillable_.get()->endrecord()); + } + void FillableArray::maybeupdate(Fillable* tmp) { if (tmp != fillable_.get() && tmp != nullptr) { fillable_ = std::shared_ptr(tmp); diff --git a/src/libawkward/fillable/Float64Fillable.cpp b/src/libawkward/fillable/Float64Fillable.cpp index 049b26128c..9b477508a0 100644 --- a/src/libawkward/fillable/Float64Fillable.cpp +++ b/src/libawkward/fillable/Float64Fillable.cpp @@ -27,15 +27,31 @@ namespace awkward { return std::shared_ptr(new NumpyArray(Identity::none(), buffer_.ptr(), shape, strides, 0, sizeof(double), "d")); } + bool Float64Fillable::active() const { + return false; + } + Fillable* Float64Fillable::null() { Fillable* out = OptionFillable::fromvalids(options_, this); - out->null(); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* Float64Fillable::boolean(bool x) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->boolean(x); + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } return out; } @@ -51,12 +67,62 @@ namespace awkward { Fillable* Float64Fillable::beginlist() { Fillable* out = UnionFillable::fromsingle(options_, this); - out->beginlist(); + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* Float64Fillable::endlist() { - return nullptr; + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + + Fillable* Float64Fillable::begintuple(int64_t numfields) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* Float64Fillable::index(int64_t index) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + + Fillable* Float64Fillable::endtuple() { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + + Fillable* Float64Fillable::beginrecord(int64_t disambiguator) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* Float64Fillable::field_fast(const char* key) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + + Fillable* Float64Fillable::field_check(const char* key) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + + Fillable* Float64Fillable::endrecord() { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); } } diff --git a/src/libawkward/fillable/Int64Fillable.cpp b/src/libawkward/fillable/Int64Fillable.cpp index fabe483499..1fc2affac0 100644 --- a/src/libawkward/fillable/Int64Fillable.cpp +++ b/src/libawkward/fillable/Int64Fillable.cpp @@ -32,15 +32,31 @@ namespace awkward { #endif } + bool Int64Fillable::active() const { + return false; + } + Fillable* Int64Fillable::null() { Fillable* out = OptionFillable::fromvalids(options_, this); - out->null(); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* Int64Fillable::boolean(bool x) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->boolean(x); + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } return out; } @@ -51,17 +67,74 @@ namespace awkward { Fillable* Int64Fillable::real(double x) { Float64Fillable* out = Float64Fillable::fromint64(options_, buffer_); - out->real(x); + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* Int64Fillable::beginlist() { Fillable* out = UnionFillable::fromsingle(options_, this); - out->beginlist(); + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* Int64Fillable::endlist() { - return nullptr; + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + + Fillable* Int64Fillable::begintuple(int64_t numfields) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } + return out; } + + Fillable* Int64Fillable::index(int64_t index) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + + Fillable* Int64Fillable::endtuple() { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + + Fillable* Int64Fillable::beginrecord(int64_t disambiguator) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* Int64Fillable::field_fast(const char* key) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + + Fillable* Int64Fillable::field_check(const char* key) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + + Fillable* Int64Fillable::endrecord() { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); + } + } diff --git a/src/libawkward/fillable/ListFillable.cpp b/src/libawkward/fillable/ListFillable.cpp index d81385ea5c..2edc1f9d08 100644 --- a/src/libawkward/fillable/ListFillable.cpp +++ b/src/libawkward/fillable/ListFillable.cpp @@ -30,79 +30,194 @@ namespace awkward { return std::shared_ptr(new ListOffsetArray64(Identity::none(), offsets, content_.get()->snapshot())); } + bool ListFillable::active() const { + return begun_; + } + Fillable* ListFillable::null() { - if (begun_) { - return maybeupdate(content_.get()->null()); - } - else { + if (!begun_) { Fillable* out = OptionFillable::fromvalids(options_, this); - out->null(); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } return out; } + else { + maybeupdate(content_.get()->null()); + return this; + } } Fillable* ListFillable::boolean(bool x) { - if (begun_) { - return maybeupdate(content_.get()->boolean(x)); - } - else { + if (!begun_) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->boolean(x); + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } return out; } + else { + maybeupdate(content_.get()->boolean(x)); + return this; + } } Fillable* ListFillable::integer(int64_t x) { - if (begun_) { - return maybeupdate(content_.get()->integer(x)); + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->integer(x); + } + catch (...) { + delete out; + throw; + } + return out; } else { + maybeupdate(content_.get()->integer(x)); + return this; + } + } + + Fillable* ListFillable::real(double x) { + if (!begun_) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->integer(x); + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } return out; } + else { + maybeupdate(content_.get()->real(x)); + return this; + } } - Fillable* ListFillable::real(double x) { - if (begun_) { - return maybeupdate(content_.get()->real(x)); + Fillable* ListFillable::beginlist() { + if (!begun_) { + begun_ = true; + } + else { + maybeupdate(content_.get()->beginlist()); + } + return this; + } + + Fillable* ListFillable::endlist() { + if (!begun_) { + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + else if (!content_.get()->active()) { + offsets_.append(content_.get()->length()); + begun_ = false; } else { + maybeupdate(content_.get()->endlist()); + } + return this; + } + + Fillable* ListFillable::begintuple(int64_t numfields) { + if (!begun_) { Fillable* out = UnionFillable::fromsingle(options_, this); - out->real(x); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } return out; } + else { + maybeupdate(content_.get()->begintuple(numfields)); + return this; + } } - Fillable* ListFillable::beginlist() { - if (begun_) { - return maybeupdate(content_.get()->beginlist()); + Fillable* ListFillable::index(int64_t index) { + if (!begun_) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); } else { - begun_ = true; + content_.get()->index(index); return this; } } - Fillable* ListFillable::endlist() { - if (begun_) { - Fillable* tmp = content_.get()->endlist(); - if (tmp == nullptr) { - offsets_.append(content_.get()->length()); - begun_ = false; - return this; + Fillable* ListFillable::endtuple() { + if (!begun_) { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + else { + content_.get()->endtuple(); + return this; + } + } + + Fillable* ListFillable::beginrecord(int64_t disambiguator) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); } - else { - return maybeupdate(tmp); + catch (...) { + delete out; + throw; } + return out; + } + else { + maybeupdate(content_.get()->beginrecord(disambiguator)); + return this; + } + } + + Fillable* ListFillable::field_fast(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + else { + content_.get()->field_fast(key); + return this; + } + } + + Fillable* ListFillable::field_check(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + else { + content_.get()->field_check(key); + return this; + } + } + + Fillable* ListFillable::endrecord() { + if (!begun_) { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); } else { - return nullptr; + content_.get()->endrecord(); + return this; } } Fillable* ListFillable::maybeupdate(Fillable* tmp) { - if (tmp != content_.get() && tmp != nullptr) { + if (tmp != content_.get()) { content_ = std::shared_ptr(tmp); } return this; diff --git a/src/libawkward/fillable/OptionFillable.cpp b/src/libawkward/fillable/OptionFillable.cpp index 7f2c66b2c4..f0780604c5 100644 --- a/src/libawkward/fillable/OptionFillable.cpp +++ b/src/libawkward/fillable/OptionFillable.cpp @@ -3,67 +3,184 @@ #include #include "awkward/Identity.h" +#include "awkward/Index.h" #include "awkward/type/OptionType.h" #include "awkward/fillable/OptionFillable.h" namespace awkward { int64_t OptionFillable::length() const { - return index_.length(); + return offsets_.length(); } void OptionFillable::clear() { - index_.clear(); + offsets_.clear(); content_.get()->clear(); } const std::shared_ptr OptionFillable::type() const { + Index64 offsets(offsets_.ptr(), 0, offsets_.length()); return std::shared_ptr(new OptionType(content_.get()->type())); } const std::shared_ptr OptionFillable::snapshot() const { - throw std::runtime_error("OptionFillable::snapshot() needs MaskedArray"); + throw std::runtime_error("OptionFillable::snapshot() needs OptionArray"); + } + + bool OptionFillable::active() const { + return content_.get()->active(); } Fillable* OptionFillable::null() { - index_.append(-1); + if (!content_.get()->active()) { + offsets_.append(-1); + } + else { + content_.get()->null(); + } return this; } Fillable* OptionFillable::boolean(bool x) { - int64_t length = content_.get()->length(); - maybeupdate(content_.get()->boolean(x)); - index_.append(length); + if (!content_.get()->active()) { + int64_t length = content_.get()->length(); + maybeupdate(content_.get()->boolean(x)); + offsets_.append(length); + } + else { + content_.get()->boolean(x); + } return this; } Fillable* OptionFillable::integer(int64_t x) { - int64_t length = content_.get()->length(); - maybeupdate(content_.get()->integer(x)); - index_.append(length); + if (!content_.get()->active()) { + int64_t length = content_.get()->length(); + maybeupdate(content_.get()->integer(x)); + offsets_.append(length); + } + else { + content_.get()->integer(x); + } return this; } Fillable* OptionFillable::real(double x) { - int64_t length = content_.get()->length(); - maybeupdate(content_.get()->real(x)); - index_.append(length); + if (!content_.get()->active()) { + int64_t length = content_.get()->length(); + maybeupdate(content_.get()->real(x)); + offsets_.append(length); + } + else { + content_.get()->real(x); + } return this; } Fillable* OptionFillable::beginlist() { - int64_t length = content_.get()->length(); - maybeupdate(content_.get()->beginlist()); - index_.append(length); + if (!content_.get()->active()) { + maybeupdate(content_.get()->beginlist()); + } + else { + content_.get()->beginlist(); + } return this; } Fillable* OptionFillable::endlist() { - return nullptr; + if (!content_.get()->active()) { + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + else { + int64_t length = content_.get()->length(); + content_.get()->endlist(); + if (length != content_.get()->length()) { + offsets_.append(length); + } + } + return this; + } + + Fillable* OptionFillable::begintuple(int64_t numfields) { + if (!content_.get()->active()) { + maybeupdate(content_.get()->begintuple(numfields)); + } + else { + content_.get()->begintuple(numfields); + } + return this; + } + + Fillable* OptionFillable::index(int64_t index) { + if (!content_.get()->active()) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + else { + content_.get()->index(index); + } + return this; + } + + Fillable* OptionFillable::endtuple() { + if (!content_.get()->active()) { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + else { + int64_t length = content_.get()->length(); + content_.get()->endtuple(); + if (length != content_.get()->length()) { + offsets_.append(length); + } + } + return this; + } + + Fillable* OptionFillable::beginrecord(int64_t disambiguator) { + if (!content_.get()->active()) { + maybeupdate(content_.get()->beginrecord(disambiguator)); + } + else { + content_.get()->beginrecord(disambiguator); + } + return this; + } + + Fillable* OptionFillable::field_fast(const char* key) { + if (!content_.get()->active()) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + else { + content_.get()->field_fast(key); + } + return this; + } + + Fillable* OptionFillable::field_check(const char* key) { + if (!content_.get()->active()) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + else { + content_.get()->field_check(key); + } + return this; + } + + Fillable* OptionFillable::endrecord() { + if (!content_.get()->active()) { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); + } + else { + int64_t length = content_.get()->length(); + content_.get()->endrecord(); + if (length != content_.get()->length()) { + offsets_.append(length); + } + } + return this; } void OptionFillable::maybeupdate(Fillable* tmp) { - if (tmp != content_.get() && tmp != nullptr) { + if (tmp != content_.get()) { content_ = std::shared_ptr(tmp); } } diff --git a/src/libawkward/fillable/RecordFillable.cpp b/src/libawkward/fillable/RecordFillable.cpp new file mode 100644 index 0000000000..28dd0251c8 --- /dev/null +++ b/src/libawkward/fillable/RecordFillable.cpp @@ -0,0 +1,404 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include + +#include "awkward/Identity.h" +#include "awkward/Index.h" + +#include "awkward/Identity.h" +#include "awkward/Index.h" +#include "awkward/array/RecordArray.h" +#include "awkward/array/EmptyArray.h" +#include "awkward/type/RecordType.h" +#include "awkward/type/UnknownType.h" +#include "awkward/fillable/OptionFillable.h" +#include "awkward/fillable/UnionFillable.h" + +#include "awkward/fillable/RecordFillable.h" + +namespace awkward { + int64_t RecordFillable::length() const { + return length_; + } + + void RecordFillable::clear() { + for (auto x : contents_) { + x.get()->clear(); + } + keys_.clear(); + pointers_.clear(); + disambiguator_ = 0; + length_ = -1; + begun_ = false; + nextindex_ = -1; + nexttotry_ = 0; + } + + const std::shared_ptr RecordFillable::type() const { + if (length_ == -1) { + return std::shared_ptr(new UnknownType); + } + else { + std::vector> types; + std::shared_ptr lookup(new RecordType::Lookup); + std::shared_ptr reverselookup(new RecordType::ReverseLookup); + for (size_t i = 0; i < contents_.size(); i++) { + types.push_back(contents_[i].get()->type()); + (*lookup.get())[keys_[i]] = i; + reverselookup.get()->push_back(keys_[i]); + } + return std::shared_ptr(new RecordType(types, lookup, reverselookup)); + return std::shared_ptr(new RecordType(types)); + } + } + + const std::shared_ptr RecordFillable::snapshot() const { + if (length_ == -1) { + return std::shared_ptr(new EmptyArray(Identity::none())); + } + else if (contents_.size() == 0) { + return std::shared_ptr(new RecordArray(Identity::none(), length_, false)); + } + else { + std::vector> contents; + std::shared_ptr lookup(new RecordArray::Lookup); + std::shared_ptr reverselookup(new RecordArray::ReverseLookup); + for (size_t i = 0; i < contents_.size(); i++) { + contents.push_back(contents_[i].get()->snapshot()); + (*lookup.get())[keys_[i]] = i; + reverselookup.get()->push_back(keys_[i]); + } + return std::shared_ptr(new RecordArray(Identity::none(), contents, lookup, reverselookup)); + } + } + + bool RecordFillable::active() const { + return begun_; + } + + Fillable* RecordFillable::null() { + if (!begun_) { + Fillable* out = OptionFillable::fromvalids(options_, this); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'null' immediately after 'beginrecord'; needs 'index' or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->null()); + } + else { + contents_[(size_t)nextindex_].get()->null(); + } + return this; + } + + Fillable* RecordFillable::boolean(bool x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'boolean' immediately after 'beginrecord'; needs 'index' or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->boolean(x)); + } + else { + contents_[(size_t)nextindex_].get()->boolean(x); + } + return this; + } + + Fillable* RecordFillable::integer(int64_t x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->integer(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'integer' immediately after 'beginrecord'; needs 'index' or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->integer(x)); + } + else { + contents_[(size_t)nextindex_].get()->integer(x); + } + return this; + } + + Fillable* RecordFillable::real(double x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'real' immediately after 'beginrecord'; needs 'index' or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->real(x)); + } + else { + contents_[(size_t)nextindex_].get()->real(x); + } + return this; + } + + Fillable* RecordFillable::beginlist() { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'beginlist' immediately after 'beginrecord'; needs 'index' or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->beginlist()); + } + else { + contents_[(size_t)nextindex_].get()->beginlist(); + } + return this; + } + + Fillable* RecordFillable::endlist() { + if (!begun_) { + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'endlist' immediately after 'beginrecord'; needs 'index' or 'endrecord' and then 'beginlist'"); + } + else { + contents_[(size_t)nextindex_].get()->endlist(); + } + return this; + } + + Fillable* RecordFillable::begintuple(int64_t numfields) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'begintuple' immediately after 'beginrecord'; needs 'field_fast', 'field_check', or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->begintuple(numfields)); + } + else { + contents_[(size_t)nextindex_].get()->begintuple(numfields); + } + return this; + } + + Fillable* RecordFillable::index(int64_t index) { + if (!begun_) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'index' immediately after 'beginrecord'; needs 'field_fast', 'field_check' or 'endrecord' and then 'begintuple'"); + } + else { + contents_[(size_t)nextindex_].get()->index(index); + } + return this; + } + + Fillable* RecordFillable::endtuple() { + if (!begun_) { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'endtuple' immediately after 'beginrecord'; needs 'field_fast', 'field_check', or 'endrecord' and then 'begintuple'"); + } + else { + contents_[(size_t)nextindex_].get()->endtuple(); + } + return this; + } + + Fillable* RecordFillable::beginrecord(int64_t disambiguator) { + if (length_ == -1) { + disambiguator_ = disambiguator; + length_ = 0; + } + + if (!begun_ && disambiguator == disambiguator_) { + begun_ = true; + nextindex_ = -1; + nexttotry_ = 0; + } + else if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'beginrecord' immediately after 'beginrecord'; needs 'field_fast', 'field_check', or 'endrecord'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->beginrecord(disambiguator)); + } + else { + contents_[(size_t)nextindex_].get()->beginrecord(disambiguator); + } + return this; + } + + Fillable* RecordFillable::field_fast(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { + int64_t wrap_around = (int64_t)pointers_.size(); + int64_t i = nexttotry_; + do { + if (i >= wrap_around) { + i = 0; + if (i == nexttotry_) { + break; + } + } + if (pointers_[(size_t)i] == key) { + nextindex_ = i; + nexttotry_ = i + 1; + return this; + } + i++; + } while (i != nexttotry_); + nextindex_ = wrap_around; + nexttotry_ = 0; + if (length_ == 0) { + contents_.push_back(std::shared_ptr(UnknownFillable::fromempty(options_))); + } + else { + contents_.push_back(std::shared_ptr(OptionFillable::fromnulls(options_, length_, UnknownFillable::fromempty(options_)))); + } + keys_.push_back(std::string(key)); + pointers_.push_back(key); + return this; + } + else { + contents_[(size_t)nextindex_].get()->field_fast(key); + return this; + } + } + + Fillable* RecordFillable::field_check(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { + int64_t wrap_around = (int64_t)keys_.size(); + int64_t i = nexttotry_; + do { + if (i >= wrap_around) { + i = 0; + if (i == nexttotry_) { + break; + } + } + if (keys_[(size_t)i].compare(key) == 0) { + nextindex_ = i; + nexttotry_ = i + 1; + return this; + } + i++; + } while (i != nexttotry_); + nextindex_ = wrap_around; + nexttotry_ = 0; + if (length_ == 0) { + contents_.push_back(std::shared_ptr(UnknownFillable::fromempty(options_))); + } + else { + contents_.push_back(std::shared_ptr(OptionFillable::fromnulls(options_, length_, UnknownFillable::fromempty(options_)))); + } + keys_.push_back(std::string(key)); + pointers_.push_back(nullptr); + return this; + } + else { + contents_[(size_t)nextindex_].get()->field_check(key); + return this; + } + } + + Fillable* RecordFillable::endrecord() { + if (!begun_) { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { + int64_t i = 0; + for (auto content : contents_) { + if (content.get()->length() == length_) { + maybeupdate(i, content.get()->null()); + } + if (content.get()->length() != length_ + 1) { + throw std::invalid_argument(std::string("record field ") + util::quote(keys_[(size_t)i], true) + std::string(" filled more than once")); + } + i++; + } + length_++; + begun_ = false; + } + else { + contents_[(size_t)nextindex_].get()->endrecord(); + } + return this; + } + + void RecordFillable::maybeupdate(int64_t i, Fillable* tmp) { + if (tmp != contents_[(size_t)i].get()) { + contents_[(size_t)i] = std::shared_ptr(tmp); + } + } + +} diff --git a/src/libawkward/fillable/TupleFillable.cpp b/src/libawkward/fillable/TupleFillable.cpp new file mode 100644 index 0000000000..19497e4d2a --- /dev/null +++ b/src/libawkward/fillable/TupleFillable.cpp @@ -0,0 +1,336 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include + +#include "awkward/Identity.h" +#include "awkward/Index.h" +#include "awkward/array/RecordArray.h" +#include "awkward/array/EmptyArray.h" +#include "awkward/type/RecordType.h" +#include "awkward/type/UnknownType.h" +#include "awkward/fillable/OptionFillable.h" +#include "awkward/fillable/UnionFillable.h" + +#include "awkward/fillable/TupleFillable.h" + +namespace awkward { + int64_t TupleFillable::length() const { + return length_; + } + + void TupleFillable::clear() { + for (auto x : contents_) { + x.get()->clear(); + } + length_ = -1; + begun_ = false; + nextindex_ = -1; + } + + const std::shared_ptr TupleFillable::type() const { + if (length_ == -1) { + return std::shared_ptr(new UnknownType); + } + else { + std::vector> types; + for (auto content : contents_) { + types.push_back(content.get()->type()); + } + return std::shared_ptr(new RecordType(types)); + } + } + + const std::shared_ptr TupleFillable::snapshot() const { + if (length_ == -1) { + return std::shared_ptr(new EmptyArray(Identity::none())); + } + else if (contents_.size() == 0) { + return std::shared_ptr(new RecordArray(Identity::none(), length_, true)); + } + else { + std::vector> contents; + for (auto content : contents_) { + contents.push_back(content.get()->snapshot()); + } + return std::shared_ptr(new RecordArray(Identity::none(), contents)); + } + } + + bool TupleFillable::active() const { + return begun_; + } + + Fillable* TupleFillable::null() { + if (!begun_) { + Fillable* out = OptionFillable::fromvalids(options_, this); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'null' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->null()); + } + else { + contents_[(size_t)nextindex_].get()->null(); + } + return this; + } + + Fillable* TupleFillable::boolean(bool x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'boolean' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->boolean(x)); + } + else { + contents_[(size_t)nextindex_].get()->boolean(x); + } + return this; + } + + Fillable* TupleFillable::integer(int64_t x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->integer(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'integer' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->integer(x)); + } + else { + contents_[(size_t)nextindex_].get()->integer(x); + } + return this; + } + + Fillable* TupleFillable::real(double x) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'real' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->real(x)); + } + else { + contents_[(size_t)nextindex_].get()->real(x); + } + return this; + } + + Fillable* TupleFillable::beginlist() { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'beginlist' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->beginlist()); + } + else { + contents_[(size_t)nextindex_].get()->beginlist(); + } + return this; + } + + Fillable* TupleFillable::endlist() { + if (!begun_) { + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'endlist' immediately after 'begintuple'; needs 'index' or 'endtuple' and then 'beginlist'"); + } + else { + contents_[(size_t)nextindex_].get()->endlist(); + } + return this; + } + + Fillable* TupleFillable::begintuple(int64_t numfields) { + if (length_ == -1) { + for (int64_t i = 0; i < numfields; i++) { + contents_.push_back(std::shared_ptr(UnknownFillable::fromempty(options_))); + } + length_ = 0; + } + + if (!begun_ && numfields == (int64_t)contents_.size()) { + begun_ = true; + nextindex_ = -1; + } + else if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'begintuple' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->begintuple(numfields)); + } + else { + contents_[(size_t)nextindex_].get()->begintuple(numfields); + } + return this; + } + + Fillable* TupleFillable::index(int64_t index) { + if (!begun_) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { + nextindex_ = index; + } + else { + contents_[(size_t)nextindex_].get()->index(index); + } + return this; + } + + Fillable* TupleFillable::endtuple() { + if (!begun_) { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + else if (nextindex_ == -1 || !contents_[(size_t)nextindex_].get()->active()) { + int64_t i = 0; + for (auto content : contents_) { + if (content.get()->length() == length_) { + maybeupdate(i, content.get()->null()); + } + if (content.get()->length() != length_ + 1) { + throw std::invalid_argument(std::string("tuple index ") + std::to_string(i) + std::string(" filled more than once")); + } + i++; + } + length_++; + begun_ = false; + } + else { + contents_[(size_t)nextindex_].get()->endtuple(); + } + return this; + } + + Fillable* TupleFillable::beginrecord(int64_t disambiguator) { + if (!begun_) { + Fillable* out = UnionFillable::fromsingle(options_, this); + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'beginrecord' immediately after 'begintuple'; needs 'index' or 'endtuple'"); + } + else if (!contents_[(size_t)nextindex_].get()->active()) { + maybeupdate(nextindex_, contents_[(size_t)nextindex_].get()->beginrecord(disambiguator)); + } + else { + contents_[(size_t)nextindex_].get()->beginrecord(disambiguator); + } + return this; + } + + Fillable* TupleFillable::field_fast(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'field_fast' immediately after 'begintuple'; needs 'index' or 'endtuple' and then 'beginrecord'"); + } + else { + contents_[(size_t)nextindex_].get()->field_fast(key); + } + return this; + } + + Fillable* TupleFillable::field_check(const char* key) { + if (!begun_) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'field_check' immediately after 'begintuple'; needs 'index' or 'endtuple' and then 'beginrecord'"); + } + else { + contents_[(size_t)nextindex_].get()->field_check(key); + } + return this; + } + + Fillable* TupleFillable::endrecord() { + if (!begun_) { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); + } + else if (nextindex_ == -1) { + throw std::invalid_argument("called 'endrecord' immediately after 'begintuple'; needs 'index' or 'endtuple' and then 'beginrecord'"); + } + else { + contents_[(size_t)nextindex_].get()->endrecord(); + } + return this; + } + + void TupleFillable::maybeupdate(int64_t i, Fillable* tmp) { + if (tmp != contents_[(size_t)i].get()) { + contents_[(size_t)i] = std::shared_ptr(tmp); + } + } +} diff --git a/src/libawkward/fillable/UnionFillable.cpp b/src/libawkward/fillable/UnionFillable.cpp index 2c93c8f674..f6f0364d5f 100644 --- a/src/libawkward/fillable/UnionFillable.cpp +++ b/src/libawkward/fillable/UnionFillable.cpp @@ -3,12 +3,15 @@ #include #include "awkward/Identity.h" +#include "awkward/Index.h" #include "awkward/type/UnionType.h" #include "awkward/fillable/OptionFillable.h" #include "awkward/fillable/BoolFillable.h" #include "awkward/fillable/Int64Fillable.h" #include "awkward/fillable/Float64Fillable.h" #include "awkward/fillable/ListFillable.h" +#include "awkward/fillable/TupleFillable.h" +#include "awkward/fillable/RecordFillable.h" #include "awkward/fillable/UnionFillable.h" @@ -34,91 +37,276 @@ namespace awkward { } const std::shared_ptr UnionFillable::snapshot() const { + Index8 types(types_.ptr(), 0, types_.length()); + Index64 offsets(offsets_.ptr(), 0, offsets_.length()); throw std::runtime_error("UnionFillable::snapshot() needs UnionArray"); } + bool UnionFillable::active() const { + throw std::runtime_error("FIXME: UnionFillable::active"); + } + Fillable* UnionFillable::null() { - Fillable* out = OptionFillable::fromvalids(options_, this); - out->null(); - return out; + if (current_ == -1) { + Fillable* out = OptionFillable::fromvalids(options_, this); + try { + out->null(); + } + catch (...) { + delete out; + throw; + } + return out; + } + else { + contents_[(size_t)current_].get()->null(); + return this; + } } Fillable* UnionFillable::boolean(bool x) { - int8_t type; - int64_t length; - get1(type, length)->boolean(x); - offsets_.append(length); - types_.append(type); + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (dynamic_cast(content.get()) != nullptr) { + tofill = content.get(); + break; + } + i++; + } + if (tofill == nullptr) { + tofill = BoolFillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); + } + int64_t length = tofill->length(); + tofill->boolean(x); + types_.append(i); + offsets_.append(length); + } + else { + contents_[(size_t)current_].get()->boolean(x); + } return this; } Fillable* UnionFillable::integer(int64_t x) { - int8_t type; - int64_t length; - get2(type, length)->integer(x); - offsets_.append(length); - types_.append(type); + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (dynamic_cast(content.get()) != nullptr) { + tofill = content.get(); + break; + } + i++; + } + if (tofill == nullptr) { + tofill = Int64Fillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); + } + int64_t length = tofill->length(); + tofill->integer(x); + types_.append(i); + offsets_.append(length); + } + else { + contents_[(size_t)current_].get()->integer(x); + } return this; } Fillable* UnionFillable::real(double x) { - int8_t type; - int64_t length; - get2(type, length)->real(x); - offsets_.append(length); - types_.append(type); + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (dynamic_cast(content.get()) != nullptr) { + tofill = content.get(); + break; + } + i++; + } + if (tofill == nullptr) { + i = 0; + for (auto content : contents_) { + if (dynamic_cast(content.get()) != nullptr) { + tofill = content.get(); + break; + } + i++; + } + if (tofill != nullptr) { + tofill = Float64Fillable::fromint64(options_, dynamic_cast(tofill)->buffer()); + contents_[(size_t)i] = std::shared_ptr(tofill); + } + else { + tofill = Float64Fillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); + } + } + int64_t length = tofill->length(); + tofill->real(x); + types_.append(i); + offsets_.append(length); + } + else { + contents_[(size_t)current_].get()->real(x); + } return this; } Fillable* UnionFillable::beginlist() { - int8_t type; - int64_t length; - get1(type, length)->beginlist(); - offsets_.append(length); - types_.append(type); + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (dynamic_cast(content.get()) != nullptr) { + tofill = content.get(); + break; + } + i++; + } + if (tofill == nullptr) { + tofill = ListFillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); + } + tofill->beginlist(); + } + else { + contents_[(size_t)current_].get()->beginlist(); + } return this; } Fillable* UnionFillable::endlist() { - return nullptr; + if (current_ == -1) { + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); + } + else { + int64_t length = contents_[(size_t)current_].get()->length(); + contents_[(size_t)current_].get()->endlist(); + if (length != contents_[(size_t)current_].get()->length()) { + types_.append(current_); + offsets_.append(length); + current_ = -1; + } + } + return this; } - template - T* UnionFillable::findfillable(int8_t& type) { - type = 0; - for (auto x : contents_) { - if (T* raw = dynamic_cast(x.get())) { - return raw; + Fillable* UnionFillable::begintuple(int64_t numfields) { + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (TupleFillable* raw = dynamic_cast(content.get())) { + if (raw->length() == -1 || raw->numfields() == numfields) { + tofill = content.get(); + break; + } + } + i++; + } + if (tofill == nullptr) { + tofill = TupleFillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); } - type++; + tofill->begintuple(numfields); + current_ = i; } - return nullptr; + else { + contents_[(size_t)current_].get()->begintuple(numfields); + } + return this; + } + + Fillable* UnionFillable::index(int64_t index) { + if (current_ == -1) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + else { + contents_[(size_t)current_].get()->index(index); + } + return this; + } + + Fillable* UnionFillable::endtuple() { + if (current_ == -1) { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + else { + int64_t length = contents_[(size_t)current_].get()->length(); + contents_[(size_t)current_].get()->endtuple(); + if (length != contents_[(size_t)current_].get()->length()) { + types_.append(current_); + offsets_.append(length); + current_ = -1; + } + } + return this; + } + + Fillable* UnionFillable::beginrecord(int64_t disambiguator) { + if (current_ == -1) { + Fillable* tofill = nullptr; + int8_t i = 0; + for (auto content : contents_) { + if (RecordFillable* raw = dynamic_cast(content.get())) { + if (raw->length() == -1 || raw->disambiguator() == disambiguator) { + tofill = content.get(); + break; + } + } + i++; + } + if (tofill == nullptr) { + tofill = RecordFillable::fromempty(options_); + contents_.push_back(std::shared_ptr(tofill)); + } + tofill->beginrecord(disambiguator); + current_ = i; + } + else { + contents_[(size_t)current_].get()->beginrecord(disambiguator); + } + return this; } - template - T* UnionFillable::maybenew(T* fillable, int64_t& length) { - if (fillable == nullptr) { - fillable = new T(options_); - contents_.push_back(std::shared_ptr(fillable)); + Fillable* UnionFillable::field_fast(const char* key) { + if (current_ == -1) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + else { + contents_[(size_t)current_].get()->field_fast(key); } - length = fillable->length(); - return fillable; + return this; } - template - Fillable* UnionFillable::get1(int8_t& type, int64_t& length) { - return maybenew(findfillable(type), length); + Fillable* UnionFillable::field_check(const char* key) { + if (current_ == -1) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + else { + contents_[(size_t)current_].get()->field_check(key); + } + return this; } - template - Fillable* UnionFillable::get2(int8_t& type, int64_t& length) { - Fillable* fillable = findfillable(type); - if (fillable == nullptr) { - return maybenew(findfillable(type), length); + Fillable* UnionFillable::endrecord() { + if (current_ == -1) { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); } else { - length = fillable->length(); - return fillable; + int64_t length = contents_[(size_t)current_].get()->length(); + contents_[(size_t)current_].get()->endrecord(); + if (length != contents_[(size_t)current_].get()->length()) { + types_.append(current_); + offsets_.append(length); + current_ = -1; + } } + return this; } + } diff --git a/src/libawkward/fillable/UnknownFillable.cpp b/src/libawkward/fillable/UnknownFillable.cpp index 56e5e511e7..5e25573576 100644 --- a/src/libawkward/fillable/UnknownFillable.cpp +++ b/src/libawkward/fillable/UnknownFillable.cpp @@ -3,6 +3,7 @@ #include #include "awkward/Identity.h" +#include "awkward/Index.h" #include "awkward/array/EmptyArray.h" #include "awkward/type/UnknownType.h" #include "awkward/fillable/OptionFillable.h" @@ -10,6 +11,8 @@ #include "awkward/fillable/Int64Fillable.h" #include "awkward/fillable/Float64Fillable.h" #include "awkward/fillable/ListFillable.h" +#include "awkward/fillable/TupleFillable.h" +#include "awkward/fillable/RecordFillable.h" #include "awkward/fillable/UnknownFillable.h" @@ -31,50 +34,131 @@ namespace awkward { return std::shared_ptr(new EmptyArray(Identity::none())); } else { - throw std::runtime_error("UnknownFillable::snapshot() needs MaskedArray"); + throw std::runtime_error("UnknownFillable::snapshot() needs OptionArray"); } } + bool UnknownFillable::active() const { + return false; + } + Fillable* UnknownFillable::null() { nullcount_++; return this; } Fillable* UnknownFillable::boolean(bool x) { - Fillable* out = prepare(); - out->boolean(x); + Fillable* out = BoolFillable::fromempty(options_); + if (nullcount_ != 0) { + out = OptionFillable::fromnulls(options_, nullcount_, out); + } + try { + out->boolean(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* UnknownFillable::integer(int64_t x) { - Fillable* out = prepare(); - out->integer(x); + Fillable* out = Int64Fillable::fromempty(options_); + if (nullcount_ != 0) { + out = OptionFillable::fromnulls(options_, nullcount_, out); + } + try { + out->integer(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* UnknownFillable::real(double x) { - Fillable* out = prepare(); - out->real(x); + Fillable* out = Float64Fillable::fromempty(options_); + if (nullcount_ != 0) { + out = OptionFillable::fromnulls(options_, nullcount_, out); + } + try { + out->real(x); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* UnknownFillable::beginlist() { - Fillable* out = prepare(); - out->beginlist(); + Fillable* out = ListFillable::fromempty(options_); + if (nullcount_ != 0) { + out = OptionFillable::fromnulls(options_, nullcount_, out); + } + try { + out->beginlist(); + } + catch (...) { + delete out; + throw; + } return out; } Fillable* UnknownFillable::endlist() { - return nullptr; + throw std::invalid_argument("called 'endlist' without 'beginlist' at the same level before it"); } - template - Fillable* UnknownFillable::prepare() const { - Fillable* out = new T(options_); + Fillable* UnknownFillable::begintuple(int64_t numfields) { + Fillable* out = TupleFillable::fromempty(options_); if (nullcount_ != 0) { out = OptionFillable::fromnulls(options_, nullcount_, out); } + try { + out->begintuple(numfields); + } + catch (...) { + delete out; + throw; + } return out; } + Fillable* UnknownFillable::index(int64_t index) { + throw std::invalid_argument("called 'index' without 'begintuple' at the same level before it"); + } + + Fillable* UnknownFillable::endtuple() { + throw std::invalid_argument("called 'endtuple' without 'begintuple' at the same level before it"); + } + + Fillable* UnknownFillable::beginrecord(int64_t disambiguator) { + Fillable* out = RecordFillable::fromempty(options_); + if (nullcount_ != 0) { + out = OptionFillable::fromnulls(options_, nullcount_, out); + } + try { + out->beginrecord(disambiguator); + } + catch (...) { + delete out; + throw; + } + return out; + } + + Fillable* UnknownFillable::field_fast(const char* key) { + throw std::invalid_argument("called 'field_fast' without 'beginrecord' at the same level before it"); + } + + Fillable* UnknownFillable::field_check(const char* key) { + throw std::invalid_argument("called 'field_check' without 'beginrecord' at the same level before it"); + } + + Fillable* UnknownFillable::endrecord() { + throw std::invalid_argument("called 'endrecord' without 'beginrecord' at the same level before it"); + } + } diff --git a/src/libawkward/io/json.cpp b/src/libawkward/io/json.cpp index f2543ca2b2..72003ff35f 100644 --- a/src/libawkward/io/json.cpp +++ b/src/libawkward/io/json.cpp @@ -14,13 +14,13 @@ namespace awkward { return array_.snapshot(); } - bool Null() { array_.null(); return true; } - bool Bool(bool x) { array_.boolean(x); return true; } - bool Int(int x) { array_.integer((int64_t)x); return true; } + bool Null() { array_.null(); return true; } + bool Bool(bool x) { array_.boolean(x); return true; } + bool Int(int x) { array_.integer((int64_t)x); return true; } bool Uint(unsigned int x) { array_.integer((int64_t)x); return true; } - bool Int64(int64_t x) { array_.integer(x); return true; } - bool Uint64(uint64_t x) { array_.integer((int64_t)x); return true; } - bool Double(double x) { array_.real(x); return true; } + bool Int64(int64_t x) { array_.integer(x); return true; } + bool Uint64(uint64_t x) { array_.integer((int64_t)x); return true; } + bool Double(double x) { array_.real(x); return true; } bool StartArray() { if (depth_ != 0) { @@ -38,14 +38,18 @@ namespace awkward { } bool StartObject() { - throw std::runtime_error("not implemented: Handler::StartObject"); + array_.beginrecord(); + return true; } bool EndObject(rj::SizeType numfields) { - throw std::runtime_error("not implemented: Handler::EndObject"); + array_.endrecord(); + return true; } bool Key(const char* str, rj::SizeType length, bool copy) { - throw std::runtime_error("not implemented: Handler::Key"); + array_.field_check(str); + return true; } + bool String(const char* str, rj::SizeType length, bool copy) { throw std::runtime_error("not implemented: Handler::String"); } diff --git a/src/libawkward/type/OptionType.cpp b/src/libawkward/type/OptionType.cpp index fcf8e6aa95..446f7bd1d2 100644 --- a/src/libawkward/type/OptionType.cpp +++ b/src/libawkward/type/OptionType.cpp @@ -3,17 +3,19 @@ #include #include "awkward/type/UnknownType.h" -#include "awkward/type/PrimitiveType.h" +#include "awkward/type/ListType.h" +#include "awkward/type/RegularType.h" #include "awkward/type/OptionType.h" namespace awkward { std::string OptionType::tostring_part(std::string indent, std::string pre, std::string post) const { - if (dynamic_cast(type_.get()) != nullptr) { - return indent + pre + "?" + type_.get()->tostring_part("", "", "") + post; + if (dynamic_cast(type_.get()) != nullptr || + dynamic_cast(type_.get()) != nullptr) { + return indent + pre + "option[" + type().get()->tostring_part(indent, "", "") + "]" + post; } else { - return indent + pre + "option[" + type().get()->tostring_part(indent, "", "") + "]" + post; + return indent + pre + "?" + type_.get()->tostring_part("", "", "") + post; } } diff --git a/src/libawkward/type/RecordType.cpp b/src/libawkward/type/RecordType.cpp new file mode 100644 index 0000000000..74e91d9b5c --- /dev/null +++ b/src/libawkward/type/RecordType.cpp @@ -0,0 +1,256 @@ +// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +#include +#include + +#include "awkward/type/UnknownType.h" +#include "awkward/type/OptionType.h" +#include "awkward/util.h" + +#include "awkward/type/RecordType.h" + +namespace awkward { + std::string RecordType::tostring_part(std::string indent, std::string pre, std::string post) const { + std::stringstream out; + if (reverselookup_.get() == nullptr) { + out << "("; + for (size_t j = 0; j < types_.size(); j++) { + if (j != 0) { + out << ", "; + } + out << types_[j].get()->tostring_part("", "", ""); + } + out << ")"; + } + else { + out << "{"; + for (size_t j = 0; j < types_.size(); j++) { + if (j != 0) { + out << ", "; + } + out << util::quote(reverselookup_.get()->at(j), true) << ": "; + out << types_[j].get()->tostring_part("", "", ""); + } + out << "}"; + } + return out.str(); + } + + const std::shared_ptr RecordType::shallow_copy() const { + return std::shared_ptr(new RecordType(types_, lookup_, reverselookup_)); + } + + bool RecordType::equal(std::shared_ptr other) const { + if (RecordType* t = dynamic_cast(other.get())) { + if (numfields() != t->numfields()) { + return false; + } + if (reverselookup_.get() == nullptr) { + if (t->reverselookup().get() != nullptr) { + return false; + } + for (int64_t j = 0; j < numfields(); j++) { + if (!field(j).get()->equal(t->field(j))) { + return false; + } + } + return true; + } + else { + if (t->reverselookup().get() == nullptr) { + return false; + } + if (lookup_.get()->size() != t->lookup().get()->size()) { + return false; + } + for (auto pair : *lookup_.get()) { + int64_t otherindex; + try { + otherindex = (int64_t)t->lookup().get()->at(pair.first); + } + catch (std::out_of_range err) { + return false; + } + if (!field((int64_t)pair.second).get()->equal(t->field(otherindex))) { + return false; + } + } + return true; + } + } + else { + return false; + } + } + + bool RecordType::compatible(std::shared_ptr other, bool bool_is_int, bool int_is_float, bool ignore_null, bool unknown_is_anything) const { + if (unknown_is_anything && dynamic_cast(other.get())) { + return true; + } + else if (ignore_null && dynamic_cast(other.get())) { + return compatible(dynamic_cast(other.get())->type(), bool_is_int, int_is_float, ignore_null, unknown_is_anything); + } + else if (RecordType* t = dynamic_cast(other.get())) { + if (numfields() != t->numfields()) { + return false; + } + if (reverselookup_.get() == nullptr) { + if (t->reverselookup().get() != nullptr) { + return false; + } + for (int64_t j = 0; j < numfields(); j++) { + if (!field(j).get()->compatible(t->field(j), bool_is_int, int_is_float, ignore_null, unknown_is_anything)) { + return false; + } + } + return true; + } + else { + if (t->reverselookup().get() == nullptr) { + return false; + } + if (lookup_.get()->size() != t->lookup().get()->size()) { + return false; + } + for (auto pair : *lookup_.get()) { + int64_t otherindex; + try { + otherindex = (int64_t)t->lookup().get()->at(pair.first); + } + catch (std::out_of_range err) { + return false; + } + if (!field((int64_t)pair.second).get()->compatible(t->field(otherindex), bool_is_int, int_is_float, ignore_null, unknown_is_anything)) { + return false; + } + } + return true; + } + } + else { + return false; + } + } + + int64_t RecordType::numfields() const { + return (int64_t)types_.size(); + } + + int64_t RecordType::index(const std::string& key) const { + int64_t out = -1; + if (lookup_.get() != nullptr) { + try { + out = (int64_t)lookup_.get()->at(key); + } + catch (std::out_of_range err) { } + if (out != -1 && out >= numfields()) { + throw std::invalid_argument(std::string("key \"") + key + std::string("\" points to tuple index ") + std::to_string(out) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); + } + } + if (out == -1) { + try { + out = (int64_t)std::stoi(key); + } + catch (std::invalid_argument err) { + throw std::invalid_argument(std::string("key \"") + key + std::string("\" is not in RecordType")); + } + if (out >= numfields()) { + throw std::invalid_argument(std::string("key interpreted as index ") + key + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); + } + } + return out; + } + + const std::string RecordType::key(int64_t index) const { + if (index >= numfields()) { + throw std::invalid_argument(std::string("index ") + std::to_string(index) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); + } + if (reverselookup_.get() != nullptr) { + return reverselookup_.get()->at((size_t)index); + } + else { + return std::to_string(index); + } + } + + bool RecordType::has(const std::string& key) const { + try { + index(key); + } + catch (std::invalid_argument err) { + return false; + } + return true; + } + + const std::vector RecordType::aliases(int64_t index) const { + std::vector out; + std::string _default = std::to_string(index); + bool has_default = false; + if (lookup_.get() != nullptr) { + for (auto pair : *lookup_.get()) { + if (pair.second == index) { + out.push_back(pair.first); + if (pair.first == _default) { + has_default = true; + } + } + } + } + if (!has_default) { + out.push_back(_default); + } + return out; + } + + const std::vector RecordType::aliases(const std::string& key) const { + return aliases(index(key)); + } + + const std::shared_ptr RecordType::field(int64_t index) const { + if (index >= numfields()) { + throw std::invalid_argument(std::string("index ") + std::to_string(index) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); + } + return types_[(size_t)index]; + } + + const std::shared_ptr RecordType::field(const std::string& key) const { + return types_[(size_t)index(key)]; + } + + const std::vector RecordType::keys() const { + std::vector out; + if (reverselookup_.get() == nullptr) { + int64_t cols = numfields(); + for (int64_t j = 0; j < cols; j++) { + out.push_back(std::to_string(j)); + } + } + else { + out.insert(out.end(), reverselookup_.get()->begin(), reverselookup_.get()->end()); + } + return out; + } + + const std::vector> RecordType::values() const { + return std::vector>(types_); + } + + const std::vector>> RecordType::items() const { + std::vector>> out; + if (reverselookup_.get() == nullptr) { + size_t cols = types_.size(); + for (size_t j = 0; j < cols; j++) { + out.push_back(std::pair>(std::to_string(j), types_[j])); + } + } + else { + size_t cols = types_.size(); + for (size_t j = 0; j < cols; j++) { + out.push_back(std::pair>(reverselookup_.get()->at(j), types_[j])); + } + } + return out; + } + +} diff --git a/src/libawkward/type/UnionType.cpp b/src/libawkward/type/UnionType.cpp index 85eebd1f17..91a8754991 100644 --- a/src/libawkward/type/UnionType.cpp +++ b/src/libawkward/type/UnionType.cpp @@ -95,7 +95,7 @@ namespace awkward { return types_; } - const std::shared_ptr UnionType::type(int64_t i) const { - return types_[(size_t)i]; + const std::shared_ptr UnionType::type(int64_t index) const { + return types_[(size_t)index]; } } diff --git a/src/libawkward/type/UnknownType.cpp b/src/libawkward/type/UnknownType.cpp index 1ce844f9b3..17ed564a67 100644 --- a/src/libawkward/type/UnknownType.cpp +++ b/src/libawkward/type/UnknownType.cpp @@ -6,7 +6,7 @@ namespace awkward { std::string UnknownType::tostring_part(std::string indent, std::string pre, std::string post) const { - return indent + pre + "???" + post; + return indent + pre + "unknown" + post; } const std::shared_ptr UnknownType::shallow_copy() const { diff --git a/src/libawkward/util.cpp b/src/libawkward/util.cpp index ae48336797..24b0be8ea1 100644 --- a/src/libawkward/util.cpp +++ b/src/libawkward/util.cpp @@ -18,7 +18,7 @@ namespace awkward { if (err.location != kSliceNone && id != nullptr) { assert(err.location > 0); if (0 <= err.location && err.location < id->length()) { - out << " at id[" << id->location(err.location) << "]"; + out << " at id[" << id->location_at(err.location) << "]"; } else { out << " at id[???]"; @@ -32,6 +32,16 @@ namespace awkward { } } + std::string quote(std::string x, bool doublequote) { + // TODO: escape characters, possibly using RapidJSON. + if (doublequote) { + return std::string("\"") + x + std::string("\""); + } + else { + return std::string("'") + x + std::string("'"); + } + } + template <> Error awkward_identity64_from_listoffsetarray(int64_t* toptr, const int64_t* fromptr, const uint32_t* fromoffsets, int64_t fromptroffset, int64_t offsetsoffset, int64_t tolength, int64_t fromlength, int64_t fromwidth) { return awkward_identity64_from_listoffsetarrayU32(toptr, fromptr, fromoffsets, fromptroffset, offsetsoffset, tolength, fromlength, fromwidth); diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index 53d1066f54..4caf4592ce 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -17,6 +17,8 @@ #include "awkward/array/ListOffsetArray.h" #include "awkward/array/EmptyArray.h" #include "awkward/array/RegularArray.h" +#include "awkward/array/RecordArray.h" +#include "awkward/array/Record.h" #include "awkward/fillable/FillableOptions.h" #include "awkward/fillable/FillableArray.h" #include "awkward/type/Type.h" @@ -27,6 +29,7 @@ #include "awkward/type/ListType.h" #include "awkward/type/OptionType.h" #include "awkward/type/UnionType.h" +#include "awkward/type/RecordType.h" #include "awkward/io/json.h" #include "awkward/io/root.h" @@ -65,6 +68,9 @@ py::object box(std::shared_ptr t) { else if (ak::UnionType* raw = dynamic_cast(t.get())) { return py::cast(*raw); } + else if (ak::RecordType* raw = dynamic_cast(t.get())) { + return py::cast(*raw); + } else if (ak::UnknownType* raw = dynamic_cast(t.get())) { return py::cast(*raw); } @@ -113,6 +119,12 @@ py::object box(std::shared_ptr content) { else if (ak::RegularArray* raw = dynamic_cast(content.get())) { return py::cast(*raw); } + else if (ak::Record* raw = dynamic_cast(content.get())) { + return py::cast(*raw); + } + else if (ak::RecordArray* raw = dynamic_cast(content.get())) { + return py::cast(*raw); + } else { throw std::runtime_error("missing boxer for Content subtype"); } @@ -162,10 +174,14 @@ std::shared_ptr unbox_type(py::handle obj) { return obj.cast()->shallow_copy(); } catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); + } + catch (py::cast_error err) { } throw std::invalid_argument("argument must be a Type subtype"); } -std::shared_ptr unbox_content(py::object obj) { +std::shared_ptr unbox_content(py::handle obj) { try { return obj.cast()->shallow_copy(); } @@ -202,12 +218,21 @@ std::shared_ptr unbox_content(py::object obj) { return obj.cast()->shallow_copy(); } catch (py::cast_error err) { } + try { + obj.cast(); + throw std::invalid_argument("content argument must be a Content subtype (excluding Record)"); + } + catch (py::cast_error err) { } + try { + return obj.cast()->shallow_copy(); + } + catch (py::cast_error err) { } throw std::invalid_argument("content argument must be a Content subtype"); } -std::shared_ptr unbox_id(py::object id) { +std::shared_ptr unbox_id(py::handle id) { if (id.is(py::none())) { - return std::shared_ptr(nullptr); + return ak::Identity::none(); } try { return id.cast()->shallow_copy(); @@ -269,6 +294,46 @@ py::class_> make_IndexOf(py::handle m, std::string name) { /////////////////////////////////////////////////////////////// Identity +template +py::tuple location(const T& self) { + if (self.id().get() == nullptr) { + throw std::invalid_argument(self.classname() + std::string(" instance has no associated id (use 'setid' to assign one to the array it is in)")); + } + ak::Identity::FieldLoc fieldloc = self.id().get()->fieldloc(); + if (self.isscalar()) { + py::tuple out((size_t)(self.id().get()->width()) + fieldloc.size()); + size_t j = 0; + for (int64_t i = 0; i < self.id().get()->width(); i++) { + out[j] = py::cast(self.id().get()->value(0, i)); + j++; + for (auto pair : fieldloc) { + if (pair.first == i) { + out[j] = py::cast(pair.second); + j++; + } + } + } + return out; + } + else { + py::tuple out((size_t)(self.id().get()->width() - 1) + fieldloc.size()); + size_t j = 0; + for (int64_t i = 0; i < self.id().get()->width(); i++) { + if (i < self.id().get()->width() - 1) { + out[j] = py::cast(self.id().get()->value(0, i)); + j++; + } + for (auto pair : fieldloc) { + if (pair.first == i) { + out[j] = py::cast(pair.second); + j++; + } + } + } + return out; + } +} + template py::object getid(T& self) { return box(self.id()); @@ -327,6 +392,23 @@ py::class_> make_IdentityOf(py::handle m, std::string name) { .def_property_readonly("array", [](py::buffer& self) -> py::array { return py::array(self); }) + .def("location_at_str", &ak::IdentityOf::location_at) + .def("location_at", [](const ak::Identity& self, int64_t at) -> py::tuple { + ak::Identity::FieldLoc fieldloc = self.fieldloc(); + py::tuple out((size_t)self.width() + fieldloc.size()); + size_t j = 0; + for (int64_t i = 0; i < self.width(); i++) { + out[j] = py::cast(self.value(at, i)); + j++; + for (auto pair : fieldloc) { + if (pair.first == i) { + out[j] = py::cast(pair.second); + j++; + } + } + } + return out; + }) ); } @@ -338,6 +420,7 @@ void toslice_part(ak::Slice& slice, py::object obj) { // FIXME: what happens if you give this a Numpy integer? a Numpy 0-dimensional array? slice.append(std::shared_ptr(new ak::SliceAt(obj.cast()))); } + else if (py::isinstance(obj)) { py::object pystart = obj.attr("start"); py::object pystop = obj.attr("stop"); @@ -359,74 +442,99 @@ void toslice_part(ak::Slice& slice, py::object obj) { } slice.append(std::shared_ptr(new ak::SliceRange(start, stop, step))); } + #if PY_MAJOR_VERSION >= 3 else if (py::isinstance(obj)) { slice.append(std::shared_ptr(new ak::SliceEllipsis())); } #endif + else if (obj.is(py::module::import("numpy").attr("newaxis"))) { slice.append(std::shared_ptr(new ak::SliceNewAxis())); } + + else if (py::isinstance(obj)) { + slice.append(std::shared_ptr(new ak::SliceField(obj.cast()))); + } + else if (py::isinstance(obj)) { - py::object objarray = py::module::import("numpy").attr("asarray")(obj); - if (!py::isinstance(objarray)) { - throw std::invalid_argument("iterable cannot be cast as an array"); + std::vector strings; + bool all_strings = true; + for (auto x : obj) { + if (py::isinstance(x)) { + strings.push_back(x.cast()); + } + else { + all_strings = false; + break; + } } - py::array array = objarray.cast(); - if (array.ndim() == 0) { - throw std::invalid_argument("arrays used as an index must have at least one dimension"); + + if (all_strings && strings.size() != 0) { + slice.append(std::shared_ptr(new ak::SliceFields(strings))); } + else { + py::object objarray = py::module::import("numpy").attr("asarray")(obj); + if (!py::isinstance(objarray)) { + throw std::invalid_argument("iterable cannot be cast as an array"); + } + py::array array = objarray.cast(); + if (array.ndim() == 0) { + throw std::invalid_argument("arrays used as an index must have at least one dimension"); + } + + py::buffer_info info = array.request(); + if (info.format.compare("?") == 0) { + py::object nonzero_tuple = py::module::import("numpy").attr("nonzero")(array); + for (auto x : nonzero_tuple.cast()) { + py::object intarray_object = py::module::import("numpy").attr("asarray")(x.cast(), py::module::import("numpy").attr("int64")); + py::array intarray = intarray_object.cast(); + py::buffer_info intinfo = intarray.request(); + std::vector shape; + std::vector strides; + for (ssize_t i = 0; i < intinfo.ndim; i++) { + shape.push_back((int64_t)intinfo.shape[i]); + strides.push_back((int64_t)intinfo.strides[i] / sizeof(int64_t)); + } + ak::Index64 index(std::shared_ptr(reinterpret_cast(intinfo.ptr), pyobject_deleter(intarray.ptr())), 0, shape[0]); + slice.append(std::shared_ptr(new ak::SliceArray64(index, shape, strides))); + } + } + + else { + std::string format(info.format); + format.erase(0, format.find_first_not_of("@=<>!")); + if (py::isinstance(obj) && + format.compare("c") != 0 && + format.compare("b") != 0 && + format.compare("B") != 0 && + format.compare("h") != 0 && + format.compare("H") != 0 && + format.compare("i") != 0 && + format.compare("I") != 0 && + format.compare("l") != 0 && + format.compare("L") != 0 && + format.compare("q") != 0 && + format.compare("Q") != 0) { + throw std::invalid_argument("arrays used as an index must be integer or boolean"); + } - py::buffer_info info = array.request(); - if (info.format.compare("?") == 0) { - py::object nonzero_tuple = py::module::import("numpy").attr("nonzero")(array); - for (auto x : nonzero_tuple.cast()) { - py::object intarray_object = py::module::import("numpy").attr("asarray")(x.cast(), py::module::import("numpy").attr("int64")); + py::object intarray_object = py::module::import("numpy").attr("asarray")(array, py::module::import("numpy").attr("int64")); py::array intarray = intarray_object.cast(); py::buffer_info intinfo = intarray.request(); std::vector shape; std::vector strides; for (ssize_t i = 0; i < intinfo.ndim; i++) { shape.push_back((int64_t)intinfo.shape[i]); - strides.push_back((int64_t)intinfo.strides[i] / sizeof(int64_t)); + strides.push_back((int64_t)intinfo.strides[i] / (int64_t)sizeof(int64_t)); } ak::Index64 index(std::shared_ptr(reinterpret_cast(intinfo.ptr), pyobject_deleter(intarray.ptr())), 0, shape[0]); slice.append(std::shared_ptr(new ak::SliceArray64(index, shape, strides))); } } - else { - std::string format(info.format); - format.erase(0, format.find_first_not_of("@=<>!")); - if (py::isinstance(obj) && - format.compare("c") != 0 && - format.compare("b") != 0 && - format.compare("B") != 0 && - format.compare("h") != 0 && - format.compare("H") != 0 && - format.compare("i") != 0 && - format.compare("I") != 0 && - format.compare("l") != 0 && - format.compare("L") != 0 && - format.compare("q") != 0 && - format.compare("Q") != 0) { - throw std::invalid_argument("arrays used as an index must be integer or boolean"); - } - - py::object intarray_object = py::module::import("numpy").attr("asarray")(array, py::module::import("numpy").attr("int64")); - py::array intarray = intarray_object.cast(); - py::buffer_info intinfo = intarray.request(); - std::vector shape; - std::vector strides; - for (ssize_t i = 0; i < intinfo.ndim; i++) { - shape.push_back((int64_t)intinfo.shape[i]); - strides.push_back((int64_t)intinfo.strides[i] / (int64_t)sizeof(int64_t)); - } - ak::Index64 index(std::shared_ptr(reinterpret_cast(intinfo.ptr), pyobject_deleter(intarray.ptr())), 0, shape[0]); - slice.append(std::shared_ptr(new ak::SliceArray64(index, shape, strides))); - } - } + else { throw std::invalid_argument("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`), and integer or boolean arrays (possibly jagged) are valid indices"); } @@ -502,11 +610,34 @@ py::object getitem(T& self, py::object obj) { } // NOTE: control flow can pass through here; don't make the last line an 'else'! } + if (py::isinstance(obj)) { + return box(self.getitem_field(obj.cast())); + } + if (!py::isinstance(obj) && py::isinstance(obj)) { + std::vector strings; + bool all_strings = true; + for (auto x : obj) { + if (py::isinstance(x)) { + strings.push_back(x.cast()); + } + else { + all_strings = false; + break; + } + } + if (all_strings && strings.size() != 0) { + return box(self.getitem_fields(strings)); + } + // NOTE: control flow can pass through here; don't make the last line an 'else'! + } return box(self.getitem(toslice(obj))); } -void fillable_fill(ak::FillableArray& self, py::object obj) { - if (py::isinstance(obj)) { +void fillable_fill(ak::FillableArray& self, py::handle obj) { + if (obj.is(py::none())) { + self.null(); + } + else if (py::isinstance(obj)) { self.boolean(obj.cast()); } else if (py::isinstance(obj)) { @@ -515,7 +646,29 @@ void fillable_fill(ak::FillableArray& self, py::object obj) { else if (py::isinstance(obj)) { self.real(obj.cast()); } - // FIXME: strings, dicts... + else if (py::isinstance(obj)) { + py::tuple tup = obj.cast(); + self.begintuple(tup.size()); + for (size_t i = 0; i < tup.size(); i++) { + self.index((int64_t)i); + fillable_fill(self, tup[i]); + } + self.endtuple(); + } + else if (py::isinstance(obj)) { + py::dict dict = obj.cast(); + self.beginrecord(dict.size()); + for (auto pair : dict) { + if (!py::isinstance(pair.first)) { + throw std::invalid_argument("keys of dicts in 'fromiter' must all be strings"); + } + std::string key = pair.first.cast(); + self.field_check(key.c_str()); + fillable_fill(self, pair.second); + } + self.endrecord(); + } + // FIXME: strings else if (py::isinstance(obj)) { py::sequence seq = obj.cast(); self.beginlist(); @@ -523,7 +676,6 @@ void fillable_fill(ak::FillableArray& self, py::object obj) { fillable_fill(self, x); } self.endlist(); - return; } else { throw std::invalid_argument(std::string("cannot convert ") + obj.attr("__repr__")().cast() + std::string(" to an array element")); @@ -547,11 +699,20 @@ py::class_ make_FillableArray(py::handle m, std::string name) .def("__iter__", [](ak::FillableArray& self) -> ak::Iterator { return ak::Iterator(self.snapshot()); }) + .def("null", &ak::FillableArray::null) .def("boolean", &ak::FillableArray::boolean) .def("integer", &ak::FillableArray::integer) .def("real", &ak::FillableArray::real) .def("beginlist", &ak::FillableArray::beginlist) .def("endlist", &ak::FillableArray::endlist) + .def("begintuple", &ak::FillableArray::begintuple) + .def("index", &ak::FillableArray::index) + .def("endtuple", &ak::FillableArray::endtuple) + .def("beginrecord", [](ak::FillableArray& self, int64_t disambiguator) -> void { + self.beginrecord(disambiguator); + }, py::arg("disambiguator") = 0) + .def("field", &ak::FillableArray::field_check) + .def("endrecord", &ak::FillableArray::endrecord) .def("fill", &fillable_fill) ); } @@ -672,6 +833,10 @@ py::class_, ak::Type> make_UnionTy } return ak::UnionType(types); })) + .def("__repr__", &ak::UnionType::tostring) + .def("__eq__", &ak::UnionType::equal) + .def("compatible", &ak::UnionType::compatible, py::arg("other"), py::arg("bool_is_int") = false, py::arg("int_is_float") = false, py::arg("ignore_null") = true, py::arg("unknown_is_anything") = true) + .def_property_readonly("numtypes", &ak::UnionType::numtypes) .def_property_readonly("types", [](ak::UnionType& self) -> py::tuple { py::tuple types((size_t)self.numtypes()); @@ -681,9 +846,72 @@ py::class_, ak::Type> make_UnionTy return types; }) .def("type", &ak::UnionType::type) - .def("__repr__", &ak::UnionType::tostring) - .def("__eq__", &ak::UnionType::equal) - .def("compatible", &ak::UnionType::compatible, py::arg("other"), py::arg("bool_is_int") = false, py::arg("int_is_float") = false, py::arg("ignore_null") = true, py::arg("unknown_is_anything") = true) + + ); +} + +py::class_, ak::Type> make_RecordType(py::handle m, std::string name) { + return (py::class_, ak::Type>(m, name.c_str()) + .def(py::init([](py::args args) -> ak::RecordType { + std::vector> types; + for (auto x : args) { + types.push_back(unbox_type(x)); + } + return ak::RecordType(types, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); + })) + .def(py::init([](py::kwargs kwargs) -> ak::RecordType { + std::shared_ptr lookup(new ak::RecordType::Lookup); + std::shared_ptr reverselookup(new ak::RecordType::ReverseLookup); + std::vector> types; + for (auto x : kwargs) { + std::string key = x.first.cast(); + (*lookup.get())[key] = types.size(); + reverselookup.get()->push_back(key); + types.push_back(unbox_type(x.second)); + } + return ak::RecordType(types, lookup, reverselookup); + })) + .def("__repr__", &ak::RecordType::tostring) + .def("__eq__", &ak::RecordType::equal) + .def("compatible", &ak::RecordType::compatible, py::arg("other"), py::arg("bool_is_int") = false, py::arg("int_is_float") = false, py::arg("ignore_null") = true, py::arg("unknown_is_anything") = true) + + .def_property_readonly("numfields", &ak::RecordType::numfields) + .def("index", &ak::RecordType::index) + .def("key", &ak::RecordType::key) + .def("__contains__", &ak::RecordType::has) + .def("aliases", [](ak::RecordType& self, int64_t index) -> std::vector { + return self.aliases(index); + }) + .def("aliases", [](ak::RecordType& self, std::string key) -> std::vector { + return self.aliases(key); + }) + .def("__getitem__", [](ak::RecordType& self, int64_t index) -> py::object { + return box(self.field(index)); + }) + .def("__getitem__", [](ak::RecordType& self, std::string key) -> py::object { + return box(self.field(key)); + }) + .def("keys", &ak::RecordType::keys) + .def("values", [](ak::RecordType& self) -> py::object { + py::list out; + for (auto item : self.values()) { + out.append(box(item)); + } + return out; + }) + .def("items", [](ak::RecordType& self) -> py::object { + py::list out; + for (auto item : self.items()) { + py::str key(item.first); + py::object val(box(item.second)); + py::tuple pair(2); + pair[0] = key; + pair[1] = val; + out.append(pair); + } + return out; + }) + ); } @@ -747,7 +975,10 @@ py::class_ content(py::class_& x) { .def("__iter__", &iter) .def("tojson", &tojson_string, py::arg("pretty") = false, py::arg("maxdecimals") = py::none()) .def("tojson", &tojson_file, py::arg("destination"), py::arg("pretty") = false, py::arg("maxdecimals") = py::none(), py::arg("buffersize") = 65536) - .def_property_readonly("type", &ak::Content::type); + .def_property_readonly("type", &ak::Content::type) + .def_property_readonly("location", &location) + + ; } py::class_ make_Content(py::handle m, std::string name) { @@ -857,6 +1088,141 @@ py::class_ make_RegularArray(py::handle m, std::s ); } +/////////////////////////////////////////////////////////////// RecordArray + +py::class_ make_RecordArray(py::handle m, std::string name) { + return content(py::class_(m, name.c_str()) + .def(py::init([](py::dict contents, py::object id) -> ak::RecordArray { + std::shared_ptr lookup(new ak::RecordArray::Lookup); + std::shared_ptr reverselookup(new ak::RecordArray::ReverseLookup); + std::vector> out; + for (auto x : contents) { + std::string key = x.first.cast(); + (*lookup.get())[key] = out.size(); + reverselookup.get()->push_back(key); + out.push_back(unbox_content(x.second)); + } + return ak::RecordArray(unbox_id(id), out, lookup, reverselookup); + }), py::arg("contents"), py::arg("id") = py::none()) + .def(py::init([](py::iterable contents, py::object id) -> ak::RecordArray { + std::vector> out; + for (auto x : contents) { + out.push_back(unbox_content(x)); + } + return ak::RecordArray(unbox_id(id), out, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); + }), py::arg("contents"), py::arg("id") = py::none()) + .def(py::init([](int64_t length, bool istuple, py::object id) -> ak::RecordArray { + return ak::RecordArray(unbox_id(id), length, istuple); + }), py::arg("length"), py::arg("istuple") = false, py::arg("id") = py::none()) + + .def_property_readonly("istuple", &ak::RecordArray::istuple) + .def_property_readonly("numfields", &ak::RecordArray::numfields) + .def("index", &ak::RecordArray::index) + .def("key", &ak::RecordArray::key) + .def("has", &ak::RecordArray::has) + .def("aliases", [](ak::RecordArray& self, int64_t index) -> std::vector { + return self.aliases(index); + }) + .def("aliases", [](ak::RecordArray& self, std::string key) -> std::vector { + return self.aliases(key); + }) + .def("field", [](ak::RecordArray& self, int64_t index) -> py::object { + return box(self.field(index)); + }) + .def("field", [](ak::RecordArray& self, std::string key) -> py::object { + return box(self.field(key)); + }) + .def("keys", &ak::RecordArray::keys) + .def("values", [](ak::RecordArray& self) -> py::object { + py::list out; + for (auto item : self.values()) { + out.append(box(item)); + } + return out; + }) + .def("items", [](ak::RecordArray& self) -> py::object { + py::list out; + for (auto item : self.items()) { + py::str key(item.first); + py::object val(box(item.second)); + py::tuple pair(2); + pair[0] = key; + pair[1] = val; + out.append(pair); + } + return out; + }) + .def_property_readonly("withoutkeys", [](ak::RecordArray& self) -> py::object { + return box(self.withoutkeys().shallow_copy()); + }) + + .def("append", [](ak::RecordArray& self, py::object content, py::object key) -> void { + if (key.is(py::none())) { + self.append(unbox_content(content)); + } + else { + self.append(unbox_content(content), key.cast()); + } + }, py::arg("content"), py::arg("key") = py::none()) + .def("setkey", &ak::RecordArray::setkey) + + ); +} + +py::class_ make_Record(py::handle m, std::string name) { + return py::class_(m, name.c_str()) + .def("__repr__", &repr) + .def_property_readonly("id", [](ak::Record& self) -> py::object { return box(self.id()); }) + .def("__getitem__", &getitem) + .def("tojson", &tojson_string, py::arg("pretty") = false, py::arg("maxdecimals") = py::none()) + .def("tojson", &tojson_file, py::arg("destination"), py::arg("pretty") = false, py::arg("maxdecimals") = py::none(), py::arg("buffersize") = 65536) + .def_property_readonly("type", &ak::Content::type) + + .def_property_readonly("istuple", &ak::Record::istuple) + .def_property_readonly("numfields", &ak::Record::numfields) + .def("index", &ak::Record::index) + .def("key", &ak::Record::key) + .def("has", &ak::Record::has) + .def("aliases", [](ak::Record& self, int64_t index) -> std::vector { + return self.aliases(index); + }) + .def("aliases", [](ak::Record& self, std::string key) -> std::vector { + return self.aliases(key); + }) + .def("field", [](ak::Record& self, int64_t index) -> py::object { + return box(self.field(index)); + }) + .def("field", [](ak::Record& self, std::string key) -> py::object { + return box(self.field(key)); + }) + .def("keys", &ak::Record::keys) + .def("values", [](ak::Record& self) -> py::object { + py::list out; + for (auto item : self.values()) { + out.append(box(item)); + } + return out; + }) + .def("items", [](ak::Record& self) -> py::object { + py::list out; + for (auto item : self.items()) { + py::str key(item.first); + py::object val(box(item.second)); + py::tuple pair(2); + pair[0] = key; + pair[1] = val; + out.append(pair); + } + return out; + }) + .def_property_readonly("withoutkeys", [](ak::RecordArray& self) -> py::object { + return box(self.withoutkeys().shallow_copy()); + }) + .def_property_readonly("location", &location) + + ; +} + /////////////////////////////////////////////////////////////// module PYBIND11_MODULE(layout, m) { @@ -889,6 +1255,7 @@ PYBIND11_MODULE(layout, m) { make_ListType(m, "ListType"); make_OptionType(m, "OptionType"); make_UnionType(m, "UnionType"); + make_RecordType(m, "RecordType"); make_Content(m, "Content"); @@ -906,6 +1273,9 @@ PYBIND11_MODULE(layout, m) { make_RegularArray(m, "RegularArray"); + make_RecordArray(m, "RecordArray"); + make_Record(m, "Record"); + m.def("fromjson", [](std::string source, int64_t initial, double resize, int64_t buffersize) -> py::object { bool isarray = false; for (char const &x: source) { diff --git a/studies/fillable.py b/studies/fillable.py new file mode 100644 index 0000000000..71f115e243 --- /dev/null +++ b/studies/fillable.py @@ -0,0 +1,940 @@ +################################################################ Contents + +class Content: + def __iter__(self): + def convert(x): + if isinstance(x, Content): + return list(x) + elif isinstance(x, tuple): + return tuple(convert(y) for y in x) + else: + return x + + for i in range(len(self)): + yield convert(self[i]) + + def __repr__(self): + return self.tostring_part("", "", "").rstrip() + +class FloatArray(Content): + def __init__(self, data): + assert isinstance(data, list) + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, where): + if isinstance(where, int): + return self.data[where] + else: + return FloatArray(self.data[where]) + + def tostring_part(self, indent, pre, post): + out = indent + pre + "\n" + out += indent + " " + " ".join(repr(x) for x in self.data) + "\n" + out += indent + "" + post + return out + +class ListArray(Content): + def __init__(self, offsets, content): + assert isinstance(offsets, list) + assert isinstance(content, Content) + self.offsets = offsets + self.content = content + + def __len__(self): + return len(self.offsets) - 1 + + def __getitem__(self, where): + if isinstance(where, int): + return self.content[self.offsets[where]:self.offsets[where + 1]] + else: + start = where.start + stop = where.stop + 1 + return ListArray(self.offsets[start:stop], self.content) + + def tostring_part(self, indent, pre, post): + out = indent + pre + "\n" + out += indent + " " + " ".join(repr(x) for x in self.offsets) + "\n" + out += self.content.tostring_part(indent + " ", "", "\n") + out += indent + "" + post + return out + +class UnionArray(Content): + def __init__(self, tags, offsets, contents): + assert isinstance(tags, list) + assert isinstance(offsets, list) + assert all(isinstance(x, Content) for x in contents) + self.tags = tags + self.offsets = offsets + self.contents = contents + + def __len__(self): + return len(self.tags) + + def __getitem__(self, where): + if isinstance(where, int): + return self.contents[self.tags[where]][self.offsets[where]] + else: + return UnionArray(self.tags[where], self.offsets[where], self.contents) + + def tostring_part(self, indent, pre, post): + out = indent + pre + "\n" + out += indent + " " + " ".join(repr(x) for x in self.tags) + "\n" + out += indent + " " + " ".join(repr(x) for x in self.offsets) + "\n" + for i, content in enumerate(self.contents): + out += content.tostring_part(indent + " ", "".format(i), "\n") + out += indent + "" + post + return out + +class OptionArray(Content): + def __init__(self, offsets, content): + assert isinstance(offsets, list) + assert isinstance(content, Content) + self.offsets = offsets + self.content = content + + def __len__(self): + return len(self.offsets) + + def __getitem__(self, where): + if isinstance(where, int): + if self.offsets[where] == -1: + return None + else: + return self.content[self.offsets[where]] + else: + return OptionArray(self.offsets[where], self.content) + + def tostring_part(self, indent, pre, post): + out = indent + pre + "\n" + out += indent + " " + " ".join(repr(x) for x in self.offsets) + "\n" + out += self.content.tostring_part(indent + " ", "", "\n") + out += indent + "" + post + return out + +class TupleArray(Content): + def __init__(self, contents): + assert len(contents) != 0 + assert all(isinstance(x, Content) for x in contents) + assert all(isinstance(x, EmptyArray) or len(contents[0]) == len(x) for x in contents) + self.contents = contents + + def __len__(self): + return len(self.contents[0]) + + def __getitem__(self, where): + if isinstance(where, int): + return tuple(x[where] for x in self.contents) + else: + return TupleArray([x[where] for x in self.contents]) + + def tostring_part(self, indent, pre, post): + out = indent + pre + "\n" + for i, content in enumerate(self.contents): + out += content.tostring_part(indent + " ", "".format(i), "\n") + out += indent + "" + post + return out + +class EmptyArray(Content): + def __init__(self): + pass + + def __len__(self): + return 0 + + def __getitem__(self, where): + if isinstance(where, int): + [][where] + else: + return EmptyArray() + def tostring_part(self, indent, pre, post): + return indent + pre + "" + post + +class EmptyTupleArray(Content): + def __init__(self, length): + self.length = length + + def __len__(self): + return self.length + + def __getitem__(self, where): + if isinstance(where, int): + if 0 <= where < self.length: + return () + else: + [][where] + else: + return where.stop - where.start + def tostring_part(self, indent, pre, post): + return indent + pre + "" + post + +################################################################ Content tests + +one = OptionArray([0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6], UnionArray([1, 0, 1, 0, 1, 0, 1], [0, 0, 1, 1, 2, 2, 3], [FloatArray([100, 200, 300]), ListArray([0, 1, 4, 4, 5], ListArray([0, 3, 3, 5, 6, 9], FloatArray([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])))])) +# print(one) +# print(list(one)) +assert list(one) == [[[1.1, 2.2, 3.3]], None, 100, None, [[], [4.4, 5.5], [6.6]], None, 200, None, [], None, 300, None, [[7.7, 8.8, 9.9]]] +# print() + +two = ListArray([0, 2, 2, 2, 3], TupleArray([FloatArray([1, 2, 3]), ListArray([0, 3, 3, 5], FloatArray([1.1, 2.2, 3.3, 4.4, 5.5]))])) +# print(two) +# print(list(two)) +assert list(two) == [[(1, [1.1, 2.2, 3.3]), (2, [])], [], [], [(3, [4.4, 5.5])]] +# print() + +################################################################ Fillables + +class FillableArray: + def __init__(self): + self.fillable = UnknownFillable.fromempty() + + def fill(self, x): + if x is None: + self._maybeupdate(self.fillable.null()) + elif isinstance(x, (int, float)): + self._maybeupdate(self.fillable.real(x)) + elif isinstance(x, list): + self._maybeupdate(self.fillable.beginlist()) + for y in x: + self.fill(y) + self._maybeupdate(self.fillable.endlist()) + elif isinstance(x, tuple): + self._maybeupdate(self.fillable.begintuple(len(x))) + for i, y in enumerate(x): + self.fillable.index(i) + self.fill(y) + self._maybeupdate(self.fillable.endtuple()) + else: + raise AssertionError(x) + + def active(self): + return self.fillable.active() + + def null(self): + self._maybeupdate(self.fillable.null()) + + def real(self, x): + self._maybeupdate(self.fillable.real(x)) + + def beginlist(self): + self._maybeupdate(self.fillable.beginlist()) + + def endlist(self): + self._maybeupdate(self.fillable.endlist()) + + def begintuple(self, numfields): + self._maybeupdate(self.fillable.begintuple(numfields)) + + def index(self, i): + self._maybeupdate(self.fillable.index(i)) + + def endtuple(self): + self._maybeupdate(self.fillable.endtuple()) + + def _maybeupdate(self, fillable): + assert fillable is not None + if fillable is not self.fillable: + self.fillable = fillable + + def snapshot(self): + return self.fillable.snapshot() + +class Fillable: + pass + +class UnknownFillable(Fillable): + def __init__(self, nullcount): + assert isinstance(nullcount, int) + self.nullcount = nullcount + + @classmethod + def fromempty(cls): + return UnknownFillable(0) + + def snapshot(self): + if self.nullcount == 0: + return EmptyArray() + else: + return OptionArray([-1] * self.nullcount, EmptyArray()) + + def __len__(self): + return self.nullcount + + def active(self): + return False + + def null(self): + self.nullcount += 1 + return self + + def real(self, x): + if self.nullcount == 0: + out = FloatFillable.fromempty() + else: + out = OptionFillable.fromnulls(self.nullcount, FloatFillable.fromempty()) + out.real(x) + return out + + def beginlist(self): + if self.nullcount == 0: + out = ListFillable.fromempty() + else: + out = OptionFillable.fromnulls(self.nullcount, ListFillable.fromempty()) + out.beginlist() + return out + + def endlist(self): + raise ValueError("called 'endlist' without corresponding 'beginlist'") + + def begintuple(self, numfields): + if self.nullcount == 0: + out = TupleFillable.fromempty() + else: + out = OptionFillable.fromnulls(self.nullcount, TupleFillable.fromempty()) + out.begintuple(numfields) + return out + + def index(self, i): + raise ValueError("called 'index' without corresponding 'begintuple'") + + def endtuple(self): + raise ValueError("called 'endtuple' without corresponding 'begintuple'") + +class OptionFillable(Fillable): + def __init__(self, offsets, content): + assert isinstance(offsets, list) + assert isinstance(content, Fillable) + self.offsets = offsets + self.content = content + + @classmethod + def fromnulls(cls, nullcount, content): + return cls([-1] * nullcount, content) + + @classmethod + def fromvalids(cls, content): + return cls(list(range(len(content))), content) + + def snapshot(self): + return OptionArray(list(self.offsets), self.content.snapshot()) + + def __len__(self): + return len(self.offsets) + + def active(self): + return self.content.active() + + def null(self): + if not self.content.active(): + self.offsets.append(-1) + else: + self.content.null() + return self + + def real(self, x): + if not self.content.active(): + length = len(self.content) + self._maybeupdate(self.content.real(x)) + self.offsets.append(length) + else: + self.content.real(x) + return self + + def beginlist(self): + if not self.content.active(): + self._maybeupdate(self.content.beginlist()) + else: + self.content.beginlist() + return self + + def endlist(self): + if not self.content.active(): + raise ValueError("'endlist' without corresponding 'beginlist'") + else: + length = len(self.content) + self.content.endlist() + if length != len(self.content): + self.offsets.append(length) + return self + + def begintuple(self, numfields): + if not self.content.active(): + self._maybeupdate(self.content.begintuple(numfields)) + else: + self.content.begintuple(numfields) + return self + + def index(self, i): + if not self.content.active(): + raise ValueError("'index' without corresponding 'begintuple'") + else: + self.content.index(i) + return self + + def endtuple(self): + if not self.content.active(): + raise ValueError("'endtuple' without corresponding 'begintuple'") + else: + length = len(self.content) + self.content.endtuple() + if length != len(self.content): + self.offsets.append(length) + return self + + def _maybeupdate(self, fillable): + assert fillable is not None + if fillable is not self.content: + self.content = fillable + +class UnionFillable(Fillable): + def __init__(self, tags, offsets, contents): + assert isinstance(tags, list) + assert isinstance(offsets, list) + assert all(isinstance(x, Fillable) for x in contents) + self.tags = tags + self.offsets = offsets + self.contents = contents + self.current = -1 + + @classmethod + def fromsingle(cls, firstcontent): + return UnionFillable([0] * len(firstcontent), + list(range(len(firstcontent))), + [firstcontent]) + + def snapshot(self): + return UnionArray(list(self.tags), list(self.offsets), [x.snapshot() for x in self.contents]) + + def __len__(self): + return len(self.tags) + + def active(self): + return self.current != -1 + + def null(self): + if self.current == -1: + out = OptionFillable.fromvalids(self) + out.null() + return out + + else: + self.contents[self.current].null() + return self + + def real(self, x): + if self.current == -1: + for i in range(len(self.contents)): + if isinstance(self.contents[i], FloatFillable): + break + else: + i = len(self.contents) + self.contents.append(FloatFillable.fromempty()) + length = len(self.contents[i]) + self.contents[i].real(x) + assert length + 1 == len(self.contents[i]) + self.tags.append(i) + self.offsets.append(length) + return self + + else: + self.contents[self.current].real(x) + return self + + def beginlist(self): + if self.current == -1: + for i in range(len(self.contents)): + if isinstance(self.contents[i], ListFillable): + break + else: + i = len(self.contents) + self.contents.append(ListFillable.fromempty()) + self.contents[i].beginlist() + self.current = i + return self + + else: + self.contents[self.current].beginlist() + return self + + def endlist(self): + if self.current == -1: + raise ValueError("'endlist' called without corresponding 'beginlist'") + + else: + length = len(self.contents[self.current]) + self.contents[self.current].endlist() + if length != len(self.contents[self.current]): + self.tags.append(self.current) + self.offsets.append(length) + self.current = -1 + return self + + def begintuple(self, numfields): + if self.current == -1: + for i in range(len(self.contents)): + if isinstance(self.contents[i], TupleFillable) and (self.contents[i].length == -1 or len(self.contents[i].contents) == numfields): + break + else: + i = len(self.contents) + self.contents.append(TupleFillable.fromempty()) + self.contents[i].begintuple(numfields) + self.current = i + return self + + else: + self.contents[self.current].begintuple(numfields) + return self + + def index(self, i): + if self.current == -1: + raise ValueError("'index' called without corresponding 'begintuple'") + + else: + self.contents[self.current].index(i) + return self + + def endtuple(self): + if self.current == -1: + raise ValueError("'endtuple' called without corresponding 'begintuple'") + + else: + length = len(self.contents[self.current]) + self.contents[self.current].endtuple() + if length != len(self.contents[self.current]): + self.tags.append(self.current) + self.offsets.append(length) + self.current = -1 + return self + +class ListFillable(Fillable): + def __init__(self, offsets, content): + assert isinstance(offsets, list) + assert isinstance(content, Fillable) + self.offsets = offsets + self.content = content + self.begun = False + + @classmethod + def fromempty(cls): + return ListFillable([0], UnknownFillable.fromempty()) + + def snapshot(self): + return ListArray(list(self.offsets), self.content.snapshot()) + + def __len__(self): + return len(self.offsets) - 1 + + def active(self): + return self.begun + + def null(self): + if not self.begun: + out = OptionFillable.fromvalids(self) + out.null() + return out + else: + self._maybeupdate(self.content.null()) + return self + + def real(self, x): + if not self.begun: + out = UnionFillable.fromsingle(self) + out.real(x) + return out + else: + self._maybeupdate(self.content.real(x)) + return self + + def beginlist(self): + if not self.begun: + self.begun = True + else: + self._maybeupdate(self.content.beginlist()) + return self + + def endlist(self): + if not self.begun: + raise ValueError("called 'endlist' without corresponding 'beginlist'") + elif not self.content.active(): + self.offsets.append(len(self.content)) + self.begun = False + else: + self._maybeupdate(self.content.endlist()) + return self + + def begintuple(self, numfields): + if not self.begun: + out = UnionFillable.fromsingle(self) + out.begintuple(numfields) + return out + else: + self._maybeupdate(self.content.begintuple(numfields)) + return self + + def index(self, i): + if not self.begun: + raise ValueError("called 'index' without corresponding 'begintuple'") + else: + self.content.index(i) + return self + + def endtuple(self): + if not self.begun: + raise ValueError("called 'endtuple' without corresponding 'begintuple'") + else: + self.content.endtuple() + return self + + def _maybeupdate(self, fillable): + assert fillable is not None + if fillable is not self.content: + self.content = fillable + +class TupleFillable(Fillable): + def __init__(self): + self.length = -1 + self.begun = False + + @classmethod + def fromempty(cls): + return TupleFillable() + + def snapshot(self): + assert self.length != -1 + if len(self.contents) == 0: + return EmptyTupleArray(self.length) + else: + return TupleArray([x.snapshot() for x in self.contents]) + + def __len__(self): + return self.length + + def active(self): + return self.begun + + def null(self): + assert self.length != -1 + + if not self.begun: + out = OptionFillable.fromvalids(self) + out.null() + return out + + elif self.nextindex == -1: + raise ValueError("'null' called immediately after 'begintuple'; needs 'index' or 'endtuple'") + + elif not self.contents[self.nextindex].active(): + self._maybeupdate(self.nextindex, self.contents[self.nextindex].null()) + + else: + self.contents[self.nextindex].null() + + return self + + def real(self, x): + assert self.length != -1 + + if not self.begun: + out = UnionFillable.fromsingle(self) + out.real(x) + return out + + elif self.nextindex == -1: + raise ValueError("'real' called immediately after 'begintuple'; needs 'index' or 'endtuple'") + + elif not self.contents[self.nextindex].active(): + self._maybeupdate(self.nextindex, self.contents[self.nextindex].real(x)) + + else: + self.contents[self.nextindex].real(x) + + return self + + def beginlist(self): + assert self.length != -1 + + if not self.begun: + out = UnionFillable.fromsingle(self) + out.beginlist() + return out + + elif self.nextindex == -1: + raise ValueError("'beginlist' called immediately after 'begintuple'; needs 'index' or 'endtuple'") + + elif not self.contents[self.nextindex].active(): + self._maybeupdate(self.nextindex, self.contents[self.nextindex].beginlist()) + + else: + self.contents[self.nextindex].beginlist() + + return self + + def endlist(self): + assert self.length != -1 + + if not self.begun: + raise ValueError("'endlist' called without a corresponding 'beginlist'") + + elif self.nextindex == -1: + raise ValueError("'endlist' called immediately after 'begintuple'; needs 'index' or 'endtuple'") + + else: + self.contents[self.nextindex].endlist() + + return self + + def begintuple(self, numfields): + if self.length == -1: + self.contents = [UnknownFillable.fromempty() for i in range(numfields)] + self.length = 0 + + if not self.begun and numfields == len(self.contents): + self.begun = True + self.nextindex = -1 + + elif not self.begun: + out = UnionFillable.fromsingle(self) + out.begintuple(numfields) + return out + + elif self.nextindex == -1: + raise ValueError("'begintuple' called immediately after 'begintuple'; needs 'index' or 'endtuple'") + + elif not self.contents[self.nextindex].active(): + self._maybeupdate(self.nextindex, self.contents[self.nextindex].begintuple(numfields)) + + else: + self.contents[self.nextindex].begintuple(numfields) + + return self + + def index(self, i): + assert self.length != -1 + + if not self.begun: + raise ValueError("'index' called without corresponding 'begintuple'") + + elif self.nextindex == -1 or not self.contents[self.nextindex].active(): + self.nextindex = i + + else: + self.contents[self.nextindex].index(i) + + return self + + def endtuple(self): + assert self.length != -1 + + if not self.begun: + raise ValueError("'endtuple' called without corresponding 'begintuple'") + + elif self.nextindex == -1 or not self.contents[self.nextindex].active(): + for i in range(len(self.contents)): + if len(self.contents[i]) == self.length: + self._maybeupdate(i, self.contents[i].null()) + if len(self.contents[i]) != self.length + 1: + raise ValueError("tuple index {} filled more than once".format(i)) + self.length += 1 + self.begun = False + + else: + self.contents[self.nextindex].endtuple() + + return self + + def _maybeupdate(self, index, fillable): + assert fillable is not None + if fillable is not self.contents[index]: + self.contents[index] = fillable + +class FloatFillable(Fillable): + def __init__(self, data): + assert isinstance(data, list) + self.data = data + + @classmethod + def fromempty(cls): + return FloatFillable([]) + + def snapshot(self): + return FloatArray(list(self.data)) + + def __len__(self): + return len(self.data) + + def active(self): + return False + + def null(self): + out = OptionFillable.fromvalids(self) + out.null() + return out + + def real(self, x): + self.data.append(x) + return self + + def beginlist(self): + out = UnionFillable.fromsingle(self) + out.beginlist() + return out + + def endlist(self): + raise ValueError("'endlist' called without corresponding 'beginlist'") + + def begintuple(self, numfields): + out = UnionFillable.fromsingle(self) + out.begintuple(numfields) + return out + + def index(self, i): + raise ValueError("'index' called without corresponding 'begintuple'") + + def endtuple(self): + raise ValueError("'endtuple' called without corresponding 'begintuple'") + +################################################################ Fillable tests + +# underfilling tuple field is okay (becomes None) +fillable = FillableArray() +assert list(fillable.snapshot()) == [] +fillable.begintuple(2) +fillable.index(0) +fillable.real(1.1) +fillable.endtuple() +assert list(fillable.snapshot()) == [(1.1, None)] +fillable.begintuple(2) +fillable.index(0) +fillable.real(2.2) +fillable.endtuple() +assert list(fillable.snapshot()) == [(1.1, None), (2.2, None)] +fillable.begintuple(2) +fillable.index(1) +fillable.real(3.3) +fillable.endtuple() +assert list(fillable.snapshot()) == [(1.1, None), (2.2, None), (None, 3.3)] + +# overfilling tuple field is bad (raises error) +fillable = FillableArray() +fillable.begintuple(2) +fillable.index(0) +fillable.real(1.1) +fillable.index(1) +fillable.real(2.2) +fillable.real(3.3) +try: + fillable.endtuple() +except ValueError: + pass +else: + raise AssertionError + +datasets = [ + [], + [None], + [None, None, None], + [1.1, 2.2, 3.3], + [None, 1.1, 2.2, 3.3], + [1.1, None, 2.2, 3.3], + [None, 1.1, None, 2.2, 3.3], + [1.1, 2.2, 3.3, None], + [1.1, 2.2, None, 3.3, None], + [None, 1.1, 2.2, 3.3, None], + [(1, 1.1), (2, 2.2), (3, 3.3)], + [(1, (2, 3)), (10, (20, 30)), (100, (200, 300))], + [(1, (2, 3, 4)), (10, (20, 30, 40)), (100, (200, 300, 400))], + [((1, 2), (3, 4)), ((10, 20), (30, 40)), ((100, 200), (300, 400))], + [((1, 2, 3), (4, 5)), ((10, 20, 30), (40, 50)), ((100, 200, 300), (400, 500))], + [((1, 2, 3), (4, 5, 6)), ((10, 20, 30), (40, 50, 60)), ((100, 200, 300), (400, 500, 600))], + [(1, (2, (3, 4))), (10, (20, (30, 40))), (100, (200, (300, 400)))], + [(1, ((2, 3), 4)), (10, ((20, 30), 40)), (100, ((200, 300), 400))], + [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3]], + [None, [1.1], [1.1, 2.2], [1.1, 2.2, 3.3]], + [[1.1], None, [1.1, 2.2], [1.1, 2.2, 3.3]], + [None, [1.1], None, [1.1, 2.2], [1.1, 2.2, 3.3]], + [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], None], + [[1.1], None, [1.1, 2.2], [1.1, 2.2, 3.3], None], + [None, [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], None], + [[1.1], [1.1, 2.2], [1.1, None, 3.3]], + [None, [1.1], [1.1, 2.2], [1.1, None, 3.3]], + [[1.1, 2.2, 3.3], [], [4.4, 5.5]], + [None, [1.1, 2.2, 3.3], [], [4.4, 5.5]], + [[1.1, 2.2, 3.3], None, [], [4.4, 5.5]], + [None, [1.1, 2.2, 3.3], None, [], [4.4, 5.5]], + [[1.1, 2.2, 3.3], [], [4.4, 5.5], None], + [[1.1, None, 3.3], [], [4.4, 5.5]], + [[1.1, 2.2, 3.3], [], [None, 5.5]], + [None, [1.1, None, 3.3], [], [4.4, 5.5]], + [None, [1.1, 2.2, 3.3], [], [None, 5.5]], + [[1.1, None, 3.3], None, [], [4.4, 5.5]], + [[1.1, 2.2, 3.3], None, [], [None, 5.5]], + [None, [1.1, None, 3.3], [], [4.4, 5.5]], + [None, [1.1, 2.2, 3.3], [], [None, 5.5]], + [[(1, 1.1)], [], [(2, 2.2), (3, 3.3)]], + [None, [(1, 1.1)], [], [(2, 2.2), (3, 3.3)]], + [[(1, 1.1)], None, [], [(2, 2.2), (3, 3.3)]], + [None, [(1, 1.1)], None, [], [(2, 2.2), (3, 3.3)]], + [[(1, 1.1)], [], [(2, None), (3, 3.3)]], + [[(None, 1.1)], [], [(2, 2.2), (3, 3.3)]], + [None, [(None, 1.1)], [], [(2, 2.2), (3, 3.3)]], + [(1, []), (2, [1.1]), (3, [2.2, 3.3])], + [None, (1, []), (2, [1.1]), (3, [2.2, 3.3])], + [(1, []), None, (2, [1.1]), (3, [2.2, 3.3])], + [None, (1, []), None, (2, [1.1]), (3, [2.2, 3.3])], + [(1, []), (2, [1.1]), (3, [None, 3.3])], + [None, (1, []), (2, [1.1]), (3, [None, 3.3])], + [(1, []), None, (2, [1.1]), (3, [None, 3.3])], + [1.1, [], 2.2], + [1.1, [2.2], 3.3], + [None, 1.1, [2.2], 3.3], + [1.1, None, [2.2], 3.3], + [1.1, [2.2], None, 3.3], + [None, 1.1, [2.2], None, 3.3], + [1.1, [2.2, None], 3.3], + [1.1, [None, 2.2], 3.3], + [None, 1.1, [2.2, None], 3.3], + [None, 1.1, [None, 2.2], 3.3], + [1.1, None, [2.2, None], 3.3], + [1.1, None, [None, 2.2], 3.3], + [1.1, [2.2, None], None, 3.3], + [1.1, [None, 2.2], None, 3.3], + [None, 1.1, [2.2, None], None, 3.3], + [None, 1.1, [None, 2.2], None, 3.3], + [1.1, (2, 2.2), 3.3], + [None, 1.1, (2, 2.2), 3.3], + [1.1, None, (2, 2.2), 3.3], + [1.1, (2, 2.2), None, 3.3], + [1.1, (2, None), 3.3], + [None, 1.1, (2, None), 3.3], + [1.1, None, (2, None), 3.3], + [1.1, (2, None), None, 3.3], + [[1.1, 2.2], (3.3, 4.4), [5.5]], + [None, [1.1, 2.2], (3.3, 4.4), [5.5]], + [[1.1, 2.2], None, (3.3, 4.4), [5.5]], + [[1.1, 2.2], 3.3, [4.4]], + [None, [1.1, 2.2], 3.3, [4.4]], + [[1.1, 2.2], None, 3.3, [4.4]], + [(1, 2.2), 3.3, [5.5]], + [None, (1, 2.2), 3.3, [5.5]], + [(1, 2.2), None, 3.3, [5.5]], + [(1, 2.2), [3.3], 5.5], + [None, (1, 2.2), [3.3], 5.5], + [(1, 2.2), [3.3], 5.5], + [(1, 1.1), (2, 2.2, 200), (3, 3.3)], + [None, (1, 1.1), (2, 2.2, 200), (3, 3.3)], + [(1, 1.1), None, (2, 2.2, 200), (3, 3.3)], + [1.1, [2.2, 3.3], [[4.4], [5.5, 6.6]]], + [1.1, (2.2, 3.3), ((4.4,), (5.5, 6.6))], + [(1.1, [2.2, 3.3]), (100, [200, 300])], + ] + +for dataset in datasets: + fillable = FillableArray() + for x in dataset: + fillable.fill(x) + if list(fillable.snapshot()) != dataset: + print(dataset) + print(list(fillable.snapshot())) + raise AssertionError diff --git a/tests/test_PR008_slices_and_getitem.py b/tests/test_PR008_slices_and_getitem.py index 5d827d800c..3371bf8b2c 100644 --- a/tests/test_PR008_slices_and_getitem.py +++ b/tests/test_PR008_slices_and_getitem.py @@ -12,10 +12,10 @@ def test_slice(): assert repr(awkward1.layout.Slice(3)) == "[3]" - assert repr(awkward1.layout.Slice(slice(None))) == "[::]" - assert repr(awkward1.layout.Slice(slice(10))) == "[:10:]" - assert repr(awkward1.layout.Slice(slice(1, 2))) == "[1:2:]" - assert repr(awkward1.layout.Slice(slice(1, None))) == "[1::]" + assert repr(awkward1.layout.Slice(slice(None))) == "[:]" + assert repr(awkward1.layout.Slice(slice(10))) == "[:10]" + assert repr(awkward1.layout.Slice(slice(1, 2))) == "[1:2]" + assert repr(awkward1.layout.Slice(slice(1, None))) == "[1:]" assert repr(awkward1.layout.Slice(slice(None, None, 2))) == "[::2]" assert repr(awkward1.layout.Slice(slice(1, 2, 3))) == "[1:2:3]" if not py27: @@ -34,16 +34,15 @@ def test_slice(): assert repr(awkward1.layout.Slice(())) == "[]" assert repr(awkward1.layout.Slice((3,))) == "[3]" assert repr(awkward1.layout.Slice((3, slice(1, 2, 3)))) == "[3, 1:2:3]" - assert repr(awkward1.layout.Slice((slice(None), [1, 2, 3]))) == "[::, array([1, 2, 3])]" - assert repr(awkward1.layout.Slice(([1, 2, 3], slice(None)))) == "[array([1, 2, 3]), ::]" - assert repr(awkward1.layout.Slice((slice(None), [True, True, False, False, True]))) == "[::, array([0, 1, 4])]" - assert repr(awkward1.layout.Slice((slice(None), [[True, True], [False, False], [True, False]]))) == "[::, array([0, 0, 2]), array([0, 1, 0])]" - assert repr(awkward1.layout.Slice(([[True, True], [False, False], [True, False]], slice(None)))) == "[array([0, 0, 2]), array([0, 1, 0]), ::]" + assert repr(awkward1.layout.Slice((slice(None), [1, 2, 3]))) == "[:, array([1, 2, 3])]" + assert repr(awkward1.layout.Slice(([1, 2, 3], slice(None)))) == "[array([1, 2, 3]), :]" + assert repr(awkward1.layout.Slice((slice(None), [True, True, False, False, True]))) == "[:, array([0, 1, 4])]" + assert repr(awkward1.layout.Slice((slice(None), [[True, True], [False, False], [True, False]]))) == "[:, array([0, 0, 2]), array([0, 1, 0])]" + assert repr(awkward1.layout.Slice(([[True, True], [False, False], [True, False]], slice(None)))) == "[array([0, 0, 2]), array([0, 1, 0]), :]" with pytest.raises(ValueError): awkward1.layout.Slice(numpy.array([1.1, 2.2, 3.3])) - with pytest.raises(ValueError): - awkward1.layout.Slice(numpy.array(["one", "two", "three"])) + assert repr(awkward1.layout.Slice(numpy.array(["one", "two", "three"]))) == '[["one", "two", "three"]]' with pytest.raises(ValueError): awkward1.layout.Slice(numpy.array([1, 2, 3, None, 4, 5])) @@ -60,8 +59,8 @@ def test_slice(): awkward1.layout.Slice((slice(None), 3, slice(None), [[1], [2], [3]], slice(None))) with pytest.raises(ValueError): awkward1.layout.Slice((slice(None), [[1, 2, 3, 4]], slice(None), [[1], [2], [3]], slice(None))) - assert repr(awkward1.layout.Slice((slice(None), 3, [[1], [2], [3]], slice(None)))) == "[::, array([[3], [3], [3]]), array([[1], [2], [3]]), ::]" - assert repr(awkward1.layout.Slice((slice(None), [[1, 2, 3, 4]], [[1], [2], [3]], slice(None)))) == "[::, array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]), array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]), ::]" + assert repr(awkward1.layout.Slice((slice(None), 3, [[1], [2], [3]], slice(None)))) == "[:, array([[3], [3], [3]]), array([[1], [2], [3]]), :]" + assert repr(awkward1.layout.Slice((slice(None), [[1, 2, 3, 4]], [[1], [2], [3]], slice(None)))) == "[:, array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]), array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]), :]" def test_numpyarray_getitem_bystrides(): a = numpy.arange(10) diff --git a/tests/test_PR018_fromiter_fillable.py b/tests/test_PR018_fromiter_fillable.py index 9cdf2fecaf..fca7d5086a 100644 --- a/tests/test_PR018_fromiter_fillable.py +++ b/tests/test_PR018_fromiter_fillable.py @@ -17,7 +17,7 @@ def test_types(): t4b = awkward1.layout.ListType(awkward1.layout.PrimitiveType("int32")) t5 = awkward1.layout.ListType(t4) t6 = awkward1.layout.OptionType(t4) - assert repr(t0) == "???" + assert repr(t0) == "unknown" assert repr(t1) == "int32" assert repr(t2) == "?int32" assert repr(t3) == "union[int32, float64]" diff --git a/tests/test_PR019_use_json_library.py b/tests/test_PR019_use_json_library.py index 46c2c24191..a37a782556 100644 --- a/tests/test_PR019_use_json_library.py +++ b/tests/test_PR019_use_json_library.py @@ -83,3 +83,14 @@ def test_fromiter(): assert awkward1.tolist(awkward1.fromiter([5, 4, 3.14, 2.22, 1.23])) == [5.0, 4.0, 3.14, 2.22, 1.23] assert awkward1.tolist(awkward1.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] assert awkward1.tolist(awkward1.fromiter([[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]])) == [[[1.1, 2.2, 3.3], []], [[4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]] + +def test_numpy(): + a = awkward1.layout.NumpyArray(numpy.arange(2*3*5).reshape(2, 3, 5)) + assert awkward1.tolist(a) == [[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]] + assert awkward1.tojson(a) == "[[[0,1,2,3,4],[5,6,7,8,9],[10,11,12,13,14]],[[15,16,17,18,19],[20,21,22,23,24],[25,26,27,28,29]]]" + + b = awkward1.layout.NumpyArray(numpy.array([[[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], [[10.1, 20.2, 30.3], [40.4, 50.5, 60.6]]])) + assert awkward1.tojson(b) == "[[[1.1,2.2,3.3],[4.4,5.5,6.6]],[[10.1,20.2,30.3],[40.4,50.5,60.6]]]" + + c = awkward1.layout.NumpyArray(numpy.array([[True, False, True], [False, False, True]])) + assert awkward1.tojson(c) == "[[true,false,true],[false,false,true]]" diff --git a/tests/test_PR021_emptyarray.py b/tests/test_PR021_emptyarray.py index 357e84fab3..6f96013dac 100644 --- a/tests/test_PR021_emptyarray.py +++ b/tests/test_PR021_emptyarray.py @@ -12,14 +12,14 @@ def test_unknown(): a = awkward1.fromjson("[[], [], []]") assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * ???" + assert str(awkward1.typeof(a)) == "3 * var * unknown" assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3)) assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")), 3)) assert not awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3)) a = awkward1.fromjson("[[], [[], []], [[], [], []]]") assert awkward1.tolist(a) == [[], [[], []], [[], [], []]] - assert str(awkward1.typeof(a)) == "3 * var * var * ???" + assert str(awkward1.typeof(a)) == "3 * var * var * unknown" assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.ListType(awkward1.layout.UnknownType())), 3)) a = awkward1.layout.FillableArray() @@ -30,14 +30,14 @@ def test_unknown(): a.beginlist() a.endlist() assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * ???" + assert str(awkward1.typeof(a)) == "3 * var * unknown" assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3)) assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")), 3)) assert not awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3)) a = a.snapshot() assert awkward1.tolist(a) == [[], [], []] - assert str(awkward1.typeof(a)) == "3 * var * ???" + assert str(awkward1.typeof(a)) == "3 * var * unknown" assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.UnknownType()), 3)) assert awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")), 3)) assert not awkward1.typeof(a).compatible(awkward1.layout.ArrayType(awkward1.layout.PrimitiveType("float64"), 3)) diff --git a/tests/test_PR025_record_array.py b/tests/test_PR025_record_array.py new file mode 100644 index 0000000000..0bbc7b9b04 --- /dev/null +++ b/tests/test_PR025_record_array.py @@ -0,0 +1,407 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import sys +import itertools + +import pytest +import numpy + +import awkward1 + +def test_basic(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5])) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) + offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content2) + recordarray = awkward1.layout.RecordArray(0) + recordarray.append(content1, "one") + recordarray.append(listoffsetarray, "two") + recordarray.append(content2) + recordarray.setkey(0, "wonky") + assert awkward1.tolist(recordarray.field(0)) == [1, 2, 3, 4, 5] + assert awkward1.tolist(recordarray.field("two")) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(recordarray.field("wonky")) == [1, 2, 3, 4, 5] + + str(recordarray) + assert awkward1.tojson(recordarray) == '[{"wonky":1,"two":[1.1,2.2,3.3],"2":1.1},{"wonky":2,"two":[],"2":2.2},{"wonky":3,"two":[4.4,5.5],"2":3.3},{"wonky":4,"two":[6.6],"2":4.4},{"wonky":5,"two":[7.7,8.8,9.9],"2":5.5}]' + + assert len(recordarray) == 5 + assert recordarray.key(0) == "wonky" + assert recordarray.key(1) == "two" + assert recordarray.key(2) == "2" + assert recordarray.index("wonky") == 0 + assert recordarray.index("one") == 0 + assert recordarray.index("0") == 0 + assert recordarray.index("two") == 1 + assert recordarray.index("1") == 1 + assert recordarray.index("2") == 2 + assert recordarray.has("wonky") + assert recordarray.has("one") + assert recordarray.has("0") + assert recordarray.has("two") + assert recordarray.has("1") + assert recordarray.has("2") + assert set(recordarray.aliases(0)) == set(["wonky", "one", "0"]) + assert set(recordarray.aliases("wonky")) == set(["wonky", "one", "0"]) + assert set(recordarray.aliases("one")) == set(["wonky", "one", "0"]) + assert set(recordarray.aliases("0")) == set(["wonky", "one", "0"]) + assert set(recordarray.aliases(1)) == set(["two", "1"]) + assert set(recordarray.aliases("two")) == set(["two", "1"]) + assert set(recordarray.aliases("1")) == set(["two", "1"]) + assert set(recordarray.aliases(2)) == set(["2"]) + assert set(recordarray.aliases("2")) == set(["2"]) + + assert recordarray.keys() == ["wonky", "two", "2"] + assert [awkward1.tolist(x) for x in recordarray.values()] == [[1, 2, 3, 4, 5], [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]] + pairs = recordarray.items() + assert pairs[0][0] == "wonky" + assert pairs[1][0] == "two" + assert pairs[2][0] == "2" + assert awkward1.tolist(pairs[0][1]) == [1, 2, 3, 4, 5] + assert awkward1.tolist(pairs[1][1]) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(pairs[2][1]) == [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] + + assert awkward1.tojson(recordarray.withoutkeys) == '[{"0":1,"1":[1.1,2.2,3.3],"2":1.1},{"0":2,"1":[],"2":2.2},{"0":3,"1":[4.4,5.5],"2":3.3},{"0":4,"1":[6.6],"2":4.4},{"0":5,"1":[7.7,8.8,9.9],"2":5.5}]' + +def test_scalar_record(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5])) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) + offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content2) + recordarray = awkward1.layout.RecordArray(0) + recordarray.append(content1, "one") + recordarray.append(listoffsetarray, "two") + + str(recordarray) + str(recordarray[2]) + assert awkward1.tojson(recordarray[2]) == '{"one":3,"two":[4.4,5.5]}' + + assert recordarray[2].keys() == ["one", "two"] + assert [awkward1.tolist(x) for x in recordarray[2].values()] == [3, [4.4, 5.5]] + pairs = recordarray[2].items() + assert pairs[0][0] == "one" + assert pairs[1][0] == "two" + assert pairs[0][1] == 3 + assert awkward1.tolist(pairs[1][1]) == [4.4, 5.5] + assert awkward1.tolist(recordarray[2]) == {"one": 3, "two": [4.4, 5.5]} + +def test_type(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64)) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=numpy.float64)) + offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content2) + recordarray = awkward1.layout.RecordArray(0, True) + recordarray.append(content1) + recordarray.append(listoffsetarray) + assert str(awkward1.typeof(recordarray)) == '5 * (int64, var * float64)' + + assert awkward1.typeof(recordarray) == awkward1.layout.ArrayType(awkward1.layout.RecordType( + awkward1.layout.PrimitiveType("int64"), + awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))), 5) + assert awkward1.typeof(recordarray).compatible(awkward1.layout.ArrayType(awkward1.layout.RecordType( + awkward1.layout.PrimitiveType("int64"), + awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))), 5)) + assert awkward1.typeof(recordarray[2]) == awkward1.layout.RecordType( + awkward1.layout.PrimitiveType("int64"), + awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))) + assert awkward1.typeof(recordarray[2]).compatible(awkward1.layout.RecordType( + awkward1.layout.PrimitiveType("int64"), + awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) + + recordarray.setkey(0, "one") + recordarray.setkey(1, "two") + assert str(awkward1.typeof(recordarray)) in ('5 * {"one": int64, "two": var * float64}', '5 * {"two": var * float64, "one": int64}') + + assert str(awkward1.layout.RecordType( + awkward1.layout.PrimitiveType("int32"), + awkward1.layout.PrimitiveType("float64"))) == '(int32, float64)' + + assert str(awkward1.layout.RecordType( + one=awkward1.layout.PrimitiveType("int32"), + two=awkward1.layout.PrimitiveType("float64"))) in ('{"one": int32, "two": float64}', '{"two": float64, "one": int32}') + + assert awkward1.typeof(recordarray) == awkward1.layout.ArrayType(awkward1.layout.RecordType( + one=awkward1.layout.PrimitiveType("int64"), + two=awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))), 5) + assert awkward1.typeof(recordarray).compatible(awkward1.layout.ArrayType(awkward1.layout.RecordType( + one=awkward1.layout.PrimitiveType("int64"), + two=awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))), 5)) + assert awkward1.typeof(recordarray[2]) == awkward1.layout.RecordType( + one=awkward1.layout.PrimitiveType("int64"), + two=awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64"))) + assert awkward1.typeof(recordarray[2]).compatible(awkward1.layout.RecordType( + one=awkward1.layout.PrimitiveType("int64"), + two=awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) + +def test_getitem(): + assert str(awkward1.layout.Slice((1, 2, [3], "four", ["five", "six"], slice(7, 8, 9)))) == '[array([1]), array([2]), array([3]), "four", ["five", "six"], 7:8:9]' + + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64)) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=numpy.float64)) + offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content2) + recordarray = awkward1.layout.RecordArray([content1, listoffsetarray, content2]) + assert recordarray.istuple + + assert awkward1.tolist(recordarray["2"]) == [1.1, 2.2, 3.3, 4.4, 5.5] + assert awkward1.tolist(recordarray[["0", "1"]]) == [(1, [1.1, 2.2, 3.3]), (2, []), (3, [4.4, 5.5]), (4, [6.6]), (5, [7.7, 8.8, 9.9])] + assert awkward1.tolist(recordarray[["1", "0"]]) == [([1.1, 2.2, 3.3], 1), ([], 2), ([4.4, 5.5], 3), ([6.6], 4), ([7.7, 8.8, 9.9], 5)] + assert awkward1.tolist(recordarray[1:-1]) == [(2, [], 2.2), (3, [4.4, 5.5], 3.3), (4, [6.6], 4.4)] + assert awkward1.tolist(recordarray[2]) == (3, [4.4, 5.5], 3.3) + assert awkward1.tolist(recordarray[2]["1"]) == [4.4, 5.5] + assert awkward1.tolist(recordarray[2][["0", "1"]]) == (3, [4.4, 5.5]) + assert awkward1.tolist(recordarray[2][["1", "0"]]) == ([4.4, 5.5], 3) + + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray, "three": content2}) + assert not recordarray.istuple + + assert awkward1.tolist(recordarray["three"]) == [1.1, 2.2, 3.3, 4.4, 5.5] + assert awkward1.tolist(recordarray[["one", "two"]]) == [{"one": 1, "two": [1.1, 2.2, 3.3]}, {"one": 2, "two": []}, {"one": 3, "two": [4.4, 5.5]}, {"one": 4, "two": [6.6]}, {"one": 5, "two": [7.7, 8.8, 9.9]}] + assert awkward1.tolist(recordarray[["two", "one"]]) == [{"one": 1, "two": [1.1, 2.2, 3.3]}, {"one": 2, "two": []}, {"one": 3, "two": [4.4, 5.5]}, {"one": 4, "two": [6.6]}, {"one": 5, "two": [7.7, 8.8, 9.9]}] + assert awkward1.tolist(recordarray[1:-1]) == [{"one": 2, "two": [], "three": 2.2}, {"one": 3, "two": [4.4, 5.5], "three": 3.3}, {"one": 4, "two": [6.6], "three": 4.4}] + assert awkward1.tolist(recordarray[2]) == {"one": 3, "two": [4.4, 5.5], "three": 3.3} + assert awkward1.tolist(recordarray[2]["two"]) == [4.4, 5.5] + assert awkward1.tolist(recordarray[2][["one", "two"]]) == {"one": 3, "two": [4.4, 5.5]} + assert awkward1.tolist(recordarray[2][["two", "one"]]) == {"one": 3, "two": [4.4, 5.5]} + +def test_getitem_other_types(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64)) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=numpy.float64)) + offsets1 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray1 = awkward1.layout.ListOffsetArray64(offsets1, content2) + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray1, "three": content2}) + + offsets2 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5])) + listoffsetarray2 = awkward1.layout.ListOffsetArray64(offsets2, recordarray) + assert awkward1.tolist(listoffsetarray2["one"]) == [[1, 2, 3], [], [4, 5]] + assert awkward1.tolist(listoffsetarray2["two"]) == [[[1.1, 2.2, 3.3], [], [4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]] + assert awkward1.tolist(listoffsetarray2["three"]) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] + assert awkward1.tolist(listoffsetarray2[["two", "three"]]) == [[{"two": [1.1, 2.2, 3.3], "three": 1.1}, {"two": [], "three": 2.2}, {"two": [4.4, 5.5], "three": 3.3}], [], [{"two": [6.6], "three": 4.4}, {"two": [7.7, 8.8, 9.9], "three": 5.5}]] + + starts2 = awkward1.layout.Index64(numpy.array([0, 3, 3])) + stops2 = awkward1.layout.Index64(numpy.array([3, 3, 5])) + listarray2 = awkward1.layout.ListArray64(starts2, stops2, recordarray) + assert awkward1.tolist(listarray2["one"]) == [[1, 2, 3], [], [4, 5]] + assert awkward1.tolist(listarray2["two"]) == [[[1.1, 2.2, 3.3], [], [4.4, 5.5]], [], [[6.6], [7.7, 8.8, 9.9]]] + assert awkward1.tolist(listarray2["three"]) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] + assert awkward1.tolist(listarray2[["two", "three"]]) == [[{"two": [1.1, 2.2, 3.3], "three": 1.1}, {"two": [], "three": 2.2}, {"two": [4.4, 5.5], "three": 3.3}], [], [{"two": [6.6], "three": 4.4}, {"two": [7.7, 8.8, 9.9], "three": 5.5}]] + + regulararray2 = awkward1.layout.RegularArray(recordarray, 1) + assert awkward1.tolist(regulararray2["one"]) == [[1], [2], [3], [4], [5]] + assert awkward1.tolist(regulararray2["two"]) == [[[1.1, 2.2, 3.3]], [[]], [[4.4, 5.5]], [[6.6]], [[7.7, 8.8, 9.9]]] + assert awkward1.tolist(regulararray2["three"]) == [[1.1], [2.2], [3.3], [4.4], [5.5]] + assert awkward1.tolist(regulararray2[["two", "three"]]) == [[{"two": [1.1, 2.2, 3.3], "three": 1.1}], [{"two": [], "three": 2.2}], [{"two": [4.4, 5.5], "three": 3.3}], [{"two": [6.6], "three": 4.4}], [{"two": [7.7, 8.8, 9.9], "three": 5.5}]] + +def test_getitem_next(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5], dtype=numpy.int64)) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=numpy.float64)) + content3 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=numpy.float64)) + offsets1 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray1 = awkward1.layout.ListOffsetArray64(offsets1, content2) + listoffsetarray3 = awkward1.layout.ListOffsetArray64(offsets1, content3) + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray1, "three": content2, "four": listoffsetarray3}) + offsets2 = awkward1.layout.Index64(numpy.array([0, 3, 3, 5])) + listoffsetarray2 = awkward1.layout.ListOffsetArray64(offsets2, recordarray) + + assert awkward1.tolist(listoffsetarray2[2, "one"]) == [4, 5] + assert awkward1.tolist(listoffsetarray2[2, "two"]) == [[6.6], [7.7, 8.8, 9.9]] + assert awkward1.tolist(listoffsetarray2[2, "three"]) == [4.4, 5.5] + assert awkward1.tolist(listoffsetarray2[2, ["two", "three"]]) == [{"two": [6.6], "three": 4.4}, {"two": [7.7, 8.8, 9.9], "three": 5.5}] + + assert awkward1.tolist(listoffsetarray2[2, 1]) == {"one": 5, "two": [7.7, 8.8, 9.9], "three": 5.5, "four": [7, 8, 9]} + with pytest.raises(ValueError): + listoffsetarray2[2, 1, 0] + assert listoffsetarray2[2, 1, "one"] == 5 + assert awkward1.tolist(listoffsetarray2[2, 1, "two"]) == [7.7, 8.8, 9.9] + assert listoffsetarray2[2, 1, "two", 1] == 8.8 + assert awkward1.tolist(listoffsetarray2[2, 1, ["two", "four"], 1]) == {"two": 8.8, "four": 8} + assert awkward1.tolist(listoffsetarray2[2, 1, ["two", "four"], 1:]) == {"two": [8.8, 9.9], "four": [8, 9]} + +def test_setid(): + content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5])) + content2 = awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) + offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9])) + listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content2) + + recordarray = awkward1.layout.RecordArray([content1, listoffsetarray]) + recordarray.setid() + + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray}) + recordarray.setid() + assert recordarray["one"].id.fieldloc == [(0, "one")] + assert recordarray["two"].id.fieldloc == [(0, "two")] + assert recordarray["one", 1] == 2 + assert recordarray[1, "one"] == 2 + assert recordarray["two", 2, 1] == 5.5 + assert recordarray[2, "two", 1] == 5.5 + + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray}) + recordarray2 = awkward1.layout.RecordArray({"outer": recordarray}) + recordarray2.setid() + assert recordarray2["outer"].id.fieldloc == [(0, "outer")] + assert recordarray2["outer", "one"].id.fieldloc == [(0, "outer"), (0, "one")] + assert recordarray2["outer", "two"].id.fieldloc == [(0, "outer"), (0, "two")] + assert recordarray2["outer", "one", 1] == 2 + assert recordarray2["outer", 1, "one"] == 2 + assert recordarray2[1, "outer", "one"] == 2 + assert recordarray2["outer", "two", 2, 1] == 5.5 + assert recordarray2["outer", 2, "two", 1] == 5.5 + assert recordarray2[2, "outer", "two", 1] == 5.5 + with pytest.raises(ValueError) as excinfo: + recordarray2["outer", "two", 0, 99] + assert str(excinfo.value) == 'in ListArray64 at id[0, "outer", "two"] attempting to get 99, index out of range' + assert recordarray2.location == () + assert recordarray2[2].location == (2,) + assert recordarray2[2, "outer"].location == (2, "outer") + assert recordarray2[2, "outer", "two"].location == (2, "outer", "two") + + recordarray = awkward1.layout.RecordArray({"one": content1, "two": listoffsetarray}) + recordarray2 = awkward1.layout.RecordArray({"outer": awkward1.layout.RegularArray(recordarray, 1)}) + recordarray2.setid() + assert recordarray2["outer"].id.fieldloc == [(0, "outer")] + assert recordarray2["outer", 0, "one"].id.fieldloc == [(0, "outer"), (1, "one")] + assert recordarray2["outer", 0, "two"].id.fieldloc == [(0, "outer"), (1, "two")] + assert recordarray2["outer", "one", 0].id.fieldloc == [(0, "outer"), (1, "one")] + assert recordarray2["outer", "two", 0].id.fieldloc == [(0, "outer"), (1, "two")] + assert recordarray2["outer", "one", 1, 0] == 2 + assert recordarray2["outer", 1, "one", 0] == 2 + assert recordarray2["outer", 1, 0, "one"] == 2 + assert recordarray2[1, "outer", "one", 0] == 2 + assert recordarray2[1, "outer", 0, "one"] == 2 + assert recordarray2[1, 0, "outer", "one"] == 2 + + with pytest.raises(ValueError) as excinfo: + recordarray2["outer", 2, "two", 0, 99] + assert str(excinfo.value) == 'in ListArray64 at id[2, "outer", 0, "two"] attempting to get 99, index out of range' + assert recordarray2.location == () + assert recordarray2[2].location == (2,) + assert recordarray2[2, "outer"].location == (2, "outer") + assert recordarray2[2, "outer", 0].location == (2, "outer", 0) + assert recordarray2[2, "outer", 0, "two"].location == (2, "outer", 0, "two") + +def test_fillable_tuple(): + fillable = awkward1.layout.FillableArray() + assert str(fillable.type) == '0 * unknown' + assert awkward1.tolist(fillable.snapshot()) == [] + + fillable.begintuple(0) + fillable.endtuple() + + fillable.begintuple(0) + fillable.endtuple() + + fillable.begintuple(0) + fillable.endtuple() + + assert str(fillable.type) == '3 * ()' + assert awkward1.tolist(fillable.snapshot()) == [(), (), ()] + + fillable = awkward1.layout.FillableArray() + + fillable.begintuple(3) + fillable.index(0) + fillable.boolean(True) + fillable.index(1) + fillable.beginlist() + fillable.integer(1) + fillable.endlist() + fillable.index(2) + fillable.real(1.1) + fillable.endtuple() + + fillable.begintuple(3) + fillable.index(1) + fillable.beginlist() + fillable.integer(2) + fillable.integer(2) + fillable.endlist() + fillable.index(2) + fillable.real(2.2) + fillable.index(0) + fillable.boolean(False) + fillable.endtuple() + + fillable.begintuple(3) + fillable.index(2) + fillable.real(3.3) + fillable.index(1) + fillable.beginlist() + fillable.integer(3) + fillable.integer(3) + fillable.integer(3) + fillable.endlist() + fillable.index(0) + fillable.boolean(True) + fillable.endtuple() + + assert str(fillable.type) == '3 * (bool, var * int64, float64)' + assert awkward1.tolist(fillable.snapshot()) == [(True, [1], 1.1), (False, [2, 2], 2.2), (True, [3, 3, 3], 3.3)] + +def test_fillable_record(): + fillable = awkward1.layout.FillableArray() + assert str(fillable.type) == '0 * unknown' + assert awkward1.tolist(fillable.snapshot()) == [] + + fillable.beginrecord() + fillable.endrecord() + + fillable.beginrecord() + fillable.endrecord() + + fillable.beginrecord() + fillable.endrecord() + + assert str(fillable.type) == '3 * {}' + assert awkward1.tolist(fillable.snapshot()) == [{}, {}, {}] + + fillable = awkward1.layout.FillableArray() + + fillable.beginrecord() + fillable.field("one") + fillable.integer(1) + fillable.field("two") + fillable.real(1.1) + fillable.endrecord() + + fillable.beginrecord() + fillable.field("two") + fillable.real(2.2) + fillable.field("one") + fillable.integer(2) + fillable.endrecord() + + fillable.beginrecord() + fillable.field("one") + fillable.integer(3) + fillable.field("two") + fillable.real(3.3) + fillable.endrecord() + + assert str(fillable.type) == '3 * {"one": int64, "two": float64}' + assert awkward1.tolist(fillable.snapshot()) == [{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}] + +def test_fromiter(): + dataset = [ + [(1, 1.1), (2, 2.2), (3, 3.3)], + [(1, [1.1, 2.2, 3.3]), (2, []), (3, [4.4, 5.5])], + [[(1, 1.1), (2, 2.2), (3, 3.3)], [], [(4, 4.4), (5, 5.5)]], + [((1, 1), 1.1), ((2, 2), 2.2), ((3, 3), 3.3)], + [({"x": 1, "y": 1}, 1.1), ({"x": 2, "y": 2}, 2.2), ({"x": 3, "y": 3}, 3.3)], + [{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}], + [{"one": 1, "two": [1.1, 2.2, 3.3]}, {"one": 2, "two": []}, {"one": 3, "two": [4.4, 5.5]}], + [[{"one": 1, "two": 1.1}, {"one": 2, "two": 2.2}, {"one": 3, "two": 3.3}], [], [{"one": 4, "two": 4.4}, {"one": 5, "two": 5.5}]], + [{"one": {"x": 1, "y": 1}, "two": 1.1}, {"one": {"x": 2, "y": 2}, "two": 2.2}, {"one": {"x": 3, "y": 3}, "two": 3.3}], + [{"one": (1, 1), "two": 1.1}, {"one": (2, 2), "two": 2.2}, {"one": (3, 3), "two": 3.3}], + ] + for datum in dataset: + assert awkward1.tolist(awkward1.fromiter(datum)) == datum + +def test_json(): + dataset = [ + '[{"one":1,"two":1.1},{"one":2,"two":2.2},{"one":3,"two":3.3}]', + '[{"one":1,"two":[1.1,2.2,3.3]},{"one":2,"two":[]},{"one":3,"two":[4.4,5.5]}]', + '[[{"one":1,"two":1.1},{"one":2,"two":2.2},{"one":3,"two":3.3}],[],[{"one":4,"two":4.4},{"one":5,"two":5.5}]]', + '[{"one":{"x":1,"y":1},"two":1.1},{"one":{"x":2,"y":2},"two":2.2},{"one":{"x":3,"y":3},"two":3.3}]', + ] + for datum in dataset: + assert awkward1.tojson(awkward1.fromjson(datum)) == datum