From 5f73d230bc099a857fd2f9496e40a6484c1faa90 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 26 Dec 2019 16:32:24 -0600 Subject: [PATCH] Replace 'lookup' and 'reverselookup' with a single property (that acts like 'reverselookup'). (#37) * [WIP] Replace 'lookup' and 'reverselookup' with a single property (that acts like 'reverselookup'). * [skip ci] Starting transitioning... * [skip ci] Transitioned, but have not compiled yet... * [skip ci] Compiles... * [skip ci] test_PR025 succeeds... * [skip ci] test_PR026 succeeds... * Done. --- VERSION_INFO | 2 +- awkward1/_numba/array/recordarray.py | 51 ++--- awkward1/_numba/types.py | 39 +--- awkward1/_util.py | 19 +- include/awkward/Content.h | 2 - include/awkward/array/EmptyArray.h | 2 - include/awkward/array/ListArray.h | 2 - include/awkward/array/ListOffsetArray.h | 2 - include/awkward/array/NumpyArray.h | 2 - include/awkward/array/RawArray.h | 8 - include/awkward/array/Record.h | 5 +- include/awkward/array/RecordArray.h | 14 +- include/awkward/array/RegularArray.h | 2 - include/awkward/type/ArrayType.h | 2 - include/awkward/type/ListType.h | 2 - include/awkward/type/OptionType.h | 2 - include/awkward/type/PrimitiveType.h | 2 - include/awkward/type/RecordType.h | 17 +- include/awkward/type/RegularType.h | 2 - include/awkward/type/Type.h | 2 - include/awkward/type/UnionType.h | 2 - include/awkward/type/UnknownType.h | 2 - include/awkward/util.h | 8 + src/libawkward/array/EmptyArray.cpp | 8 - src/libawkward/array/ListArray.cpp | 10 - src/libawkward/array/ListOffsetArray.cpp | 10 - src/libawkward/array/NumpyArray.cpp | 8 - src/libawkward/array/Record.cpp | 28 +-- src/libawkward/array/RecordArray.cpp | 175 +++++------------ src/libawkward/array/RegularArray.cpp | 8 - src/libawkward/fillable/RecordFillable.cpp | 16 +- src/libawkward/type/ArrayType.cpp | 8 - src/libawkward/type/ListType.cpp | 8 - src/libawkward/type/OptionType.cpp | 8 - src/libawkward/type/PrimitiveType.cpp | 8 - src/libawkward/type/RecordType.cpp | 189 ++++++------------- src/libawkward/type/RegularType.cpp | 8 - src/libawkward/type/UnionType.cpp | 8 - src/libawkward/type/UnknownType.cpp | 8 - src/libawkward/util.cpp | 68 +++++++ src/pyawkward.cpp | 210 +++++++-------------- tests/test_PR025_record_array.py | 40 ++-- tests/test_PR031_types_in_numba.py | 17 -- 43 files changed, 313 insertions(+), 721 deletions(-) diff --git a/VERSION_INFO b/VERSION_INFO index 072d0fa39e..9f42295fc2 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.36 +0.1.37 diff --git a/awkward1/_numba/array/recordarray.py b/awkward1/_numba/array/recordarray.py index 21ca8d7ac0..30a690c0d6 100644 --- a/awkward1/_numba/array/recordarray.py +++ b/awkward1/_numba/array/recordarray.py @@ -11,24 +11,23 @@ @numba.extending.typeof_impl.register(awkward1.layout.RecordArray) def typeof(val, c): - return RecordArrayType([numba.typeof(x) for x in val.fields()], val.lookup, val.reverselookup, numba.typeof(val.id), numba.none if val.isbare else numba.typeof(val.type)) + return RecordArrayType([numba.typeof(x) for x in val.fields()], None if val.istuple else tuple(val.keys()), numba.typeof(val.id), numba.none if val.isbare else numba.typeof(val.type)) @numba.extending.typeof_impl.register(awkward1.layout.Record) def typeof(val, c): return RecordType(numba.typeof(val.array)) class RecordArrayType(content.ContentType): - def __init__(self, contenttpes, lookup, reverselookup, idtpe, typetpe): - super(RecordArrayType, self).__init__(name="ak::RecordArrayType([{0}], {1}, {2}, id={3}, type={4})".format(", ".join(x.name for x in contenttpes), lookup, reverselookup, idtpe.name, typetpe.name)) + def __init__(self, contenttpes, keys, idtpe, typetpe): + super(RecordArrayType, self).__init__(name="ak::RecordArrayType([{0}], {1}, id={2}, type={3})".format(", ".join(x.name for x in contenttpes), keys, idtpe.name, typetpe.name)) self.contenttpes = contenttpes - self.lookup = lookup - self.reverselookup = reverselookup + self.keys = keys self.idtpe = idtpe self.typetpe = typetpe @property def istuple(self): - return self.lookup is None + return self.keys is None @property def numfields(self): @@ -45,7 +44,7 @@ def getitem_range(self): return self def getitem_str(self, key): - return self.contenttpes[awkward1._util.field2index(self.lookup, self.numfields, key)] + return self.contenttpes[awkward1._util.key2index(self.keys, key)] def getitem_tuple(self, wheretpe): import awkward1._numba.array.regulararray @@ -60,19 +59,19 @@ def getitem_next(self, wheretpe, isadvanced): tailtpe = numba.types.Tuple(wheretpe.types[1:]) if isinstance(headtpe, numba.types.StringLiteral): - index = awkward1._util.field2index(self.lookup, self.numfields, headtpe.literal_value) + index = awkward1._util.key2index(self.keys, headtpe.literal_value) nexttpe = self.contenttpes[index] else: contenttpes = [] for t in self.contenttpes: contenttpes.append(t.getitem_next(numba.types.Tuple((headtpe,)), isadvanced)) - nexttpe = RecordArrayType(contenttpes, self.lookup, self.reverselookup, numba.none, numba.none) + nexttpe = RecordArrayType(contenttpes, self.keys, numba.none, numba.none) return nexttpe.getitem_next(tailtpe, isadvanced) def carry(self): - return RecordArrayType([x.carry() for x in self.contenttpes], self.lookup, self.reverselookup, self.idtpe, self.typetpe) + return RecordArrayType([x.carry() for x in self.contenttpes], self.keys, self.idtpe, self.typetpe) @property def lower_len(self): @@ -227,36 +226,16 @@ def box(tpe, val, c): else: RecordArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.RecordArray)) - from_lookup_obj = c.pyapi.object_getattr_string(RecordArray_obj, "from_lookup") - if tpe.lookup is None: - lookup_obj = c.pyapi.make_none() - else: - lookup_obj = c.pyapi.dict_new(len(tpe.lookup)) - for key, fieldindex in tpe.lookup.items(): - key_obj = c.pyapi.unserialize(c.pyapi.serialize_object(key)) - fieldindex_obj = c.pyapi.unserialize(c.pyapi.serialize_object(fieldindex)) - c.pyapi.dict_setitem(lookup_obj, key_obj, fieldindex_obj) - c.pyapi.decref(key_obj) - c.pyapi.decref(fieldindex_obj) - if tpe.reverselookup is None: - reverselookup_obj = c.pyapi.make_none() - else: - reverselookup_obj = c.pyapi.list_new(c.context.get_constant(numba.intp, 0)) - for key in tpe.reverselookup: - key_obj = c.pyapi.unserialize(c.pyapi.serialize_object(key)) - c.pyapi.list_append(reverselookup_obj, key_obj) - c.pyapi.decref(key_obj) contents_obj = c.pyapi.list_new(c.context.get_constant(numba.intp, 0)) for i, t in enumerate(tpe.contenttpes): x_obj = c.pyapi.from_native_value(t, getattr(proxyin, field(i)), c.env_manager) c.pyapi.list_append(contents_obj, x_obj) c.pyapi.decref(x_obj) - out = c.pyapi.call_function_objargs(from_lookup_obj, [contents_obj, lookup_obj, reverselookup_obj] + args) + keys_obj = c.pyapi.unserialize(c.pyapi.serialize_object(tpe.keys)) + out = c.pyapi.call_function_objargs(RecordArray_obj, [contents_obj, keys_obj] + args) c.pyapi.decref(RecordArray_obj) - c.pyapi.decref(from_lookup_obj) - c.pyapi.decref(lookup_obj) - c.pyapi.decref(reverselookup_obj) c.pyapi.decref(contents_obj) + c.pyapi.decref(keys_obj) for x in args: c.pyapi.decref(x) @@ -337,7 +316,7 @@ def lower_getitem_range(context, builder, sig, args): def lower_getitem_str(context, builder, sig, args): rettpe, (tpe, wheretpe) = sig.return_type, sig.args val, whereval = args - index = awkward1._util.field2index(tpe.lookup, tpe.numfields, wheretpe.literal_value) + index = awkward1._util.key2index(tpe.keys, wheretpe.literal_value) proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val) @@ -393,12 +372,12 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval, proxyin = numba.cgutils.create_struct_proxy(arraytpe)(context, builder, value=arrayval) if isinstance(headtpe, numba.types.StringLiteral): - index = awkward1._util.field2index(arraytpe.lookup, arraytpe.numfields, headtpe.literal_value) + index = awkward1._util.key2index(arraytpe.keys, headtpe.literal_value) nexttpe = arraytpe.contenttpes[index] nextval = getattr(proxyin, field(index)) else: - nexttpe = RecordArrayType([t.getitem_next(numba.types.Tuple((headtpe,)), advanced is not None) for t in arraytpe.contenttpes], arraytpe.lookup, arraytpe.reverselookup, numba.none, numba.none) # FIXME: Type::none() # arraytpe.typetpe if util.preserves_type(headtpe, advanced is not None) else + nexttpe = RecordArrayType([t.getitem_next(numba.types.Tuple((headtpe,)), advanced is not None) for t in arraytpe.contenttpes], arraytpe.keys, numba.none, numba.none) # FIXME: Type::none() # arraytpe.typetpe if util.preserves_type(headtpe, advanced is not None) else proxyout = numba.cgutils.create_struct_proxy(nexttpe)(context, builder) proxyout.length = proxyin.length wrappedheadtpe = numba.types.Tuple((headtpe,)) diff --git a/awkward1/_numba/types.py b/awkward1/_numba/types.py index 407373dbf1..24e26a73cb 100644 --- a/awkward1/_numba/types.py +++ b/awkward1/_numba/types.py @@ -56,7 +56,7 @@ def typeof_UnionType(val, c): @numba.extending.typeof_impl.register(awkward1.layout.RecordType) def typeof_RecordType(val, c): - return RecordTypeType([numba.typeof(x) for x in val.types], val.lookup, val.reverselookup, val.parameters) + return RecordTypeType([numba.typeof(x) for x in val.types], None if val.istuple else tuple(val.keys()), val.parameters) class TypeType(numba.types.Type): pass @@ -97,11 +97,10 @@ def __init__(self, typetpes, parameters): self.parameters = parameters class RecordTypeType(TypeType): - def __init__(self, typetpes, lookup, reverselookup, parameters): - super(RecordTypeType, self).__init__(name="ak::RecordTypeType([{0}], {1}, {2}, parameters={3})".format(", ".join(x.name for x in typetpes), repr(lookup), repr(reverselookup), json.dumps(parameters))) + def __init__(self, typetpes, keys, parameters): + super(RecordTypeType, self).__init__(name="ak::RecordTypeType([{0}], {1}, parameters={2})".format(", ".join(x.name for x in typetpes), repr(keys), json.dumps(parameters))) self.typetpes = typetpes - self.lookup = lookup - self.reverselookup = reverselookup + self.keys = keys self.parameters = parameters @numba.extending.register_model(UnknownTypeType) @@ -307,35 +306,11 @@ def box_RecordType(tpe, val, c): c.pyapi.tuple_setitem(types_obj, i, x_obj) parameters_obj = box_parameters(tpe.parameters, c) - if tpe.lookup is None: - out = c.pyapi.call_function_objargs(class_obj, (types_obj, parameters_obj)) - - else: - from_lookup_obj = c.pyapi.object_getattr_string(class_obj, "from_lookup") - if tpe.lookup is None: - lookup_obj = c.pyapi.make_none() - else: - lookup_obj = c.pyapi.dict_new(len(tpe.lookup)) - for key, fieldindex in tpe.lookup.items(): - key_obj = c.pyapi.unserialize(c.pyapi.serialize_object(key)) - fieldindex_obj = c.pyapi.unserialize(c.pyapi.serialize_object(fieldindex)) - c.pyapi.dict_setitem(lookup_obj, key_obj, fieldindex_obj) - c.pyapi.decref(key_obj) - c.pyapi.decref(fieldindex_obj) - if tpe.reverselookup is None: - reverselookup_obj = c.pyapi.make_none() - else: - reverselookup_obj = c.pyapi.list_new(c.context.get_constant(numba.intp, 0)) - for key in tpe.reverselookup: - key_obj = c.pyapi.unserialize(c.pyapi.serialize_object(key)) - c.pyapi.list_append(reverselookup_obj, key_obj) - c.pyapi.decref(key_obj) - out = c.pyapi.call_function_objargs(from_lookup_obj, (types_obj, lookup_obj, reverselookup_obj, parameters_obj)) - c.pyapi.decref(from_lookup_obj) - c.pyapi.decref(lookup_obj) - c.pyapi.decref(reverselookup_obj) + keys_obj = c.pyapi.unserialize(c.pyapi.serialize_object(tpe.keys)) + out = c.pyapi.call_function_objargs(class_obj, (types_obj, keys_obj, parameters_obj)) c.pyapi.decref(class_obj) c.pyapi.decref(types_obj) + c.pyapi.decref(keys_obj) c.pyapi.decref(parameters_obj) return out diff --git a/awkward1/_util.py b/awkward1/_util.py index fecb2892b2..550c5c9ca6 100644 --- a/awkward1/_util.py +++ b/awkward1/_util.py @@ -22,23 +22,26 @@ def wrap(content, namespace): else: return content -def field2index(lookup, numfields, key): - if isinstance(key, (int, numbers.Integral, numpy.integer)): - attempt = key +def key2index(keys, key): + if keys is None: + attempt = None else: - attempt = None if lookup is None else lookup.get(key) + try: + attempt = keys.index(key) + except ValueError: + attempt = None if attempt is None: - m = field2index._pattern.match(key) + m = key2index._pattern.match(key) if m is not None: attempt = m.group(0) - if attempt is None or attempt >= numfields: - raise ValueError("key {0} not found in Record".format(repr(key))) + if attempt is None: + raise ValueError("key {0} not found in record".format(repr(key))) else: return attempt -field2index._pattern = re.compile(r"^[1-9][0-9]*$") +key2index._pattern = re.compile(r"^[1-9][0-9]*$") def minimally_touching_string(limit_length, layout, namespace): import awkward1.layout diff --git a/include/awkward/Content.h b/include/awkward/Content.h index d6bb2e8269..8848f3855b 100644 --- a/include/awkward/Content.h +++ b/include/awkward/Content.h @@ -46,8 +46,6 @@ namespace awkward { virtual int64_t fieldindex(const std::string& key) const = 0; virtual const std::string key(int64_t fieldindex) const = 0; virtual bool haskey(const std::string& key) const = 0; - virtual const std::vector keyaliases(int64_t fieldindex) const = 0; - virtual const std::vector keyaliases(const std::string& key) const = 0; virtual const std::vector keys() const = 0; const std::string tostring() const; diff --git a/include/awkward/array/EmptyArray.h b/include/awkward/array/EmptyArray.h index 74dec52f2a..814d11651b 100644 --- a/include/awkward/array/EmptyArray.h +++ b/include/awkward/array/EmptyArray.h @@ -40,8 +40,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; protected: diff --git a/include/awkward/array/ListArray.h b/include/awkward/array/ListArray.h index c6137888f0..b0baed6715 100644 --- a/include/awkward/array/ListArray.h +++ b/include/awkward/array/ListArray.h @@ -42,8 +42,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; protected: diff --git a/include/awkward/array/ListOffsetArray.h b/include/awkward/array/ListOffsetArray.h index 2765cc5088..fbd0a16457 100644 --- a/include/awkward/array/ListOffsetArray.h +++ b/include/awkward/array/ListOffsetArray.h @@ -41,8 +41,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; protected: diff --git a/include/awkward/array/NumpyArray.h b/include/awkward/array/NumpyArray.h index 0e0eb2620b..632fcdd0bd 100644 --- a/include/awkward/array/NumpyArray.h +++ b/include/awkward/array/NumpyArray.h @@ -60,8 +60,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; bool iscontiguous() const; diff --git a/include/awkward/array/RawArray.h b/include/awkward/array/RawArray.h index 02a2fafe2c..7a1fd7c654 100644 --- a/include/awkward/array/RawArray.h +++ b/include/awkward/array/RawArray.h @@ -370,14 +370,6 @@ namespace awkward { throw std::invalid_argument("array contains no Records"); } - const std::vector keyaliases(int64_t fieldindex) const override { - throw std::invalid_argument("array contains no Records"); - } - - const std::vector keyaliases(const std::string& key) const override { - throw std::invalid_argument("array contains no Records"); - } - const std::vector keys() const override { throw std::invalid_argument("array contains no Records"); } diff --git a/include/awkward/array/Record.h b/include/awkward/array/Record.h index 9e0d5802aa..a3489c2c7e 100644 --- a/include/awkward/array/Record.h +++ b/include/awkward/array/Record.h @@ -12,8 +12,7 @@ namespace awkward { const std::shared_ptr array() const; int64_t at() const; const std::vector> contents() const; - const std::shared_ptr lookup() const; - const std::shared_ptr reverselookup() const; + const std::shared_ptr recordlookup() const; bool istuple() const; bool isscalar() const override; @@ -43,8 +42,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; diff --git a/include/awkward/array/RecordArray.h b/include/awkward/array/RecordArray.h index abff128979..fa35898b55 100644 --- a/include/awkward/array/RecordArray.h +++ b/include/awkward/array/RecordArray.h @@ -14,16 +14,12 @@ namespace awkward { class RecordArray: public Content { public: - typedef std::unordered_map Lookup; - typedef std::vector ReverseLookup; - - RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup); + RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& recordlookup); RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents); RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, int64_t length, bool istuple); const std::vector> contents() const; - const std::shared_ptr lookup() const; - const std::shared_ptr reverselookup() const; + const std::shared_ptr recordlookup() const; bool istuple() const; const std::string classname() const override; @@ -50,8 +46,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; @@ -62,7 +56,6 @@ namespace awkward { void append(const std::shared_ptr& content, const std::string& key); void append(const std::shared_ptr& content); - void setkey(int64_t fieldindex, const std::string& key); protected: void checktype() const override; @@ -75,8 +68,7 @@ namespace awkward { private: std::vector> contents_; - std::shared_ptr lookup_; - std::shared_ptr reverselookup_; + std::shared_ptr recordlookup_; int64_t length_; }; } diff --git a/include/awkward/array/RegularArray.h b/include/awkward/array/RegularArray.h index 61a2d1eb49..99871d0271 100644 --- a/include/awkward/array/RegularArray.h +++ b/include/awkward/array/RegularArray.h @@ -42,8 +42,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; protected: diff --git a/include/awkward/type/ArrayType.h b/include/awkward/type/ArrayType.h index bbf5b9402a..933fe3be53 100644 --- a/include/awkward/type/ArrayType.h +++ b/include/awkward/type/ArrayType.h @@ -17,8 +17,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/ListType.h b/include/awkward/type/ListType.h index 88e321b22a..e2cc38980a 100644 --- a/include/awkward/type/ListType.h +++ b/include/awkward/type/ListType.h @@ -17,8 +17,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/OptionType.h b/include/awkward/type/OptionType.h index 1b8b1afd89..f2cb2723ad 100644 --- a/include/awkward/type/OptionType.h +++ b/include/awkward/type/OptionType.h @@ -17,8 +17,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/PrimitiveType.h b/include/awkward/type/PrimitiveType.h index ae685bd35e..80645e7952 100644 --- a/include/awkward/type/PrimitiveType.h +++ b/include/awkward/type/PrimitiveType.h @@ -32,8 +32,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const DType dtype() const; diff --git a/include/awkward/type/RecordType.h b/include/awkward/type/RecordType.h index 8d0feb5a8d..6071372d8f 100644 --- a/include/awkward/type/RecordType.h +++ b/include/awkward/type/RecordType.h @@ -7,20 +7,18 @@ #include #include +#include "awkward/util.h" + #include "awkward/type/Type.h" namespace awkward { class RecordType: public Type { public: - typedef std::unordered_map Lookup; - typedef std::vector ReverseLookup; - - RecordType(const Parameters& parameters, const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup); + RecordType(const Parameters& parameters, const std::vector>& types, const std::shared_ptr& recordlookup); RecordType(const Parameters& parameters, const std::vector>& types); const std::vector> types() const; - const std::shared_ptr lookup() const; - const std::shared_ptr reverselookup() const; + const std::shared_ptr recordlookup() const; bool istuple() const; std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override; @@ -30,8 +28,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr field(int64_t fieldindex) const; @@ -40,13 +36,12 @@ namespace awkward { const std::vector>> fielditems() const; const std::shared_ptr astuple() const; + void append(const std::shared_ptr& type, const std::string& key); void append(const std::shared_ptr& type); - void setkey(int64_t fieldindex, const std::string& key); private: std::vector> types_; - std::shared_ptr lookup_; - std::shared_ptr reverselookup_; + std::shared_ptr recordlookup_; }; } diff --git a/include/awkward/type/RegularType.h b/include/awkward/type/RegularType.h index c9312f68f2..20fde5d707 100644 --- a/include/awkward/type/RegularType.h +++ b/include/awkward/type/RegularType.h @@ -19,8 +19,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; const std::shared_ptr type() const; diff --git a/include/awkward/type/Type.h b/include/awkward/type/Type.h index 95f0b50131..d985b62f78 100644 --- a/include/awkward/type/Type.h +++ b/include/awkward/type/Type.h @@ -26,8 +26,6 @@ namespace awkward { virtual int64_t fieldindex(const std::string& key) const = 0; virtual const std::string key(int64_t fieldindex) const = 0; virtual bool haskey(const std::string& key) const = 0; - virtual const std::vector keyaliases(int64_t fieldindex) const = 0; - virtual const std::vector keyaliases(const std::string& key) const = 0; virtual const std::vector keys() const = 0; const Parameters parameters() const; diff --git a/include/awkward/type/UnionType.h b/include/awkward/type/UnionType.h index 46021dc610..6107348a07 100644 --- a/include/awkward/type/UnionType.h +++ b/include/awkward/type/UnionType.h @@ -19,8 +19,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; int64_t numtypes() const; diff --git a/include/awkward/type/UnknownType.h b/include/awkward/type/UnknownType.h index cd8943dd66..1836ce5867 100644 --- a/include/awkward/type/UnknownType.h +++ b/include/awkward/type/UnknownType.h @@ -17,8 +17,6 @@ namespace awkward { int64_t fieldindex(const std::string& key) const override; const std::string key(int64_t fieldindex) const override; bool haskey(const std::string& key) const override; - const std::vector keyaliases(int64_t fieldindex) const override; - const std::vector keyaliases(const std::string& key) const override; const std::vector keys() const override; private: diff --git a/include/awkward/util.h b/include/awkward/util.h index d0306becb3..a9d98a86f6 100644 --- a/include/awkward/util.h +++ b/include/awkward/util.h @@ -5,6 +5,7 @@ #include #include +#include #include "awkward/cpu-kernels/util.h" @@ -12,6 +13,13 @@ namespace awkward { class Identity; namespace util { + typedef std::vector RecordLookup; + std::shared_ptr init_recordlookup(int64_t numfields); + int64_t fieldindex(const std::shared_ptr& recordlookup, const std::string& key, int64_t numfields); + const std::string key(const std::shared_ptr& recordlookup, int64_t fieldindex, int64_t numfields); + bool haskey(const std::shared_ptr& recordlookup, const std::string& key, int64_t numfields); + const std::vector keys(const std::shared_ptr& recordlookup, int64_t numfields); + void handle_error(const struct Error& err, const std::string& classname, const Identity* id); template diff --git a/src/libawkward/array/EmptyArray.cpp b/src/libawkward/array/EmptyArray.cpp index e662d24f55..eeef6d54ab 100644 --- a/src/libawkward/array/EmptyArray.cpp +++ b/src/libawkward/array/EmptyArray.cpp @@ -129,14 +129,6 @@ namespace awkward { throw std::invalid_argument("array contains no Records"); } - const std::vector EmptyArray::keyaliases(int64_t fieldindex) const { - throw std::invalid_argument("array contains no Records"); - } - - const std::vector EmptyArray::keyaliases(const std::string& key) const { - throw std::invalid_argument("array contains no Records"); - } - const std::vector EmptyArray::keys() const { throw std::invalid_argument("array contains no Records"); } diff --git a/src/libawkward/array/ListArray.cpp b/src/libawkward/array/ListArray.cpp index de43c3ccd0..b7f0511266 100644 --- a/src/libawkward/array/ListArray.cpp +++ b/src/libawkward/array/ListArray.cpp @@ -360,16 +360,6 @@ namespace awkward { return content_.get()->haskey(key); } - template - const std::vector ListArrayOf::keyaliases(int64_t fieldindex) const { - return content_.get()->keyaliases(fieldindex); - } - - template - const std::vector ListArrayOf::keyaliases(const std::string& key) const { - return content_.get()->keyaliases(key); - } - template const std::vector ListArrayOf::keys() const { return content_.get()->keys(); diff --git a/src/libawkward/array/ListOffsetArray.cpp b/src/libawkward/array/ListOffsetArray.cpp index 0e116c26d9..96673e6561 100644 --- a/src/libawkward/array/ListOffsetArray.cpp +++ b/src/libawkward/array/ListOffsetArray.cpp @@ -346,16 +346,6 @@ namespace awkward { return content_.get()->haskey(key); } - template - const std::vector ListOffsetArrayOf::keyaliases(int64_t fieldindex) const { - return content_.get()->keyaliases(fieldindex); - } - - template - const std::vector ListOffsetArrayOf::keyaliases(const std::string& key) const { - return content_.get()->keyaliases(key); - } - template const std::vector ListOffsetArrayOf::keys() const { return content_.get()->keys(); diff --git a/src/libawkward/array/NumpyArray.cpp b/src/libawkward/array/NumpyArray.cpp index 7152fec03e..870f42bd42 100644 --- a/src/libawkward/array/NumpyArray.cpp +++ b/src/libawkward/array/NumpyArray.cpp @@ -590,14 +590,6 @@ namespace awkward { throw std::invalid_argument("array contains no Records"); } - const std::vector NumpyArray::keyaliases(int64_t fieldindex) const { - throw std::invalid_argument("array contains no Records"); - } - - const std::vector NumpyArray::keyaliases(const std::string& key) const { - throw std::invalid_argument("array contains no Records"); - } - const std::vector NumpyArray::keys() const { throw std::invalid_argument("array contains no Records"); } diff --git a/src/libawkward/array/Record.cpp b/src/libawkward/array/Record.cpp index 9456b2a6bc..2645960d00 100644 --- a/src/libawkward/array/Record.cpp +++ b/src/libawkward/array/Record.cpp @@ -35,16 +35,12 @@ namespace awkward { return out; } - const std::shared_ptr Record::lookup() const { - return array_.lookup(); - } - - const std::shared_ptr Record::reverselookup() const { - return array_.reverselookup(); + const std::shared_ptr Record::recordlookup() const { + return array_.recordlookup(); } bool Record::istuple() const { - return lookup().get() == nullptr; + return array_.istuple(); } bool Record::isscalar() const { @@ -91,7 +87,7 @@ namespace awkward { return std::make_shared(RecordArray(array_.id(), type, array_.length(), array_.istuple()), at_); } else { - return std::make_shared(RecordArray(array_.id(), type, array_.contents(), array_.lookup(), array_.reverselookup()), at_); + return std::make_shared(RecordArray(array_.id(), type, array_.contents(), array_.recordlookup()), at_); } } else { @@ -101,7 +97,7 @@ namespace awkward { return std::make_shared(RecordArray(raw->id(), raw->type(), raw->length(), raw->istuple()), at_); } else { - return std::make_shared(RecordArray(raw->id(), raw->type(), raw->contents(), raw->lookup(), raw->reverselookup()), at_); + return std::make_shared(RecordArray(raw->id(), raw->type(), raw->contents(), raw->recordlookup()), at_); } } } @@ -116,9 +112,9 @@ namespace awkward { void Record::tojson_part(ToJson& builder) const { size_t cols = (size_t)numfields(); - std::shared_ptr keys = array_.reverselookup(); + std::shared_ptr keys = array_.recordlookup(); if (istuple()) { - keys = std::make_shared(); + keys = std::make_shared(); for (size_t j = 0; j < cols; j++) { keys.get()->push_back(std::to_string(j)); } @@ -213,14 +209,6 @@ namespace awkward { return array_.haskey(key); } - const std::vector Record::keyaliases(int64_t fieldindex) const { - return array_.keyaliases(fieldindex); - } - - const std::vector Record::keyaliases(const std::string& key) const { - return array_.keyaliases(key); - } - const std::vector Record::keys() const { return array_.keys(); } @@ -244,7 +232,7 @@ namespace awkward { const std::vector>> Record::fielditems() const { std::vector>> out; - std::shared_ptr keys = array_.reverselookup(); + std::shared_ptr keys = array_.recordlookup(); if (istuple()) { int64_t cols = numfields(); for (int64_t j = 0; j < cols; j++) { diff --git a/src/libawkward/array/RecordArray.cpp b/src/libawkward/array/RecordArray.cpp index a83faf0465..840ab04a82 100644 --- a/src/libawkward/array/RecordArray.cpp +++ b/src/libawkward/array/RecordArray.cpp @@ -11,20 +11,17 @@ #include "awkward/array/RecordArray.h" namespace awkward { - RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents, const std::shared_ptr& recordlookup) : Content(id, type) , contents_(contents) - , lookup_(lookup) - , reverselookup_(reverselookup) + , recordlookup_(recordlookup) , length_(0) { - if (reverselookup_.get() == nullptr && lookup_.get() == nullptr) { } - else if (reverselookup_.get() != nullptr && lookup_.get() != nullptr) { } - else { - throw std::runtime_error("either 'lookup' and 'reverselookup' should both be None or neither should be"); - } if (contents_.empty()) { throw std::runtime_error("this constructor can only be used with non-empty contents"); } + if (recordlookup_.get() != nullptr && recordlookup_.get()->size() != contents_.size()) { + throw std::runtime_error("recordlookup and contents must have the same length"); + } if (type_.get() != nullptr) { checktype(); } @@ -33,8 +30,7 @@ namespace awkward { RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, const std::vector>& contents) : Content(id, type) , contents_(contents) - , lookup_(nullptr) - , reverselookup_(nullptr) + , recordlookup_(nullptr) , length_(0) { if (contents_.empty()) { throw std::runtime_error("this constructor can only be used with non-empty contents"); @@ -47,8 +43,7 @@ namespace awkward { RecordArray::RecordArray(const std::shared_ptr& id, const std::shared_ptr& type, int64_t length, bool istuple) : Content(id, type) , contents_() - , lookup_(istuple ? nullptr : new Lookup) - , reverselookup_(istuple ? nullptr : new ReverseLookup) + , recordlookup_(istuple ? nullptr : new util::RecordLookup) , length_(length) { if (type_.get() != nullptr) { checktype(); @@ -59,16 +54,12 @@ namespace awkward { return contents_; } - const std::shared_ptr RecordArray::lookup() const { - return lookup_; - } - - const std::shared_ptr RecordArray::reverselookup() const { - return reverselookup_; + const std::shared_ptr RecordArray::recordlookup() const { + return recordlookup_; } bool RecordArray::istuple() const { - return lookup_.get() == nullptr; + return recordlookup_.get() == nullptr; } const std::string RecordArray::classname() const { @@ -114,7 +105,7 @@ namespace awkward { Identity::FieldLoc original = id.get()->fieldloc(); for (size_t j = 0; j < contents_.size(); j++) { Identity::FieldLoc fieldloc(original.begin(), original.end()); - fieldloc.push_back(std::pair(id.get()->width() - 1, reverselookup_.get()->at(j))); + fieldloc.push_back(std::pair(id.get()->width() - 1, recordlookup_.get()->at(j))); contents_[j].get()->setid(id.get()->withfieldloc(fieldloc)); } } @@ -131,7 +122,7 @@ namespace awkward { for (auto item : contents_) { types.push_back(item.get()->type()); } - return std::make_shared(Type::Parameters(), types, lookup_, reverselookup_); + return std::make_shared(Type::Parameters(), types, recordlookup_); } } @@ -141,12 +132,12 @@ namespace awkward { return std::make_shared(id_, type, length(), istuple()); } else { - return std::make_shared(id_, type, contents_, lookup_, reverselookup_); + return std::make_shared(id_, type, contents_, recordlookup_); } } RecordType* raw = dynamic_cast(type.get()); std::vector> contents; - if (raw->reverselookup().get() == nullptr) { + if (raw->recordlookup().get() == nullptr) { for (int64_t i = 0; i < raw->numfields(); i++) { if (i >= numfields()) { throw std::invalid_argument(std::string("cannot assign type ") + type_.get()->tostring() + std::string(" to ") + classname()); @@ -166,7 +157,7 @@ namespace awkward { return std::make_shared(id_, type, length(), istuple()); } else { - return std::make_shared(id_, type, contents, raw->lookup(), raw->reverselookup()); + return std::make_shared(id_, type, contents, raw->recordlookup()); } } @@ -186,12 +177,7 @@ namespace awkward { for (size_t j = 0; j < contents_.size(); j++) { out << indent << " at(j) << "\">"; - for (auto pair : *lookup_.get()) { - if (pair.second == j && pair.first != reverselookup_.get()->at(j)) { - out << "" << pair.first << ""; - } - } + out << " key=\"" << recordlookup_.get()->at(j) << "\">"; } else { out << ">"; @@ -207,9 +193,9 @@ namespace awkward { void RecordArray::tojson_part(ToJson& builder) const { int64_t rows = length(); size_t cols = contents_.size(); - std::shared_ptr keys = reverselookup_; + std::shared_ptr keys = recordlookup_; if (istuple()) { - keys = std::make_shared(); + keys = std::make_shared(); for (size_t j = 0; j < cols; j++) { keys.get()->push_back(std::to_string(j)); } @@ -247,7 +233,7 @@ namespace awkward { return std::make_shared(id_, type_, length(), istuple()); } else { - return std::make_shared(id_, type_, contents_, lookup_, reverselookup_); + return std::make_shared(id_, type_, contents_, recordlookup_); } } @@ -289,7 +275,7 @@ namespace awkward { for (auto content : contents_) { contents.push_back(content.get()->getitem_range(start, stop)); } - return std::make_shared(id_, type_, contents, lookup_, reverselookup_); + return std::make_shared(id_, type_, contents, recordlookup_); } } @@ -302,7 +288,7 @@ namespace awkward { for (auto content : contents_) { contents.push_back(content.get()->getitem_range_nowrap(start, stop)); } - return std::make_shared(id_, type_, contents, lookup_, reverselookup_); + return std::make_shared(id_, type_, contents, recordlookup_); } } @@ -342,7 +328,7 @@ namespace awkward { if (id_.get() != nullptr) { id = id_.get()->getitem_carry_64(carry); } - return std::make_shared(id, type_, contents, lookup_, reverselookup_); + return std::make_shared(id, type_, contents, recordlookup_); } } @@ -369,93 +355,24 @@ namespace awkward { } int64_t RecordArray::fieldindex(const std::string& key) const { - int64_t out = -1; - if (!istuple()) { - try { - out = (int64_t)lookup_.get()->at(key); - } - catch (std::out_of_range err) { } - if (out != -1 && out >= numfields()) { - throw std::invalid_argument(std::string("key \"") + key + std::string("\" points to fieldindex ") + std::to_string(out) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); - } - } - if (out == -1) { - try { - out = (int64_t)std::stoi(key); - } - catch (std::invalid_argument err) { - throw std::invalid_argument(std::string("key \"") + key + std::string("\" is not in RecordArray")); - } - if (out >= numfields()) { - throw std::invalid_argument(std::string("key interpreted as fieldindex ") + key + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); - } - } - return out; + return util::fieldindex(recordlookup_, key, numfields()); } const std::string RecordArray::key(int64_t fieldindex) const { - if (fieldindex >= numfields()) { - throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); - } - if (!istuple()) { - return reverselookup_.get()->at((size_t)fieldindex); - } - else { - return std::to_string(fieldindex); - } + return util::key(recordlookup_, fieldindex, numfields()); } bool RecordArray::haskey(const std::string& key) const { - try { - fieldindex(key); - } - catch (std::invalid_argument err) { - return false; - } - return true; - } - - const std::vector RecordArray::keyaliases(int64_t fieldindex) const { - std::vector out; - std::string _default = std::to_string(fieldindex); - bool has_default = false; - if (!istuple()) { - for (auto pair : *lookup_.get()) { - if (pair.second == fieldindex) { - out.push_back(pair.first); - if (pair.first == _default) { - has_default = true; - } - } - } - } - if (!has_default) { - out.push_back(_default); - } - return out; - } - - const std::vector RecordArray::keyaliases(const std::string& key) const { - return keyaliases(fieldindex(key)); + return util::haskey(recordlookup_, key, numfields()); } const std::vector RecordArray::keys() const { - std::vector out; - if (istuple()) { - int64_t cols = numfields(); - for (int64_t j = 0; j < cols; j++) { - out.push_back(std::to_string(j)); - } - } - else { - out.insert(out.end(), reverselookup_.get()->begin(), reverselookup_.get()->end()); - } - return out; + return util::keys(recordlookup_, numfields()); } const std::shared_ptr RecordArray::field(int64_t fieldindex) const { if (fieldindex >= numfields()) { - throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for RecordArray with only " + std::to_string(numfields()) + std::string(" fields"))); + throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for record with only " + std::to_string(numfields()) + std::string(" fields"))); } return contents_[(size_t)fieldindex]; } @@ -479,7 +396,7 @@ namespace awkward { else { size_t cols = contents_.size(); for (size_t j = 0; j < cols; j++) { - out.push_back(std::pair>(reverselookup_.get()->at(j), contents_[j])); + out.push_back(std::pair>(recordlookup_.get()->at(j), contents_[j])); } } return out; @@ -496,21 +413,25 @@ namespace awkward { } void RecordArray::append(const std::shared_ptr& content, const std::string& key) { - size_t j = contents_.size(); - append(content); - setkey(j, key); + if (recordlookup_.get() == nullptr) { + recordlookup_ = util::init_recordlookup(numfields()); + } + contents_.push_back(content); + recordlookup_.get()->push_back(key); if (type_.get() != nullptr) { if (RecordType* raw = dynamic_cast(type_.get())) { - raw->setkey(j, key); + raw->append(content.get()->type(), key); } } } void RecordArray::append(const std::shared_ptr& content) { - if (!istuple()) { - reverselookup_.get()->push_back(std::to_string(contents_.size())); + if (recordlookup_.get() == nullptr) { + contents_.push_back(content); + } + else { + append(content, std::to_string(numfields())); } - contents_.push_back(content); if (type_.get() != nullptr) { if (RecordType* raw = dynamic_cast(type_.get())) { raw->append(content.get()->type()); @@ -518,23 +439,11 @@ namespace awkward { } } - void RecordArray::setkey(int64_t fieldindex, const std::string& fieldname) { - if (istuple()) { - lookup_ = std::make_shared(); - reverselookup_ = std::make_shared(); - for (size_t j = 0; j < contents_.size(); j++) { - reverselookup_.get()->push_back(std::to_string(j)); - } - } - (*lookup_.get())[fieldname] = (size_t)fieldindex; - (*reverselookup_.get())[(size_t)fieldindex] = fieldname; - } - void RecordArray::checktype() const { bool okay = false; if (RecordType* raw = dynamic_cast(type_.get())) { - if (raw->lookup().get() != nullptr && lookup_.get() != nullptr && raw->reverselookup().get() != nullptr && reverselookup_.get() != nullptr) { - okay = *(raw->lookup().get()) == *(lookup_.get()) && *(raw->reverselookup().get()) == *(reverselookup_.get()); + if (raw->recordlookup().get() != nullptr && recordlookup_.get() != nullptr) { + okay = *(raw->recordlookup().get()) == *(recordlookup_.get()); } else { okay = (raw->numfields() == numfields()); @@ -583,7 +492,7 @@ namespace awkward { if (head.get()->preserves_type(type_, advanced)) { type = type_; } - RecordArray out(Identity::none(), type, contents, lookup_, reverselookup_); + RecordArray out(Identity::none(), type, contents, recordlookup_); return out.getitem_next(nexthead, nexttail, advanced); } } diff --git a/src/libawkward/array/RegularArray.cpp b/src/libawkward/array/RegularArray.cpp index 7d3a6321d7..cfad86f474 100644 --- a/src/libawkward/array/RegularArray.cpp +++ b/src/libawkward/array/RegularArray.cpp @@ -242,14 +242,6 @@ namespace awkward { return content_.get()->haskey(key); } - const std::vector RegularArray::keyaliases(int64_t fieldindex) const { - return content_.get()->keyaliases(fieldindex); - } - - const std::vector RegularArray::keyaliases(const std::string& key) const { - return content_.get()->keyaliases(key); - } - const std::vector RegularArray::keys() const { return content_.get()->keys(); } diff --git a/src/libawkward/fillable/RecordFillable.cpp b/src/libawkward/fillable/RecordFillable.cpp index 06d5599d82..7cdc146108 100644 --- a/src/libawkward/fillable/RecordFillable.cpp +++ b/src/libawkward/fillable/RecordFillable.cpp @@ -71,18 +71,16 @@ namespace awkward { } else { std::vector> types; - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); + std::shared_ptr recordlookup = std::make_shared(); for (size_t i = 0; i < contents_.size(); i++) { types.push_back(contents_[i].get()->type()); - (*lookup.get())[keys_[i]] = i; - reverselookup.get()->push_back(keys_[i]); + recordlookup.get()->push_back(keys_[i]); } Type::Parameters parameters; if (nameptr_ != nullptr) { parameters["__class__"] = util::quote(name_, true); } - return std::make_shared(parameters, types, lookup, reverselookup); + return std::make_shared(parameters, types, recordlookup); } } @@ -93,8 +91,7 @@ namespace awkward { RecordType* raw = dynamic_cast(type.get()); std::vector> contents; - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); + std::shared_ptr recordlookup = std::make_shared(); for (size_t i = 0; i < contents_.size(); i++) { if (raw == nullptr) { contents.push_back(contents_[i].get()->snapshot(Type::none())); @@ -102,15 +99,14 @@ namespace awkward { else { contents.push_back(contents_[i].get()->snapshot(raw->field((int64_t)i))); } - (*lookup.get())[keys_[i]] = i; - reverselookup.get()->push_back(keys_[i]); + recordlookup.get()->push_back(keys_[i]); } if (contents.empty()) { return std::make_shared(Identity::none(), type, length_, false); } else { - return std::make_shared(Identity::none(), type, contents, lookup, reverselookup); + return std::make_shared(Identity::none(), type, contents, recordlookup); } } diff --git a/src/libawkward/type/ArrayType.cpp b/src/libawkward/type/ArrayType.cpp index 971c46fa3f..975e7492c9 100644 --- a/src/libawkward/type/ArrayType.cpp +++ b/src/libawkward/type/ArrayType.cpp @@ -51,14 +51,6 @@ namespace awkward { return type_.get()->haskey(key); } - const std::vector ArrayType::keyaliases(int64_t fieldindex) const { - return type_.get()->keyaliases(fieldindex); - } - - const std::vector ArrayType::keyaliases(const std::string& key) const { - return type_.get()->keyaliases(key); - } - const std::vector ArrayType::keys() const { return type_.get()->keys(); } diff --git a/src/libawkward/type/ListType.cpp b/src/libawkward/type/ListType.cpp index 9042875152..2548a5b031 100644 --- a/src/libawkward/type/ListType.cpp +++ b/src/libawkward/type/ListType.cpp @@ -61,14 +61,6 @@ namespace awkward { return type_.get()->haskey(key); } - const std::vector ListType::keyaliases(int64_t fieldindex) const { - return type_.get()->keyaliases(fieldindex); - } - - const std::vector ListType::keyaliases(const std::string& key) const { - return type_.get()->keyaliases(key); - } - const std::vector ListType::keys() const { return type_.get()->keys(); } diff --git a/src/libawkward/type/OptionType.cpp b/src/libawkward/type/OptionType.cpp index 9b786ef102..edeebcd39e 100644 --- a/src/libawkward/type/OptionType.cpp +++ b/src/libawkward/type/OptionType.cpp @@ -68,14 +68,6 @@ namespace awkward { return type_.get()->haskey(key); } - const std::vector OptionType::keyaliases(int64_t fieldindex) const { - return type_.get()->keyaliases(fieldindex); - } - - const std::vector OptionType::keyaliases(const std::string& key) const { - return type_.get()->keyaliases(key); - } - const std::vector OptionType::keys() const { return type_.get()->keys(); } diff --git a/src/libawkward/type/PrimitiveType.cpp b/src/libawkward/type/PrimitiveType.cpp index 7aee81b8c0..e2f9fce0ac 100644 --- a/src/libawkward/type/PrimitiveType.cpp +++ b/src/libawkward/type/PrimitiveType.cpp @@ -76,14 +76,6 @@ namespace awkward { throw std::invalid_argument("type contains no Records"); } - const std::vector PrimitiveType::keyaliases(int64_t fieldindex) const { - throw std::invalid_argument("type contains no Records"); - } - - const std::vector PrimitiveType::keyaliases(const std::string& key) const { - throw std::invalid_argument("type contains no Records"); - } - const std::vector PrimitiveType::keys() const { throw std::invalid_argument("type contains no Records"); } diff --git a/src/libawkward/type/RecordType.cpp b/src/libawkward/type/RecordType.cpp index a3d346ab80..e0a6350f60 100644 --- a/src/libawkward/type/RecordType.cpp +++ b/src/libawkward/type/RecordType.cpp @@ -10,32 +10,30 @@ #include "awkward/type/RecordType.h" namespace awkward { - RecordType::RecordType(const Type::Parameters& parameters, const std::vector>& types, const std::shared_ptr& lookup, const std::shared_ptr& reverselookup) + RecordType::RecordType(const Type::Parameters& parameters, const std::vector>& types, const std::shared_ptr& recordlookup) : Type(parameters) , types_(types) - , lookup_(lookup) - , reverselookup_(reverselookup) { } + , recordlookup_(recordlookup) { + if (recordlookup_.get() != nullptr && recordlookup_.get()->size() != types_.size()) { + throw std::runtime_error("recordlookup and types must have the same length"); + } + } RecordType::RecordType(const Type::Parameters& parameters, const std::vector>& types) : Type(parameters) , types_(types) - , lookup_(nullptr) - , reverselookup_(nullptr) { } + , recordlookup_(nullptr) { } const std::vector> RecordType::types() const { return types_; }; - const std::shared_ptr RecordType::lookup() const { - return lookup_; - } - - const std::shared_ptr RecordType::reverselookup() const { - return reverselookup_; + const std::shared_ptr RecordType::recordlookup() const { + return recordlookup_; } bool RecordType::istuple() const { - return lookup_.get() == nullptr; + return recordlookup_.get() == nullptr; } std::string RecordType::tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const { @@ -46,47 +44,47 @@ namespace awkward { std::stringstream out; if (parameters_.empty()) { - if (reverselookup_.get() == nullptr) { - out << "("; + if (recordlookup_.get() != nullptr) { + out << "{"; for (size_t j = 0; j < types_.size(); j++) { if (j != 0) { out << ", "; } + out << util::quote(recordlookup_.get()->at(j), true) << ": "; out << types_[j].get()->tostring_part("", "", ""); } - out << ")"; + out << "}"; } else { - out << "{"; + out << "("; for (size_t j = 0; j < types_.size(); j++) { if (j != 0) { out << ", "; } - out << util::quote(reverselookup_.get()->at(j), true) << ": "; out << types_[j].get()->tostring_part("", "", ""); } - out << "}"; + out << ")"; } } else { - if (reverselookup_.get() == nullptr) { - out << "tuple[["; + if (recordlookup_.get() != nullptr) { + out << "struct[["; for (size_t j = 0; j < types_.size(); j++) { if (j != 0) { out << ", "; } - out << types_[j].get()->tostring_part("", "", ""); + out << util::quote(recordlookup_.get()->at(j), true); } - } - else { - out << "struct[["; + out << "], ["; for (size_t j = 0; j < types_.size(); j++) { if (j != 0) { out << ", "; } - out << util::quote(reverselookup_.get()->at(j), true); + out << types_[j].get()->tostring_part("", "", ""); } - out << "], ["; + } + else { + out << "tuple[["; for (size_t j = 0; j < types_.size(); j++) { if (j != 0) { out << ", "; @@ -100,7 +98,7 @@ namespace awkward { } const std::shared_ptr RecordType::shallow_copy() const { - return std::make_shared(parameters_, types_, lookup_, reverselookup_); + return std::make_shared(parameters_, types_, recordlookup_); } bool RecordType::equal(const std::shared_ptr& other, bool check_parameters) const { @@ -111,33 +109,26 @@ namespace awkward { if (numfields() != t->numfields()) { return false; } - if (reverselookup_.get() == nullptr) { - if (t->reverselookup().get() != nullptr) { + if (recordlookup_.get() != nullptr) { + if (t->istuple()) { return false; } - for (int64_t j = 0; j < numfields(); j++) { - if (!field(j).get()->equal(t->field(j), check_parameters)) { + for (auto key : keys()) { + if (!t->haskey(key)) { + return false; + } + if (!field(key).get()->equal(t->field(key), check_parameters)) { return false; } } return true; } else { - if (t->reverselookup().get() == nullptr) { + if (!t->istuple()) { return false; } - if (lookup_.get()->size() != t->lookup().get()->size()) { - return false; - } - for (auto pair : *lookup_.get()) { - int64_t otherindex; - try { - otherindex = (int64_t)t->lookup().get()->at(pair.first); - } - catch (std::out_of_range err) { - return false; - } - if (!field((int64_t)pair.second).get()->equal(t->field(otherindex), check_parameters)) { + for (int64_t j = 0; j < numfields(); j++) { + if (!field(j).get()->equal(t->field(j), check_parameters)) { return false; } } @@ -154,93 +145,24 @@ namespace awkward { } int64_t RecordType::fieldindex(const std::string& key) const { - int64_t out = -1; - if (lookup_.get() != nullptr) { - try { - out = (int64_t)lookup_.get()->at(key); - } - catch (std::out_of_range err) { } - if (out != -1 && out >= numfields()) { - throw std::invalid_argument(std::string("key \"") + key + std::string("\" points to fieldindex ") + std::to_string(out) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); - } - } - if (out == -1) { - try { - out = (int64_t)std::stoi(key); - } - catch (std::invalid_argument err) { - throw std::invalid_argument(std::string("key \"") + key + std::string("\" is not in RecordType")); - } - if (out >= numfields()) { - throw std::invalid_argument(std::string("key interpreted as fieldindex ") + key + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); - } - } - return out; + return util::fieldindex(recordlookup_, key, numfields()); } const std::string RecordType::key(int64_t fieldindex) const { - if (fieldindex >= numfields()) { - throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); - } - if (reverselookup_.get() != nullptr) { - return reverselookup_.get()->at((size_t)fieldindex); - } - else { - return std::to_string(fieldindex); - } + return util::key(recordlookup_, fieldindex, numfields()); } bool RecordType::haskey(const std::string& key) const { - try { - fieldindex(key); - } - catch (std::invalid_argument err) { - return false; - } - return true; - } - - const std::vector RecordType::keyaliases(int64_t fieldindex) const { - std::vector out; - std::string _default = std::to_string(fieldindex); - bool has_default = false; - if (lookup_.get() != nullptr) { - for (auto pair : *lookup_.get()) { - if (pair.second == fieldindex) { - out.push_back(pair.first); - if (pair.first == _default) { - has_default = true; - } - } - } - } - if (!has_default) { - out.push_back(_default); - } - return out; - } - - const std::vector RecordType::keyaliases(const std::string& key) const { - return keyaliases(fieldindex(key)); + return util::haskey(recordlookup_, key, numfields()); } const std::vector RecordType::keys() const { - std::vector out; - if (reverselookup_.get() == nullptr) { - int64_t cols = numfields(); - for (int64_t j = 0; j < cols; j++) { - out.push_back(std::to_string(j)); - } - } - else { - out.insert(out.end(), reverselookup_.get()->begin(), reverselookup_.get()->end()); - } - return out; + return util::keys(recordlookup_, numfields()); } const std::shared_ptr RecordType::field(int64_t fieldindex) const { if (fieldindex >= numfields()) { - throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for RecordType with only " + std::to_string(numfields()) + std::string(" fields"))); + throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for record with only " + std::to_string(numfields()) + std::string(" fields"))); } return types_[(size_t)fieldindex]; } @@ -255,41 +177,40 @@ namespace awkward { const std::vector>> RecordType::fielditems() const { std::vector>> out; - if (reverselookup_.get() == nullptr) { + if (recordlookup_.get() != nullptr) { size_t cols = types_.size(); for (size_t j = 0; j < cols; j++) { - out.push_back(std::pair>(std::to_string(j), types_[j])); + out.push_back(std::pair>(recordlookup_.get()->at(j), types_[j])); } } else { size_t cols = types_.size(); for (size_t j = 0; j < cols; j++) { - out.push_back(std::pair>(reverselookup_.get()->at(j), types_[j])); + out.push_back(std::pair>(std::to_string(j), types_[j])); } } return out; } const std::shared_ptr RecordType::astuple() const { - return std::make_shared(parameters_, types_, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); + return std::make_shared(parameters_, types_, std::shared_ptr(nullptr)); } - void RecordType::append(const std::shared_ptr& type) { - if (!istuple()) { - reverselookup_.get()->push_back(std::to_string(types_.size())); + void RecordType::append(const std::shared_ptr& type, const std::string& key) { + if (recordlookup_.get() == nullptr) { + recordlookup_ = util::init_recordlookup(numfields()); } types_.push_back(type); + recordlookup_.get()->push_back(key); } - void RecordType::setkey(int64_t fieldindex, const std::string& fieldname) { - if (istuple()) { - lookup_ = std::make_shared(); - reverselookup_ = std::make_shared(); - for (size_t j = 0; j < types_.size(); j++) { - reverselookup_.get()->push_back(std::to_string(j)); - } + void RecordType::append(const std::shared_ptr& type) { + if (recordlookup_.get() == nullptr) { + types_.push_back(type); + } + else { + append(type, std::to_string(numfields())); } - (*lookup_.get())[fieldname] = (size_t)fieldindex; - (*reverselookup_.get())[(size_t)fieldindex] = fieldname; } + } diff --git a/src/libawkward/type/RegularType.cpp b/src/libawkward/type/RegularType.cpp index c92c9699c8..08cc383efd 100644 --- a/src/libawkward/type/RegularType.cpp +++ b/src/libawkward/type/RegularType.cpp @@ -62,14 +62,6 @@ namespace awkward { return type_.get()->haskey(key); } - const std::vector RegularType::keyaliases(int64_t fieldindex) const { - return type_.get()->keyaliases(fieldindex); - } - - const std::vector RegularType::keyaliases(const std::string& key) const { - return type_.get()->keyaliases(key); - } - const std::vector RegularType::keys() const { return type_.get()->keys(); } diff --git a/src/libawkward/type/UnionType.cpp b/src/libawkward/type/UnionType.cpp index 15bd4ceaf6..173f7b6f01 100644 --- a/src/libawkward/type/UnionType.cpp +++ b/src/libawkward/type/UnionType.cpp @@ -78,14 +78,6 @@ namespace awkward { throw std::runtime_error("FIXME: UnionType::haskey(key)"); } - const std::vector UnionType::keyaliases(int64_t fieldindex) const { - throw std::runtime_error("FIXME: UnionType::keyaliases(fieldindex)"); - } - - const std::vector UnionType::keyaliases(const std::string& key) const { - throw std::runtime_error("FIXME: UnionType::keyaliases(key)"); - } - const std::vector UnionType::keys() const { throw std::runtime_error("FIXME: UnionType::keys"); } diff --git a/src/libawkward/type/UnknownType.cpp b/src/libawkward/type/UnknownType.cpp index d068478eef..4ddf1088da 100644 --- a/src/libawkward/type/UnknownType.cpp +++ b/src/libawkward/type/UnknownType.cpp @@ -57,14 +57,6 @@ namespace awkward { throw std::invalid_argument("type contains no Records"); } - const std::vector UnknownType::keyaliases(int64_t fieldindex) const { - throw std::invalid_argument("type contains no Records"); - } - - const std::vector UnknownType::keyaliases(const std::string& key) const { - throw std::invalid_argument("type contains no Records"); - } - const std::vector UnknownType::keys() const { throw std::invalid_argument("type contains no Records"); } diff --git a/src/libawkward/util.cpp b/src/libawkward/util.cpp index 01cf79aa9f..3711186e61 100644 --- a/src/libawkward/util.cpp +++ b/src/libawkward/util.cpp @@ -11,6 +11,74 @@ namespace awkward { namespace util { + std::shared_ptr init_recordlookup(int64_t numfields) { + std::shared_ptr out = std::make_shared(); + for (int64_t i = 0; i < numfields; i++) { + out.get()->push_back(std::to_string(i)); + } + return out; + } + + int64_t fieldindex(const std::shared_ptr& recordlookup, const std::string& key, int64_t numfields) { + int64_t out = -1; + if (recordlookup.get() != nullptr) { + for (size_t i = 0; i < recordlookup.get()->size(); i++) { + if (recordlookup.get()->at(i) == key) { + out = (int64_t)i; + break; + } + } + } + if (out == -1) { + try { + out = (int64_t)std::stoi(key); + } + catch (std::invalid_argument err) { + throw std::invalid_argument(std::string("key \"") + key + std::string("\" is not in Record")); + } + if (out >= numfields) { + throw std::invalid_argument(std::string("key interpreted as fieldindex ") + key + std::string(" for record with only " + std::to_string(numfields) + std::string(" fields"))); + } + } + return out; + } + + const std::string key(const std::shared_ptr& recordlookup, int64_t fieldindex, int64_t numfields) { + if (fieldindex >= numfields) { + throw std::invalid_argument(std::string("fieldindex ") + std::to_string(fieldindex) + std::string(" for record with only " + std::to_string(numfields) + std::string(" fields"))); + } + if (recordlookup.get() != nullptr) { + return recordlookup.get()->at((size_t)fieldindex); + } + else { + return std::to_string(fieldindex); + } + } + + bool haskey(const std::shared_ptr& recordlookup, const std::string& key, int64_t numfields) { + try { + fieldindex(recordlookup, key, numfields); + } + catch (std::invalid_argument err) { + return false; + } + return true; + } + + const std::vector keys(const std::shared_ptr& recordlookup, int64_t numfields) { + std::vector out; + if (recordlookup.get() != nullptr) { + out.insert(out.end(), recordlookup.get()->begin(), recordlookup.get()->end()); + } + else { + int64_t cols = numfields; + for (int64_t j = 0; j < cols; j++) { + out.push_back(std::to_string(j)); + } + } + return out; + } + void handle_error(const struct Error& err, const std::string& classname, const Identity* id) { if (err.str != nullptr) { std::stringstream out; diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index 6531fcbb32..7b250aced7 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -815,12 +815,6 @@ py::class_ type_methods(py::class_, ak::Type> .def("fieldindex", &T::fieldindex) .def("key", &T::key) .def("haskey", &T::haskey) - .def("keyaliases", [](T& self, int64_t fieldindex) -> std::vector { - return self.keyaliases(fieldindex); - }) - .def("keyaliases", [](T& self, std::string key) -> std::vector { - return self.keyaliases(key); - }) .def("keys", &T::keys) ; } @@ -1002,107 +996,46 @@ py::class_, ak::Type> make_UnionTy ); } -template -py::object lookup(const T& self) { - std::shared_ptr lookup = self.lookup(); - if (lookup.get() == nullptr) { - return py::none(); +ak::RecordType iterable_to_RecordType(py::iterable types, py::object keys, py::object parameters) { + std::vector> out; + for (auto x : types) { + out.push_back(unbox_type(x)); } - else { - py::dict out; - for (auto pair : *lookup.get()) { - std::string cppkey = pair.first; - py::str pykey(PyUnicode_DecodeUTF8(cppkey.data(), cppkey.length(), "surrogateescape")); - out[pykey] = py::cast(pair.second); - } - return out; - } -} - -template -py::object reverselookup(const T& self) { - std::shared_ptr reverselookup = self.reverselookup(); - if (reverselookup.get() == nullptr) { - return py::none(); + if (keys.is(py::none())) { + return ak::RecordType(dict2parameters(parameters), out, std::shared_ptr(nullptr)); } else { - py::list out; - for (auto item : *reverselookup.get()) { - std::string cppkey = item; - py::str pykey(PyUnicode_DecodeUTF8(cppkey.data(), cppkey.length(), "surrogateescape")); - out.append(pykey); + std::shared_ptr recordlookup = std::make_shared(); + for (auto x : keys.cast()) { + recordlookup.get()->push_back(x.cast()); } - return out; - } -} - -template -void from_lookup(std::shared_ptr lookup, std::shared_ptr reverselookup, py::dict pylookup, py::object pyreverselookup, int64_t numfields) { - for (auto x : pylookup) { - std::string key = x.first.cast(); - (*lookup)[key] = (size_t)x.second.cast(); - } - if (pyreverselookup.is(py::none())) { - for (int64_t i = 0; i < numfields; i++) { - reverselookup.get()->push_back(std::to_string(i)); - } - for (auto x : *lookup.get()) { - if ((int64_t)x.second > numfields) { - throw std::invalid_argument(std::string("lookup[") + ak::util::quote(x.first, true) + std::string("] is ") + std::to_string(x.second) + std::string(" but there are only ") + std::to_string(numfields) + std::string(" fields")); - } - (*reverselookup)[x.second] = x.first; - } - } - else { - if (!py::isinstance(pyreverselookup)) { - throw std::invalid_argument("reverselookup must be iterable"); - } - for (auto x : pyreverselookup.cast()) { - if (!py::isinstance(x)) { - throw std::invalid_argument("elements of reverselookup must all be strings"); - } - reverselookup.get()->push_back(x.cast()); + if (out.size() != recordlookup.get()->size()) { + throw std::invalid_argument("if provided, 'keys' must have the same length as 'types'"); } + return ak::RecordType(dict2parameters(parameters), out, recordlookup); } } py::class_, ak::Type> make_RecordType(py::handle m, std::string name) { return type_methods(py::class_, ak::Type>(m, name.c_str()) - .def(py::init([](py::tuple types, py::object parameters) -> ak::RecordType { - std::vector> out; - for (auto x : types) { - out.push_back(unbox_type(x)); - } - return ak::RecordType(dict2parameters(parameters), out, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); - }), py::arg("types"), py::arg("parameters") = py::none()) .def(py::init([](py::dict types, py::object parameters) -> ak::RecordType { - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); + std::shared_ptr recordlookup = std::make_shared(); std::vector> out; for (auto x : types) { std::string key = x.first.cast(); - (*lookup.get())[key] = out.size(); - reverselookup.get()->push_back(key); + recordlookup.get()->push_back(key); out.push_back(unbox_type(x.second)); } - return ak::RecordType(dict2parameters(parameters), out, lookup, reverselookup); + return ak::RecordType(dict2parameters(parameters), out, recordlookup); }), py::arg("types"), py::arg("parameters") = py::none()) - .def_static("from_lookup", [](py::iterable types, py::dict pylookup, py::object pyreverselookup, py::object parameters) -> ak::RecordType { - std::vector> out; - for (auto x : types) { - out.push_back(unbox_type(x)); - } - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); - from_lookup(lookup, reverselookup, pylookup, pyreverselookup, (int64_t)out.size()); - return ak::RecordType(dict2parameters(parameters), out, lookup, reverselookup); - }, py::arg("types"), py::arg("lookup"), py::arg("reverselookup") = py::none(), py::arg("parameters") = py::none()) + .def(py::init(&iterable_to_RecordType), py::arg("types"), py::arg("keys") = py::none(), py::arg("parameters") = py::none()) .def("__getitem__", [](ak::RecordType& self, int64_t fieldindex) -> py::object { return box(self.field(fieldindex)); }) .def("__getitem__", [](ak::RecordType& self, std::string key) -> py::object { return box(self.field(key)); }) + .def_property_readonly("istuple", &ak::RecordType::istuple) .def_property_readonly("types", [](ak::RecordType& self) -> py::object { std::vector> types = self.types(); py::tuple pytypes(types.size()); @@ -1136,24 +1069,33 @@ py::class_, ak::Type> make_Recor } return out; }) - .def_property_readonly("lookup", &lookup) - .def_property_readonly("reverselookup", &reverselookup) + .def("append", [](ak::RecordType& self, py::object type, py::object key) -> void { + if (key.is(py::none())) { + self.append(unbox_type(type)); + } + else { + self.append(unbox_type(type), key.cast()); + } + }, py::arg("type"), py::arg("key") = py::none()) .def_property("parameters", &getparameters, &setparameters) .def(py::pickle([](const ak::RecordType& self) { - py::tuple fields((size_t)self.numfields()); + py::tuple pytypes((size_t)self.numfields()); for (int64_t i = 0; i < self.numfields(); i++) { - fields[(size_t)i] = box(self.field(i)); + pytypes[(size_t)i] = box(self.field(i)); } - return py::make_tuple(parameters2dict(self.parameters()), fields, lookup(self), reverselookup(self)); - }, [](py::tuple state) { - std::vector> fields; - for (auto x : state[1]) { - fields.push_back(unbox_type(x)); + std::shared_ptr recordlookup = self.recordlookup(); + if (recordlookup.get() == nullptr) { + return py::make_tuple(pytypes, py::none(), parameters2dict(self.parameters())); } - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); - from_lookup(lookup, reverselookup, state[2].cast(), state[3], (int64_t)fields.size()); - return ak::RecordType(dict2parameters(state[0]), fields, lookup, reverselookup); + else { + py::tuple pyrecordlookup((size_t)self.numfields()); + for (size_t i = 0; i < (size_t)self.numfields(); i++) { + pyrecordlookup[i] = py::cast(recordlookup.get()->at(i)); + } + return py::make_tuple(pytypes, pyrecordlookup, parameters2dict(self.parameters())); + } + }, [](py::tuple state) { + return iterable_to_RecordType(state[0].cast(), state[1], state[2]); })) ); } @@ -1230,12 +1172,6 @@ py::class_, ak::Content> content_methods(py::class_ std::vector { - return self.keyaliases(fieldindex); - }) - .def("keyaliases", [](T& self, std::string key) -> std::vector { - return self.keyaliases(key); - }) .def("keys", &T::keys) ; } @@ -1343,49 +1279,48 @@ py::class_, ak::Content> mak /////////////////////////////////////////////////////////////// RecordArray +ak::RecordArray iterable_to_RecordArray(py::iterable contents, py::object keys, py::object id, py::object type) { + std::vector> out; + for (auto x : contents) { + out.push_back(unbox_content(x)); + } + if (out.empty()) { + throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); + } + if (keys.is(py::none())) { + return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, std::shared_ptr(nullptr)); + } + else { + std::shared_ptr recordlookup = std::make_shared(); + for (auto x : keys.cast()) { + recordlookup.get()->push_back(x.cast()); + } + if (out.size() != recordlookup.get()->size()) { + throw std::invalid_argument("if provided, 'keys' must have the same length as 'types'"); + } + return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, recordlookup); + } +} + py::class_, ak::Content> make_RecordArray(py::handle m, std::string name) { return content_methods(py::class_, ak::Content>(m, name.c_str()) .def(py::init([](py::dict contents, py::object id, py::object type) -> ak::RecordArray { - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); + std::shared_ptr recordlookup = std::make_shared(); std::vector> out; for (auto x : contents) { std::string key = x.first.cast(); - (*lookup.get())[key] = out.size(); - reverselookup.get()->push_back(key); + recordlookup.get()->push_back(key); out.push_back(unbox_content(x.second)); } if (out.empty()) { throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); } - return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, lookup, reverselookup); - }), py::arg("contents"), py::arg("id") = py::none(), py::arg("type") = py::none()) - .def(py::init([](py::iterable contents, py::object id, py::object type) -> ak::RecordArray { - std::vector> out; - for (auto x : contents) { - out.push_back(unbox_content(x)); - } - if (out.empty()) { - throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); - } - return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, std::shared_ptr(nullptr), std::shared_ptr(nullptr)); + return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, recordlookup); }), py::arg("contents"), py::arg("id") = py::none(), py::arg("type") = py::none()) + .def(py::init(&iterable_to_RecordArray), py::arg("contents"), py::arg("keys") = py::none(), py::arg("id") = py::none(), py::arg("type") = py::none()) .def(py::init([](int64_t length, bool istuple, py::object id, py::object type) -> ak::RecordArray { return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), length, istuple); }), py::arg("length"), py::arg("istuple") = false, py::arg("id") = py::none(), py::arg("type") = py::none()) - .def_static("from_lookup", [](py::iterable contents, py::dict pylookup, py::object pyreverselookup, py::object id, py::object type) -> ak::RecordArray { - std::vector> out; - for (auto x : contents) { - out.push_back(unbox_content(x)); - } - if (out.empty()) { - throw std::invalid_argument("construct RecordArrays without fields using RecordArray(length) where length is an integer"); - } - std::shared_ptr lookup = std::make_shared(); - std::shared_ptr reverselookup = std::make_shared(); - from_lookup(lookup, reverselookup, pylookup, pyreverselookup, (int64_t)out.size()); - return ak::RecordArray(unbox_id_none(id), unbox_type_none(type), out, lookup, reverselookup); - }, py::arg("contents"), py::arg("lookup"), py::arg("reverselookup") = py::none(), py::arg("id") = py::none(), py::arg("type") = py::none()) .def_property_readonly("istuple", &ak::RecordArray::istuple) .def_property_readonly("contents", &ak::RecordArray::contents) @@ -1414,8 +1349,6 @@ py::class_, ak::Content> make_ } return out; }) - .def_property_readonly("lookup", &lookup) - .def_property_readonly("reverselookup", &reverselookup) .def_property_readonly("astuple", [](ak::RecordArray& self) -> py::object { return box(self.astuple().shallow_copy()); }) @@ -1427,8 +1360,7 @@ py::class_, ak::Content> make_ else { self.append(unbox_content(content), key.cast()); } - }, py::arg("content"), py::arg("key") = py::none()) - .def("setkey", &ak::RecordArray::setkey) + }, py::arg("type"), py::arg("key") = py::none()) ); } @@ -1456,12 +1388,6 @@ py::class_> make_Record(py::handle m, st .def("fieldindex", &ak::Record::fieldindex) .def("key", &ak::Record::key) .def("haskey", &ak::Record::haskey) - .def("keyaliases", [](ak::Record& self, int64_t fieldindex) -> std::vector { - return self.keyaliases(fieldindex); - }) - .def("keyaliases", [](ak::Record& self, std::string key) -> std::vector { - return self.keyaliases(key); - }) .def("keys", &ak::Record::keys) .def("field", [](ak::Record& self, int64_t fieldindex) -> py::object { return box(self.field(fieldindex)); @@ -1488,8 +1414,6 @@ py::class_> make_Record(py::handle m, st } return out; }) - .def_property_readonly("lookup", &lookup) - .def_property_readonly("reverselookup", &reverselookup) .def_property_readonly("astuple", [](ak::Record& self) -> py::object { return box(self.astuple().shallow_copy()); }) diff --git a/tests/test_PR025_record_array.py b/tests/test_PR025_record_array.py index 561fe075fc..869b3f64d9 100644 --- a/tests/test_PR025_record_array.py +++ b/tests/test_PR025_record_array.py @@ -17,19 +17,20 @@ def test_basic(): recordarray.append(content1, "one") recordarray.append(listoffsetarray, "two") recordarray.append(content2) - recordarray.setkey(0, "wonky") + recordarray.append(content1, "wonky") assert awkward1.tolist(recordarray.field(0)) == [1, 2, 3, 4, 5] assert awkward1.tolist(recordarray.field("two")) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] assert awkward1.tolist(recordarray.field("wonky")) == [1, 2, 3, 4, 5] str(recordarray) - assert awkward1.tojson(recordarray) == '[{"wonky":1,"two":[1.1,2.2,3.3],"2":1.1},{"wonky":2,"two":[],"2":2.2},{"wonky":3,"two":[4.4,5.5],"2":3.3},{"wonky":4,"two":[6.6],"2":4.4},{"wonky":5,"two":[7.7,8.8,9.9],"2":5.5}]' + assert awkward1.tojson(recordarray) == '[{"one":1,"two":[1.1,2.2,3.3],"2":1.1,"wonky":1},{"one":2,"two":[],"2":2.2,"wonky":2},{"one":3,"two":[4.4,5.5],"2":3.3,"wonky":3},{"one":4,"two":[6.6],"2":4.4,"wonky":4},{"one":5,"two":[7.7,8.8,9.9],"2":5.5,"wonky":5}]' assert len(recordarray) == 5 - assert recordarray.key(0) == "wonky" + assert recordarray.key(0) == "one" assert recordarray.key(1) == "two" assert recordarray.key(2) == "2" - assert recordarray.fieldindex("wonky") == 0 + assert recordarray.key(3) == "wonky" + assert recordarray.fieldindex("wonky") == 3 assert recordarray.fieldindex("one") == 0 assert recordarray.fieldindex("0") == 0 assert recordarray.fieldindex("two") == 1 @@ -41,30 +42,20 @@ def test_basic(): assert recordarray.haskey("two") assert recordarray.haskey("1") assert recordarray.haskey("2") - assert set(recordarray.keyaliases(0)) == set(["wonky", "one", "0"]) - assert set(recordarray.keyaliases("wonky")) == set(["wonky", "one", "0"]) - assert set(recordarray.keyaliases("one")) == set(["wonky", "one", "0"]) - assert set(recordarray.keyaliases("0")) == set(["wonky", "one", "0"]) - assert set(recordarray.keyaliases(1)) == set(["two", "1"]) - assert set(recordarray.keyaliases("two")) == set(["two", "1"]) - assert set(recordarray.keyaliases("1")) == set(["two", "1"]) - assert set(recordarray.keyaliases(2)) == set(["2"]) - assert set(recordarray.keyaliases("2")) == set(["2"]) - - assert recordarray.keys() == ["wonky", "two", "2"] - assert [awkward1.tolist(x) for x in recordarray.fields()] == [[1, 2, 3, 4, 5], [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]] + + assert recordarray.keys() == ["one", "two", "2", "wonky"] + assert [awkward1.tolist(x) for x in recordarray.fields()] == [[1, 2, 3, 4, 5], [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], [1, 2, 3, 4, 5]] pairs = recordarray.fielditems() - assert pairs[0][0] == "wonky" + assert pairs[0][0] == "one" assert pairs[1][0] == "two" assert pairs[2][0] == "2" + assert pairs[3][0] == "wonky" assert awkward1.tolist(pairs[0][1]) == [1, 2, 3, 4, 5] assert awkward1.tolist(pairs[1][1]) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]] assert awkward1.tolist(pairs[2][1]) == [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] + assert awkward1.tolist(pairs[3][1]) == [1, 2, 3, 4, 5] - assert awkward1.tojson(recordarray.astuple) == '[{"0":1,"1":[1.1,2.2,3.3],"2":1.1},{"0":2,"1":[],"2":2.2},{"0":3,"1":[4.4,5.5],"2":3.3},{"0":4,"1":[6.6],"2":4.4},{"0":5,"1":[7.7,8.8,9.9],"2":5.5}]' - - assert recordarray.lookup == {"one": 0, "two": 1, "wonky": 0} - assert recordarray.astuple.lookup is None + assert awkward1.tojson(recordarray.astuple) == '[{"0":1,"1":[1.1,2.2,3.3],"2":1.1,"3":1},{"0":2,"1":[],"2":2.2,"3":2},{"0":3,"1":[4.4,5.5],"2":3.3,"3":3},{"0":4,"1":[6.6],"2":4.4,"3":4},{"0":5,"1":[7.7,8.8,9.9],"2":5.5,"3":5}]' def test_scalar_record(): content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5])) @@ -99,7 +90,6 @@ def test_type(): recordarray.append(content1) recordarray.append(listoffsetarray) assert str(awkward1.typeof(recordarray)) == '(int64, var * float64)' - assert recordarray.lookup is None assert awkward1.typeof(recordarray) == awkward1.layout.RecordType(( awkward1.layout.PrimitiveType("int64"), @@ -108,10 +98,10 @@ def test_type(): (awkward1.layout.PrimitiveType("int64"), awkward1.layout.ListType(awkward1.layout.PrimitiveType("float64")))) - recordarray.setkey(0, "one") - recordarray.setkey(1, "two") + recordarray = awkward1.layout.RecordArray(0, True) + recordarray.append(content1, "one") + recordarray.append(listoffsetarray, "two") assert str(awkward1.typeof(recordarray)) in ('{"one": int64, "two": var * float64}', '{"two": var * float64, "one": int64}') - assert recordarray.lookup == {"one": 0, "two": 1} assert str(awkward1.layout.RecordType( (awkward1.layout.PrimitiveType("int32"), diff --git a/tests/test_PR031_types_in_numba.py b/tests/test_PR031_types_in_numba.py index cec3bd0259..07b237142c 100644 --- a/tests/test_PR031_types_in_numba.py +++ b/tests/test_PR031_types_in_numba.py @@ -14,23 +14,6 @@ if sys.version_info[0] < 3: pytest.skip("pybind11 pickle, and hence numba serialization with types, only works in Python 3", allow_module_level=True) -def test_from_lookup(): - r = awkward1.layout.RecordArray.from_lookup([awkward1.layout.EmptyArray(), awkward1.layout.EmptyArray()], {"one": 0, "two": 1}) - assert r.lookup == {"one": 0, "two": 1} - assert r.reverselookup == ["one", "two"] - - r = awkward1.layout.RecordArray.from_lookup([awkward1.layout.EmptyArray(), awkward1.layout.EmptyArray()], {"one": 0, "two": 1}, ["uno", "dos"]) - assert r.lookup == {"one": 0, "two": 1} - assert r.reverselookup == ["uno", "dos"] - - r = awkward1.layout.RecordType.from_lookup([awkward1.layout.UnknownType(), awkward1.layout.UnknownType()], {"one": 0, "two": 1}) - assert r.lookup == {"one": 0, "two": 1} - assert r.reverselookup == ["one", "two"] - - r = awkward1.layout.RecordType.from_lookup([awkward1.layout.UnknownType(), awkward1.layout.UnknownType()], {"one": 0, "two": 1}, ["uno", "dos"]) - assert r.lookup == {"one": 0, "two": 1} - assert r.reverselookup == ["uno", "dos"] - def test_pickle(): t = awkward1.layout.UnknownType(); assert pickle.loads(pickle.dumps(t)) == t t = awkward1.layout.PrimitiveType("int32"); assert pickle.loads(pickle.dumps(t)) == t