diff --git a/README.md b/README.md index 3189c448f8..950fce1ea1 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ The following features of awkward 0.x will be features of awkward 1.x. * 2019-08-30 (PR [#6](../../pull/6)): added iteration to both C++ and Numba, as well as the first "operation," `awkward1.tolist`, which turns an awkward array into Python lists (and eventually dicts, etc.). * 2019-09-02 (PR [#7](../../pull/7)): refactored `Index`, `Identity`, and `ListOffsetArray` (and any other array types with `Index`, which is nearly all of them) to have a 32-bit and a 64-bit version. My original plan to only support 64-bit in "chunked arrays" with 32-bit everywhere else is hereby scrapped—both bit widths will be supported on all indexes. Non-native endian, non-trivial strides, and multidimensional `Index`/`Identity` are not supported, though all of these features are allowed for `NumpyArray` (which is _content_, not an _index_). The only limitation on `NumpyArray` is that data must be C-ordered, not Fortran-ordered. * 2019-09-21 (PR [#8](../../pull/8)): C++ NumpyArray::getitem is done, setting the pattern for other classes (external C functions). The Numba and Identity extensions are not done, which would be necessary to fully set the pattern. This involved a lot of investigation (see [studies/getitem.py](https://github.com/jpivarski/awkward-1.0/blob/master/studies/getitem.py)). + * 2019-09-21 (PR [#9](../../pull/9)): `Identity` is correctly passed through `NumpyArray` slices and `__getitem__` uses `get`, `slice`, or the full `getitem`, depending on argument complexity. ## Roadmap diff --git a/include/awkward/Identity.h b/include/awkward/Identity.h index 7ecbd49247..83170043fb 100644 --- a/include/awkward/Identity.h +++ b/include/awkward/Identity.h @@ -8,6 +8,7 @@ #include #include "awkward/cpu-kernels/util.h" +#include "awkward/Index.h" namespace awkward { class Identity { @@ -34,8 +35,9 @@ namespace awkward { virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const = 0; virtual const std::shared_ptr slice(int64_t start, int64_t stop) const = 0; virtual const std::shared_ptr shallow_copy() const = 0; + virtual const std::shared_ptr getitem_carry_64(Index64& carry) const = 0; - private: + protected: const Ref ref_; const FieldLoc fieldloc_; int64_t offset_; @@ -58,6 +60,7 @@ namespace awkward { virtual const std::string tostring_part(const std::string indent, const std::string pre, const std::string post) const; virtual const std::shared_ptr slice(int64_t start, int64_t stop) const; virtual const std::shared_ptr shallow_copy() const; + virtual const std::shared_ptr getitem_carry_64(Index64& carry) const; const std::string tostring() const; const std::vector get(int64_t at) const; diff --git a/include/awkward/RawArray.h b/include/awkward/RawArray.h index 05b8367ae2..69d53070f3 100644 --- a/include/awkward/RawArray.h +++ b/include/awkward/RawArray.h @@ -60,8 +60,7 @@ namespace awkward { virtual void setid() { Identity32* id32 = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(id32); - Error err = awkward_identity_new32(length(), id32->ptr().get()); - HANDLE_ERROR(err); + awkward_identity_new32(length(), id32->ptr().get()); setid(newid); } virtual void setid(const std::shared_ptr id) { id_ = id; } diff --git a/include/awkward/cpu-kernels/getitem.h b/include/awkward/cpu-kernels/getitem.h index 8ac74605b2..78e1a1b299 100644 --- a/include/awkward/cpu-kernels/getitem.h +++ b/include/awkward/cpu-kernels/getitem.h @@ -11,10 +11,14 @@ extern "C" { void awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides); + Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); + Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length); + void awkward_numpyarray_contiguous_init_64(int64_t* toptr, int64_t skip, int64_t stride); void awkward_numpyarray_contiguous_copy_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); void awkward_numpyarray_contiguous_next_64(int64_t* topos, const int64_t* frompos, int64_t len, int64_t skip, int64_t stride); void awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* fromptr, int64_t len, int64_t stride, int64_t offset, const int64_t* pos); + void awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at); void awkward_numpyarray_getitem_next_slice_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); void awkward_numpyarray_getitem_next_slice_advanced_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* advancedptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step); void awkward_numpyarray_getitem_next_array_64(int64_t* nextcarryptr, int64_t* nextadvancedptr, const int64_t* carryptr, const int64_t* flatheadptr, int64_t lencarry, int64_t lenflathead, int64_t skip); diff --git a/include/awkward/cpu-kernels/identity.h b/include/awkward/cpu-kernels/identity.h index 8e36b8c884..064b1a71d5 100644 --- a/include/awkward/cpu-kernels/identity.h +++ b/include/awkward/cpu-kernels/identity.h @@ -6,11 +6,11 @@ #include "awkward/cpu-kernels/util.h" extern "C" { - Error awkward_identity_new32(int64_t length, int32_t* to); - Error awkward_identity_new64(int64_t length, int32_t* to); - Error awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to); - Error awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to); - Error awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to); + void awkward_identity_new32(int64_t length, int32_t* to); + void awkward_identity_new64(int64_t length, int32_t* to); + void awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to); + void awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to); + void awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to); } #endif // AWKWARDCPU_IDENTITY_H_ diff --git a/src/cpu-kernels/getitem.cpp b/src/cpu-kernels/getitem.cpp index 542e48c26b..06603cc4f2 100644 --- a/src/cpu-kernels/getitem.cpp +++ b/src/cpu-kernels/getitem.cpp @@ -65,6 +65,25 @@ void awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t awkward_slicearray_ravel(toptr, fromptr, ndim, shape, strides); } +template +Error awkward_identity_getitem_carry(ID* newidentityptr, const ID* identityptr, const T* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { + for (int64_t i = 0; i < lencarry; i++) { + if (carryptr[i] >= length) { + return "index out of range for identity"; + } + for (int64_t j = 0; j < width; j++) { + newidentityptr[width*i + j] = identityptr[offset + width*carryptr[i] + j]; + } + } + return kNoError; +} +Error awkward_identity32_getitem_carry_64(int32_t* newidentityptr, const int32_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { + return awkward_identity_getitem_carry(newidentityptr, identityptr, carryptr, lencarry, offset, width, length); +} +Error awkward_identity64_getitem_carry_64(int64_t* newidentityptr, const int64_t* identityptr, const int64_t* carryptr, int64_t lencarry, int64_t offset, int64_t width, int64_t length) { + return awkward_identity_getitem_carry(newidentityptr, identityptr, carryptr, lencarry, offset, width, length); +} + template void awkward_numpyarray_contiguous_init(T* toptr, int64_t skip, int64_t stride) { for (int64_t i = 0; i < skip; i++) { @@ -107,6 +126,16 @@ void awkward_numpyarray_getitem_next_null_64(uint8_t* toptr, const uint8_t* from awkward_numpyarray_getitem_next_null(toptr, fromptr, len, stride, offset, pos); } +template +void awkward_numpyarray_getitem_next_at(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t skip, int64_t at) { + for (int64_t i = 0; i < lencarry; i++) { + nextcarryptr[i] = skip*carryptr[i] + at; + } +} +void awkward_numpyarray_getitem_next_at_64(int64_t* nextcarryptr, const int64_t* carryptr, int64_t lencarry, int64_t skip, int64_t at) { + awkward_numpyarray_getitem_next_at(nextcarryptr, carryptr, lencarry, skip, at); +} + template void awkward_numpyarray_getitem_next_slice(T* nextcarryptr, const T* carryptr, int64_t lencarry, int64_t lenhead, int64_t skip, int64_t start, int64_t step) { for (int64_t i = 0; i < lencarry; i++) { diff --git a/src/cpu-kernels/identity.cpp b/src/cpu-kernels/identity.cpp index 9d1d6415a6..e76377c39b 100644 --- a/src/cpu-kernels/identity.cpp +++ b/src/cpu-kernels/identity.cpp @@ -3,28 +3,26 @@ #include "awkward/cpu-kernels/identity.h" template -Error awkward_identity_new(int64_t length, T* to) { +void awkward_identity_new(int64_t length, T* to) { for (T i = 0; i < length; i++) { to[i] = i; } - return kNoError; } -Error awkward_identity_new32(int64_t length, int32_t* to) { - return awkward_identity_new(length, to); +void awkward_identity_new32(int64_t length, int32_t* to) { + awkward_identity_new(length, to); } -Error awkward_identity_new64(int64_t length, int64_t* to) { - return awkward_identity_new(length, to); +void awkward_identity_new64(int64_t length, int64_t* to) { + awkward_identity_new(length, to); } -Error awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to) { +void awkward_identity_32to64(int64_t length, int32_t* from, int64_t* to) { for (int64_t i = 0; i < length; i++) { to[i]= (int64_t)from[i]; } - return kNoError; } template -Error awkward_identity_from_listfoffsets(int64_t length, int64_t width, T* offsets, T* from, int64_t tolength, T* to) { +void awkward_identity_from_listfoffsets(int64_t length, int64_t width, T* offsets, T* from, int64_t tolength, T* to) { int64_t k = 0; for (int64_t i = 0; i < length; i++) { for (T subi = 0; subi < offsets[i + 1] - offsets[i]; subi++) { @@ -35,11 +33,10 @@ Error awkward_identity_from_listfoffsets(int64_t length, int64_t width, T* offse k++; } } - return kNoError; } -Error awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to) { - return awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); +void awkward_identity_from_listfoffsets32(int64_t length, int64_t width, int32_t* offsets, int32_t* from, int64_t tolength, int32_t* to) { + awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); } -Error awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to) { - return awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); +void awkward_identity_from_listfoffsets64(int64_t length, int64_t width, int64_t* offsets, int64_t* from, int64_t tolength, int64_t* to) { + awkward_identity_from_listfoffsets(length, width, offsets, from, tolength, to); } diff --git a/src/libawkward/Identity.cpp b/src/libawkward/Identity.cpp index bb0661a52c..f520714a45 100644 --- a/src/libawkward/Identity.cpp +++ b/src/libawkward/Identity.cpp @@ -5,7 +5,8 @@ #include #include #include -// #include + +#include "awkward/cpu-kernels/getitem.h" #include "awkward/Identity.h" @@ -54,6 +55,40 @@ const std::shared_ptr IdentityOf::shallow_copy() const { return std::shared_ptr(new IdentityOf(ref(), fieldloc(), offset(), width(), length(), ptr_)); } +template +const std::shared_ptr IdentityOf::getitem_carry_64(Index64& carry) const { + IdentityOf* rawout = new IdentityOf(ref_, fieldloc_, width_, carry.length()); + std::shared_ptr out(rawout); + + Error assign_err = kNoError; + if (std::is_same::value) { + assign_err = awkward_identity32_getitem_carry_64( + reinterpret_cast(rawout->ptr().get()), + reinterpret_cast(ptr_.get()), + carry.ptr().get(), + carry.length(), + offset_, + width_, + length_); + } + else if (std::is_same::value) { + assign_err = awkward_identity64_getitem_carry_64( + reinterpret_cast(rawout->ptr().get()), + reinterpret_cast(ptr_.get()), + carry.ptr().get(), + carry.length(), + offset_, + width_, + length_); + } + else { + throw std::runtime_error("unrecognized identity"); + } + HANDLE_ERROR(assign_err) + + return out; +} + template const std::vector IdentityOf::get(int64_t at) const { std::vector out; diff --git a/src/libawkward/ListOffsetArray.cpp b/src/libawkward/ListOffsetArray.cpp index 8db5e5fb0f..e59a59cbe7 100644 --- a/src/libawkward/ListOffsetArray.cpp +++ b/src/libawkward/ListOffsetArray.cpp @@ -14,8 +14,7 @@ template void ListOffsetArrayOf::setid() { Identity32* rawid = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(rawid); - Error err = awkward_identity_new32(length(), rawid->ptr().get()); - HANDLE_ERROR(err); + awkward_identity_new32(length(), rawid->ptr().get()); setid(newid); } @@ -30,8 +29,7 @@ void ListOffsetArrayOf::setid(const std::shared_ptr id) { if (rawid32 && std::is_same::value) { Identity32* rawsubid = new Identity32(Identity::newref(), rawid32->fieldloc(), rawid32->width() + 1, content_.get()->length()); std::shared_ptr newsubid(rawsubid); - Error err = awkward_identity_from_listfoffsets32(length(), rawid32->width(), reinterpret_cast(offsets_.ptr().get()), rawid32->ptr().get(), content_.get()->length(), rawsubid->ptr().get()); - HANDLE_ERROR(err); + awkward_identity_from_listfoffsets32(length(), rawid32->width(), reinterpret_cast(offsets_.ptr().get()), rawid32->ptr().get(), content_.get()->length(), rawsubid->ptr().get()); content_.get()->setid(newsubid); } else { diff --git a/src/libawkward/NumpyArray.cpp b/src/libawkward/NumpyArray.cpp index e6806fd12d..502804c288 100644 --- a/src/libawkward/NumpyArray.cpp +++ b/src/libawkward/NumpyArray.cpp @@ -51,8 +51,7 @@ void NumpyArray::setid() { assert(!isscalar()); Identity32* id32 = new Identity32(Identity::newref(), Identity::FieldLoc(), 1, length()); std::shared_ptr newid(id32); - Error err = awkward_identity_new32(length(), id32->ptr().get()); - HANDLE_ERROR(err); + awkward_identity_new32(length(), id32->ptr().get()); setid(newid); } @@ -178,23 +177,46 @@ const std::shared_ptr NumpyArray::shallow_copy() const { } const std::shared_ptr NumpyArray::get(int64_t at) const { - return getitem(Slice(std::vector>({ std::shared_ptr(new SliceAt(at)) }), true)); + assert(!isscalar()); + // return getitem(Slice(std::vector>({ std::shared_ptr(new SliceAt(at)) }), true)); + int64_t regular_at = at; + if (regular_at < 0) { + regular_at += shape_[0]; + } + if (regular_at < 0 || regular_at >= shape_[0]) { + throw std::invalid_argument("index out of range"); + } + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_at); + const std::vector shape(shape_.begin() + 1, shape_.end()); + const std::vector strides(strides_.begin() + 1, strides_.end()); + std::shared_ptr id; + if (id_.get() != nullptr) { + if (regular_at >= id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->slice(regular_at, regular_at + 1); + } + return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides, byteoffset, itemsize_, format_)); } const std::shared_ptr NumpyArray::slice(int64_t start, int64_t stop) const { - return getitem(Slice(std::vector>({ std::shared_ptr(new SliceRange(start, stop, 1)) }), true)); - - // FIXME: id should be propagated through the new getitem - // assert(!isscalar()); - // ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)start); - // std::vector shape; - // shape.push_back((ssize_t)(stop - start)); - // shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); - // std::shared_ptr id(nullptr); - // if (id_.get() != nullptr) { - // id = id_.get()->slice(start, stop); - // } - // return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides_, byteoffset, itemsize_, format_)); + assert(!isscalar()); + // return getitem(Slice(std::vector>({ std::shared_ptr(new SliceRange(start, stop, 1)) }), true)); + int64_t regular_start = start; + int64_t regular_stop = stop; + awkward_regularize_rangeslice(regular_start, regular_stop, true, start != Slice::none(), stop != Slice::none(), shape_[0]); + ssize_t byteoffset = byteoffset_ + strides_[0]*((ssize_t)regular_start); + std::vector shape; + shape.push_back((ssize_t)(regular_stop - regular_start)); + shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); + std::shared_ptr id; + if (id_.get() != nullptr) { + if (regular_stop > id_.get()->length()) { + throw std::invalid_argument("index out of range for identity"); + } + id = id_.get()->slice(regular_start, regular_stop); + } + return std::shared_ptr(new NumpyArray(id, ptr_, shape, strides_, byteoffset, itemsize_, format_)); } const std::pair NumpyArray::minmax_depth() const { @@ -299,7 +321,7 @@ const NumpyArray NumpyArray::contiguous_next(Index64 bytepos) const { const std::shared_ptr NumpyArray::getitem(const Slice& where) const { assert(!isscalar()); - if (!where.isadvanced()) { + if (!where.isadvanced() && id_.get() == nullptr) { std::vector nextshape = { 1 }; nextshape.insert(nextshape.end(), shape_.begin(), shape_.end()); std::vector nextstrides = { shape_[0]*strides_[0] }; @@ -342,7 +364,7 @@ const NumpyArray NumpyArray::getitem_bystrides(const std::shared_ptr& return NumpyArray(id_, ptr_, shape_, strides_, byteoffset_, itemsize_, format_); } - if (SliceAt* at = dynamic_cast(head.get())) { + else if (SliceAt* at = dynamic_cast(head.get())) { if (ndim() < 2) { throw std::invalid_argument("too many indexes for array"); } @@ -362,7 +384,7 @@ const NumpyArray NumpyArray::getitem_bystrides(const std::shared_ptr& std::vector outshape = { (ssize_t)length }; outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); - return NumpyArray(id_, ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); + return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); } else if (SliceRange* range = dynamic_cast(head.get())) { @@ -392,7 +414,7 @@ const NumpyArray NumpyArray::getitem_bystrides(const std::shared_ptr& outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); std::vector outstrides = { strides_[0], strides_[1]*((ssize_t)step) }; outstrides.insert(outstrides.end(), out.strides_.begin() + 1, out.strides_.end()); - return NumpyArray(id_, ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); + return NumpyArray(out.id_, out.ptr_, outshape, outstrides, out.byteoffset_, itemsize_, format_); } else if (SliceEllipsis* ellipsis = dynamic_cast(head.get())) { @@ -444,14 +466,46 @@ const NumpyArray NumpyArray::getitem_next(const std::shared_ptr head, byteoffset_, carry.ptr().get()); + std::shared_ptr id(nullptr); + if (id_.get() != nullptr) { + id = id_.get()->getitem_carry_64(carry); + } + std::vector shape = { (ssize_t)carry.length() }; shape.insert(shape.end(), shape_.begin() + 1, shape_.end()); std::vector strides = { (ssize_t)stride }; strides.insert(strides.end(), strides_.begin() + 1, strides_.end()); - return NumpyArray(id_, ptr, shape, strides, 0, itemsize_, format_); + return NumpyArray(id, ptr, shape, strides, 0, itemsize_, format_); + } + + else if (SliceAt* at = dynamic_cast(head.get())) { + if (ndim() < 2) { + throw std::invalid_argument("too many indexes for array"); + } + + NumpyArray next(id_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_); + std::shared_ptr nexthead = tail.head(); + Slice nexttail = tail.tail(); + + // if we had any array slices, this int would become an array + assert(advanced.length() == 0); + + Index64 nextcarry(carry.length()); + awkward_numpyarray_getitem_next_at_64( + nextcarry.ptr().get(), + carry.ptr().get(), + carry.length(), + shape_[1], // because this is contiguous + at->at()); + + NumpyArray out = next.getitem_next(nexthead, nexttail, nextcarry, advanced, length, next.strides_[0]); + + std::vector outshape = { (ssize_t)length }; + outshape.insert(outshape.end(), out.shape_.begin() + 1, out.shape_.end()); + return NumpyArray(out.id_, out.ptr_, outshape, out.strides_, out.byteoffset_, itemsize_, format_); } - if (SliceRange* range = dynamic_cast(head.get())) { + else if (SliceRange* range = dynamic_cast(head.get())) { if (ndim() < 2) { throw std::invalid_argument("too many indexes for array"); } diff --git a/src/pyawkward.cpp b/src/pyawkward.cpp index ba3ce5bcbd..d15e0c9ec5 100644 --- a/src/pyawkward.cpp +++ b/src/pyawkward.cpp @@ -423,7 +423,26 @@ py::class_ make_NumpyArray(py::handle m, std::string name) { .def("become_contiguous", &ak::NumpyArray::become_contiguous) .def("__len__", &ak::NumpyArray::length) + .def("__getitem__", [](ak::NumpyArray& self, int64_t at) -> py::object { + return unwrap(self.get(at)); + }) .def("__getitem__", [](ak::NumpyArray& self, py::object pyslice) -> py::object { + if (py::isinstance(pyslice)) { + py::object pystep = pyslice.attr("step"); + if ((py::isinstance(pystep) && pystep.cast() == 1) || pystep.is(py::none())) { + int64_t start = ak::Slice::none(); + int64_t stop = ak::Slice::none(); + py::object pystart = pyslice.attr("start"); + py::object pystop = pyslice.attr("stop"); + if (!pystart.is(py::none())) { + start = pystart.cast(); + } + if (!pystop.is(py::none())) { + stop = pystop.cast(); + } + return unwrap(self.slice(start, stop)); + } + } return unwrap(self.getitem(toslice(pyslice))); }) .def("__iter__", [](ak::NumpyArray& self) -> ak::Iterator { diff --git a/tests/test_PR4_design_surrogate_key.py b/tests/test_PR4_design_surrogate_key.py index 43e602f319..a275e4870c 100644 --- a/tests/test_PR4_design_surrogate_key.py +++ b/tests/test_PR4_design_surrogate_key.py @@ -81,13 +81,12 @@ def test_listoffsetarray_setid(): assert numpy.asarray(jagged.id).tolist() == [[0], [1], [2], [3]] assert numpy.asarray(jagged.content.id).tolist() == [[0, 0], [0, 1], [0, 2], [2, 0], [2, 1], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4]] - # FIXME: id should be propagated through the new getitem - # assert numpy.asarray(jagged.content[3:7].id).tolist() == [[2, 0], [2, 1], [3, 0], [3, 1]] - # assert numpy.asarray(jagged[0].id).tolist() == [[0, 0], [0, 1], [0, 2]] - # assert numpy.asarray(jagged[1].id).tolist() == [] - # assert numpy.asarray(jagged[2].id).tolist() == [[2, 0], [2, 1]] - # assert numpy.asarray(jagged[3].id).tolist() == [[3, 0], [3, 1], [3, 2], [3, 3], [3, 4]] - # assert numpy.asarray(jagged[1:3].id).tolist() == [[1], [2]] + assert numpy.asarray(jagged.content[3:7].id).tolist() == [[2, 0], [2, 1], [3, 0], [3, 1]] + assert numpy.asarray(jagged[0].id).tolist() == [[0, 0], [0, 1], [0, 2]] + assert numpy.asarray(jagged[1].id).tolist() == [] + assert numpy.asarray(jagged[2].id).tolist() == [[2, 0], [2, 1]] + assert numpy.asarray(jagged[3].id).tolist() == [[3, 0], [3, 1], [3, 2], [3, 3], [3, 4]] + assert numpy.asarray(jagged[1:3].id).tolist() == [[1], [2]] def test_setid_none(): offsets = awkward1.layout.Index32(numpy.array([0, 2, 2, 3], "i4")) diff --git a/tests/test_PR9_identity_and_getitem.py b/tests/test_PR9_identity_and_getitem.py new file mode 100644 index 0000000000..c057170827 --- /dev/null +++ b/tests/test_PR9_identity_and_getitem.py @@ -0,0 +1,22 @@ +# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE + +import pytest +import numpy + +import awkward1 + +def test_identity(): + a = numpy.arange(10) + b = awkward1.layout.NumpyArray(a) + b.setid() + assert numpy.array(b.id).tolist() == numpy.arange(10).reshape(-1, 1).tolist() + + assert numpy.array(b[3]) == a[3] + assert numpy.array(b[3:7].id).tolist() == numpy.arange(10).reshape(-1, 1)[3:7].tolist() + assert numpy.array(b[[7, 3, 3, -4]].id).tolist() == numpy.arange(10).reshape(-1, 1)[[7, 3, 3, -4]].tolist() + assert numpy.array(b[[True, True, True, False, False, False, True, False, True, False]].id).tolist() == numpy.arange(10).reshape(-1, 1)[[True, True, True, False, False, False, True, False, True, False]].tolist() + + assert numpy.array(b[1:][3]) == a[1:][3] + assert numpy.array(b[1:][3:7].id).tolist() == numpy.arange(10).reshape(-1, 1)[1:][3:7].tolist() + assert numpy.array(b[1:][[7, 3, 3, -4]].id).tolist() == numpy.arange(10).reshape(-1, 1)[1:][[7, 3, 3, -4]].tolist() + assert numpy.array(b[1:][[True, True, False, False, False, True, False, True, False]].id).tolist() == numpy.arange(10).reshape(-1, 1)[1:][[True, True, False, False, False, True, False, True, False]].tolist()