Skip to content

Commit

Permalink
Creating a demo for Coffea will motivate improvements. (#33)
Browse files Browse the repository at this point in the history
Closing off this PR (even though I'm not done with the demo) because the changes are enormous: 7000 lines in 100 files added or removed.

In this PR, Types became const-members of arrays (don't have to worry about them changing) and eventually Identities should also become const. Code style was made uniform: `override` instead of `virtual`, all implementations in .cpp files, and non-numeric function arguments passed as const-references, rather than by value. Removed the visibility of RapidJSON to downstream projects (it is now an implementation detail). Now distributing statically linked libraries for downstream projects to include in their projects: can compile as

```bash
g++ -Iinclude -Lawkward1 tests/test_PR019_use_json_library.cpp -lawkward-static -lawkward-cpu-kernels-static
```

Now let's see if it still deploys!

* [WIP] Creating a demo for Coffea will motivate improvements.

* Oops; remove QUICKSTART.md.

* Start demo notebook.

* Writing notebook.

* Writing demo.

* [skip ci] Made the ArrayType schema look sane in a demo; unit tests are probably broken.

* [skip ci] Fixed segfault; working on tests

* [skip ci] Fixed many of the bugs, but not all.

* Fixed all bugs. ArrayType no longer appears in layout types.

* Remove all 'inner' and 'nolength' calls on Types

* [skip ci] Made 'type_' an immutable member of all Contents; need to update tests.

* [skip ci] Working through test failures; good up to (and including PR026).

* [skip ci] Should be up through PR02*.

* [skip ci] It is up through PR02*.

* Tests pass once again; now types are immutable members of arrays.

* Replace 'virtual' keyword with 'override' when overriding methods.

* Replace checks of 'size() == 0' and 'size() != 0' with 'empty()' and '!empty()', respectively.

* Replaced 'new' operator with 'std::make_shared' everywhere that it is possible (not arrays).

* Pass non-numeric arguments by const-ref, rather than by ref or by value, if possible.

* Writing demo.

* Writing demo.

* Writing demo.

* Fixed a bug in FillableUnion, but still haven't found a dataset without nulls or unions.

* Writing demo.

* Implementing deep copies, will use in high-level Identity interface.

* Iterator and Slices have been EXTERNAL_SYMBOLed.

* Try to satisfy MacOS and Windows warnings.

* That broke MacOS. Try EXPORT_SYMBOL of the template instantiation in the cpp.

* EXPORT_SYMBOLS for Index and std::vector<int64_t>.

* We don't need to EXPORT_SYMBOLS if downstream users statically link against them.

* Moving all implementations out of header files anyway (would have been needed for EXPORT_SYMBOLS).

* Moved implementations to cpp for most classes. Only json.h is left.

* If this passes, we should close this PR so that other projects can use these updates.

* Fix 32-bit error and try to find those .a files on Windows.

* Windows should find the statically-linked library now.
  • Loading branch information
jpivarski authored Dec 19, 2019
1 parent 6b9eedb commit 862ae35
Show file tree
Hide file tree
Showing 100 changed files with 4,890 additions and 2,411 deletions.
3 changes: 3 additions & 0 deletions .ci/azure-buildtest-awkward.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ jobs:
- script: |
python setup.py build
ls awkward1
python -m pytest -vv tests
displayName: "Build and test"
Expand Down Expand Up @@ -129,6 +130,7 @@ jobs:
- script: |
python setup.py build
ls awkward1
python -m pytest -vv tests
displayName: "Build and test"
Expand Down Expand Up @@ -189,5 +191,6 @@ jobs:
- script: |
python setup.py build
ls awkward1
python -m pytest -vv tests
displayName: "Build and test"
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.32
0.1.33
11 changes: 11 additions & 0 deletions awkward1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
namespace = {}

import awkward1.layout
from awkward1.layout import Type
from awkward1.layout import UnknownType
from awkward1.layout import PrimitiveType
from awkward1.layout import ListType
from awkward1.layout import RegularType
from awkward1.layout import RecordType
from awkward1.layout import OptionType
from awkward1.layout import UnionType
from awkward1.layout import ArrayType

import awkward1._numba

import awkward1.highlevel
from awkward1.highlevel import Array
from awkward1.highlevel import Record
Expand Down
20 changes: 11 additions & 9 deletions awkward1/_numba/array/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,9 @@ def unbox(tpe, obj, c):
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type1_obj = c.pyapi.object_getattr_string(obj, "type")
type2_obj = c.pyapi.object_getattr_string(type1_obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value
c.pyapi.decref(type1_obj)
c.pyapi.decref(type2_obj)
type_obj = c.pyapi.object_getattr_string(obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value
c.pyapi.decref(type_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

Expand Down Expand Up @@ -191,14 +189,18 @@ def box(tpe, val, c):
args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager))
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(ListArray_obj, args)
for x in args:
c.pyapi.decref(x)
c.pyapi.decref(ListArray_obj)
if tpe.typetpe != numba.none:
old = out
astype_obj = c.pyapi.object_getattr_string(out, "astype")
t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)
out = c.pyapi.call_function_objargs(astype_obj, (t,))
c.pyapi.decref(old)
c.pyapi.decref(astype_obj)
c.pyapi.decref(t)
return out

@numba.extending.lower_builtin(len, ListArrayType)
Expand Down
20 changes: 11 additions & 9 deletions awkward1/_numba/array/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,9 @@ def unbox(tpe, obj, c):
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type1_obj = c.pyapi.object_getattr_string(obj, "type")
type2_obj = c.pyapi.object_getattr_string(type1_obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value
c.pyapi.decref(type1_obj)
c.pyapi.decref(type2_obj)
type_obj = c.pyapi.object_getattr_string(obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value
c.pyapi.decref(type_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

Expand Down Expand Up @@ -179,14 +177,18 @@ def box(tpe, val, c):
args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager))
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(ListOffsetArray_obj, args)
for x in args:
c.pyapi.decref(x)
c.pyapi.decref(ListOffsetArray_obj)
if tpe.typetpe != numba.none:
old = out
astype_obj = c.pyapi.object_getattr_string(out, "astype")
t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)
out = c.pyapi.call_function_objargs(astype_obj, (t,))
c.pyapi.decref(old)
c.pyapi.decref(astype_obj)
c.pyapi.decref(t)
return out

@numba.extending.lower_builtin(len, ListOffsetArrayType)
Expand Down
12 changes: 11 additions & 1 deletion awkward1/_numba/array/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,17 @@ def box(tpe, val, c):
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.unserialize(c.pyapi.serialize_object(tpe.typetpe.literal_type)))
RegularType_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.RegularType))
t = c.pyapi.unserialize(c.pyapi.serialize_object(tpe.typetpe.literal_type))
arrayval = numba.targets.arrayobj.make_array(tpe.arraytpe)(c.context, c.builder, proxyin.array)
arrayshape = arrayval.shape
for i in range(tpe.arraytpe.ndim - 1, 0, -1):
size_val = c.builder.extract_value(arrayshape, i)
size_obj = c.pyapi.from_native_value(numba.intp, size_val, c.env_manager)
t = c.pyapi.call_function_objargs(RegularType_obj, (t, size_obj))
c.pyapi.decref(size_obj)
c.pyapi.decref(RegularType_obj)
args.append(t)
else:
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(NumpyArray_obj, args)
Expand Down
22 changes: 13 additions & 9 deletions awkward1/_numba/array/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,9 @@ def unbox(tpe, obj, c):
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type1_obj = c.pyapi.object_getattr_string(obj, "type")
type2_obj = c.pyapi.object_getattr_string(type1_obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value
c.pyapi.decref(type1_obj)
c.pyapi.decref(type2_obj)
type_obj = c.pyapi.object_getattr_string(obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value
c.pyapi.decref(type_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

Expand All @@ -217,10 +215,6 @@ def box(tpe, val, c):
args.append(id_obj)
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
args.append(c.pyapi.make_none())

if len(tpe.contenttpes) == 0:
RecordArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.RecordArray))
Expand Down Expand Up @@ -266,6 +260,16 @@ def box(tpe, val, c):

for x in args:
c.pyapi.decref(x)

if tpe.typetpe != numba.none:
old = out
astype_obj = c.pyapi.object_getattr_string(out, "astype")
t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)
out = c.pyapi.call_function_objargs(astype_obj, (t,))
c.pyapi.decref(old)
c.pyapi.decref(astype_obj)
c.pyapi.decref(t)

return out

@numba.extending.box(RecordType)
Expand Down
20 changes: 11 additions & 9 deletions awkward1/_numba/array/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,9 @@ def unbox(tpe, obj, c):
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type1_obj = c.pyapi.object_getattr_string(obj, "type")
type2_obj = c.pyapi.object_getattr_string(type1_obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value
c.pyapi.decref(type1_obj)
c.pyapi.decref(type2_obj)
type_obj = c.pyapi.object_getattr_string(obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value
c.pyapi.decref(type_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

Expand All @@ -147,14 +145,18 @@ def box(tpe, val, c):
args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager))
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(RegularArray_obj, args)
for x in args:
c.pyapi.decref(x)
c.pyapi.decref(RegularArray_obj)
if tpe.typetpe != numba.none:
old = out
astype_obj = c.pyapi.object_getattr_string(out, "astype")
t = c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager)
out = c.pyapi.call_function_objargs(astype_obj, (t,))
c.pyapi.decref(old)
c.pyapi.decref(astype_obj)
c.pyapi.decref(t)
return out

@numba.extending.lower_builtin(len, RegularArrayType)
Expand Down
6 changes: 0 additions & 6 deletions awkward1/_numba/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,8 @@ def box(tpe, val, c):
return c.pyapi.unserialize(c.pyapi.serialize_object(tpe.literal_type))

def typeof_literaltype(literal_type):
if isinstance(literal_type, awkward1.layout.ArrayType):
literal_type = literal_type.type
return LiteralTypeType(literal_type)

@numba.extending.typeof_impl.register(awkward1.layout.ArrayType)
def typeof_ArrayType(val, c):
return numba.typeof(val.type)

@numba.extending.typeof_impl.register(awkward1.layout.UnknownType)
def typeof_UnknownType(val, c):
return UnknownTypeType(val.parameters)
Expand Down
Loading

0 comments on commit 862ae35

Please sign in to comment.