Skip to content

Commit

Permalink
Closed #2651: series_test.py conversion for new framework (#2671)
Browse files Browse the repository at this point in the history
* conversion of series test to new framework

* update to prob_size parameterization

* fix prob_size scaling issue

* left over prob_size variable

* requested changes, clean up

---------

Co-authored-by: jaketrookman <jaketrookman@users.noreply.github.com>
  • Loading branch information
jaketrookman and jaketrookman committed Sep 6, 2023
1 parent 5d7f91f commit 2c8061b
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 2 deletions.
187 changes: 187 additions & 0 deletions PROTO_tests/tests/series_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal

import arkouda as ak

DTYPES = [ak.int64, ak.uint64, ak.bool, ak.float64, ak.bigint, ak.str_]
NO_STRING = [ak.int64, ak.uint64, ak.bool, ak.float64, ak.bigint]
NUMERICAL_TYPES = [ak.int64, ak.uint64, ak.float64, ak.bigint]
INTEGRAL_TYPES = [ak.int64, ak.uint64, ak.bool, ak.bigint]


class TestSeries:
@pytest.mark.parametrize("dtype", DTYPES)
def test_series_creation(self, dtype):
idx = ak.arange(3, dtype=dtype)
for val in idx, ak.array(["A", "B", "C"]):
ans = ak.Series(data=val, index=idx).to_list()
for series in (
ak.Series(data=val, index=idx),
ak.Series(data=val),
ak.Series(val, idx),
ak.Series(val),
ak.Series((idx, val)),
):
assert isinstance(series, ak.Series)
assert isinstance(series.index, ak.Index)
assert series.to_list() == ans

with pytest.raises(TypeError):
ak.Series(index=idx)

with pytest.raises(TypeError):
ak.Series((ak.arange(3),))

with pytest.raises(TypeError):
ak.Series()

with pytest.raises(ValueError):
ak.Series(data=ak.arange(3), index=ak.arange(6))

@pytest.mark.parametrize("dtype", INTEGRAL_TYPES)
@pytest.mark.parametrize("dtype_index", [ak.int64, ak.uint64])
def test_lookup(self, dtype, dtype_index):
pda = ak.arange(3, dtype=dtype)
for val in pda, ak.array(["A", "B", "C"]):
s = ak.Series(data=val, index=ak.arange(3, dtype=dtype_index))

for key in (
1,
ak.Index(ak.array([1], dtype=dtype_index)),
ak.Index(ak.array([0, 2], dtype=dtype_index)),
):
lk = s.locate(key)
assert isinstance(lk, ak.Series)
key = ak.array(key) if not isinstance(key, int) else key
assert (lk.index == s.index[key]).all()
assert (lk.values == s.values[key]).all()

# testing multi-index lookup
mi = ak.MultiIndex([pda, pda[::-1]])
s = ak.Series(data=val, index=mi)
lk = s.locate(mi[0])
assert isinstance(lk, ak.Series)
assert lk.index.index == mi[0].index
assert lk.values[0] == val[0]

# ensure error with scalar and multi-index
with pytest.raises(TypeError):
s.locate(0)

with pytest.raises(TypeError):
s.locate([0, 2])

@pytest.mark.parametrize("prob_size", pytest.prob_size)
@pytest.mark.parametrize("dtype", NO_STRING)
def test_add(self, prob_size, dtype):
added = ak.Series(ak.arange(prob_size // 2, dtype=dtype)).add(
ak.Series(
data=ak.arange(prob_size // 2, prob_size, dtype=dtype),
index=ak.arange(prob_size // 2, prob_size),
)
)
assert (added.index == ak.arange(prob_size)).all()
assert (added.values == ak.arange(prob_size, dtype=dtype)).all()

@pytest.mark.parametrize("dtype", [ak.int64, ak.uint64, ak.float64])
def test_topn(self, dtype):
top = ak.Series(ak.arange(100, dtype=dtype)).topn(50)
assert top.values.to_list() == list(range(99, 49, -1))
assert top.index.to_list() == list(range(99, 49, -1))

@pytest.mark.parametrize("dtype", NUMERICAL_TYPES)
@pytest.mark.parametrize("dtype_index", NUMERICAL_TYPES)
def test_sort(self, dtype, dtype_index):
def gen_perm(n):
idx_left = set(range(n))
perm = np.arange(n)
while len(idx_left) >= 2:
inds = np.random.choice(list(idx_left), 2, replace=False)
tmp = perm[inds[0]]
perm[inds[0]] = perm[inds[1]]
perm[inds[1]] = tmp
idx_left.remove(inds[0])
idx_left.remove(inds[1])
return perm

ordered = ak.arange(100, dtype=dtype)
perm = ak.array(gen_perm(100), dtype=dtype_index)

idx_sort = ak.Series(data=ordered, index=perm).sort_index()
assert idx_sort.index.to_list() == ordered.to_list()
assert idx_sort.values.to_list() == perm.to_list()

val_sort = ak.Series(data=perm, index=ordered).sort_values()
assert val_sort.index.to_pandas().tolist() == perm.to_list()
assert val_sort.values.to_list() == ordered.to_list()

@pytest.mark.parametrize("dtype", DTYPES)
def test_head_tail(self, dtype):
n = 10
s = ak.Series(ak.arange(n, dtype=dtype))
for i in range(n):

head = s.head(i)
assert head.index.to_list() == list(range(i))
assert head.values.to_list() == ak.arange(i, dtype=dtype).to_list()

tail = s.tail(i)
assert tail.index.to_list() == ak.arange(n)[-i:n].to_list()
assert tail.values.to_list() == ak.arange(n, dtype=dtype)[-i:n].to_list()

def test_value_counts(self):
s = ak.Series(ak.array([1, 2, 0, 2, 0]))

c = s.value_counts()
assert c.index.to_list() == [0, 2, 1]
assert c.values.to_list() == [2, 2, 1]

c = s.value_counts(sort=False)
assert c.index.to_list() == list(range(3))
assert c.values.to_list() == [2, 1, 2]

def test_concat(self):
s = ak.Series(ak.arange(5))
s2 = ak.Series(ak.arange(5, 11), ak.arange(5, 11))
s3 = ak.Series(ak.arange(5, 10), ak.arange(5, 10))

df = ak.Series.concat([s, s2], axis=1)
assert isinstance(df, ak.DataFrame)

ref_df = pd.DataFrame(
{
"idx": list(range(11)),
"val_0": [0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0],
"val_1": [0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 10],
}
)
assert_frame_equal(ref_df, df.to_pandas())

def list_helper(arr):
return arr.to_list() if isinstance(arr, (ak.pdarray, ak.Index)) else arr.tolist()

for fname in "concat", "pdconcat":
func = getattr(ak.Series, fname)
c = func([s, s2])
assert list_helper(c.index) == list(range(11))
assert list_helper(c.values) == list(range(11))

df = func([s, s3], axis=1)
if fname == "concat":
ref_df = pd.DataFrame(
{"idx": [0, 1, 2, 3, 4], "val_0": [0, 1, 2, 3, 4], "val_1": [5, 6, 7, 8, 9]}
)
assert isinstance(df, ak.DataFrame)
assert_frame_equal(ref_df, df.to_pandas())
else:
ref_df = pd.DataFrame({0: [0, 1, 2, 3, 4], 1: [5, 6, 7, 8, 9]})
assert isinstance(df, pd.DataFrame)
assert_frame_equal(ref_df, df)

def test_index_as_index_compat(self):
# added to validate functionality for issue #1506
df = ak.DataFrame({"a": ak.arange(10), "b": ak.arange(10), "c": ak.arange(10)})
g = df.groupby(["a", "b"])
g.broadcast(g.sum("c"))
5 changes: 4 additions & 1 deletion arkouda/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from arkouda.dtypes import float64 as akfloat64
from arkouda.dtypes import int64 as akint64
from arkouda.groupbyclass import GroupBy, unique
from arkouda.numeric import cast as akcast
from arkouda.pdarrayclass import RegistrationError, pdarray
from arkouda.pdarraycreation import arange, array, create_pdarray, ones
from arkouda.pdarraysetops import argsort, in1d
Expand Down Expand Up @@ -806,5 +807,7 @@ def lookup(self, key):
raise TypeError("MultiIndex lookup failure")
# if individual vals convert to pdarrays
if not isinstance(key[0], pdarray):
key = [array([x]) for x in key]
dt = self.values[0].dtype if isinstance(self.values[0], pdarray) else akint64
key = [akcast(array([x]), dt) for x in key]

return in1d(self.index, key)
1 change: 0 additions & 1 deletion src/KExtremeMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ module KExtremeMsg

}
}

otherwise {
var errorMsg = notImplementedError("maxk",gEnt.dtype);
keLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
Expand Down

0 comments on commit 2c8061b

Please sign in to comment.