Skip to content

Commit 00c47b4

Browse files
committed
Add unit tests for some utils functions used in data package
1 parent f64f0e0 commit 00c47b4

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

tests/cyclops/data/test_utils.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
import numpy as np
55
import pandas as pd
66
import pytest
7+
from datasets import Dataset
8+
9+
from datasets.features import (
10+
Array2D,
11+
Array3D,
12+
Array4D,
13+
Array5D,
14+
Sequence,
15+
Value,
16+
)
717

818
from cyclops.data.utils import (
919
create_indicator_variables,
@@ -12,9 +22,126 @@
1222
has_range_index,
1323
is_timestamp_series,
1424
to_range_index,
25+
feature_is_numeric,
26+
feature_is_datetime,
27+
check_required_columns,
28+
get_columns_as_numpy_array,
1529
)
1630

1731

32+
def test_get_columns_as_numpy_array():
33+
"""Test get_columns_as_numpy_array fn."""
34+
# Mock dataset creation
35+
data = {
36+
"column1": [1, 2, 3],
37+
"column2": [4, 5, 6]
38+
}
39+
mock_dataset = Dataset.from_dict(data)
40+
41+
# Scenario 1: Valid Dataset object with single column
42+
result = get_columns_as_numpy_array(mock_dataset, "column1")
43+
np.testing.assert_array_equal(result, np.array([1, 2, 3])) # Adjusted expectation
44+
45+
# Scenario 2: Valid Dataset object with multiple columns
46+
result = get_columns_as_numpy_array(mock_dataset, ["column1", "column2"])
47+
np.testing.assert_array_equal(result, np.array([[1, 4], [2, 5], [3, 6]]))
48+
49+
# Scenario 3: Valid dictionary dataset with single column
50+
result = get_columns_as_numpy_array(data, "column1")
51+
np.testing.assert_array_equal(result, np.array([1, 2, 3])) # Adjusted expectation
52+
53+
# Scenario 4: Valid dictionary dataset with multiple columns
54+
result = get_columns_as_numpy_array(data, ["column1", "column2"])
55+
np.testing.assert_array_equal(result, np.array([[1, 4], [2, 5], [3, 6]]))
56+
57+
# Scenario 5: Invalid dataset type
58+
with pytest.raises(TypeError):
59+
get_columns_as_numpy_array(["not", "a", "dataset"], "column1")
60+
61+
62+
def test_check_required_columns():
63+
"""Test check_required_columns fn."""
64+
# Scenario 1: All required columns are present
65+
dataset_columns = ["name", "age", "email"]
66+
check_required_columns(dataset_columns, "name", ["age", "email"])
67+
check_required_columns(dataset_columns, "age")
68+
check_required_columns(dataset_columns, ["name"])
69+
70+
# Scenario 2: Some required columns are missing
71+
with pytest.raises(ValueError) as excinfo:
72+
check_required_columns(dataset_columns, "name", "gender")
73+
assert "gender" in str(excinfo.value)
74+
75+
with pytest.raises(ValueError) as excinfo:
76+
check_required_columns(dataset_columns, ["name", "gender"])
77+
assert "gender" in str(excinfo.value)
78+
79+
# Scenario 3: All required columns are missing
80+
with pytest.raises(ValueError) as excinfo:
81+
check_required_columns(dataset_columns, "height", "weight")
82+
assert "height" in str(excinfo.value) and "weight" in str(excinfo.value)
83+
84+
# Scenario 4: Handling of None in required columns
85+
check_required_columns(dataset_columns, None, "name")
86+
check_required_columns(dataset_columns, "name", None)
87+
check_required_columns(dataset_columns, None)
88+
89+
# Scenario 5: No required columns provided (should not raise an error)
90+
check_required_columns(dataset_columns)
91+
92+
93+
def test_feature_is_numeric():
94+
"""Test feature_is_numeric fn."""
95+
numeric_features = [
96+
Value("int32"), Value("float64"),
97+
Array2D(shape=(10, 10), dtype="int32"),
98+
Array3D(shape=(10, 10, 10), dtype="float64"),
99+
Array4D(shape=(10, 10, 10, 10), dtype="int32"),
100+
Array5D(shape=(10, 10, 10, 10, 10), dtype="float64"),
101+
Sequence([Value("int32"), Value("float64")]),
102+
Value("bool"),
103+
]
104+
for feature in numeric_features:
105+
assert feature_is_numeric(feature) == True, f"Failed for {type(feature)} with dtype {feature.dtype}"
106+
107+
non_numeric_features = [
108+
Value("string"),
109+
Array2D(shape=(10, 10), dtype="string"),
110+
]
111+
for feature in non_numeric_features:
112+
assert feature_is_numeric(feature) == False, f"Failed for {type(feature)} with dtype {feature.dtype}"
113+
114+
invalid_features = [None, 123, "invalid", [1, 2, 3], {"key": "value"}]
115+
for feature in invalid_features:
116+
with pytest.raises(AttributeError):
117+
feature_is_numeric(feature)
118+
119+
120+
def test_feature_is_datetime():
121+
"""Test feature_is_datetime fn."""
122+
datetime_features = [
123+
Value("timestamp[s]"),
124+
Array2D(shape=(10, 10), dtype="time[s]"),
125+
Sequence([Value("timestamp[ns]")]),
126+
]
127+
for feature in datetime_features:
128+
assert feature_is_datetime(feature) == True, f"Failed for {type(feature)} with dtype {feature.dtype}"
129+
130+
non_datetime_features = [
131+
Value("string"),
132+
Array2D(shape=(10, 10), dtype="string"),
133+
Value("int32"),
134+
Array2D(shape=(10, 10), dtype="int32"),
135+
]
136+
for feature in non_datetime_features:
137+
assert feature_is_datetime(feature) == False, f"Failed for {type(feature)} with dtype {feature.dtype}"
138+
139+
invalid_features = [None, 123, "invalid", [1, 2, 3], {"key": "value"}]
140+
for feature in invalid_features:
141+
with pytest.raises(AttributeError):
142+
feature_is_datetime(feature)
143+
144+
18145
def test_create_indicator_variables():
19146
"""Test create_indicator_variables fn."""
20147
features = pd.DataFrame([[np.nan, 1], [3, np.nan]], columns=["A", "B"])

0 commit comments

Comments
 (0)