Skip to content

Commit 35a5e86

Browse files
committed
Add bounds check for empty tensors and enhance tests for numpy slicing
1 parent 97a70dd commit 35a5e86

File tree

2 files changed

+150
-1
lines changed

2 files changed

+150
-1
lines changed

cpp/arcticdb/entity/native_tensor.hpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,17 @@ struct TypedTensor : public NativeTensor {
237237
// The new column shape * the column stride tells us how far to move the data pointer from the origin
238238

239239
ptr = reinterpret_cast<const uint8_t*>(tensor.data()) + (slice_num * stride_offset);
240-
util::check(ptr < static_cast<const uint8_t*>(tensor.ptr) + std::abs(tensor.extent(0)),
240+
if (tensor.extent(0) == 0) {
241+
// For empty tensors, we can't perform the normal bounds check
242+
// Just ensure we're not trying to access beyond the first element
243+
util::check(slice_num == 0,
244+
"Cannot put slice pointer at position {} in an empty tensor",
245+
slice_num);
246+
} else {
247+
util::check(ptr < static_cast<const uint8_t*>(tensor.ptr) + std::abs(tensor.extent(0)),
241248
"Tensor overflow, cannot put slice pointer at byte {} in a tensor of {} bytes",
242249
slice_num * stride_offset, tensor.extent(0));
250+
}
243251
}
244252
};
245253
template<typename T>

python/tests/unit/arcticdb/version_store/test_normalization.py

+141
Original file line numberDiff line numberDiff line change
@@ -858,3 +858,144 @@ def test_throws_correct_exceptions(returns_expected, method_to_test, lmdb_versio
858858
args = [MagicMock()] * non_default_arg_count
859859
with pytest.raises(expected):
860860
method_to_test(*args)
861+
862+
863+
def test_numpy_none_slice(lmdb_version_store):
864+
lib = lmdb_version_store
865+
866+
dat = np.array([1.0, 2.0, 3.0, 4.0])
867+
idx = pd.DatetimeIndex(["2020-01-01"], name="date")
868+
columns_names = ["A", "B", "C", "D"]
869+
870+
# This is a view, not a copy
871+
# it transposes the array, so the shape is (4,) instead of (1,4)
872+
sl = dat[None, :]
873+
df = pd.DataFrame(sl, index=idx, columns=columns_names)
874+
875+
lib.write("df_none_slice", df)
876+
877+
result = lib.read("df_none_slice").data
878+
pd.testing.assert_frame_equal(result, df)
879+
880+
881+
def test_numpy_newaxis_slice(lmdb_version_store):
882+
lib = lmdb_version_store
883+
884+
dat = np.array([1.0, 2.0, 3.0, 4.0])
885+
idx = pd.DatetimeIndex(["2020-01-01"], name="date")
886+
columns_names = ["A", "B", "C", "D"]
887+
888+
# This is a view, not a copy
889+
# it transposes the array, so the shape is (4,) instead of (1,4)
890+
sl = dat[np.newaxis, :]
891+
df = pd.DataFrame(sl, index=idx, columns=columns_names)
892+
893+
lib.write("df_none_slice", df)
894+
895+
result = lib.read("df_none_slice").data
896+
pd.testing.assert_frame_equal(result, df)
897+
898+
899+
def test_view_with_reshape(lmdb_version_store):
900+
lib = lmdb_version_store
901+
902+
dat = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
903+
reshaped = dat.reshape(2, 3) # Creates a view with different strides
904+
idx = pd.DatetimeIndex(["2020-01-01", "2020-01-02"], name="date")
905+
columns_names = ["A", "B", "C"]
906+
df = pd.DataFrame(reshaped, index=idx, columns=columns_names)
907+
908+
lib.write("df_reshaped", df)
909+
910+
result = lib.read("df_reshaped").data
911+
pd.testing.assert_frame_equal(result, df)
912+
913+
914+
def test_view_with_transpose(lmdb_version_store):
915+
lib = lmdb_version_store
916+
917+
original = np.array([[1, 2, 3], [4, 5, 6]])
918+
transposed = original.T # Shape changes from (2,3) to (3,2)
919+
idx = pd.DatetimeIndex(["2020-01-01", "2020-01-02", "2020-01-03"], name="date")
920+
columns_names = ["A", "B"]
921+
922+
df = pd.DataFrame(transposed, index=idx, columns=columns_names)
923+
924+
lib.write("df_transposed", df)
925+
926+
result = lib.read("df_transposed").data
927+
pd.testing.assert_frame_equal(result, df)
928+
929+
def test_view_with_fancy_indexing(lmdb_version_store):
930+
lib = lmdb_version_store
931+
932+
original = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
933+
934+
indices = np.array([0, 2])
935+
view = original[indices] # Selects rows 0 and 2
936+
937+
idx = pd.DatetimeIndex(["2020-01-01", "2020-01-02"], name="date")
938+
columns_names = ["A", "B", "C", "D"]
939+
940+
df = pd.DataFrame(view, index=idx, columns=columns_names)
941+
942+
lib.write("df_fancy_idx", df)
943+
944+
result = lib.read("df_fancy_idx").data
945+
pd.testing.assert_frame_equal(result, df)
946+
947+
948+
def test_view_with_boolean_masking(lmdb_version_store):
949+
lib = lmdb_version_store
950+
951+
original = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
952+
953+
mask = np.array([True, False, True])
954+
view = original[mask] # Selects rows 0 and 2
955+
956+
idx = pd.DatetimeIndex(["2020-01-01", "2020-01-02"], name="date")
957+
columns_names = ["A", "B", "C", "D"]
958+
959+
df = pd.DataFrame(view, index=idx, columns=columns_names)
960+
961+
lib.write("df_bool_mask", df)
962+
963+
result = lib.read("df_bool_mask").data
964+
pd.testing.assert_frame_equal(result, df)
965+
966+
967+
def test_view_with_slice(lmdb_version_store):
968+
lib = lmdb_version_store
969+
970+
# Create a 2D array
971+
original = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
972+
view = original[0:2, 1:3] # Select rows 0-1 and columns 1-2
973+
idx = pd.DatetimeIndex(["2020-01-01", "2020-01-02"], name="date")
974+
columns_names = ["B", "C"]
975+
976+
df = pd.DataFrame(view, index=idx, columns=columns_names)
977+
978+
lib.write("df_slice", df)
979+
980+
result = lib.read("df_slice").data
981+
pd.testing.assert_frame_equal(result, df)
982+
983+
984+
def test_empty_dimension(lmdb_version_store):
985+
lib = lmdb_version_store
986+
987+
# 0 rows, 3 columns
988+
zero_dim_array = np.zeros((0, 3))
989+
columns_names = ["A", "B", "C"]
990+
991+
# Empty index
992+
# N.B. Make sure not to pass a name to the index
993+
# as we don't keep names for empty indices
994+
# and pandas does
995+
idx = pd.DatetimeIndex([])
996+
df = pd.DataFrame(zero_dim_array, index=idx, columns=columns_names)
997+
998+
lib.write("df_zero_dim", df)
999+
1000+
result = lib.read("df_zero_dim").data
1001+
pd.testing.assert_frame_equal(result, df)

0 commit comments

Comments
 (0)