|
12 | 12 | from arcticdb.exceptions import MissingDataException
|
13 | 13 | from arcticdb_ext.storage import KeyType
|
14 | 14 |
|
| 15 | +from arcticdb.util.venv import CurrentVersion |
15 | 16 |
|
16 | 17 | @pytest.mark.parametrize("batch", (True, False))
|
17 | 18 | def test_read_incompletes_with_indexed_data(lmdb_version_store_v1, batch):
|
@@ -80,3 +81,86 @@ def test_read_incompletes_no_chunking(lmdb_version_store_tiny_segment):
|
80 | 81 |
|
81 | 82 | ref_keys = lib_tool.find_keys_for_symbol(KeyType.APPEND_REF, sym)
|
82 | 83 | assert len(ref_keys) == 1
|
| 84 | + |
| 85 | +@pytest.mark.parametrize("dynamic_schema", [True, False]) |
| 86 | +def test_read_incompletes_columns_filter(version_store_factory, dynamic_schema): |
| 87 | + lib = version_store_factory(dynamic_schema=dynamic_schema) |
| 88 | + lib_tool = lib.library_tool() |
| 89 | + sym = "sym" |
| 90 | + df = pd.DataFrame({ |
| 91 | + "col": np.arange(20), |
| 92 | + "float_col": np.arange(20, dtype=np.float64), |
| 93 | + "str_col": [f"str_{i}" for i in range(20)] |
| 94 | + }, pd.date_range("2024-01-01", periods=20)) |
| 95 | + lib_tool.append_incomplete(sym, df.iloc[:5]) |
| 96 | + lib_tool.append_incomplete(sym, df.iloc[5:8]) |
| 97 | + lib_tool.append_incomplete(sym, df.iloc[8:10]) |
| 98 | + |
| 99 | + date_range = (None, None) |
| 100 | + col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["col"]).data |
| 101 | + assert_frame_equal(col_df, df[["col"]].iloc[:10]) |
| 102 | + |
| 103 | + float_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col"]).data |
| 104 | + assert_frame_equal(float_col_df, df[["float_col"]].iloc[:10]) |
| 105 | + |
| 106 | + float_and_str_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col", "str_col"]).data |
| 107 | + assert_frame_equal(float_and_str_col_df, df[["float_col", "str_col"]].iloc[:10]) |
| 108 | + |
| 109 | + date_range = (pd.Timestamp(2024, 1, 3), pd.Timestamp(2024, 1, 8)) |
| 110 | + float_and_str_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col", "str_col"]).data |
| 111 | + assert_frame_equal(float_and_str_col_df, df[["float_col", "str_col"]].iloc[2:8]) |
| 112 | + |
| 113 | + # Compact and add the rest of the df |
| 114 | + lib.compact_incomplete(sym, append=True, convert_int_to_float=False, via_iteration=False) |
| 115 | + lib_tool.append_incomplete(sym, df.iloc[10:17]) |
| 116 | + lib_tool.append_incomplete(sym, df.iloc[17:]) |
| 117 | + |
| 118 | + date_range = (None, None) |
| 119 | + float_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col"]).data |
| 120 | + assert_frame_equal(float_col_df, df[["float_col"]]) |
| 121 | + |
| 122 | + float_and_str_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col", "str_col"]).data |
| 123 | + assert_frame_equal(float_and_str_col_df, df[["float_col", "str_col"]]) |
| 124 | + |
| 125 | + # Only incomplete range |
| 126 | + date_range = (pd.Timestamp(2024, 1, 12), pd.Timestamp(2024, 1, 18)) |
| 127 | + float_and_str_col_df = lib.read(sym, date_range=date_range, incomplete=True, columns=["float_col", "str_col"]).data |
| 128 | + assert_frame_equal(float_and_str_col_df, df[["float_col", "str_col"]].iloc[11:18]) |
| 129 | + |
| 130 | + |
| 131 | +def test_read_incompletes_dynamic(lmdb_version_store_dynamic_schema_v1): |
| 132 | + lib = lmdb_version_store_dynamic_schema_v1 |
| 133 | + lib_tool = lib.library_tool() |
| 134 | + sym = "sym" |
| 135 | + |
| 136 | + def get_date(days_after_epoch): |
| 137 | + return pd.Timestamp(0) + pd.DateOffset(days=days_after_epoch) |
| 138 | + |
| 139 | + def get_index(days_after_epoch, num_days): |
| 140 | + return pd.date_range(get_date(days_after_epoch), periods=num_days, freq="d") |
| 141 | + |
| 142 | + df_1 = pd.DataFrame({"col_1": [1., 2., 3.], "col_2": [1., 2., 3.]}, index=get_index(0, 3)) |
| 143 | + df_2 = pd.DataFrame({"col_2": [4., 5.], "col_3": [1., 2.]}, index=get_index(3, 2)) |
| 144 | + df_3 = pd.DataFrame({"col_3": [3., 4.], "col_4": [1., 2.]}, index=get_index(5, 2)) |
| 145 | + |
| 146 | + lib_tool.append_incomplete(sym, df_1) |
| 147 | + lib_tool.append_incomplete(sym, df_2) |
| 148 | + |
| 149 | + df = lib.read(sym, date_range=(None, None), incomplete=True).data |
| 150 | + assert_frame_equal(df, pd.concat([df_1, df_2])) |
| 151 | + |
| 152 | + # If reading just a single incomplete we will get the result in its own schema |
| 153 | + df = lib.read(sym, date_range = (get_date(3), None), incomplete=True).data |
| 154 | + assert_frame_equal(df, df_2) |
| 155 | + |
| 156 | + lib.compact_incomplete(sym, append=True, convert_int_to_float=False, via_iteration=False) |
| 157 | + |
| 158 | + df = lib.read(sym, date_range=(None, None), incomplete=True).data |
| 159 | + assert_frame_equal(df, pd.concat([df_1, df_2])) |
| 160 | + |
| 161 | + lib_tool.append_incomplete(sym, df_3) |
| 162 | + df = lib.read(sym, date_range=(None, None), incomplete=True).data |
| 163 | + assert_frame_equal(df, pd.concat([df_1, df_2, df_3])) |
| 164 | + |
| 165 | + df_col_filter = lib.read(sym, date_range=(None, None), incomplete=True, columns=["col_2", "col_4"]).data |
| 166 | + assert_frame_equal(df_col_filter, pd.concat([df_1, df_2, df_3])[["col_2", "col_4"]]) |
0 commit comments