|
22 | 22 | import time
|
23 | 23 | import pandas as pd
|
24 | 24 |
|
25 |
| -from typing import Generator, Tuple |
| 25 | +from typing import Generator, List, Tuple |
| 26 | +from arcticdb.encoding_version import EncodingVersion |
| 27 | +from arcticdb.options import LibraryOptions |
26 | 28 | from arcticdb.util.test import get_sample_dataframe, random_string
|
| 29 | +from arcticdb.util.utils import DFGenerator |
27 | 30 | from arcticdb.version_store.library import Library, ReadRequest
|
28 | 31 | from arcticdb.version_store.processing import QueryBuilder
|
29 | 32 | from arcticdb.version_store._store import NativeVersionStore
|
30 |
| -from tests.util.mark import MACOS, SLOW_TESTS_MARK, WINDOWS, MEMRAY_SUPPORTED, MEMRAY_TESTS_MARK, SKIP_CONDA_MARK |
| 33 | +from arcticdb_ext.version_store import PythonVersionStoreReadOptions |
| 34 | +from tests.util.mark import LINUX, MACOS, SLOW_TESTS_MARK, WINDOWS, MEMRAY_SUPPORTED, MEMRAY_TESTS_MARK, SKIP_CONDA_MARK |
31 | 35 |
|
32 | 36 |
|
33 | 37 | logging.basicConfig(level=logging.INFO)
|
34 | 38 | logger = logging.getLogger("Memory_tests")
|
35 | 39 |
|
| 40 | +## IMPORTANT !!! |
| 41 | +## |
| 42 | +## All memory tests MUST be done with fixtures that return Library object |
| 43 | +## and not NativeVersionStore. Reason is that the last is thick wrapper which |
| 44 | +## is hiding some possible problems, therefore all tests have to be done with what |
| 45 | +## customer works on |
| 46 | + |
36 | 47 |
|
37 | 48 | # region HELPER functions for non-memray tests
|
38 | 49 |
|
@@ -590,6 +601,30 @@ def is_relevant(stack: Stack) -> bool:
|
590 | 601 | # do something to check if we need this to be added
|
591 | 602 | # as mem leak
|
592 | 603 | # print(f"SAMPLE >>> {frame.filename}:{frame.function}[{frame.lineno}]")
|
| 604 | + frame_info_str = f"{frame.filename}:{frame.function}:[{frame.lineno}]" |
| 605 | + |
| 606 | + if "folly::CPUThreadPoolExecutor::CPUTask" in frame_info_str: |
| 607 | + logger.warning(f"Frame excluded : {frame_info_str}") |
| 608 | + logger.warning(f'''Explanation : These are on purpose, and they come from the interaction of |
| 609 | + multi-threading and forking. When Python forks, the task-scheduler has a linked-list |
| 610 | + of tasks to execute, but there is a global lock held that protects the thread-local state. |
| 611 | + We can't free the list without accessing the global thread-local storage singleton, |
| 612 | + and that is protected by a lock which is now held be a thread that is in a different |
| 613 | + process, so it will never be unlocked in the child. As a work-around we intentionally |
| 614 | + leak the task-scheduler and replace it with a new one, in this method: |
| 615 | + https://github.com/man-group/ArcticDB/blob/master/cpp/arcticdb/async/task_scheduler.cpp#L34 |
| 616 | +
|
| 617 | + It's actually due to a bug in folly, because the lock around the thread-local |
| 618 | + storage manager has a compile-time token that should be used to differentiate it |
| 619 | + from other locks, but it has been constructed with type void as have other locks. |
| 620 | + It's possible they might fix it at some point in which case we can free the memory. |
| 621 | + Or we do have a vague intention to move away from folly for async if we |
| 622 | + find something better |
| 623 | +
|
| 624 | + Great that it is catching this, as it's the one case in the whole project where I know |
| 625 | + for certain that it does leak memory (and only because there's no alternative''') |
| 626 | + return False |
| 627 | + |
593 | 628 | pass
|
594 | 629 | return True
|
595 | 630 |
|
@@ -727,3 +762,127 @@ def test_mem_leak_read_all_arctic_lib_memray(library_with_big_symbol_):
|
727 | 762 | logger.info(f"Test took : {time.time() - st}")
|
728 | 763 |
|
729 | 764 | gc.collect()
|
| 765 | + |
| 766 | + @pytest.fixture |
| 767 | + def lmdb_library(lmdb_storage, lib_name, request) -> Generator[Library, None, None]: |
| 768 | + """ |
| 769 | + Allows passing library creation parameters as parameters of the test or other fixture. |
| 770 | + Example: |
| 771 | +
|
| 772 | +
|
| 773 | + @pytest.mark.parametrize("lmdb_library_any", [ |
| 774 | + {'library_options': LibraryOptions(rows_per_segment=100, columns_per_segment=100)} |
| 775 | + ], indirect=True) |
| 776 | + def test_my_test(lmdb_library_any): |
| 777 | + ..... |
| 778 | + """ |
| 779 | + params = request.param if hasattr(request, 'param') else {} |
| 780 | + yield lmdb_storage.create_arctic().create_library(name=lib_name, **params) |
| 781 | + |
| 782 | + |
| 783 | + @pytest.fixture |
| 784 | + def prepare_head_tails_symbol(lmdb_library): |
| 785 | + """ |
| 786 | + This fixture is part of test `test_mem_leak_head_tail_memray` |
| 787 | +
|
| 788 | + It creates a symbol with several versions and snapshot for each version. |
| 789 | + It inserts dataframes which are large given the segment size of library. |
| 790 | + And if the dynamic schema is used each version has more columns than previous version |
| 791 | +
|
| 792 | + Should not be reused |
| 793 | + """ |
| 794 | + lib: Library = lmdb_library |
| 795 | + opts = lib.options() |
| 796 | + |
| 797 | + total_number_columns = 1002 |
| 798 | + symbol = "asdf12345" |
| 799 | + num_rows_list = [279,199,1,350,999,0,1001] |
| 800 | + snapshot_names = [] |
| 801 | + for rows in num_rows_list: |
| 802 | + st = time.time() |
| 803 | + df = DFGenerator.generate_wide_dataframe(num_rows=rows, num_cols=total_number_columns, num_string_cols=25, |
| 804 | + start_time=pd.Timestamp(0),seed=64578) |
| 805 | + lib.write(symbol,df) |
| 806 | + snap = f"{symbol}_{rows}" |
| 807 | + lib.snapshot(snap) |
| 808 | + snapshot_names.append(snap) |
| 809 | + logger.info(f"Generated {rows} in {time.time() - st} sec") |
| 810 | + if opts.dynamic_schema: |
| 811 | + # Dynamic libraries are dynamic by nature so the test should cover that |
| 812 | + # characteristic |
| 813 | + total_number_columns += 20 |
| 814 | + logger.info(f"Total number of columns increased to {total_number_columns}") |
| 815 | + |
| 816 | + all_columns = df.columns.to_list() |
| 817 | + yield (lib, symbol, num_rows_list, snapshot_names, all_columns) |
| 818 | + lib.delete(symbol=symbol) |
| 819 | + |
| 820 | + |
| 821 | + @MEMRAY_TESTS_MARK |
| 822 | + @SLOW_TESTS_MARK |
| 823 | + ## Linux is having quite huge location there will be separate issue to investigate why |
| 824 | + @pytest.mark.limit_leaks(location_limit="1000 KB" if LINUX else "52 KB", filter_fn=is_relevant) |
| 825 | + @pytest.mark.parametrize("lmdb_library", [ |
| 826 | + {'library_options': LibraryOptions(rows_per_segment=233, columns_per_segment=197, dynamic_schema=True, encoding_version=EncodingVersion.V2)}, |
| 827 | + {'library_options': LibraryOptions(rows_per_segment=99, columns_per_segment=99, dynamic_schema=False, encoding_version=EncodingVersion.V1)} |
| 828 | + ], indirect=True) |
| 829 | + def test_mem_leak_head_tail_memray(prepare_head_tails_symbol): |
| 830 | + """ |
| 831 | + This test aims to test `head` and `tail` functions if they do leak memory. |
| 832 | + The creation of initial symbol (test environment precondition) is in specialized fixture |
| 833 | + so that memray does not detect memory there as this will slow the process many times |
| 834 | + """ |
| 835 | + |
| 836 | + symbol: str |
| 837 | + num_rows_list: List[int] |
| 838 | + store: NativeVersionStore = None |
| 839 | + snapshot_names: List[str] |
| 840 | + all_columns: List[str] |
| 841 | + (store, symbol, num_rows_list, snapshot_names, all_columns) = prepare_head_tails_symbol |
| 842 | + |
| 843 | + start_test: float = time.time() |
| 844 | + max_rows:int = max(num_rows_list) |
| 845 | + |
| 846 | + np.random.seed(959034) |
| 847 | + # constructing a list of head and tail rows to be selected |
| 848 | + num_rows_to_select = [] |
| 849 | + important_values = [0, 1, 0 -1, 2, -2, max_rows, -max_rows ] # some boundary cases |
| 850 | + num_rows_to_select.extend(important_values) |
| 851 | + num_rows_to_select.extend(np.random.randint(low=5, high=99, size=7)) # add 7 more random values |
| 852 | + # number of iterations will be the list length/size |
| 853 | + iterations = len(num_rows_to_select) |
| 854 | + # constructing a random list of values for snapshot names for each iteration |
| 855 | + snapshots_list: List[str] = np.random.choice(snapshot_names, iterations) |
| 856 | + # constructing a random list of values for versions names for each iteration |
| 857 | + versions_list: List[int] = np.random.randint(0, len(num_rows_list) - 1, iterations) |
| 858 | + # constructing a random list of number of columns to be selected |
| 859 | + number_columns_for_selection_list: List[int] = np.random.randint(0, len(all_columns)-1, iterations) |
| 860 | + |
| 861 | + count: int = 0 |
| 862 | + # We will execute several time all head/tail operations with specific number of columns. |
| 863 | + # the number of columns consist of random columns and boundary cases see definition above |
| 864 | + for rows in num_rows_to_select: |
| 865 | + selected_columns:List[str] = np.random.choice(all_columns, number_columns_for_selection_list[count], replace=False).tolist() |
| 866 | + snap: str = snapshots_list[count] |
| 867 | + ver: str = int(versions_list[count]) |
| 868 | + logger.info(f"rows {rows} / snapshot {snap}") |
| 869 | + df1: pd.DataFrame = store.head(n=rows, as_of=snap, symbol=symbol).data |
| 870 | + df2: pd.DataFrame = store.tail(n=rows, as_of=snap, symbol=symbol).data |
| 871 | + df3: pd.DataFrame = store.head(n=rows, as_of=ver, symbol=symbol, columns=selected_columns).data |
| 872 | + difference = list(set(df3.columns.to_list()).difference(set(selected_columns))) |
| 873 | + assert len(difference) == 0, f"Columns not included : {difference}" |
| 874 | + df4: pd.DataFrame = store.tail(n=rows, as_of=ver, symbol=symbol, columns=selected_columns).data |
| 875 | + difference = list(set(df4.columns.to_list()).difference(set(selected_columns))) |
| 876 | + assert len(difference) == 0, f"Columns not included : {difference}" |
| 877 | + |
| 878 | + logger.info(f"Iteration {count} / {iterations} completed") |
| 879 | + count += 1 |
| 880 | + del selected_columns, df1, df2, df3, df4 |
| 881 | + |
| 882 | + del store, symbol, num_rows_list, snapshot_names, all_columns |
| 883 | + del num_rows_to_select, important_values, snapshots_list, versions_list, number_columns_for_selection_list |
| 884 | + gc.collect() |
| 885 | + time.sleep(10) # collection is not immediate |
| 886 | + logger.info(f"Test completed in {time.time() - start_test}") |
| 887 | + |
| 888 | + |
0 commit comments