-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add xgb * fix only first 4 gmb models usage * remove hypex tests * tox add py311 config; fix python supporting for versions >=3.11.1 * change macos from arm to x86-64 gh actions
- Loading branch information
1 parent
8eb1a18
commit d8635a2
Showing
19 changed files
with
864 additions
and
236 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,72 +1,72 @@ | ||
import pandas as pd | ||
import pytest | ||
# import pandas as pd | ||
# import pytest | ||
|
||
from lightautoml.addons.hypex.ABTesting.ab_tester import AATest | ||
from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data | ||
# from lightautoml.addons.hypex.ABTesting.ab_tester import AATest | ||
# from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
return create_test_data(rs=52) | ||
# @pytest.fixture | ||
# def data(): | ||
# return create_test_data(rs=52) | ||
|
||
|
||
@pytest.fixture | ||
def iterations(): | ||
return 20 | ||
# @pytest.fixture | ||
# def iterations(): | ||
# return 20 | ||
|
||
|
||
@pytest.fixture | ||
def info_col(): | ||
return "user_id" | ||
# @pytest.fixture | ||
# def info_col(): | ||
# return "user_id" | ||
|
||
|
||
def test_aa_simple(data, iterations, info_col): | ||
model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
# def test_aa_simple(data, iterations, info_col): | ||
# model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col) | ||
# res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all( | ||
datas_dict[0].drop(columns=["group"]).columns | ||
), "Columns in the result are not the same as columns in initial data " | ||
# assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
# assert res.shape[0] == iterations, ( | ||
# "Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
# ) | ||
# assert isinstance(datas_dict, dict), "Result is not dict" | ||
# assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
# assert all(data.columns) == all( | ||
# datas_dict[0].drop(columns=["group"]).columns | ||
# ), "Columns in the result are not the same as columns in initial data " | ||
|
||
|
||
def test_aa_group(data, iterations, info_col): | ||
group_cols = "industry" | ||
# def test_aa_group(data, iterations, info_col): | ||
# group_cols = "industry" | ||
|
||
model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
# model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols) | ||
# res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
"Columns in the result are not " "the same as columns in initial " "data " | ||
) | ||
# assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
# assert res.shape[0] == iterations, ( | ||
# "Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
# ) | ||
# assert isinstance(datas_dict, dict), "Result is not dict" | ||
# assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
# assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
# "Columns in the result are not " "the same as columns in initial " "data " | ||
# ) | ||
|
||
|
||
def test_aa_quantfields(data, iterations, info_col): | ||
group_cols = "industry" | ||
quant_field = "gender" | ||
# def test_aa_quantfields(data, iterations, info_col): | ||
# group_cols = "industry" | ||
# quant_field = "gender" | ||
|
||
model = AATest( | ||
target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols, quant_field=quant_field | ||
) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
# model = AATest( | ||
# target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols, quant_field=quant_field | ||
# ) | ||
# res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
"Columns in the result are not " "the same as columns in initial " "data " | ||
) | ||
# assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
# assert res.shape[0] == iterations, ( | ||
# "Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
# ) | ||
# assert isinstance(datas_dict, dict), "Result is not dict" | ||
# assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
# assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
# "Columns in the result are not " "the same as columns in initial " "data " | ||
# ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,92 +1,92 @@ | ||
from lightautoml.addons.hypex.ABTesting.ab_tester import ABTest | ||
# from lightautoml.addons.hypex.ABTesting.ab_tester import ABTest | ||
|
||
import pytest | ||
import pandas as pd | ||
import numpy as np | ||
# import pytest | ||
# import pandas as pd | ||
# import numpy as np | ||
|
||
DATA_SIZE = 100 | ||
# DATA_SIZE = 100 | ||
|
||
|
||
@pytest.fixture | ||
def ab_test(): | ||
return ABTest() | ||
# @pytest.fixture | ||
# def ab_test(): | ||
# return ABTest() | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
# Generate synthetic data for group A | ||
group_a_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE) | ||
# Generate synthetic data for group B | ||
group_b_data = np.random.normal(loc=12, scale=2, size=DATA_SIZE) | ||
group_bp_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE * 2) | ||
return pd.DataFrame( | ||
{ | ||
"group": ["control"] * len(group_a_data) + ["test"] * len(group_b_data), | ||
"value": list(group_a_data) + list(group_b_data), | ||
"previous_value": group_bp_data, | ||
} | ||
) | ||
# @pytest.fixture | ||
# def data(): | ||
# # Generate synthetic data for group A | ||
# group_a_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE) | ||
# # Generate synthetic data for group B | ||
# group_b_data = np.random.normal(loc=12, scale=2, size=DATA_SIZE) | ||
# group_bp_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE * 2) | ||
# return pd.DataFrame( | ||
# { | ||
# "group": ["control"] * len(group_a_data) + ["test"] * len(group_b_data), | ||
# "value": list(group_a_data) + list(group_b_data), | ||
# "previous_value": group_bp_data, | ||
# } | ||
# ) | ||
|
||
|
||
@pytest.fixture | ||
def target_field(): | ||
return "value" | ||
# @pytest.fixture | ||
# def target_field(): | ||
# return "value" | ||
|
||
|
||
@pytest.fixture | ||
def group_field(): | ||
return "group" | ||
# @pytest.fixture | ||
# def group_field(): | ||
# return "group" | ||
|
||
|
||
@pytest.fixture | ||
def previous_value(): | ||
return "previous_value" | ||
# @pytest.fixture | ||
# def previous_value(): | ||
# return "previous_value" | ||
|
||
|
||
@pytest.fixture | ||
def alpha(): | ||
return 0.05 | ||
# @pytest.fixture | ||
# def alpha(): | ||
# return 0.05 | ||
|
||
|
||
def test_split_ab(ab_test, data, group_field): | ||
result = ab_test.split_ab(data, group_field) | ||
assert len(result["test"]) == DATA_SIZE | ||
assert len(result["control"]) == DATA_SIZE | ||
# def test_split_ab(ab_test, data, group_field): | ||
# result = ab_test.split_ab(data, group_field) | ||
# assert len(result["test"]) == DATA_SIZE | ||
# assert len(result["control"]) == DATA_SIZE | ||
|
||
|
||
def test_calc_difference(ab_test, data, group_field, target_field, previous_value): | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_difference(splitted_data, target_field, previous_value) | ||
assert 1 < result["ate"] < 3 | ||
assert 1 < result["cuped"] < 3 | ||
assert 1 < result["diff_in_diff"] < 3 | ||
# def test_calc_difference(ab_test, data, group_field, target_field, previous_value): | ||
# splitted_data = ab_test.split_ab(data, group_field) | ||
# result = ab_test.calc_difference(splitted_data, target_field, previous_value) | ||
# assert 1 < result["ate"] < 3 | ||
# assert 1 < result["cuped"] < 3 | ||
# assert 1 < result["diff_in_diff"] < 3 | ||
|
||
|
||
def test_calc_difference_with_previous_value(ab_test, data, group_field, target_field, previous_value): | ||
ab_test.calc_difference_method = "ate" | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_difference(splitted_data, previous_value) | ||
assert -1 < result["ate"] < 1 | ||
# def test_calc_difference_with_previous_value(ab_test, data, group_field, target_field, previous_value): | ||
# ab_test.calc_difference_method = "ate" | ||
# splitted_data = ab_test.split_ab(data, group_field) | ||
# result = ab_test.calc_difference(splitted_data, previous_value) | ||
# assert -1 < result["ate"] < 1 | ||
|
||
|
||
def test_calc_p_value(ab_test, data, group_field, target_field, previous_value, alpha): | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_p_value(splitted_data, target_field) | ||
assert result["t_test"] < alpha | ||
assert result["mann_whitney"] < alpha | ||
# def test_calc_p_value(ab_test, data, group_field, target_field, previous_value, alpha): | ||
# splitted_data = ab_test.split_ab(data, group_field) | ||
# result = ab_test.calc_p_value(splitted_data, target_field) | ||
# assert result["t_test"] < alpha | ||
# assert result["mann_whitney"] < alpha | ||
|
||
result = ab_test.calc_p_value(splitted_data, previous_value) | ||
assert result["t_test"] > alpha | ||
assert result["mann_whitney"] > alpha | ||
# result = ab_test.calc_p_value(splitted_data, previous_value) | ||
# assert result["t_test"] > alpha | ||
# assert result["mann_whitney"] > alpha | ||
|
||
|
||
def test_execute(ab_test, data, group_field, target_field, previous_value, alpha): | ||
result = ab_test.execute(data, target_field, group_field, previous_value) | ||
print(result) | ||
assert result["size"]["test"] == DATA_SIZE | ||
assert result["size"]["control"] == DATA_SIZE | ||
assert 1 < result["difference"]["ate"] < 3 | ||
assert 1 < result["difference"]["cuped"] < 3 | ||
assert 1 < result["difference"]["diff_in_diff"] < 3 | ||
assert result["p_value"]["t_test"] < alpha | ||
assert result["p_value"]["mann_whitney"] < alpha | ||
# def test_execute(ab_test, data, group_field, target_field, previous_value, alpha): | ||
# result = ab_test.execute(data, target_field, group_field, previous_value) | ||
# print(result) | ||
# assert result["size"]["test"] == DATA_SIZE | ||
# assert result["size"]["control"] == DATA_SIZE | ||
# assert 1 < result["difference"]["ate"] < 3 | ||
# assert 1 < result["difference"]["cuped"] < 3 | ||
# assert 1 < result["difference"]["diff_in_diff"] < 3 | ||
# assert result["p_value"]["t_test"] < alpha | ||
# assert result["p_value"]["mann_whitney"] < alpha |
Oops, something went wrong.