From 62ea326372ff77ce4963232ed7e2ae5713d6ef86 Mon Sep 17 00:00:00 2001 From: Patrick Bloebaum Date: Tue, 14 May 2024 07:26:15 -0700 Subject: [PATCH] Replacing numpy repmat with tile NumPy matlib has been deprecated. Replacing the usage accordingly. Signed-off-by: Patrick Bloebaum --- dowhy/gcm/anomaly.py | 5 +++-- dowhy/gcm/distribution_change.py | 3 +-- dowhy/gcm/influence.py | 7 ++++--- dowhy/gcm/model_evaluation.py | 5 ++--- tests/gcm/test_stats.py | 3 +-- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/dowhy/gcm/anomaly.py b/dowhy/gcm/anomaly.py index fab334bad7..46f67e6de4 100644 --- a/dowhy/gcm/anomaly.py +++ b/dowhy/gcm/anomaly.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -from numpy.matlib import repmat from tqdm import tqdm from dowhy.gcm import config @@ -40,7 +39,9 @@ def conditional_anomaly_scores( result = np.zeros(parent_samples.shape[0]) for i in range(parent_samples.shape[0]): - samples_from_conditional = causal_mechanism.draw_samples(repmat(parent_samples[i], num_samples_conditional, 1)) + samples_from_conditional = causal_mechanism.draw_samples( + np.tile(parent_samples[i], (num_samples_conditional, 1)) + ) anomaly_scorer = anomaly_scorer_factory() anomaly_scorer.fit(samples_from_conditional) result[i] = anomaly_scorer.score(target_samples[i])[0] diff --git a/dowhy/gcm/distribution_change.py b/dowhy/gcm/distribution_change.py index 28b608ad4f..acba09333f 100644 --- a/dowhy/gcm/distribution_change.py +++ b/dowhy/gcm/distribution_change.py @@ -6,7 +6,6 @@ import networkx as nx import numpy as np import pandas as pd -from numpy.matlib import repmat from statsmodels.stats.multitest import multipletests from tqdm import tqdm @@ -496,7 +495,7 @@ def _estimate_distribution_change_score( for joint_parent_sample in joint_parent_samples: old_result = result - samples = repmat(joint_parent_sample, num_joint_samples, 1) + samples = np.tile(joint_parent_sample, (num_joint_samples, 1)) result += difference_estimation_func( causal_model_original.draw_samples(samples), causal_model_new.draw_samples(samples) ) diff --git a/dowhy/gcm/influence.py b/dowhy/gcm/influence.py index 4934007f01..cff0c95a27 100644 --- a/dowhy/gcm/influence.py +++ b/dowhy/gcm/influence.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd from joblib import Parallel, delayed -from numpy.matlib import repmat import dowhy.gcm.auto as auto from dowhy.gcm import feature_relevance_sample @@ -175,7 +174,7 @@ def _estimate_direct_strength( average_difference_result = 0 converged_run = 0 for run, sample in enumerate(distribution_samples): - tmp_samples = repmat(sample, num_samples_conditional, 1) + tmp_samples = np.tile(sample, (num_samples_conditional, 1)) rnd_permutation = np.random.choice(distribution_samples.shape[0], num_samples_conditional, replace=False) @@ -436,7 +435,9 @@ def icc_set_function(subset: np.ndarray) -> Union[np.ndarray, float]: # In case of the empty subset (all are jointly randomize), it boils down to taking the average over all # predictions, seeing that the randomization yields the same values for each sample of interest (none of the # samples of interest are used to replace a (jointly) 'randomized' sample). - predictions = repmat(np.mean(prediction_method(noise_samples), axis=0), baseline_noise_samples.shape[0], 1) + predictions = np.tile( + np.mean(prediction_method(noise_samples), axis=0), (baseline_noise_samples.shape[0], 1) + ) else: predictions = marginal_expectation( prediction_method, diff --git a/dowhy/gcm/model_evaluation.py b/dowhy/gcm/model_evaluation.py index 09558b6a62..f3d714c9ca 100644 --- a/dowhy/gcm/model_evaluation.py +++ b/dowhy/gcm/model_evaluation.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd from joblib import Parallel, delayed -from numpy.matlib import repmat from scipy.stats import mode from sklearn.metrics import f1_score, mean_squared_error, r2_score from sklearn.model_selection import KFold @@ -730,7 +729,7 @@ def empirical_crps(generated_Y, observed_y): all_classes = np.unique(Y) for x, y in zip(X, Y): - samples = conditional_sampling_method(repmat(x, num_conditional_samples, 1)) + samples = conditional_sampling_method(np.tile(x, (num_conditional_samples, 1))) sample_categorical_crps = [] for cat in all_classes: @@ -749,7 +748,7 @@ def empirical_crps(generated_Y, observed_y): for x, y in zip(X, Y): crps_values.append( - empirical_crps(conditional_sampling_method(repmat(x, num_conditional_samples, 1)) / std_Y, y / std_Y) + empirical_crps(conditional_sampling_method(np.tile(x, (num_conditional_samples, 1))) / std_Y, y / std_Y) ) return float(np.mean(np.array(crps_values))) diff --git a/tests/gcm/test_stats.py b/tests/gcm/test_stats.py index 3221192af1..b588a863eb 100644 --- a/tests/gcm/test_stats.py +++ b/tests/gcm/test_stats.py @@ -1,7 +1,6 @@ import numpy as np import pytest from flaky import flaky -from numpy.matlib import repmat from pytest import approx from dowhy.gcm.ml import ( @@ -433,7 +432,7 @@ def test_given_nonlinear_categorical_data_when_evaluate_marginal_expectation_the def test_given_different_batch_sizes_when_estimating_marginal_expectation_then_returns_correct_marginal_expectations(): X = np.random.normal(0, 1, (34, 3)) feature_samples = np.random.normal(0, 1, (123, 3)) - expected_non_aggregated = np.array([repmat(X[i, :], feature_samples.shape[0], 1) for i in range(X.shape[0])]) + expected_non_aggregated = np.array([np.tile(X[i, :], (feature_samples.shape[0], 1)) for i in range(X.shape[0])]) def my_pred_func(X: np.ndarray) -> np.ndarray: return X.copy()