From c579b37593b789a236522cab693496d1ed775d68 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:07:25 +0000 Subject: [PATCH 1/9] Initial plan From 5f09838c540c93b0583de1e41b21858e7f443642 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:15:13 +0000 Subject: [PATCH 2/9] Implement critical performance improvements across modules Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .../stats/correlation/CorrelationModule.py | 41 +++++-------------- .../stats/factorielle/FactorielleModule.py | 16 ++++---- .../stats/frequence/FrequenceModule.py | 7 +++- .../probabilistes/ProbabilistesModule.py | 9 ++-- .../stats/temporelle/TimeSeriesModule.py | 16 ++++---- .../stats/variance/VarianceModule.py | 33 +++++++++------ 6 files changed, 60 insertions(+), 62 deletions(-) diff --git a/py_stats_toolkit/stats/correlation/CorrelationModule.py b/py_stats_toolkit/stats/correlation/CorrelationModule.py index c8d9693..a974223 100644 --- a/py_stats_toolkit/stats/correlation/CorrelationModule.py +++ b/py_stats_toolkit/stats/correlation/CorrelationModule.py @@ -41,7 +41,7 @@ def _compute_correlation_chunk(self, chunk_data): def process(self, data, method="pearson", **kwargs): """ - Calcule la corrélation entre les variables en parallèle. + Calcule la corrélation entre les variables. Args: data: Données d'entrée (pandas DataFrame) @@ -57,29 +57,10 @@ def process(self, data, method="pearson", **kwargs): if not isinstance(data, pd.DataFrame): raise TypeError("Les données doivent être un pandas DataFrame") - # Pour les petits DataFrames, calcul direct - if len(data.columns) < 100: - self.result = data.corr(method=method) - return self.result - - # Pour les grands DataFrames, traitement parallèle - n_cols = len(data.columns) - chunk_size = get_optimal_chunk_size(n_cols, self.parallel_processor.n_jobs) - - # Division des colonnes en chunks - chunks = [] - for i in range(0, n_cols, chunk_size): - chunk_cols = data.columns[i:min(i + chunk_size, n_cols)] - chunks.append(data[chunk_cols]) - - # Calcul parallèle des corrélations - chunk_results = self.parallel_processor.parallel_map( - self._compute_correlation_chunk, - chunks - ) - - # Assemblage des résultats - self.result = pd.concat(chunk_results, axis=1) + # Compute correlation matrix directly + # pandas/numpy already use optimized algorithms + # Chunking correlation computation produces incorrect results + self.result = data.corr(method=method) return self.result def get_correlation_matrix(self): @@ -109,12 +90,12 @@ def get_correlation_pairs(self, threshold=0.5): # Filtrage des paires selon le seuil mask = np.abs(corr_values) >= threshold - pairs = [] + mask_indices = np.where(mask)[0] - for idx in np.where(mask)[0]: - var1 = self.result.columns[i[idx]] - var2 = self.result.columns[j[idx]] - corr = corr_values[idx] - pairs.append((var1, var2, corr)) + # Vectorized construction of pairs using list comprehension + pairs = [ + (self.result.columns[i[idx]], self.result.columns[j[idx]], corr_values[idx]) + for idx in mask_indices + ] return sorted(pairs, key=lambda x: abs(x[2]), reverse=True) \ No newline at end of file diff --git a/py_stats_toolkit/stats/factorielle/FactorielleModule.py b/py_stats_toolkit/stats/factorielle/FactorielleModule.py index 0010f1b..1464b7b 100644 --- a/py_stats_toolkit/stats/factorielle/FactorielleModule.py +++ b/py_stats_toolkit/stats/factorielle/FactorielleModule.py @@ -142,14 +142,14 @@ def get_quality_metrics(self): if self.result['Type'] == 'ACP': return { - 'Variance expliquée par composante': dict(zip( - [f'PC{i+1}' for i in range(len(self.result['Variance expliquée']))], - self.result['Variance expliquée'] - )), - 'Variance cumulée': dict(zip( - [f'PC{i+1}' for i in range(len(self.result['Variance cumulée']))], - self.result['Variance cumulée'] - )), + 'Variance expliquée par composante': { + f'PC{i+1}': val + for i, val in enumerate(self.result['Variance expliquée']) + }, + 'Variance cumulée': { + f'PC{i+1}': val + for i, val in enumerate(self.result['Variance cumulée']) + }, 'Nombre de composantes pour 80% de variance': np.argmax( self.result['Variance cumulée'] >= 0.8 ) + 1 diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index d3526b7..0a434f8 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -83,4 +83,9 @@ def get_frequence_relative(self): """Retourne les fréquences relatives.""" if self.result is None: raise ValueError("Exécutez d'abord process()") - return self.process(self.data, normalize=True)['Fréquence Relative'] \ No newline at end of file + # Check if already normalized + if 'Fréquence Relative' in self.result.columns: + return self.result['Fréquence Relative'] + # Normalize existing frequency counts instead of reprocessing + freq_col = 'Fréquence' if 'Fréquence' in self.result.columns else self.result.columns[0] + return self.result[freq_col] / self.result[freq_col].sum() \ No newline at end of file diff --git a/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py b/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py index f8316d1..ca0cc1a 100644 --- a/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py +++ b/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py @@ -67,7 +67,8 @@ def process(self, data, distribution="normal", **kwargs): self.distribution = distribution # Pour les petits ensembles de données, ajustement direct - if len(data) < self.batch_size: + # Use 2x batch_size threshold to avoid parallel overhead for medium datasets + if len(data) < self.batch_size * 2: if distribution == "normal": self.params = stats.norm.fit(data) self.result = stats.norm(*self.params) @@ -114,7 +115,8 @@ def get_probability_density(self, x): raise ValueError("Exécutez d'abord process()") # Pour les petits ensembles, calcul direct - if len(x) < self.batch_size: + # Use 2x batch_size threshold to avoid parallel overhead + if len(x) < self.batch_size * 2: return self.result.pdf(x) # Pour les grands ensembles, traitement parallèle @@ -136,7 +138,8 @@ def get_cumulative_distribution(self, x): raise ValueError("Exécutez d'abord process()") # Pour les petits ensembles, calcul direct - if len(x) < self.batch_size: + # Use 2x batch_size threshold to avoid parallel overhead + if len(x) < self.batch_size * 2: return self.result.cdf(x) # Pour les grands ensembles, traitement parallèle diff --git a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py index 82b4d2b..3dd0281 100644 --- a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py +++ b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py @@ -74,9 +74,11 @@ def process(self, data, timestamps=None, **kwargs): # Détection des cycles if len(series) > 2: - fft = np.fft.fft(series.values) - freqs = np.fft.fftfreq(len(series)) - main_freq_idx = np.argmax(np.abs(fft[1:len(fft)//2])) + 1 + # Compute FFT only on the positive frequencies to save computation + fft = np.fft.rfft(series.values) # rfft is more efficient for real-valued data + freqs = np.fft.rfftfreq(len(series)) + # Skip DC component (index 0) + main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 stats['Fréquence Principale'] = freqs[main_freq_idx] stats['Période Principale'] = 1/freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf @@ -129,8 +131,8 @@ def get_seasonality(self, data=None, period=None): if period is not None: return period - # Détection automatique de la période - fft = np.fft.fft(series.values) - freqs = np.fft.fftfreq(len(series)) - main_freq_idx = np.argmax(np.abs(fft[1:len(fft)//2])) + 1 + # Détection automatique de la période - use rfft for efficiency + fft = np.fft.rfft(series.values) # rfft is more efficient for real-valued data + freqs = np.fft.rfftfreq(len(series)) + main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 return 1/freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf \ No newline at end of file diff --git a/py_stats_toolkit/stats/variance/VarianceModule.py b/py_stats_toolkit/stats/variance/VarianceModule.py index 4ab6376..7d03035 100644 --- a/py_stats_toolkit/stats/variance/VarianceModule.py +++ b/py_stats_toolkit/stats/variance/VarianceModule.py @@ -63,7 +63,8 @@ def process(self, data, group_col, value_col, test_type="anova", **kwargs): def _anova(self, data, group_col, value_col, **kwargs): """Analyse de variance à un facteur.""" groups = data[group_col].unique() - group_data = [data[data[group_col] == g][value_col] for g in groups] + # Pre-filter groups once to avoid repeated DataFrame filtering + group_data = [data[data[group_col] == g][value_col].to_numpy() for g in groups] f_stat, p_value = stats.f_oneway(*group_data, **kwargs) @@ -87,17 +88,19 @@ def _anova(self, data, group_col, value_col, **kwargs): def _kruskal_wallis(self, data, group_col, value_col, **kwargs): """Test de Kruskal-Wallis.""" groups = data[group_col].unique() - group_data = [data[data[group_col] == g][value_col] for g in groups] + # Pre-filter groups once to avoid repeated DataFrame filtering + group_data_dict = {g: data[data[group_col] == g][value_col].values for g in groups} + group_data = [group_data_dict[g] for g in groups] h_stat, p_value = stats.kruskal(*group_data, **kwargs) - # Test post-hoc de Mann-Whitney + # Test post-hoc de Mann-Whitney - use pre-filtered data post_hoc_results = [] for i in range(len(groups)): for j in range(i + 1, len(groups)): stat, p = stats.mannwhitneyu( - data[data[group_col] == groups[i]][value_col], - data[data[group_col] == groups[j]][value_col], + group_data_dict[groups[i]], + group_data_dict[groups[j]], alternative='two-sided' ) post_hoc_results.append({ @@ -125,19 +128,23 @@ def _friedman(self, data, group_col, value_col, **kwargs): # Réorganisation des données pour le test de Friedman pivot_data = data.pivot(columns=group_col, values=value_col) - stat, p_value = stats.friedmanchisquare(*[pivot_data[col] for col in pivot_data.columns], **kwargs) + # Pre-extract column data to avoid repeated indexing + columns = pivot_data.columns + column_data = {col: pivot_data[col].values for col in columns} - # Test post-hoc de Wilcoxon + stat, p_value = stats.friedmanchisquare(*[column_data[col] for col in columns], **kwargs) + + # Test post-hoc de Wilcoxon - use pre-extracted data post_hoc_results = [] - for i in range(len(pivot_data.columns)): - for j in range(i + 1, len(pivot_data.columns)): + for i in range(len(columns)): + for j in range(i + 1, len(columns)): stat, p = stats.wilcoxon( - pivot_data[pivot_data.columns[i]], - pivot_data[pivot_data.columns[j]] + column_data[columns[i]], + column_data[columns[j]] ) post_hoc_results.append({ - 'Groupe 1': pivot_data.columns[i], - 'Groupe 2': pivot_data.columns[j], + 'Groupe 1': columns[i], + 'Groupe 2': columns[j], 'Statistique': stat, 'p-valeur': p }) From a97fa0e51d73e188a1b552e19b5d2ddda5764091 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:17:46 +0000 Subject: [PATCH 3/9] Replace deprecated .values with .to_numpy() and format code Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .../stats/correlation/CorrelationModule.py | 44 ++--- .../stats/factorielle/FactorielleModule.py | 159 ++++++++--------- .../stats/frequence/FrequenceModule.py | 55 +++--- .../probabilistes/ProbabilistesModule.py | 56 +++--- .../stats/temporelle/TimeSeriesModule.py | 86 +++++----- .../stats/variance/VarianceModule.py | 160 +++++++++--------- 6 files changed, 290 insertions(+), 270 deletions(-) diff --git a/py_stats_toolkit/stats/correlation/CorrelationModule.py b/py_stats_toolkit/stats/correlation/CorrelationModule.py index a974223..b748d0f 100644 --- a/py_stats_toolkit/stats/correlation/CorrelationModule.py +++ b/py_stats_toolkit/stats/correlation/CorrelationModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : CorrelationModule.py ===================================================================== @@ -19,83 +19,85 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np import pandas as pd from scipy import stats -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor, get_optimal_chunk_size +from ..core.AbstractClassBase import StatisticalModule + class CorrelationModule(StatisticalModule): """Module pour l'analyse de corrélation.""" - + def __init__(self, n_jobs: int = -1): super().__init__() self.method = None self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - + def _compute_correlation_chunk(self, chunk_data): """Calcule la corrélation pour un chunk de données.""" return chunk_data.corr(method=self.method) - + def process(self, data, method="pearson", **kwargs): """ Calcule la corrélation entre les variables. - + Args: data: Données d'entrée (pandas DataFrame) method: Méthode de corrélation ('pearson', 'spearman', 'kendall') **kwargs: Arguments additionnels - + Returns: Matrice de corrélation """ self.validate_data(data) self.method = method - + if not isinstance(data, pd.DataFrame): raise TypeError("Les données doivent être un pandas DataFrame") - + # Compute correlation matrix directly # pandas/numpy already use optimized algorithms # Chunking correlation computation produces incorrect results self.result = data.corr(method=method) return self.result - + def get_correlation_matrix(self): """Retourne la matrice de corrélation.""" return self.result - + def get_correlation_pairs(self, threshold=0.5): """ Retourne les paires de variables avec une corrélation supérieure au seuil. - + Args: threshold: Seuil de corrélation - + Returns: Liste de tuples (var1, var2, corr) """ if self.result is None: raise ValueError("Exécutez d'abord process()") - + # Utilisation de numpy pour le calcul parallèle des paires - corr_matrix = self.result.values + corr_matrix = self.result.to_numpy() n = len(self.result.columns) - + # Création des indices pour les paires i, j = np.triu_indices(n, k=1) corr_values = corr_matrix[i, j] - + # Filtrage des paires selon le seuil mask = np.abs(corr_values) >= threshold mask_indices = np.where(mask)[0] - + # Vectorized construction of pairs using list comprehension pairs = [ (self.result.columns[i[idx]], self.result.columns[j[idx]], corr_values[idx]) for idx in mask_indices ] - - return sorted(pairs, key=lambda x: abs(x[2]), reverse=True) \ No newline at end of file + + return sorted(pairs, key=lambda x: abs(x[2]), reverse=True) diff --git a/py_stats_toolkit/stats/factorielle/FactorielleModule.py b/py_stats_toolkit/stats/factorielle/FactorielleModule.py index 1464b7b..1702514 100644 --- a/py_stats_toolkit/stats/factorielle/FactorielleModule.py +++ b/py_stats_toolkit/stats/factorielle/FactorielleModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : FactorielleModule.py ===================================================================== @@ -19,194 +19,197 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np import pandas as pd from sklearn.decomposition import PCA, FactorAnalysis from sklearn.preprocessing import StandardScaler -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor +from ..core.AbstractClassBase import StatisticalModule + class FactorielleModule(StatisticalModule): """Module pour l'analyse factorielle.""" - + def __init__(self, n_jobs: int = -1): super().__init__() self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) self.scaler = StandardScaler() - + def process(self, data, method="pca", n_components=None, **kwargs): """ Effectue une analyse factorielle. - + Args: data: DataFrame avec les données method: Méthode d'analyse ('pca', 'fa') n_components: Nombre de composantes à extraire **kwargs: Arguments additionnels - + Returns: Résultats de l'analyse """ self.validate_data(data) - + # Standardisation des données X = self.scaler.fit_transform(data) - + if method == "pca": return self._pca(X, data.columns, n_components, **kwargs) elif method == "fa": return self._factor_analysis(X, data.columns, n_components, **kwargs) else: raise ValueError(f"Méthode {method} non supportée") - + def _pca(self, X, feature_names, n_components, **kwargs): """Analyse en composantes principales.""" if n_components is None: n_components = min(X.shape) - + pca = PCA(n_components=n_components, **kwargs) pca.fit(X) - + # Calcul des composantes components = pca.transform(X) - + # Création du DataFrame des composantes components_df = pd.DataFrame( - components, - columns=[f'PC{i+1}' for i in range(n_components)] + components, columns=[f"PC{i+1}" for i in range(n_components)] ) - + # Calcul des contributions des variables loadings = pd.DataFrame( pca.components_.T, - columns=[f'PC{i+1}' for i in range(n_components)], - index=feature_names + columns=[f"PC{i+1}" for i in range(n_components)], + index=feature_names, ) - + self.result = { - 'Type': 'ACP', - 'Composantes': components_df, - 'Loadings': loadings, - 'Variance expliquée': pca.explained_variance_ratio_, - 'Variance cumulée': np.cumsum(pca.explained_variance_ratio_), - 'Modèle': pca + "Type": "ACP", + "Composantes": components_df, + "Loadings": loadings, + "Variance expliquée": pca.explained_variance_ratio_, + "Variance cumulée": np.cumsum(pca.explained_variance_ratio_), + "Modèle": pca, } - + return self.result - + def _factor_analysis(self, X, feature_names, n_components, **kwargs): """Analyse factorielle.""" if n_components is None: n_components = min(X.shape) - + fa = FactorAnalysis(n_components=n_components, **kwargs) fa.fit(X) - + # Calcul des facteurs factors = fa.transform(X) - + # Création du DataFrame des facteurs factors_df = pd.DataFrame( - factors, - columns=[f'F{i+1}' for i in range(n_components)] + factors, columns=[f"F{i+1}" for i in range(n_components)] ) - + # Calcul des contributions des variables loadings = pd.DataFrame( fa.components_.T, - columns=[f'F{i+1}' for i in range(n_components)], - index=feature_names + columns=[f"F{i+1}" for i in range(n_components)], + index=feature_names, ) - + self.result = { - 'Type': 'Analyse factorielle', - 'Facteurs': factors_df, - 'Loadings': loadings, - 'Noise variance': fa.noise_variance_, - 'Modèle': fa + "Type": "Analyse factorielle", + "Facteurs": factors_df, + "Loadings": loadings, + "Noise variance": fa.noise_variance_, + "Modèle": fa, } - + return self.result - + def get_quality_metrics(self): """ Calcule les métriques de qualité de l'analyse. - + Returns: Métriques de qualité """ - if not hasattr(self, 'result'): + if not hasattr(self, "result"): raise ValueError("Aucune analyse n'a été effectuée") - - if self.result['Type'] == 'ACP': + + if self.result["Type"] == "ACP": return { - 'Variance expliquée par composante': { - f'PC{i+1}': val - for i, val in enumerate(self.result['Variance expliquée']) + "Variance expliquée par composante": { + f"PC{i+1}": val + for i, val in enumerate(self.result["Variance expliquée"]) }, - 'Variance cumulée': { - f'PC{i+1}': val - for i, val in enumerate(self.result['Variance cumulée']) + "Variance cumulée": { + f"PC{i+1}": val + for i, val in enumerate(self.result["Variance cumulée"]) }, - 'Nombre de composantes pour 80% de variance': np.argmax( - self.result['Variance cumulée'] >= 0.8 - ) + 1 + "Nombre de composantes pour 80% de variance": np.argmax( + self.result["Variance cumulée"] >= 0.8 + ) + + 1, } else: return { - 'Variance du bruit': self.result['Noise variance'].tolist(), - 'Qualité de l\'ajustement': 1 - np.mean(self.result['Noise variance']) + "Variance du bruit": self.result["Noise variance"].tolist(), + "Qualité de l'ajustement": 1 - np.mean(self.result["Noise variance"]), } - + def transform(self, new_data): """ Transforme de nouvelles données. - + Args: new_data: Nouvelles données à transformer - + Returns: Données transformées """ - if not hasattr(self, 'result'): + if not hasattr(self, "result"): raise ValueError("Aucune analyse n'a été effectuée") - + # Standardisation des nouvelles données X_new = self.scaler.transform(new_data) - + # Transformation selon la méthode utilisée - if self.result['Type'] == 'ACP': + if self.result["Type"] == "ACP": return pd.DataFrame( - self.result['Modèle'].transform(X_new), - columns=[f'PC{i+1}' for i in range(self.result['Modèle'].n_components_)] + self.result["Modèle"].transform(X_new), + columns=[ + f"PC{i+1}" for i in range(self.result["Modèle"].n_components_) + ], ) else: return pd.DataFrame( - self.result['Modèle'].transform(X_new), - columns=[f'F{i+1}' for i in range(self.result['Modèle'].n_components_)] + self.result["Modèle"].transform(X_new), + columns=[f"F{i+1}" for i in range(self.result["Modèle"].n_components_)], ) - + def get_contributions(self, threshold=0.5): """ Obtient les contributions significatives des variables. - + Args: threshold: Seuil de contribution - + Returns: Variables contribuant significativement à chaque composante/facteur """ - if not hasattr(self, 'result'): + if not hasattr(self, "result"): raise ValueError("Aucune analyse n'a été effectuée") - - loadings = self.result['Loadings'] + + loadings = self.result["Loadings"] contributions = {} - + for col in loadings.columns: significant_vars = loadings[col][abs(loadings[col]) >= threshold] if not significant_vars.empty: contributions[col] = significant_vars.to_dict() - - return contributions \ No newline at end of file + + return contributions diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index 0a434f8..544a4d2 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : FrequenceModule.py ===================================================================== @@ -19,73 +19,76 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np import pandas as pd -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor +from ..core.AbstractClassBase import StatisticalModule + class FrequenceModule(StatisticalModule): """Module pour l'analyse de fréquence.""" - + def __init__(self, n_jobs: int = -1): super().__init__() self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - + def process(self, data, normalize=False, **kwargs): """ Calcule les fréquences des valeurs. - + Args: data: Données d'entrée (numpy array ou pandas Series) normalize: Si True, retourne les fréquences relatives **kwargs: Arguments additionnels - + Returns: DataFrame avec les fréquences """ self.validate_data(data) - + if isinstance(data, pd.Series): series = data else: series = pd.Series(data) - + # Calcul des fréquences freq = series.value_counts(normalize=normalize) cum_freq = freq.cumsum() - + # Création du DataFrame de résultats - self.result = pd.DataFrame({ - 'Fréquence': freq, - 'Fréquence Cumulée': cum_freq - }) - + self.result = pd.DataFrame({"Fréquence": freq, "Fréquence Cumulée": cum_freq}) + if normalize: - self.result.columns = ['Fréquence Relative', 'Fréquence Relative Cumulée'] - + self.result.columns = ["Fréquence Relative", "Fréquence Relative Cumulée"] + return self.result - + def get_frequence_absolue(self): """Retourne les fréquences absolues.""" if self.result is None: raise ValueError("Exécutez d'abord process()") - return self.result['Fréquence'] - + return self.result["Fréquence"] + def get_frequence_cumulee(self): """Retourne les fréquences cumulées.""" if self.result is None: raise ValueError("Exécutez d'abord process()") - return self.result['Fréquence Cumulée'] - + return self.result["Fréquence Cumulée"] + def get_frequence_relative(self): """Retourne les fréquences relatives.""" if self.result is None: raise ValueError("Exécutez d'abord process()") # Check if already normalized - if 'Fréquence Relative' in self.result.columns: - return self.result['Fréquence Relative'] + if "Fréquence Relative" in self.result.columns: + return self.result["Fréquence Relative"] # Normalize existing frequency counts instead of reprocessing - freq_col = 'Fréquence' if 'Fréquence' in self.result.columns else self.result.columns[0] - return self.result[freq_col] / self.result[freq_col].sum() \ No newline at end of file + freq_col = ( + "Fréquence" + if "Fréquence" in self.result.columns + else self.result.columns[0] + ) + return self.result[freq_col] / self.result[freq_col].sum() diff --git a/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py b/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py index ca0cc1a..75125e4 100644 --- a/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py +++ b/py_stats_toolkit/stats/probabilistes/ProbabilistesModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : ProbabilistesModule.py ===================================================================== @@ -19,23 +19,25 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np from scipy import stats -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor +from ..core.AbstractClassBase import StatisticalModule + class ProbabilistesModule(StatisticalModule): """Module pour l'analyse probabiliste.""" - + def __init__(self, n_jobs: int = -1, batch_size: int = 1000): super().__init__() self.distribution = None self.params = None self.batch_size = batch_size self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - + def _fit_distribution_chunk(self, chunk): """Ajuste une distribution sur un chunk de données.""" if self.distribution == "normal": @@ -46,26 +48,26 @@ def _fit_distribution_chunk(self, chunk): return stats.gamma.fit(chunk) else: raise ValueError(f"Distribution {self.distribution} non supportée") - + def _average_params(self, param_list): """Moyenne les paramètres de distribution sur plusieurs chunks.""" return np.mean(param_list, axis=0) - + def process(self, data, distribution="normal", **kwargs): """ Ajuste une distribution aux données en parallèle. - + Args: data: Données d'entrée (numpy array) distribution: Type de distribution ('normal', 'exponential', 'gamma', etc.) **kwargs: Paramètres additionnels pour la distribution - + Returns: Objet de distribution ajusté """ self.validate_data(data) self.distribution = distribution - + # Pour les petits ensembles de données, ajustement direct # Use 2x batch_size threshold to avoid parallel overhead for medium datasets if len(data) < self.batch_size * 2: @@ -81,12 +83,14 @@ def process(self, data, distribution="normal", **kwargs): else: raise ValueError(f"Distribution {distribution} non supportée") return self.result - + # Pour les grands ensembles de données, traitement parallèle chunks = np.array_split(data, self.parallel_processor.n_jobs) - chunk_params = self.parallel_processor.parallel_map(self._fit_distribution_chunk, chunks) + chunk_params = self.parallel_processor.parallel_map( + self._fit_distribution_chunk, chunks + ) self.params = self._average_params(chunk_params) - + # Création de l'objet de distribution avec les paramètres moyens if distribution == "normal": self.result = stats.norm(*self.params) @@ -94,55 +98,55 @@ def process(self, data, distribution="normal", **kwargs): self.result = stats.expon(*self.params) elif distribution == "gamma": self.result = stats.gamma(*self.params) - + return self.result - + def get_distribution_params(self): """Retourne les paramètres de la distribution ajustée.""" return self.params - + def get_probability_density(self, x): """ Calcule la densité de probabilité pour les valeurs x en parallèle. - + Args: x: Valeurs pour lesquelles calculer la densité - + Returns: Densité de probabilité """ if self.result is None: raise ValueError("Exécutez d'abord process()") - + # Pour les petits ensembles, calcul direct # Use 2x batch_size threshold to avoid parallel overhead if len(x) < self.batch_size * 2: return self.result.pdf(x) - + # Pour les grands ensembles, traitement parallèle chunks = np.array_split(x, self.parallel_processor.n_jobs) pdf_chunks = self.parallel_processor.parallel_map(self.result.pdf, chunks) return np.concatenate(pdf_chunks) - + def get_cumulative_distribution(self, x): """ Calcule la fonction de répartition pour les valeurs x en parallèle. - + Args: x: Valeurs pour lesquelles calculer la fonction de répartition - + Returns: Fonction de répartition """ if self.result is None: raise ValueError("Exécutez d'abord process()") - + # Pour les petits ensembles, calcul direct # Use 2x batch_size threshold to avoid parallel overhead if len(x) < self.batch_size * 2: return self.result.cdf(x) - + # Pour les grands ensembles, traitement parallèle chunks = np.array_split(x, self.parallel_processor.n_jobs) cdf_chunks = self.parallel_processor.parallel_map(self.result.cdf, chunks) - return np.concatenate(cdf_chunks) \ No newline at end of file + return np.concatenate(cdf_chunks) diff --git a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py index 3dd0281..23aaf0a 100644 --- a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py +++ b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : TimeSeriesModule.py ===================================================================== @@ -19,120 +19,128 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np import pandas as pd -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor +from ..core.AbstractClassBase import StatisticalModule + class TimeSeriesAnalyzer(StatisticalModule): """Module pour l'analyse de séries temporelles.""" - + def __init__(self, n_jobs: int = -1, batch_size: int = 1000): super().__init__() self.batch_size = batch_size self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - + def process(self, data, timestamps=None, **kwargs): """ Analyse une série temporelle. - + Args: data: Données d'entrée (numpy array ou pandas Series) timestamps: Timestamps pour les données **kwargs: Arguments additionnels - + Returns: DataFrame avec les analyses """ self.validate_data(data) - + if timestamps is not None: self.set_timestamps(timestamps) - + if isinstance(data, pd.Series): series = data else: series = pd.Series(data, index=self.timestamps) - + # Calcul des statistiques de base stats = { - 'Moyenne': series.mean(), - 'Écart-type': series.std(), - 'Minimum': series.min(), - 'Maximum': series.max(), - 'Médiane': series.median() + "Moyenne": series.mean(), + "Écart-type": series.std(), + "Minimum": series.min(), + "Maximum": series.max(), + "Médiane": series.median(), } - + # Détection des tendances if len(series) > 1: x = np.arange(len(series)) - slope, intercept = np.polyfit(x, series.values, 1) - stats['Pente'] = slope - stats['Intercept'] = intercept - + slope, intercept = np.polyfit(x, series.to_numpy(), 1) + stats["Pente"] = slope + stats["Intercept"] = intercept + # Détection des cycles if len(series) > 2: # Compute FFT only on the positive frequencies to save computation - fft = np.fft.rfft(series.values) # rfft is more efficient for real-valued data + fft = np.fft.rfft( + series.to_numpy() + ) # rfft is more efficient for real-valued data freqs = np.fft.rfftfreq(len(series)) # Skip DC component (index 0) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 - stats['Fréquence Principale'] = freqs[main_freq_idx] - stats['Période Principale'] = 1/freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf - + stats["Fréquence Principale"] = freqs[main_freq_idx] + stats["Période Principale"] = ( + 1 / freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf + ) + self.result = pd.Series(stats) return self.result - + def get_trend(self, data=None): """ Calcule la tendance linéaire. - + Args: data: Données optionnelles (utilise self.data si None) - + Returns: Tuple (pente, intercept) """ if data is None: data = self.data - + if isinstance(data, pd.Series): series = data else: series = pd.Series(data) - + x = np.arange(len(series)) - return np.polyfit(x, series.values, 1) - + return np.polyfit(x, series.to_numpy(), 1) + def get_seasonality(self, data=None, period=None): """ Détecte la saisonnalité. - + Args: data: Données optionnelles period: Période attendue (optionnelle) - + Returns: Période détectée """ if data is None: data = self.data - + if isinstance(data, pd.Series): series = data else: series = pd.Series(data) - + # Calcul de l'autocorrélation acf = pd.Series(series).autocorr() - + if period is not None: return period - + # Détection automatique de la période - use rfft for efficiency - fft = np.fft.rfft(series.values) # rfft is more efficient for real-valued data + fft = np.fft.rfft( + series.to_numpy() + ) # rfft is more efficient for real-valued data freqs = np.fft.rfftfreq(len(series)) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 - return 1/freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf \ No newline at end of file + return 1 / freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf diff --git a/py_stats_toolkit/stats/variance/VarianceModule.py b/py_stats_toolkit/stats/variance/VarianceModule.py index 7d03035..8b0c044 100644 --- a/py_stats_toolkit/stats/variance/VarianceModule.py +++ b/py_stats_toolkit/stats/variance/VarianceModule.py @@ -1,4 +1,4 @@ -''' +""" ===================================================================== File : VarianceModule.py ===================================================================== @@ -19,38 +19,40 @@ tags : module, stats ===================================================================== -''' +""" import numpy as np import pandas as pd from scipy import stats from statsmodels.stats.multicomp import MultiComparison -from ..core.AbstractClassBase import StatisticalModule + from ...utils.parallel import ParallelProcessor +from ..core.AbstractClassBase import StatisticalModule + class VarianceModule(StatisticalModule): """Module pour l'analyse de variance.""" - + def __init__(self, n_jobs: int = -1): super().__init__() self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - + def process(self, data, group_col, value_col, test_type="anova", **kwargs): """ Effectue une analyse de variance. - + Args: data: DataFrame avec les données group_col: Colonne des groupes value_col: Colonne des valeurs test_type: Type de test ('anova', 'kruskal', 'friedman') **kwargs: Arguments additionnels - + Returns: Résultats de l'analyse """ self.validate_data(data) - + if test_type == "anova": return self._anova(data, group_col, value_col, **kwargs) elif test_type == "kruskal": @@ -59,41 +61,40 @@ def process(self, data, group_col, value_col, test_type="anova", **kwargs): return self._friedman(data, group_col, value_col, **kwargs) else: raise ValueError(f"Type de test {test_type} non supporté") - + def _anova(self, data, group_col, value_col, **kwargs): """Analyse de variance à un facteur.""" groups = data[group_col].unique() # Pre-filter groups once to avoid repeated DataFrame filtering group_data = [data[data[group_col] == g][value_col].to_numpy() for g in groups] - + f_stat, p_value = stats.f_oneway(*group_data, **kwargs) - + # Test post-hoc de Tukey mc = MultiComparison(data[value_col], data[group_col]) tukey_result = mc.tukeyhsd() - + self.result = { - 'Type': 'ANOVA', - 'Statistique F': f_stat, - 'p-valeur': p_value, - 'Groupes': groups.tolist(), - 'Test post-hoc': { - 'Méthode': 'Tukey HSD', - 'Résultats': tukey_result - } + "Type": "ANOVA", + "Statistique F": f_stat, + "p-valeur": p_value, + "Groupes": groups.tolist(), + "Test post-hoc": {"Méthode": "Tukey HSD", "Résultats": tukey_result}, } - + return self.result - + def _kruskal_wallis(self, data, group_col, value_col, **kwargs): """Test de Kruskal-Wallis.""" groups = data[group_col].unique() # Pre-filter groups once to avoid repeated DataFrame filtering - group_data_dict = {g: data[data[group_col] == g][value_col].values for g in groups} + group_data_dict = { + g: data[data[group_col] == g][value_col].values for g in groups + } group_data = [group_data_dict[g] for g in groups] - + h_stat, p_value = stats.kruskal(*group_data, **kwargs) - + # Test post-hoc de Mann-Whitney - use pre-filtered data post_hoc_results = [] for i in range(len(groups)): @@ -101,92 +102,91 @@ def _kruskal_wallis(self, data, group_col, value_col, **kwargs): stat, p = stats.mannwhitneyu( group_data_dict[groups[i]], group_data_dict[groups[j]], - alternative='two-sided' + alternative="two-sided", ) - post_hoc_results.append({ - 'Groupe 1': groups[i], - 'Groupe 2': groups[j], - 'Statistique': stat, - 'p-valeur': p - }) - + post_hoc_results.append( + { + "Groupe 1": groups[i], + "Groupe 2": groups[j], + "Statistique": stat, + "p-valeur": p, + } + ) + self.result = { - 'Type': 'Kruskal-Wallis', - 'Statistique H': h_stat, - 'p-valeur': p_value, - 'Groupes': groups.tolist(), - 'Test post-hoc': { - 'Méthode': 'Mann-Whitney', - 'Résultats': post_hoc_results - } + "Type": "Kruskal-Wallis", + "Statistique H": h_stat, + "p-valeur": p_value, + "Groupes": groups.tolist(), + "Test post-hoc": {"Méthode": "Mann-Whitney", "Résultats": post_hoc_results}, } - + return self.result - + def _friedman(self, data, group_col, value_col, **kwargs): """Test de Friedman.""" # Réorganisation des données pour le test de Friedman pivot_data = data.pivot(columns=group_col, values=value_col) - + # Pre-extract column data to avoid repeated indexing columns = pivot_data.columns column_data = {col: pivot_data[col].values for col in columns} - - stat, p_value = stats.friedmanchisquare(*[column_data[col] for col in columns], **kwargs) - + + stat, p_value = stats.friedmanchisquare( + *[column_data[col] for col in columns], **kwargs + ) + # Test post-hoc de Wilcoxon - use pre-extracted data post_hoc_results = [] for i in range(len(columns)): for j in range(i + 1, len(columns)): stat, p = stats.wilcoxon( - column_data[columns[i]], - column_data[columns[j]] + column_data[columns[i]], column_data[columns[j]] + ) + post_hoc_results.append( + { + "Groupe 1": columns[i], + "Groupe 2": columns[j], + "Statistique": stat, + "p-valeur": p, + } ) - post_hoc_results.append({ - 'Groupe 1': columns[i], - 'Groupe 2': columns[j], - 'Statistique': stat, - 'p-valeur': p - }) - + self.result = { - 'Type': 'Friedman', - 'Statistique': stat, - 'p-valeur': p_value, - 'Groupes': pivot_data.columns.tolist(), - 'Test post-hoc': { - 'Méthode': 'Wilcoxon', - 'Résultats': post_hoc_results - } + "Type": "Friedman", + "Statistique": stat, + "p-valeur": p_value, + "Groupes": pivot_data.columns.tolist(), + "Test post-hoc": {"Méthode": "Wilcoxon", "Résultats": post_hoc_results}, } - + return self.result - + def get_effect_size(self): """ Calcule la taille d'effet (eta-carré). - + Returns: Taille d'effet """ - if not hasattr(self, 'result'): + if not hasattr(self, "result"): raise ValueError("Aucune analyse n'a été effectuée") - - if self.result['Type'] == 'ANOVA': - f_stat = self.result['Statistique F'] - df_between = len(self.result['Groupes']) - 1 - df_total = len(self.result['Groupes']) * (len(self.result['Groupes']) - 1) - + + if self.result["Type"] == "ANOVA": + f_stat = self.result["Statistique F"] + df_between = len(self.result["Groupes"]) - 1 + df_total = len(self.result["Groupes"]) * (len(self.result["Groupes"]) - 1) + eta_squared = (f_stat * df_between) / (f_stat * df_between + df_total) - + return { - 'Taille d\'effet': 'Eta-carré', - 'Valeur': eta_squared, - 'Interprétation': self._interpret_eta_squared(eta_squared) + "Taille d'effet": "Eta-carré", + "Valeur": eta_squared, + "Interprétation": self._interpret_eta_squared(eta_squared), } else: raise ValueError("La taille d'effet n'est disponible que pour l'ANOVA") - + def _interpret_eta_squared(self, eta_squared): """Interprète la taille d'effet eta-carré.""" if eta_squared < 0.01: @@ -196,4 +196,4 @@ def _interpret_eta_squared(self, eta_squared): elif eta_squared < 0.14: return "Effet moyen" else: - return "Grand effet" \ No newline at end of file + return "Grand effet" From 6983d6df061813c85a8beff9aa3990db109cb266 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:20:43 +0000 Subject: [PATCH 4/9] Clean up unused code and improve error handling Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .../stats/correlation/CorrelationModule.py | 6 +----- py_stats_toolkit/stats/frequence/FrequenceModule.py | 11 +++++------ py_stats_toolkit/stats/variance/VarianceModule.py | 4 ++-- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/py_stats_toolkit/stats/correlation/CorrelationModule.py b/py_stats_toolkit/stats/correlation/CorrelationModule.py index b748d0f..e4eb0f2 100644 --- a/py_stats_toolkit/stats/correlation/CorrelationModule.py +++ b/py_stats_toolkit/stats/correlation/CorrelationModule.py @@ -25,7 +25,7 @@ import pandas as pd from scipy import stats -from ...utils.parallel import ParallelProcessor, get_optimal_chunk_size +from ...utils.parallel import ParallelProcessor from ..core.AbstractClassBase import StatisticalModule @@ -37,10 +37,6 @@ def __init__(self, n_jobs: int = -1): self.method = None self.parallel_processor = ParallelProcessor(n_jobs=n_jobs) - def _compute_correlation_chunk(self, chunk_data): - """Calcule la corrélation pour un chunk de données.""" - return chunk_data.corr(method=self.method) - def process(self, data, method="pearson", **kwargs): """ Calcule la corrélation entre les variables. diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index 544a4d2..1b00d17 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -86,9 +86,8 @@ def get_frequence_relative(self): if "Fréquence Relative" in self.result.columns: return self.result["Fréquence Relative"] # Normalize existing frequency counts instead of reprocessing - freq_col = ( - "Fréquence" - if "Fréquence" in self.result.columns - else self.result.columns[0] - ) - return self.result[freq_col] / self.result[freq_col].sum() + if "Fréquence" not in self.result.columns: + raise ValueError( + "Le DataFrame de résultats ne contient pas de colonne 'Fréquence'" + ) + return self.result["Fréquence"] / self.result["Fréquence"].sum() diff --git a/py_stats_toolkit/stats/variance/VarianceModule.py b/py_stats_toolkit/stats/variance/VarianceModule.py index 8b0c044..6588a0c 100644 --- a/py_stats_toolkit/stats/variance/VarianceModule.py +++ b/py_stats_toolkit/stats/variance/VarianceModule.py @@ -89,7 +89,7 @@ def _kruskal_wallis(self, data, group_col, value_col, **kwargs): groups = data[group_col].unique() # Pre-filter groups once to avoid repeated DataFrame filtering group_data_dict = { - g: data[data[group_col] == g][value_col].values for g in groups + g: data[data[group_col] == g][value_col].to_numpy() for g in groups } group_data = [group_data_dict[g] for g in groups] @@ -130,7 +130,7 @@ def _friedman(self, data, group_col, value_col, **kwargs): # Pre-extract column data to avoid repeated indexing columns = pivot_data.columns - column_data = {col: pivot_data[col].values for col in columns} + column_data = {col: pivot_data[col].to_numpy() for col in columns} stat, p_value = stats.friedmanchisquare( *[column_data[col] for col in columns], **kwargs From c4d31bf3bfecdfca27edfd3fac2ea3c1c580c3a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:23:24 +0000 Subject: [PATCH 5/9] Apply advanced optimizations: use groupby and array indexing Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .../stats/correlation/CorrelationModule.py | 3 ++- .../stats/frequence/FrequenceModule.py | 8 +++---- .../stats/variance/VarianceModule.py | 22 +++++++++---------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/py_stats_toolkit/stats/correlation/CorrelationModule.py b/py_stats_toolkit/stats/correlation/CorrelationModule.py index e4eb0f2..6560c2c 100644 --- a/py_stats_toolkit/stats/correlation/CorrelationModule.py +++ b/py_stats_toolkit/stats/correlation/CorrelationModule.py @@ -57,7 +57,8 @@ def process(self, data, method="pearson", **kwargs): # Compute correlation matrix directly # pandas/numpy already use optimized algorithms - # Chunking correlation computation produces incorrect results + # Note: Chunking correlation computation produces incorrect results because + # correlation requires all data points to compute proper covariance and variance statistics self.result = data.corr(method=method) return self.result diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index 1b00d17..f5fe43f 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -86,8 +86,8 @@ def get_frequence_relative(self): if "Fréquence Relative" in self.result.columns: return self.result["Fréquence Relative"] # Normalize existing frequency counts instead of reprocessing - if "Fréquence" not in self.result.columns: - raise ValueError( - "Le DataFrame de résultats ne contient pas de colonne 'Fréquence'" - ) + # This should always exist if process() was called successfully + assert ( + "Fréquence" in self.result.columns + ), "Internal error: 'Fréquence' column missing" return self.result["Fréquence"] / self.result["Fréquence"].sum() diff --git a/py_stats_toolkit/stats/variance/VarianceModule.py b/py_stats_toolkit/stats/variance/VarianceModule.py index 6588a0c..d32ac87 100644 --- a/py_stats_toolkit/stats/variance/VarianceModule.py +++ b/py_stats_toolkit/stats/variance/VarianceModule.py @@ -64,9 +64,11 @@ def process(self, data, group_col, value_col, test_type="anova", **kwargs): def _anova(self, data, group_col, value_col, **kwargs): """Analyse de variance à un facteur.""" + # Use groupby for efficient group extraction groups = data[group_col].unique() - # Pre-filter groups once to avoid repeated DataFrame filtering - group_data = [data[data[group_col] == g][value_col].to_numpy() for g in groups] + group_data = [ + group[value_col].to_numpy() for _, group in data.groupby(group_col) + ] f_stat, p_value = stats.f_oneway(*group_data, **kwargs) @@ -86,10 +88,10 @@ def _anova(self, data, group_col, value_col, **kwargs): def _kruskal_wallis(self, data, group_col, value_col, **kwargs): """Test de Kruskal-Wallis.""" + # Use groupby for efficient group extraction groups = data[group_col].unique() - # Pre-filter groups once to avoid repeated DataFrame filtering group_data_dict = { - g: data[data[group_col] == g][value_col].to_numpy() for g in groups + name: group[value_col].to_numpy() for name, group in data.groupby(group_col) } group_data = [group_data_dict[g] for g in groups] @@ -128,21 +130,19 @@ def _friedman(self, data, group_col, value_col, **kwargs): # Réorganisation des données pour le test de Friedman pivot_data = data.pivot(columns=group_col, values=value_col) - # Pre-extract column data to avoid repeated indexing + # Get all column data as numpy array for efficient access columns = pivot_data.columns - column_data = {col: pivot_data[col].to_numpy() for col in columns} + pivot_array = pivot_data.to_numpy() stat, p_value = stats.friedmanchisquare( - *[column_data[col] for col in columns], **kwargs + *[pivot_array[:, i] for i in range(len(columns))], **kwargs ) - # Test post-hoc de Wilcoxon - use pre-extracted data + # Test post-hoc de Wilcoxon - use array indexing post_hoc_results = [] for i in range(len(columns)): for j in range(i + 1, len(columns)): - stat, p = stats.wilcoxon( - column_data[columns[i]], column_data[columns[j]] - ) + stat, p = stats.wilcoxon(pivot_array[:, i], pivot_array[:, j]) post_hoc_results.append( { "Groupe 1": columns[i], From ac3a87ab20821824218db3cf84c52bcf48d566f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:26:37 +0000 Subject: [PATCH 6/9] Address code review feedback: improve ordering, NaN handling, and error checking Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- py_stats_toolkit/stats/frequence/FrequenceModule.py | 5 ++--- .../stats/temporelle/TimeSeriesModule.py | 12 +++++------- py_stats_toolkit/stats/variance/VarianceModule.py | 8 ++++++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index f5fe43f..9ecefaf 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -87,7 +87,6 @@ def get_frequence_relative(self): return self.result["Fréquence Relative"] # Normalize existing frequency counts instead of reprocessing # This should always exist if process() was called successfully - assert ( - "Fréquence" in self.result.columns - ), "Internal error: 'Fréquence' column missing" + if "Fréquence" not in self.result.columns: + raise RuntimeError("Internal error: 'Fréquence' column missing") return self.result["Fréquence"] / self.result["Fréquence"].sum() diff --git a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py index 23aaf0a..5103624 100644 --- a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py +++ b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py @@ -76,10 +76,9 @@ def process(self, data, timestamps=None, **kwargs): # Détection des cycles if len(series) > 2: + # rfft is more efficient for real-valued data # Compute FFT only on the positive frequencies to save computation - fft = np.fft.rfft( - series.to_numpy() - ) # rfft is more efficient for real-valued data + fft = np.fft.rfft(series.to_numpy()) freqs = np.fft.rfftfreq(len(series)) # Skip DC component (index 0) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 @@ -137,10 +136,9 @@ def get_seasonality(self, data=None, period=None): if period is not None: return period - # Détection automatique de la période - use rfft for efficiency - fft = np.fft.rfft( - series.to_numpy() - ) # rfft is more efficient for real-valued data + # rfft is more efficient for real-valued data + # Détection automatique de la période + fft = np.fft.rfft(series.to_numpy()) freqs = np.fft.rfftfreq(len(series)) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 return 1 / freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf diff --git a/py_stats_toolkit/stats/variance/VarianceModule.py b/py_stats_toolkit/stats/variance/VarianceModule.py index d32ac87..88363b0 100644 --- a/py_stats_toolkit/stats/variance/VarianceModule.py +++ b/py_stats_toolkit/stats/variance/VarianceModule.py @@ -64,10 +64,11 @@ def process(self, data, group_col, value_col, test_type="anova", **kwargs): def _anova(self, data, group_col, value_col, **kwargs): """Analyse de variance à un facteur.""" - # Use groupby for efficient group extraction + # Get unique groups to maintain consistent ordering groups = data[group_col].unique() + # Use groupby with get_group for efficient extraction while preserving order group_data = [ - group[value_col].to_numpy() for _, group in data.groupby(group_col) + data.groupby(group_col).get_group(g)[value_col].to_numpy() for g in groups ] f_stat, p_value = stats.f_oneway(*group_data, **kwargs) @@ -130,6 +131,9 @@ def _friedman(self, data, group_col, value_col, **kwargs): # Réorganisation des données pour le test de Friedman pivot_data = data.pivot(columns=group_col, values=value_col) + # Friedman test requires complete cases - drop rows with NaN + pivot_data = pivot_data.dropna() + # Get all column data as numpy array for efficient access columns = pivot_data.columns pivot_array = pivot_data.to_numpy() From 25715b3b120023f1e1630d35a3958af582ce43f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 15:34:59 +0000 Subject: [PATCH 7/9] Apply code review feedback: fix sampling rate, remove unused var, fix normalize logic Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .coverage | Bin 53248 -> 53248 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 2942 -> 2918 bytes .../__pycache__/correlation.cpython-312.pyc | Bin 4212 -> 4212 bytes .../__pycache__/descriptives.cpython-312.pyc | Bin 2629 -> 2629 bytes .../__pycache__/regression.cpython-312.pyc | Bin 4251 -> 4251 bytes .../stats/frequence/FrequenceModule.py | 82 ++------- .../stats/temporelle/TimeSeriesModule.py | 159 ++++++------------ .../TimeSeriesModule.cpython-312.pyc | Bin 0 -> 7921 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 501 bytes .../data_processor.cpython-312.pyc | Bin 0 -> 3538 bytes .../__pycache__/parallel.cpython-312.pyc | Bin 0 -> 4715 bytes ...basic_imports.cpython-312-pytest-9.0.2.pyc | Bin 3358 -> 3358 bytes ...t_correlation.cpython-312-pytest-9.0.2.pyc | Bin 4317 -> 4317 bytes ..._descriptives.cpython-312-pytest-9.0.2.pyc | Bin 3903 -> 3903 bytes ...tored_modules.cpython-312-pytest-9.0.2.pyc | Bin 0 -> 10817 bytes ...ession_module.cpython-312-pytest-9.0.2.pyc | Bin 4330 -> 4330 bytes 16 files changed, 71 insertions(+), 170 deletions(-) create mode 100644 py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc create mode 100644 py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc create mode 100644 py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc create mode 100644 py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc create mode 100644 tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc diff --git a/.coverage b/.coverage index 606bae05f52b7815d539c69aaae46e623f33ee2c..8dd39c70b6534a0fc3ccfba4ab5b1c45d3459560 100644 GIT binary patch delta 887 zcmaixyG|QH6o!pFzA$?OiX;+{NRd({iW?*ZEya5VcvrK7g*&E7 zdR~AcG^9b|1wxMmEqR0FDRO3#VvW~Id;ahIbNNosi>K$sE42MGz`PJ#2Jih}{ulqX zcjp~?75AIF={z_m&NBXi`Ss#L1DD$YD(KqT)oybsiY2RsG-Yp-@tzOkbu)fq$As`9 zSA;;c??F^I(K8#3sNg-SVwplnTnHN`T%n>LORf%%(pqMlIC;Q`N=}E7Ir#ts&^8UP zG7UYJMv^Crj~Gc6Rpu-@59E#^pJzzP`jVw7kB5MSRX|z>d67Zj+L8&X{xOw2CXsPS zLoT=)LrefBsUN}F4mj4^y;DNK^-TPntH=ykuE`30{HfOdbtOXsU*X?-9 zn*R~$x{Y$Id0nI%Hk@J2%>^>n9FBC`HvG>T4w2rm{|B7c<{W2L4t29(*r13kuxkV~b^GVQ93Utmy4F+0_3$Q2G@E|7w0u XzE?o$JAABhERFVzAlc2P@uv#_1kV-} diff --git a/py_stats_toolkit/__pycache__/__init__.cpython-312.pyc b/py_stats_toolkit/__pycache__/__init__.cpython-312.pyc index 87dccb030e827c72b23a401d1184184cee04e995..410d1a1b4f7630df70516a5d6fd07b444cde463c 100644 GIT binary patch delta 107 zcmew-_DqcTG%qg~0}#AA?Ud=gk=K)jQFL-Ri__$}EVDN|u&!lf)SCQ`?Ilp~2D{B< z3652h9azOCKj27bbl>d3+0UrNC&{R^g84H8kosuF%c!-$`7;BM`lQ6q&D6;LhTR6Dgo9%hT*>539JP$Dn+-Vo8I^b>8I@Kre`WwuA1!zpwH7#kW&l#36!^KB P8rgq*sAZh|kgF8{ypSVN diff --git a/py_stats_toolkit/stats/__pycache__/correlation.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/correlation.cpython-312.pyc index 7d19b75c85af242b9e591841bf7c67a0ce6f4799..3fbea4dcdeb17373b88aaf5d9b49ced446bce86c 100644 GIT binary patch delta 20 acmeyO@I`_9G%qg~0}#AA?X;0QQvd)*aRyHS delta 20 acmeyO@I`_9G%qg~0}upmb=b(ADF6UMv<033 diff --git a/py_stats_toolkit/stats/__pycache__/descriptives.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/descriptives.cpython-312.pyc index 75082f1a8d62cbab3d043a12c003a09615213d81..ed204bf7f83aaa884f6474bdc7892980cf6ccd56 100644 GIT binary patch delta 20 acmX>qa#V!-G%qg~0}#AA?X;2GiVFZfV+EuD delta 20 acmX>qa#V!-G%qg~0}upmb=b&l#RUL4rUdf< diff --git a/py_stats_toolkit/stats/__pycache__/regression.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/regression.cpython-312.pyc index cdc657203a783696950a4814c2f60e2652bd1188..6734ffcd08f6779c7aea494031f5cbfe763ae05a 100644 GIT binary patch delta 20 acmbQOI9rkXG%qg~0}#AA?X;15f&c(Itp&ya delta 20 acmbQOI9rkXG%qg~0}#yF;;@l>f&c(F_XS@7 diff --git a/py_stats_toolkit/stats/frequence/FrequenceModule.py b/py_stats_toolkit/stats/frequence/FrequenceModule.py index dc786e2..213bab5 100644 --- a/py_stats_toolkit/stats/frequence/FrequenceModule.py +++ b/py_stats_toolkit/stats/frequence/FrequenceModule.py @@ -2,7 +2,7 @@ ===================================================================== File : FrequenceModule.py ===================================================================== -version : 2.0.0 +version : 1.0.0 release : 15/06/2025 author : Phoenix Project contact : contact@phonxproject.onmicrosoft.fr @@ -11,17 +11,16 @@ Copyright (c) 2025, Phoenix Project All rights reserved. -Refactored module for frequency analysis. -Follows SOLID principles with separation of business logic and algorithms. +Description du module FrequenceModule.py -tags : module, stats, refactored +tags : module, stats ===================================================================== Ce module Description du module FrequenceModule.py tags : module, stats +===================================================================== """ -from typing import Union import numpy as np import pandas as pd @@ -44,43 +43,9 @@ def process(self, data, normalize=False, **kwargs): data: Données d'entrée (numpy array ou pandas Series) normalize: Si True, retourne les fréquences relatives **kwargs: Arguments additionnels -# Import base class and utilities -from py_stats_toolkit.core.base import StatisticalModule -from py_stats_toolkit.core.validators import DataValidator -from py_stats_toolkit.algorithms import descriptive_stats as desc_algos -from py_stats_toolkit.utils.data_processor import DataProcessor - - -class FrequenceModule(StatisticalModule): - """ - Module for frequency analysis (Business Logic Layer). - - Responsibilities: - - Orchestrate frequency analysis workflow - - Manage results and state - - Provide user-facing API - - Delegates to: - - DataValidator for validation - - desc_algos for computations - """ - - def __init__(self): - """Initialize frequency module.""" - super().__init__() - - def process(self, data: Union[pd.Series, np.ndarray, list], - normalize: bool = False, **kwargs) -> pd.DataFrame: - """ - Compute frequency distribution. - - Args: - data: Input data - normalize: If True, return relative frequencies - **kwargs: Additional arguments Returns: - DataFrame with frequencies + DataFrame avec les fréquences """ self.validate_data(data) @@ -89,15 +54,24 @@ def process(self, data: Union[pd.Series, np.ndarray, list], else: series = pd.Series(data) - # Calcul des fréquences - freq = series.value_counts(normalize=normalize) + # Calcul des fréquences absolues (toujours stockées dans self.result) + freq = series.value_counts(normalize=False) cum_freq = freq.cumsum() - # Création du DataFrame de résultats + # Création du DataFrame de résultats absolus self.result = pd.DataFrame({"Fréquence": freq, "Fréquence Cumulée": cum_freq}) if normalize: - self.result.columns = ["Fréquence Relative", "Fréquence Relative Cumulée"] + # Calcul des fréquences relatives à partir des fréquences absolues + rel_freq = self.result["Fréquence"] / self.result["Fréquence"].sum() + rel_cum_freq = rel_freq.cumsum() + return pd.DataFrame( + { + "Fréquence Relative": rel_freq, + "Fréquence Relative Cumulée": rel_cum_freq, + }, + index=self.result.index, + ) return self.result @@ -125,23 +99,3 @@ def get_frequence_relative(self): if "Fréquence" not in self.result.columns: raise RuntimeError("Internal error: 'Fréquence' column missing") return self.result["Fréquence"] / self.result["Fréquence"].sum() - # Validation (delegated to validator) - DataValidator.validate_data(data) - - # Store state - self.data = data - - # Convert to numpy for computation - data_array = DataProcessor.to_numpy(data) - - # Computation (delegated to algorithm layer) - self.result = desc_algos.compute_frequency_distribution(data_array, normalize) -tags : module, stats -''' - -import numpy as np -import pandas as pd -from ..core.AbstractClassBase import StatisticalModule -from ...utils.parallel import ParallelProcessor - - return self.result diff --git a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py index a7ef81f..a4072e8 100644 --- a/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py +++ b/py_stats_toolkit/stats/temporelle/TimeSeriesModule.py @@ -2,7 +2,7 @@ ===================================================================== File : TimeSeriesModule.py ===================================================================== -version : 2.0.0 (Refactored) +version : 1.0.0 release : 15/06/2025 author : Phoenix Project contact : contact@phonxproject.onmicrosoft.fr @@ -11,16 +11,15 @@ Copyright (c) 2025, Phoenix Project All rights reserved. -Refactored module for time series analysis. -Follows SOLID principles with separation of business logic and algorithms. +Description du module TimeSeriesModule.py -tags : module, stats, refactored +tags : module, stats ===================================================================== -""" +Ce module Description du module TimeSeriesModule.py tags : module, stats +===================================================================== """ -from typing import Any, Dict, Union import numpy as np import pandas as pd @@ -29,27 +28,6 @@ from ..core.AbstractClassBase import StatisticalModule -from py_stats_toolkit.core.base import StatisticalModule -from py_stats_toolkit.core.validators import DataValidator - - -class TimeSeriesModule(StatisticalModule): - """ - Module for time series analysis (Business Logic Layer). - - Provides basic time series analysis including: - - Rolling statistics (mean, std, min, max) - - Trend detection - - Seasonality detection (basic) - """ - - def __init__(self): - """Initialize time series module.""" - super().__init__() - self.timestamps = None - - def process(self, data: Union[pd.DataFrame, pd.Series], - window: int = 7, **kwargs) -> Dict[str, Any]: class TimeSeriesAnalyzer(StatisticalModule): """Module pour l'analyse de séries temporelles.""" @@ -66,19 +44,9 @@ def process(self, data, timestamps=None, **kwargs): data: Données d'entrée (numpy array ou pandas Series) timestamps: Timestamps pour les données **kwargs: Arguments additionnels - Process time series data. - - Args: - data: Time series data (Series or DataFrame with time index) - window: Window size for rolling statistics - **kwargs: Additional arguments Returns: - Dictionary with analysis results containing: - - 'rolling_mean': Rolling mean - - 'rolling_std': Rolling standard deviation - - 'trend': Linear trend coefficient - - 'summary': Statistical summary + DataFrame avec les analyses """ self.validate_data(data) @@ -86,35 +54,7 @@ def process(self, data, timestamps=None, **kwargs): self.set_timestamps(timestamps) if isinstance(data, pd.Series): - DataValidator.validate_data(data) - self.data = data - - # Convert to Series if DataFrame with single column - if isinstance(data, pd.DataFrame): - if len(data.columns) == 1: - series = data.iloc[:, 0] - else: - raise ValueError( - "TimeSeriesModule requires a single time series. " - f"Got DataFrame with {len(data.columns)} columns." - ) - else: series = data - - # Calculate rolling statistics - rolling_mean = series.rolling(window=window).mean() - rolling_std = series.rolling(window=window).std() - rolling_min = series.rolling(window=window).min() - rolling_max = series.rolling(window=window).max() - - # Calculate trend (simple linear regression on index) - x = np.arange(len(series)) - y = series.values - - # Remove NaN values for trend calculation - mask = ~np.isnan(y) - if np.sum(mask) > 1: - trend_coef = np.polyfit(x[mask], y[mask], 1)[0] else: series = pd.Series(data, index=self.timestamps) @@ -136,10 +76,32 @@ def process(self, data, timestamps=None, **kwargs): # Détection des cycles if len(series) > 2: + # Determine sampling interval for correct frequency calculation + sampling_interval = 1.0 + if isinstance(series.index, (pd.DatetimeIndex, pd.TimedeltaIndex)): + if hasattr(series.index, "freq") and series.index.freq is not None: + # Use declared frequency if available + sampling_interval = pd.Timedelta(series.index.freq).total_seconds() + elif len(series.index) > 1: + # Otherwise, calculate average interval from first two points + delta = series.index[1] - series.index[0] + sampling_interval = delta.total_seconds() + elif ( + hasattr(self, "timestamps") + and self.timestamps is not None + and len(self.timestamps) > 1 + ): + # If explicit timestamps are provided, use them + delta = self.timestamps[1] - self.timestamps[0] + if hasattr(delta, "total_seconds"): + sampling_interval = delta.total_seconds() + else: + sampling_interval = float(delta) + # rfft is more efficient for real-valued data # Compute FFT only on the positive frequencies to save computation fft = np.fft.rfft(series.to_numpy()) - freqs = np.fft.rfftfreq(len(series)) + freqs = np.fft.rfftfreq(len(series), d=sampling_interval) # Skip DC component (index 0) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 stats["Fréquence Principale"] = freqs[main_freq_idx] @@ -178,34 +140,9 @@ def get_seasonality(self, data=None, period=None): Args: data: Données optionnelles period: Période attendue (optionnelle) - trend_coef = 0.0 - - # Statistical summary - summary = { - 'mean': float(series.mean()), - 'std': float(series.std()), - 'min': float(series.min()), - 'max': float(series.max()), - 'count': int(series.count()) - } - - self.result = { - 'rolling_mean': rolling_mean, - 'rolling_std': rolling_std, - 'rolling_min': rolling_min, - 'rolling_max': rolling_max, - 'trend_coefficient': trend_coef, - 'summary': summary - } - - return self.result - - def get_rolling_stats(self) -> pd.DataFrame: - """ - Get rolling statistics as a DataFrame. Returns: - DataFrame with rolling statistics + Période détectée """ if data is None: data = self.data @@ -215,24 +152,34 @@ def get_rolling_stats(self) -> pd.DataFrame: else: series = pd.Series(data) - # Calcul de l'autocorrélation - acf = pd.Series(series).autocorr() - if period is not None: return period + # Determine sampling interval for correct frequency calculation + sampling_interval = 1.0 + if isinstance(series.index, (pd.DatetimeIndex, pd.TimedeltaIndex)): + if hasattr(series.index, "freq") and series.index.freq is not None: + # Use declared frequency if available + sampling_interval = pd.Timedelta(series.index.freq).total_seconds() + elif len(series.index) > 1: + # Otherwise, calculate average interval from first two points + delta = series.index[1] - series.index[0] + sampling_interval = delta.total_seconds() + elif ( + hasattr(self, "timestamps") + and self.timestamps is not None + and len(self.timestamps) > 1 + ): + # If explicit timestamps are provided, use them + delta = self.timestamps[1] - self.timestamps[0] + if hasattr(delta, "total_seconds"): + sampling_interval = delta.total_seconds() + else: + sampling_interval = float(delta) + # rfft is more efficient for real-valued data # Détection automatique de la période fft = np.fft.rfft(series.to_numpy()) - freqs = np.fft.rfftfreq(len(series)) + freqs = np.fft.rfftfreq(len(series), d=sampling_interval) main_freq_idx = np.argmax(np.abs(fft[1:])) + 1 return 1 / freqs[main_freq_idx] if freqs[main_freq_idx] != 0 else np.inf - if not self.has_result(): - raise ValueError("No analysis performed. Call process() first.") - - return pd.DataFrame({ - 'rolling_mean': self.result['rolling_mean'], - 'rolling_std': self.result['rolling_std'], - 'rolling_min': self.result['rolling_min'], - 'rolling_max': self.result['rolling_max'] - }) diff --git a/py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc b/py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c46a08ba166ba6c7cab21d708a0228214144cfaf GIT binary patch literal 7921 zcmd5>eQev-b|>{^NtPnZABp4IG3`2zfxo#!~M*EorjG+T{edaOt#RA^GU zq|(@xfffibd2d5(bc1cQfKwzuWh6jcU_gW60V|NdiWPWbvDPoRfFnvu4Mca?MzR zF7}~?qTFx&9LYTynn8=D8QM)3|ds%@sW}9>{ipr|W&!s~;Q+Ce9*>BQQ4k#T^mT_)4Ui6eRN!_5QT<57^xn5Z~2g4F8 z&dl;8-U(A?6<}g=oG{`c7OTL`IG#}+t>K|zRDL=pgKLYt(xp43P*nav(cK@gYWPY> z5oY;t?f@I=ql~}j=OFuQdWlkv$C%Aq6VzYA+V16^&{V>bph9R9BdKMkl%Vg^_vq|D zXkjy-&rm_j=^#y9@E~Z{?Mghx6WtjK36h|OLIm$Bg01z+SkCq#I8IsP2W`4d;l+s2 z+i0r0LZPt8DoQA%U@wMYKK_HyeMO$;yGdM0hO=&~t|e*v z3*UD08xQ+8nolHc|KMzV4y~vcb^@(g4!`F~2VTz!Dw?w>wlq^BZ)e<%PyEoTcsFy+ z5*BkeXi7D`lS12 zDwL&qnj_3Piu%?U`j}F}0jq8_{}oDe)tt=?ua{sq=E~-riL!*9vmb+|2`5MnluBv9 zm7(qn76L(f&Q-MQwkT7W$CQdZ0sVDXDynVSRRw<|`5h*A)I(-Htcx+_Ldm&IZy=+k zTSNne)~57H_y4?Yw9V{QD5H2!?nJ{Nc;e+Hf{>lOL&bW8EyA zk2!b3{R(|PEV3fSmFF#Xwe$)5Bvhn-$uqc1|6i6KC#QeO5x64#i?nldp1gMcSFWpE z?KU?d;Wl*w->fyCMh}}(p^Uy!kWC3}{E{=1AG!P$Blnr(70Mz@=5K+7>%P}q<(#MJ zJogv+nNm^hFU)6!GEvUiIJ%B9H8e}kHVcjBya`XjYxt0z+hd&Y+<(q}^$6TUPR?ew zm@TI$$yc<#qlH;a32q0UaXaMWa>;w!ZfXIM`ar%f6~(T*65x=Ecq4=%H-uUv86gkg zMqD!SMiJZqoSAP&E-Od#|L`fts_ao_M3$sSYXDNXL%gIC5Sf4!pN`EiEFtU+BgdH- zD{-vCWEnv4g)vlM1`w&IV@j4KWaS(b!a;MRztE|z?dpdts>2ME%kgQLR$*9<6A%ka zyr>kEKF6zZBJI+91oR#w>@?4?*ZD95po+<1E+sz=U>w~pNF09yP)^x|Jj27tao95I z(R`RCYNt9A^Q@#> z6_wMi(*j7->fxDsXfhzNmWJ`iD2keo>y|Z<=;;dcs_%c_A zu~xYSoBuKOtd??lH=T8wvu^p#4JWfxUXw0w+VWIvdKxuP<8Akbr~OHJ%>wuNiPiCy z@s0AN*a_pItsy(pu(c(v+&TYNcw0d;q=x4{a z_BAZcF3x_MCJEonQB_SEl~> zj-09Qf9Tihk0vL!I(ygO(mIEeqv^_^=4spM=vnX7I*xxc{J3LufnFG0?$oLSk1GP- z_5Qa1+x}-(%3GPPJfwL7pwPbkOTwaX+n(9m3UsD?%U5;lqxp~0&N@;Fjv2;F$xL`@ z8LSfC9U+&YKlT6yF>8y;IajG1nv#3Zw zjZ})UsCkZ3;7#{(+-uHS)ECe0u9UDQEYX+n6pl9WJj7B<{BNOqK5Cp~#W1|yVnh~V zD8xLxrQtGpw8jdA|0%IlHs0$%EW_Xwh^vAqK!`@}+=cfVqX^7tS>l7Qh;$d?G0-Lu z!wA&DeHu!RJ@M?hH9kN7 zSK;N>Pp@V?%}ILO=f5>OKfBztd~r3j63R4mtre1!@VQSc3Cnqkv&`9w&X2Ve-YYs&CD?jVtq0b)E4{$6NN0(!WSMJ6D$1&i~4Fm8-9rS|{@S0(=uu znvtS;reQvzsAy9`HYM-|yjXUnd%6GSYT+%*%SXm5X;&&@dRB9sgzdi5T;*JuiZIql z(NE zz-J}d*pZpp<{yb@MQv{4Sib?t0dZ~bPm60i<^FVe!&cS)Xf{Ig)7^z@Fafg$v8aHoSo>8cuh<^WFLM z4E9RGNN->I%eV0(L5t8PtIKWJQQ1mqeV)T#%Qqo{a%S>EtXG@#t*i4_fjo4tbZ6??GkZFcR^%v-MaPnF(RbH+d+bhmy8HNo zFHrJn(t8BdG@>J-?o0+3W9+ef|jTM2}$^OKnp?oG<^Oz9dzRt4+-8NghIMI z6v}>B1KAS_U5m3~uBI#$;^Z(q27am|3p>&E7kqa WRR0f-a~67%PVaB~9c4(yQ2zyD-mhE$ literal 0 HcmV?d00001 diff --git a/py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..461fcd5264725a35317a3f15d190c63f7a168999 GIT binary patch literal 501 zcmZ{hzfQw25XSBNYXxm37A7RRbcj$J9sq$GQnZ-a@8c=0kdp5qhkKOx0%hI(BW?Aaxe5S8l%{`!(pRveQ(yg(3DYKbh(IgmZA-7P8dX!Je`7~v&46# zZ61|wB~B%tku$$pHUMG~0hGO89Jzs2gV;bowxOt>Dfj=m&DK0Ci`uO6sZ0zfz&%=+ z5iKu+iQq{f^E6Fla4qCC$clcQLVumYUyGkP1vOs)KvSv#23fIe9<72smK|ebeh;ZV eBd<2b_yHX}p_32G!PXlx^4@Z{t(%5ynS)POJ(#cn literal 0 HcmV?d00001 diff --git a/py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8e826a98108a915faa2182177198243c2e443c9 GIT binary patch literal 3538 zcmcgvO>7fK6rTOD^JAMN4kRfF(S}mMDLDNBlZu~)5T~U|=!HayL^wfD&9N1SZW>DsTMkIj13iVLQYB8EH|t&7sZmu4b)?M9 zzIpTB%zNMW#<}nFRU*>rrfWcn zs!DIf+Gq9UxsSC`2HZ<%kReB4MP`tbc#)M{k~_AcaDz1`(&`e@irB^yQqM5|3K5H( zRC0ZoCl}&pa~lV}5?_JDikIx$E%}K5lBXVai)EuO$NvWEzw#yf2j3;)NbX)<;JYg2 z3V2K(r1Cv0lPXT{9iW*^2@yH0X~M9I1#w-j!-^>AIIL)7F&@@QmV+h_N~Rl2dJ-!U zl3k~{$&4d^Yah~_>(1H;(sQ6fC}soG*@ln}In0irK2)?1Y|<995oHU== z!6KQy(3ZDY55E4REhpyAzD+vZowiaK9znCYc#u=o+L|JxZ?flkjaZ#6%a7k6k^Ppl zZCqsnT)OF~s$7w)zPq`sJalzDW$7~VwHf`tc(&$YkF9T zNG7W!%rZr!yG=GBn(jUc6PlauD`7cF#tMMX2^@|~CND=d-E>*H75%A%bR1(9o4ih@ zW_c*25Uo%sz+lLq1{W8e54BxV<5C+=DvE^LK34JNwnS<-@vdvTbybxwNA*&I5Y-6T z8c=?;Ho#ygSCmv7Or*?mT@BIur|aOH4$OnL9CEIGO1jU{pKj!>yC&Y~n(UeA$$EAy z*Vcb|>Y=Yb!{4|t8J&n`eLIP0{J_`&!`p1jq{&G{TIqbFSH3WXUP&ZHuB?hl9~pz_rb`EI$&il@)QV zJl(vWYY9>t!2`-F!X=^+Ps(~UAxngaax$)HyM#zFF$pUzD*+b6 z0Zms_5v5NsBqdz~q%HCl*npwbVIOo*hA>3YQ8{9M5XR=6A@?-KS2;MR3fv2a7d%YAYK0ta#;%yMeS?S5q6c^?CPHA z&U&^NvCCr7!z%x^J>v(*4lY#*Q=Pw5y|!H6IG!3yeOEKpIkj=@GoyO@lRCj@`CzGT z$fzE&D2ON16Y04j{;JX^cp@a``~Jfo}6XzE|`4;bD7+(3>O zaVSAP9Ll}(e^aiSP{<)2IZ3YBKNi+KT+Y*0igvp56cx~<{@-ab$JnGI-T)J{iERFH zDjrG#G$c6*N+(9tuH_+ zLHQIveKA|R&+zUmjnT74)B8*QbB6aE#b|)-?F}%!fojT4voaKlt71|n531=6g+5A# zW$Sr0%R(Vhjer>5$Y>-k>6cUy19z?R;KXlz)SxJuoTg*k0?OM+H-o5i%6RMoDRcl* z?Gfol(F6X-BUjA|>v8on*OHktlS30j({J7GnCY0ma5r`*wy@3Ea$>Rm&eu5P_FEfQHk0Y2*5EVF=Q_(|T$u}#q$Est-R_?0p2v67chU=l;U^ax-d#bU zvE0zOLRFXF-g~#>PRA5se4<0X#Ku* zK{Q%=78_5kAkbLezVnuRQ(mR&)Xp;&LV literal 0 HcmV?d00001 diff --git a/py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e05d7ec7cf19becf4c14549bf69753903a60663 GIT binary patch literal 4715 zcma)9Uu;v?89(=r|0Pa>VYnMH5n;trdx?Dn@;nk*3j!J_KzN>}k-4ty*7Vl8y8((kj8j*i#@#4EC^n-?6Xb zn6UL&{?57Qocn#}&+q%b-~F<#u7*JQ=a>Bv)kVl1e6WhW)L1;;=Yo8@6%9G5fGX5ml#RgTGyMj@T8)d2!gqlUIo1JWmwY6~3r-xQ6xB zKwoXSuQtU8-RZ5zWh%#FYAleT@vy4v5p6V(G$OHx5mEKfJyb9l6ilH{OPS7>V5)pN zrkdjG5#2ByniMhAG2OIlQZ#-VYN{GZs)b8-nROdT0sgBeLClg2?B}eX5b85HHVz_M zIlt6-rBqLF25MkM;WO}l8fII&7$QL--3!YKCu6dq2I2`LGA73Y;WJ6?Y(S5s)xb!c z1`g(9?o@5C-t<@yJYGa#oThjEkRdSjtjmLYR;g@6;+h^} zcd`y~BCLcVPJu=VanHl$`M(+BGDIrlF(nvy<*WeuI16{{=LE&{*4H%f7&(TBvo6ze zsW=VY`wtX~?mrk5X)V;$jRKX^IuJqL6m>N=Qi(8H4+ExCk|Lm6lG6UC4k~1gd10pB z0RnJQ>zO|J^TW&j;BEiDf`8wFzc)MZ;ScWmzm=2o&7Xwufo?+0;z&{pn>Oac)CoOd zJTc<|GVV9lH1KX8jM zHX#fFf{n73DBe_6$&jcO3oAxGE8FZP6Jkb;S|F~N0~UGfe!>7@{8`T&83OwpTf;fB zopckOI|t9F2nWBEFfN=T=eX^}U=bRXwiDXMk#Dv$&9?`jW)r%bTf(;5T=OgO)&v)=omf&Sc-UZTkw=X$y)p`J71`B90E1bFMyaNcOkSS ze{m=$+^TI|+O_wu-tPSVnSq?|de)mLc)K4jdb@9d z836{L=--DmebmsmP5gDBZy&j7Z|vJC+-z}Sd#9~$ujA%!0o!|-+*jA%Dtt~F`Zw7> zZ*XDT&w(^WMK)wpl*c1_xDtxxl(hm@-sj#IAc*i$Xjhk#18n}^ke=eE1b}7<_Cu8p zumRLFkmRE1ja6kz04FHv02d%pj3QMfD`Ytqy`34+g8qc9Vs#jFs!GPjG?zjiBvdio zQ3Y2y76(><$Fv@>|A6HLIob{y!{uZS4|skUkyd*^e9Tt{kBw6O6hFmfh_yFZ0XGQY zkk=CCCK>%kV)iAp52>2dnL!429nT{$ImwXy#CbR-kDXTJBk7)}BV`%YOl9bBEFP9) z`VnXp-`NG4bWCIank1i{z8&f-g!*o@e<6M$FNBUy5*9Io4hYhiiXod2ifP!al_`}| zlAZv?ADpB;E>AY zvzN|Z7Z*L>&Dxh5n{PL62lBAketiDqj}{tFWgW{6o(u0yy_XyPbYQWed*0d2&SDTQ z-UqgzLg)nRFi~mpm?}wTjU_`FngM z5R>Vs8o>CbtA-vjYuF@4Bsb0jG70bpNAEQkQuZXdl30KAy#jC6;m3lHFqy$^KrG$P4YRp3n zc<_zN0LOxmY5$2FOCoP_)8{}VL!W7*vNoy$XNqX19iqRgDP~OsK6K$jObeHS`3pEb z1j2Njk#*THsA*5g5P{&MwiwHUHC18UbZJs)4RkNQ$wf>T=%j`eQN~RN*kKJDn`H6{ zMMrjtOsrUne5|2F_bgrMtyKAk#>so5`WgT}t&C{s`&$k~C(e-?`Z>eQVZs3>4E*>ki?8?5j+`J{%oqP9U zZ+_tNu}jC`bK!SG*L}Y|wb1;%>?=#YmfV*73-i8R*_W5x-V1|MgBSPbZJ)gO7x&H; z7ifFvChq3x(VRRJ{nfV1otHWnYxh91nLRRdB)@aP+d1#NGn{W(tn2)n zyKM=^56&FSy|v)oG4I~-wb!2=EGHBULU7LL`g9+P{U~}-;I=Cg2Ft+g(q6}b11K<9 z6a&j4Y@z6azmA$PK^(j<^0qzseX9hj6{pA6ntyeLKsDEY?da8`ta+p7Pls+EdWgM6 zeW(TC#MXdLW7>^W0)lKYTa}ZSY15KpiByppu(jgndNEV6)vRA3aFt?)JVKAbJoFj8 z9|W*lj{AzV-yzL^CmjXSafdXn*hOwf{-qTH)tr}A*S)Lw^r-$huID}h@#shk*ZP2f HV5|Nw4+bM= literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_basic_imports.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_basic_imports.cpython-312-pytest-9.0.2.pyc index b01d2e9fa9b4fc902f1e846fe3b578000b296ea8..cd223f32b07681b034aa20338f55c7356b8d5f9c 100644 GIT binary patch delta 20 acmbOyHBXBBG%qg~0}#AA?X;0wm=^#!CIvnK delta 20 acmbOyHBXBBG%qg~0}#yF;;@lhm=^#xa0J%? diff --git a/tests/__pycache__/test_correlation.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_correlation.cpython-312-pytest-9.0.2.pyc index 909c04f03b192767ebe532cb3d939968d1f47e97..14944f70e3434329f94bddf4babad481eab24bb0 100644 GIT binary patch delta 20 acmcbscvq48G%qg~0}#AA?X;2mk^lfi{RWHx delta 20 acmcbscvq48G%qg~0}#yF;;@nXk^lfgM+PMT diff --git a/tests/__pycache__/test_descriptives.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_descriptives.cpython-312-pytest-9.0.2.pyc index ae0c096894ab105878015a948e71855b717b5030..45748962400516fec9ed121aa3073abed64c05ba 100644 GIT binary patch delta 20 acmdllw_lF?G%qg~0}#AA?X;2GgdYGt;03Dy delta 20 acmdllw_lF?G%qg~0}#yF;;@n1gdYGrDg{IU diff --git a/tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..545f97ed62f25163f6f1aa5d6d7cad5c57e8b9c2 GIT binary patch literal 10817 zcmc&)TWlNGnVuOAsS$OTW!kdrm`)^1lx!-##!ghnO`nCw#m$0-Tcj^E|+kH?UibPUTpbtg= z|ICHta45-$+C2dOIdjf`E9U^1L+UHctL%)ond~5FLv@=O^v43MSyLCQqcuXTo<4)I26*}sjmf516XoF(&B>Nn3(flz zt;x1n8_fq2?a58CO*CJZ=typkZKio45lU`}ZDARXImO8J?=W&ushfD(o?_{F$7kka zos_cyI2+gC+(tQ@fU|iG&MwN?0-UXDaBioZZNS;S2ImgSxd}Ks*5G_X*(qz;Nth^OQe00HMGhy^@?=8M!g@MQw45Z$8j&POa_K`%9_thgwEb9iQ6lMPY`FC zmY;;j45KhH7K9swI_8tPcbFJ2`+)jo9%vxzj|7aJxSD63b*%a$xhIjKe5I>cqH%jz z9J>yFD6z|b05ZeqPTd3t1+JvqCy|*5xT@e)X75<7*UKk!Ip|U6?Y<@iJ6C#6G?emo zt~8SDd*5e|dX*8KS?Mh{uPu>K>-3&wBK*iA0?s1-%ZSfl-!#~)!R8|M2A7eI`V*2a zog`9HF}PI52!Kqb#uXy~4>_GQ{P;{6oIFNw?TkQ@O3`iB;58*Nrr|P#|8j2dVmhe| zl1Y#%GKk_hn91(dbV=9t>gjailB!o1fH>*eAX2e(Ttu^fEt<(1eofK;Ig{&ILlB~r zZ7WPf`#O-fnMds%Gkd1@;3F~}nSJ4hdp{igNWG=r<332f3U@t+Vpu@Qmb0s|LG$KlwME%??dX_`XxVQn}Cdkv^T+OPpA zHO6Q#>%R8Jq?CZ2yeMUq+Z@@3RRI8w$%L-qKM#jV7nb!>Ik0*MdC(_GN)suhndl`Z zC7q~m=1#384eRXa2^dv71>|k!VcX`JuIa9Yw(de(cmCW*uibj>-i1%wx{GbkPX!(d zjSIq#g0N$D45TP=EAc=$K+Cokgsrooo7-<}&tIMwqGSj35)jCA&6oMa8neGplIP1+U=K2f=jZbhEkh4tln)y+{ut$rP* z0&3NnnU`9faMY^9R;El@Mt2ac0VlA;(j2Ywz)U=!EZl7 zZ6B#8o8UoEUQiD!atU}z2y5^l5?o93aCSn@RMn=$p)lsUSC<1*t?t2=C~%-wh2Zu2 zYxUQguQk6%9thhkHT(O})){qLok>rp^P`|SAGzN*Z<)C;ePKqN7V~F|oBO_~ljj3@}N$HwVuY$2o z;z~w01hQY$Ks_qAc{lzFmR3%YUg*67UPC%sTjp?}WNiyRcW4b!@NV`DcK3Y+(ruS0$u?NCl#AAHBlt27H z=(pb{^1J7S$SML6q+kQv0>iu0;}~Go0`WMy6~?#3sC*9ShmUi3=4|k(rFV4)vOq{I zBrhxT?A_)o2`30uz%5kd7z|II2U4w|!H832HLjB$s5X2FC1q>m2CD|VI;}!*8mROLHQ?LNUL-pq6xfA(s-8=A!KvnQ74@28#E=^xr2t^8^$Xxfwy|;Vs9lYOh zZ%?8B`C{n!R0CDUXnMWBY4)4N&dA*ZbNa_uZeO`GG%p;X@&cNfV6cMhLxSi@4j`cl z_$2233CTeuXtSA!co=g?P(?dBcwmXI_eYjO0e}B;tIvPbY=CTEQU|-R*V`e__2N30 zYhrJ{N_~uYNip+0WY%(XOYS8&jg}_@ZmOe&%UnuF&_Ud|k;M+^J>oa@5atbD21k-0 z*D5-vzfy8){2%Jjoraz?^bJ7sbcAMxr-x@pe|QFbwE9P(J;hLT4UO?fXt@y@%T&0I^z?j-gKwK&Wys!(mF5Ler^}>$1 zy)f|Q9|%vEL&5nUb$u8IyolV6e2|(K4pnOo6ij+7{u?qID;QK`I7bG$w^9x5sOe|HiqRz5FkXB4r5$3Ph7t!2GFISI6`pa1%9D zvnGn4o<9A{9nTdzhD;pqDTaEzaoky~pF`V?J+8GZ{x2J8lYQQ?wu&RokDLU2u1}>& zQc3_i8}+u#WLE)d8d4)D^G01fotR9fv}&X_je9$UHfBZgv$=z-Z+aY~JvTmU!(epe!Hp8`~=v zr}APcN3Vv;aksL*5_X*!!b3W&4%AE=AD;ry=6=ksF@odr+4F$wB>vvuwNw!$WdajFOVd@ATih%NeW+(ETR<>;SB#cNl#`B|79sLsg%c+$PltR z@%*x|5+0#dv;w`BE8W|?IfcDt!tv#GQ|1TJ1BeaJMQ6mVW^UxinS{g+5m&ba*{PQphE8|LFYfhdPJnxIuv8%@eClxbYMv>q>4-)oy8(Q;t%V5&qH zKa5V@io0|gYn*-LcBPUB4$*I?&J&xwq=`l|?GG^kF)RrMAKt+k+qo#m6OcO#F$f2P z5>)q#G7t!!IH6?<0Y<>IRZNQz57sJYp9U;eU1HNJd<(kKFpS~>0H&|M(9vD!=+3_~ z$Icxnb_|;4Vx$-v@ILVPx9K+E5^ltSUys{wJK?skb`{13rb4C>X;W+{x=}ic)~(Ee_xr{UcW@ z3&uE#xU9_a$XrGv!M!K{3MAqyoha&iRh#wnfQi6s)S+{ofp05%usH@`pgG==6TGuG zd{CvKScmOvax)HVwaP`;DGC<{%9g`wA4<@a{D!sVPy6?4sdaLl~V@6S|txjK<@ctmwRT zikU2)hp2h%;7CS^a)qcmxL!_?QD>J0p)ox^5;26yl&a&4;ZIE_Gg*p^Mj!)U#7LSM zKsyLS(a&5&>ob=qeT$ZTxDc)SEXn+m=GhueWkrhYc7K7$2d}>8MuI@@4TWOutM<28~bZCFWn|Z-S@@xBGP~_dOVlvvh~qew9--} zXl5D$O8AQsd}R`ZR~n$F;UDURP@{lK4V5^9M~QGr)?b7n^NXTmWEOhBu!!~(AfNkK zmi>bBvHYKd49mX5{9DWKnD$>YoBoeE`zMZLxjzLM_6++O^ZIAZOP?{rD}gq4Ab)XT z_cQSQ&`?ynLieFU z_aQ79T6waSjpUCkJaw?})WH=7URHLuvIBF%Lf_Fs-%%_X_Fa#Rx4nJ( TKhAvT%)2ka9U8xl!tmb!`HbOO literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_regression_module.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_regression_module.cpython-312-pytest-9.0.2.pyc index 59f537ed0143308f80e33cb28c46b2dc846f8c90..ec908058e2fc77495f2dc0c6ec149e97d21d3b52 100644 GIT binary patch delta 20 acmaE*_)3xcG%qg~0}#AA?X;2mz5oD61qQPK delta 20 acmaE*_)3xcG%qg~0}#yF;;@nXz5oD3PX Date: Wed, 17 Dec 2025 15:35:42 +0000 Subject: [PATCH 8/9] Remove cache files and update gitignore Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- .coverage | Bin 53248 -> 0 bytes .gitignore | 6 ++++++ .../__pycache__/__init__.cpython-312.pyc | Bin 2918 -> 0 bytes .../__pycache__/correlation.cpython-312.pyc | Bin 4212 -> 0 bytes .../__pycache__/descriptives.cpython-312.pyc | Bin 2629 -> 0 bytes .../__pycache__/regression.cpython-312.pyc | Bin 4251 -> 0 bytes .../TimeSeriesModule.cpython-312.pyc | Bin 7921 -> 0 bytes .../utils/__pycache__/__init__.cpython-312.pyc | Bin 501 -> 0 bytes .../__pycache__/data_processor.cpython-312.pyc | Bin 3538 -> 0 bytes .../utils/__pycache__/parallel.cpython-312.pyc | Bin 4715 -> 0 bytes ...t_basic_imports.cpython-312-pytest-9.0.2.pyc | Bin 3358 -> 0 bytes ...est_correlation.cpython-312-pytest-9.0.2.pyc | Bin 4317 -> 0 bytes ...st_descriptives.cpython-312-pytest-9.0.2.pyc | Bin 3903 -> 0 bytes ...actored_modules.cpython-312-pytest-9.0.2.pyc | Bin 10817 -> 0 bytes ...gression_module.cpython-312-pytest-9.0.2.pyc | Bin 4330 -> 0 bytes 15 files changed, 6 insertions(+) delete mode 100644 .coverage delete mode 100644 py_stats_toolkit/__pycache__/__init__.cpython-312.pyc delete mode 100644 py_stats_toolkit/stats/__pycache__/correlation.cpython-312.pyc delete mode 100644 py_stats_toolkit/stats/__pycache__/descriptives.cpython-312.pyc delete mode 100644 py_stats_toolkit/stats/__pycache__/regression.cpython-312.pyc delete mode 100644 py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc delete mode 100644 py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc delete mode 100644 py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc delete mode 100644 py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc delete mode 100644 tests/__pycache__/test_basic_imports.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/__pycache__/test_correlation.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/__pycache__/test_descriptives.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/__pycache__/test_regression_module.cpython-312-pytest-9.0.2.pyc diff --git a/.coverage b/.coverage deleted file mode 100644 index 8dd39c70b6534a0fc3ccfba4ab5b1c45d3459560..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI4O>f*p7{~3z+0DKeRn;mAQsx3_lx&u$hpNq?q#-I1l}aJ4L<@ChcPCjlUVCeM z^KwAQEm8o9Z-6+#1-=A)0^kVW01mxz;=(h2*Q-R-#mu)# z1cR9I+!D zVKs#tT5-{6E#w!Te38#q&!1P1Mc@;6n)E3oyrF`DE1NqcsA)SbX??@tR`vXYnQ2&oo0e;MhHWu%D4IP_v`%$!NkWS^f;a39$=a3HWarz<;$T7-%{dsNF&Z9O}3_J#ZAl)fDJ zo9NCMx$M!fRnIaW^+dn(m)M}?Qs9PVFEQx^MZjByO!dlnRiUU9hwdYjuD5jF^X%YQ zr_R?`=%@JQ8`Ih9+?;y+CWR<9Y_>9E4fT{V{TOF^g}>j>F?(KnM}4iJF7m<1z{BRR`e&E2%)$q zPf4TksgB@o&xsSs^O9uTVvr@{F%q32cWT*8b!~Q(J5fD-CEA(xvs$)#;ezUiBSPjS z`JGBQAxj2B;F*cw#m(<>ugy-{E{nRO!Heb=;HB3JDm=f-y~jP_86Cl7mB5%FunxG(nvS58G?y;SPd+aU zWN+y!@>i~sr{$%bKc{A^i;Jq?iw0wuXLOe)^$w5Cjt(ahLGsMWLA>0egiBXD!*L?n z4dIZ7p$3`U!X9jd6yjRZwN8FPF7VaLe+qrDfdB}A00@8p2!H?xfB*=900@8p2#gQO6BSJIS7wH00ck)1V8`;KmY_l00ck)1VG^RBydihoSljO55VQ+l}pR- zHyju9Pa9Ehl|3as+fdB}A00@8p2!H?xfWXTUxKq&-zg}3hICv>#gCq6n2CtGHbD%H&I;FVvP!~C{rILnPDh>>#33>m&l7FaFel8r9 zri-g3v-o4-ugYfmpZu@+hlMvwzn8x%FTGrcAr@PYY#O4 zS~?o~3-qC6F`1qNeeucl|HPK&*K(;O&>_3!{eODtKQ<5m0T2KI5C8!X009sH0T2KI z5CDNOCLr(sWBotI9Sqk%00ck)1V8`;KmY_l00ck)1VDfY;Ql|t00ck)1V8`;KmY_l z00ck)1V8`;#-9N0|Brtk!$S}N0T2KI5C8!X009sH0T2KI5WxL^!~h6@00@8p2!H?x zfB*=900@8p2#h}g-2Wf{K8A-N00JNY0w4eaAOHd&00JNY0w93<|A+w)009sH0T2KI S5C8!X009sH0T38}0{;VXk7<U}Sb?y?3U# z%@QjyMiZlt)}Tpae6ZogN1jYbedA>-Mw>~SXnfMAh5BIN#dGh>bhn5+c$2xmXU_TV zIp;g~W-g~7Xn)>((VA2d`kgQ0CP$OCpJDPjGEo(oxQhFbnJ^_gQJ1O`=5yJW>&a?T z%#(Jip01`b5>jcqudY;;XqBgHzZF+?(Z5bXo$R8@j6319jR^vTng(7Vzb=n{@3IEGYJfs=#xod6;@$-81y8Gy#jmvhXE7`F-?fzSnO5U&mS>76T zq^@iNwSbZqYT zo@q46JDzEQ0txJsmnek{mZ3IyfqaoPS)d-PbGzc8EK{C+K`L}zkPhR-f@~D9nr$$~ zxMm*3yC=pjxOFl{ea9hm?2=0_jy0P5dAVTwqq_K@4P8j+v4pN$j^*ikrO^zw)hIDM zqKD~ZF+DU)2f^eheC$2wX3<}Rs90Iqws&PyNgNzWLLcx7x;qUyS;aQNLxlx6U-$Z@$wm(p!1f&ahwmhvuGL>MzYsEvNh2 z>Fq1|;@sr7>ip&QGq1FVCU51Z+L@`})PW^+_uO9S+Vbu-fPiZ2l!gWOvLOBoFahGP z7ljr+hk;HBPN&Nfr`OxP|A91bE!33)(%~-Y!a~K_0Wprs_5)rI&G0?94k-<9+Joo@ zb;}snj>HNU=e3BgR2$*N?kF|j3DQmxYVj9k3bI3Uea)cMXm%O(Fw%lbUb-zt=tCK3 zn5M-aC5PDTKiGE?o@3wUImG$8{&Iz+ONdMe(gObs0TRmmaw2W%7Vu?|igGqiQaZqQ zv(XQ_kmeM889r8qPGnPl&-~<4Zv49g(9IrQR*N4U`0UWfhvxPyspWZyd$}!_DKDet zz6Y_3rs0TYc*ir9asz>?>(lUR!EJTDj44l$a(|H4boCW(tMuF7Mbd=U?SE3F|IZja~i==|X|1jC)-f|__g{debZ0@!^A zNqE=t4y`R0mj|}DQkNo diff --git a/py_stats_toolkit/stats/__pycache__/correlation.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/correlation.cpython-312.pyc deleted file mode 100644 index 3fbea4dcdeb17373b88aaf5d9b49ced446bce86c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4212 zcmb7HYiJu+7QXY+V_zQA&r2W)zPjjnq4% zc%{gYE&1X62y4-RZ0h2aU8q9~aiEYc6gFiS8t5N6Hq^{!A<+KW{iVc#LVhfJ?unDpoW_=z^-*B*NJe2KQ!*W+;&hmf zGhrsqhFOY?F)=RevU+#e13eq_#`!Q$AsW4fWbP`GU0>2RTG*FnLhg*gzmdRLiAjc< z&;&`7VrgB~g?K_v#gx4~e;g++sj{LA#<(I>Egf=JMq-k#3(*7?B8hl1WvJQ(;L4D? za9%Mc6-5&+Nm!N6#}s`p|1Tm!h%uQVEp4(RYQ!+P(;5gc7NSf}HzY%k*izcmTl!f4 z7MMIzP?!QE&@vTfWUovsY?PLnt6Z4#BAJ&t#ih6>wp9bzJd8Hh809Bg|93RXw85b4 zvanuwm&}&-xfQ>{DuF0XP@*o`1Jc!{*^tj{s$s;;ZC-#@HB-3>%f4_C_5xyIxQr?T z1FuA*YD85uqlU4CN!1t^^i(pLzy`S{3rJR@Q3b<}4M=gt7*EK$uLq=F2Z!#xXe-?tDuW#%3YzZHgy}BrH9gMGOzSbrib6c4 z8{qpwLW5sa$hh|i7ZQfh`;6)x3(+QjMvA4BH!+5%Lfji(1FI{ssP$n6E`gr1Xp5-p zx?YGl`HDAMQaM}0D!^Y0S_Jd3DCGp-{_mAE` zR5*F65Inuoc&6Y#v)0-<&0hDd!#*@-va)1ICY3f_STRyqixBmk<^slYqW?hD%cCrP zC022qEM=4(Zo-L*&Q9nJvfFAC9*3*+QQC=pc=*LJ%X-z8Uq-NS1LNU8d)mlV}qeY6Pyb4A*O)Au|_uA%vSi!Y1dmN-r~CvChd;toFes z7~GZ^+y;!nO)y+rVz@S7ylk{MGRp4Sc%P!rXh(7g4s7jJvS*$?37#@T{o&juHm%_+ zWt;jpxmT(Ri{9Grs<7yqK0;SvQCoYdgQd3^B%oTYR)We>j%73mn^PO5-SQp0@Cjwx z!wED5@|yl#^1XJ0WI9{gc89C-08SwV8RnutT2AliOe zjo9VjRQj5T|G$@VSyyc-sv<%8?qu&g3z4Am5_soUKc+%HiGf_mlLSWEaRo#Ej&DfW zPQkWi_9O9j5Yi{q4g%qYT_Zq`VkxdbidV3z=vKayFqYDlTu07K4+`%mNogp>1YLISvB8z)hg=@rCKVxSRva_SONaIWK8Nkf{4?wNi!tKI~!|80V{ecX6R+jFW8q< zd>yi+uPSlb$&%7AiMt`n`osa?u@6Y5^R#wROH69jvXx5#$ias;N|kBKRo=`91BED;-%v|@7Ba!j7P=_%DB z*z`KZ2sZgrrD8~_kQKzHN5r zpXo2WpLR&;RZH<)%{w|EZ@gTZO`o*YC04`@ZviGgat5w$eIQ*f#d4@8G@W?>oNhSZ+F2@E7Z+rgQ_m{ra(3>}2 z%^%7gnjN`uXp(|9K#+X)c}=_Z6^ zbW93G+z;dyvW~Sp6qC}=B6Yy%9|5_F9@8A>d0fYH!C8GBL09ZT`wl*K4^bTdsIF;d zI6s;jz1j0c==0FxiQC~@;X?Comg^1`IkJFUb;C?!zAe`_JG}7j+`Efxp<&OmufJ&R zFM1GPHyxSTKRte}>#3LQu%#h_g#36Pbf(KlCn3z)B3r@}{Y*|v#gl1EFVmHj;Qi~C z&hRfdqQV4u*|tLa2}f*eDf!Ivnr+D=_z;Ydzf5{BkRn4-)MJ{Z*kAmJ+PRE&{Dd0+ Uftvn_Ui!s#nxgn8h)@gYzmbW?&Hw-a diff --git a/py_stats_toolkit/stats/__pycache__/descriptives.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/descriptives.cpython-312.pyc deleted file mode 100644 index ed204bf7f83aaa884f6474bdc7892980cf6ccd56..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2629 zcma)8TWl0n7(R2^ONVX?ZRxh$9JDB_y+j2hP>6+9B&LnA`rrn$8D~!El-V=OIkUxW zmgoZ}keJA(54$8JRAVB92lYu`<;4eHC=euvK9KNKp9&=s@x}klbat1zn4V<+bNkPC z{_|hH+3SgfhM@g<{gC}v1fd6F5;k>Zu>3X*D#$_^WJ#9nNI5wp=ah_+Q!}b0))Xg_ ziTZsk6PM8uWT|J66}c)02$`0ml8Vk;)y6%;vzcd`Ot&yIsh#)iY0UJTYZV+ksA(Ti zciM(6-J8UEx~|f2&~yyO^a+>hrkl$bJewSE1k+gk4wvZD2DRM+TxWQO?iKR@z0&~` zSif#iY80U#$JEBqr$OaiYUD6W4Qh+R@|41r5mMx8+BQ8N`Iy))88d@71v0uaS@vL1 zK^SEuaFc9F8O4&ZiX%8W)z)|*7~}Xj(YAToqyCQHs3+CFV*t z`7T-qQi0#{9aya(Px4nmlq9dF7v>-B_8_-|HWh{}T!6kLU0peUS|Z;+gB1_eru;ey zXF*@XPn4vo)@S#`4A4$L2dzm`)X+?CLx=^qO%f%v){;)_M2Kz!ttN?M$(H<;Y$-v? zJjcF)<~O9#H?k?)$do@#!aL-WTtZVYTHQ)g$xq18qFMRtUKLKAmd8=!E5iMTIP|Q> zYK%jJ-ljF`ZCRthbCD-I?+X1Y!xM;KRa$2ED(2wY9u!5+F^2vLqy)FJHz!L>S3!52bh*qNY$ zC%6JgOt2C>?`dXe(#_!^S|9||p;InBF_bS32$!${&vl&>w%2SS>uY2~4SzP&5M(x( zFVgLB$IRBIyg#^NemAJFe?WYR9`~Tu?(^2g^zC)q7dkgq_Llcn)3XQ9r56&(s(SIW zOZEl(c4Ft<&SYhOd4GM`x!~MRytdHZaeh~2xIBETU7s7d*}mg$a&x6vE`GORZe*^j z{KX&1f%)V>S-t;u`uEn~TJI!Bf9?aFd-IXKdBVfp=uxU?uPqlA$_&LMwo{C+z=hRtOC>$ssnCqIOw>n>acJ^>X z%X)1IwX|q|KZ>B9F^LJTzt+FyotM?IR6CDnvjjfx*(}$x+2Ah+j9atWlLfaF(8$vFDamLza??jJpbm| z$#dHtM_^6|Lco+pQ!wUH_*VnP7Thn)@lEFuQpiE3_{K-`z!(M#Z1*kd7kWOFu<~hq uIB>^7`Z`QSVKPBhONt~(V6CJ+X+u)qJbLK?>bQqGpG04gv_}YpkMS>T%x<&* diff --git a/py_stats_toolkit/stats/__pycache__/regression.cpython-312.pyc b/py_stats_toolkit/stats/__pycache__/regression.cpython-312.pyc deleted file mode 100644 index 6734ffcd08f6779c7aea494031f5cbfe763ae05a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4251 zcmcgv-*4N-9Y2zyL`$-4E4EYHX-s#{4;HbUq}@<>MKE_+nx<*y*%^>=zz9w}*|aH2 zJyO{uC_ymf!5%W$MIY<{0}||I$ugiR_R#zZ1NvgY8DbDA7NC9Eo|Jk(fj;efN0Cw_ zWlMlz2k3r$@9xX{e!rjl+*hCX_QnyEe}4XsIS9Qk=^!YPMrZFLbT*NJRAg`lZ*h5E z<@16n!*mAyLlq{Ty$LC1lvT0yPc9*d1ubXc!%UZhQ$hiWMid>a!x>a^eClmh%{jwr>!mM5PBB#x)>xn_hDrfce!S?R=T z=FA1a-TsC5&jZx$cBFYCJFC}YIvTYH8+SP`;$SQ1&=1PrUYcGi0)`xadp?3atj4(#dNy~G`2zuReeVo)fBFsJr|C9O`aa&G9p<$!2{ zUC;(J;S9NKpHG*PuRqhJc3|p1|AANyu`2v51ucGDPwzr&RIngp`bgNvI z5A2|n2#^UasG6kVE z%Fbesw~JoPHgrPtvL~{JB*6uq0AMD)&{G7`2MwVhB7;ylG^TI>(oZF0P*nyy@CUm` z=~@-4Yv^C2NPhj!#GmIsQSYh`$KSf~&idK4vl~CT@%{sSm3us{Ja_Jilw6Om#n)47 zsf`b}CLT%$c1K|bH9i6Gw4wihSS#HA5p@Hd<*1}g#K zVYIs;CO`%tJQnO(W{+_&^4oTV!+EH8+sZO^a9e5aWLWzmjNk|Q3T|aXxdhQ%WbpI( zhd#mBdP&TM>bt^f&L>}N$d`4igq;=?>ZXI+KsaBrT{Dk=M4#Ll`MsQfAbCl5>?5xH zON?#VKWX^UO6CK4W;tJS=z_*H^dK{!*P{IeP$ULTFWLGjvL9SXUZE9zk_pA1kL6h7 z@lg0ClpTti0K^BOUwNei#Eoqkn03xVbq&==QUBoj)!$xyoIbcc{^OnW+;(#AtN(ys zfa!W=t@5aEqSiNYdt#?=a$B18VH;8-Xr`pZsCFAV47TBG1%gI-mXZKUts25L_ZmB| z2(Y3HId>)E?kl@cWmRxPvaIq|A-D)Gr3zMBr3UwVk-_(280tAc-IKG0e9^)#Zp|Ho z%;6=m3tSYIm*h*DM?y-WlQJC0c73vJJrbm~CChDOA{gj7>x8B#qL<7Ta4u(NO>Dak z57++nAZ9n|=-zLvK&-y0j*33~bB-g$|EyS|3@rPkdZhP1K|4)rni@ zZk~H|;8^XzF<{wSqc=w%jbv&gnccC&kM|wAt=}2j+4tSY>CvqdTPr*1*PkY${qZ`A z_QZWaw?dZo8KBQW=XpF#h5@3j>cc!L>~_P+~{A)`15S=0F$NYWT7FvpTZ zpoY+d-Os}Ow~7iY!yVV0#o%Xj?Ko86rYM>``o~j$IJN5fcXzzjH@+=>|Go>=|I^gL zrl}!7+Uz_GC+@J53M*qBc7{5%Up!;1GWAL7PpQDz$r|OTA3Qh>6*KmNlBTJqt>>`@ zP}MYWKK#Xlep1um@0HbH_?xc&`B*U?ixB1&H;1#!F{Pm6=Q1H&6+v+_~p_M6-L zkJJW^)&(k^I(h%(XEUG9JaD()_+f48G?PB*8QT+KwtjR#JhU~mMQ&AYR_?sHJw9C< zIbKH~yg&b$`l-4{yY<7vV!VrXD(%z`(kDH=tJ#e=R+rWe?ujt2A4k#DQ=S*!-V>l# zr@gw=kj-*TNT+8x-(qU4(o3eG@uF_I2yeXaD(X4rT~D-2`C^$l*&B3LEc*8(W3d#J z#t2)?S%mVn*B8t~Vn|7>Ni*aGRY(&l=H+jtxBd8@BHsfUy~mCMRbAjX4!q4pzDXi( dvWC9%B}#vR&i@^~^-Xkv<7T<95o?&Le*=fryLSKp diff --git a/py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc b/py_stats_toolkit/stats/temporelle/__pycache__/TimeSeriesModule.cpython-312.pyc deleted file mode 100644 index c46a08ba166ba6c7cab21d708a0228214144cfaf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7921 zcmd5>eQev-b|>{^NtPnZABp4IG3`2zfxo#!~M*EorjG+T{edaOt#RA^GU zq|(@xfffibd2d5(bc1cQfKwzuWh6jcU_gW60V|NdiWPWbvDPoRfFnvu4Mca?MzR zF7}~?qTFx&9LYTynn8=D8QM)3|ds%@sW}9>{ipr|W&!s~;Q+Ce9*>BQQ4k#T^mT_)4Ui6eRN!_5QT<57^xn5Z~2g4F8 z&dl;8-U(A?6<}g=oG{`c7OTL`IG#}+t>K|zRDL=pgKLYt(xp43P*nav(cK@gYWPY> z5oY;t?f@I=ql~}j=OFuQdWlkv$C%Aq6VzYA+V16^&{V>bph9R9BdKMkl%Vg^_vq|D zXkjy-&rm_j=^#y9@E~Z{?Mghx6WtjK36h|OLIm$Bg01z+SkCq#I8IsP2W`4d;l+s2 z+i0r0LZPt8DoQA%U@wMYKK_HyeMO$;yGdM0hO=&~t|e*v z3*UD08xQ+8nolHc|KMzV4y~vcb^@(g4!`F~2VTz!Dw?w>wlq^BZ)e<%PyEoTcsFy+ z5*BkeXi7D`lS12 zDwL&qnj_3Piu%?U`j}F}0jq8_{}oDe)tt=?ua{sq=E~-riL!*9vmb+|2`5MnluBv9 zm7(qn76L(f&Q-MQwkT7W$CQdZ0sVDXDynVSRRw<|`5h*A)I(-Htcx+_Ldm&IZy=+k zTSNne)~57H_y4?Yw9V{QD5H2!?nJ{Nc;e+Hf{>lOL&bW8EyA zk2!b3{R(|PEV3fSmFF#Xwe$)5Bvhn-$uqc1|6i6KC#QeO5x64#i?nldp1gMcSFWpE z?KU?d;Wl*w->fyCMh}}(p^Uy!kWC3}{E{=1AG!P$Blnr(70Mz@=5K+7>%P}q<(#MJ zJogv+nNm^hFU)6!GEvUiIJ%B9H8e}kHVcjBya`XjYxt0z+hd&Y+<(q}^$6TUPR?ew zm@TI$$yc<#qlH;a32q0UaXaMWa>;w!ZfXIM`ar%f6~(T*65x=Ecq4=%H-uUv86gkg zMqD!SMiJZqoSAP&E-Od#|L`fts_ao_M3$sSYXDNXL%gIC5Sf4!pN`EiEFtU+BgdH- zD{-vCWEnv4g)vlM1`w&IV@j4KWaS(b!a;MRztE|z?dpdts>2ME%kgQLR$*9<6A%ka zyr>kEKF6zZBJI+91oR#w>@?4?*ZD95po+<1E+sz=U>w~pNF09yP)^x|Jj27tao95I z(R`RCYNt9A^Q@#> z6_wMi(*j7->fxDsXfhzNmWJ`iD2keo>y|Z<=;;dcs_%c_A zu~xYSoBuKOtd??lH=T8wvu^p#4JWfxUXw0w+VWIvdKxuP<8Akbr~OHJ%>wuNiPiCy z@s0AN*a_pItsy(pu(c(v+&TYNcw0d;q=x4{a z_BAZcF3x_MCJEonQB_SEl~> zj-09Qf9Tihk0vL!I(ygO(mIEeqv^_^=4spM=vnX7I*xxc{J3LufnFG0?$oLSk1GP- z_5Qa1+x}-(%3GPPJfwL7pwPbkOTwaX+n(9m3UsD?%U5;lqxp~0&N@;Fjv2;F$xL`@ z8LSfC9U+&YKlT6yF>8y;IajG1nv#3Zw zjZ})UsCkZ3;7#{(+-uHS)ECe0u9UDQEYX+n6pl9WJj7B<{BNOqK5Cp~#W1|yVnh~V zD8xLxrQtGpw8jdA|0%IlHs0$%EW_Xwh^vAqK!`@}+=cfVqX^7tS>l7Qh;$d?G0-Lu z!wA&DeHu!RJ@M?hH9kN7 zSK;N>Pp@V?%}ILO=f5>OKfBztd~r3j63R4mtre1!@VQSc3Cnqkv&`9w&X2Ve-YYs&CD?jVtq0b)E4{$6NN0(!WSMJ6D$1&i~4Fm8-9rS|{@S0(=uu znvtS;reQvzsAy9`HYM-|yjXUnd%6GSYT+%*%SXm5X;&&@dRB9sgzdi5T;*JuiZIql z(NE zz-J}d*pZpp<{yb@MQv{4Sib?t0dZ~bPm60i<^FVe!&cS)Xf{Ig)7^z@Fafg$v8aHoSo>8cuh<^WFLM z4E9RGNN->I%eV0(L5t8PtIKWJQQ1mqeV)T#%Qqo{a%S>EtXG@#t*i4_fjo4tbZ6??GkZFcR^%v-MaPnF(RbH+d+bhmy8HNo zFHrJn(t8BdG@>J-?o0+3W9+ef|jTM2}$^OKnp?oG<^Oz9dzRt4+-8NghIMI z6v}>B1KAS_U5m3~uBI#$;^Z(q27am|3p>&E7kqa WRR0f-a~67%PVaB~9c4(yQ2zyD-mhE$ diff --git a/py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 461fcd5264725a35317a3f15d190c63f7a168999..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 501 zcmZ{hzfQw25XSBNYXxm37A7RRbcj$J9sq$GQnZ-a@8c=0kdp5qhkKOx0%hI(BW?Aaxe5S8l%{`!(pRveQ(yg(3DYKbh(IgmZA-7P8dX!Je`7~v&46# zZ61|wB~B%tku$$pHUMG~0hGO89Jzs2gV;bowxOt>Dfj=m&DK0Ci`uO6sZ0zfz&%=+ z5iKu+iQq{f^E6Fla4qCC$clcQLVumYUyGkP1vOs)KvSv#23fIe9<72smK|ebeh;ZV eBd<2b_yHX}p_32G!PXlx^4@Z{t(%5ynS)POJ(#cn diff --git a/py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/data_processor.cpython-312.pyc deleted file mode 100644 index a8e826a98108a915faa2182177198243c2e443c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3538 zcmcgvO>7fK6rTOD^JAMN4kRfF(S}mMDLDNBlZu~)5T~U|=!HayL^wfD&9N1SZW>DsTMkIj13iVLQYB8EH|t&7sZmu4b)?M9 zzIpTB%zNMW#<}nFRU*>rrfWcn zs!DIf+Gq9UxsSC`2HZ<%kReB4MP`tbc#)M{k~_AcaDz1`(&`e@irB^yQqM5|3K5H( zRC0ZoCl}&pa~lV}5?_JDikIx$E%}K5lBXVai)EuO$NvWEzw#yf2j3;)NbX)<;JYg2 z3V2K(r1Cv0lPXT{9iW*^2@yH0X~M9I1#w-j!-^>AIIL)7F&@@QmV+h_N~Rl2dJ-!U zl3k~{$&4d^Yah~_>(1H;(sQ6fC}soG*@ln}In0irK2)?1Y|<995oHU== z!6KQy(3ZDY55E4REhpyAzD+vZowiaK9znCYc#u=o+L|JxZ?flkjaZ#6%a7k6k^Ppl zZCqsnT)OF~s$7w)zPq`sJalzDW$7~VwHf`tc(&$YkF9T zNG7W!%rZr!yG=GBn(jUc6PlauD`7cF#tMMX2^@|~CND=d-E>*H75%A%bR1(9o4ih@ zW_c*25Uo%sz+lLq1{W8e54BxV<5C+=DvE^LK34JNwnS<-@vdvTbybxwNA*&I5Y-6T z8c=?;Ho#ygSCmv7Or*?mT@BIur|aOH4$OnL9CEIGO1jU{pKj!>yC&Y~n(UeA$$EAy z*Vcb|>Y=Yb!{4|t8J&n`eLIP0{J_`&!`p1jq{&G{TIqbFSH3WXUP&ZHuB?hl9~pz_rb`EI$&il@)QV zJl(vWYY9>t!2`-F!X=^+Ps(~UAxngaax$)HyM#zFF$pUzD*+b6 z0Zms_5v5NsBqdz~q%HCl*npwbVIOo*hA>3YQ8{9M5XR=6A@?-KS2;MR3fv2a7d%YAYK0ta#;%yMeS?S5q6c^?CPHA z&U&^NvCCr7!z%x^J>v(*4lY#*Q=Pw5y|!H6IG!3yeOEKpIkj=@GoyO@lRCj@`CzGT z$fzE&D2ON16Y04j{;JX^cp@a``~Jfo}6XzE|`4;bD7+(3>O zaVSAP9Ll}(e^aiSP{<)2IZ3YBKNi+KT+Y*0igvp56cx~<{@-ab$JnGI-T)J{iERFH zDjrG#G$c6*N+(9tuH_+ zLHQIveKA|R&+zUmjnT74)B8*QbB6aE#b|)-?F}%!fojT4voaKlt71|n531=6g+5A# zW$Sr0%R(Vhjer>5$Y>-k>6cUy19z?R;KXlz)SxJuoTg*k0?OM+H-o5i%6RMoDRcl* z?Gfol(F6X-BUjA|>v8on*OHktlS30j({J7GnCY0ma5r`*wy@3Ea$>Rm&eu5P_FEfQHk0Y2*5EVF=Q_(|T$u}#q$Est-R_?0p2v67chU=l;U^ax-d#bU zvE0zOLRFXF-g~#>PRA5se4<0X#Ku* zK{Q%=78_5kAkbLezVnuRQ(mR&)Xp;&LV diff --git a/py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc b/py_stats_toolkit/utils/__pycache__/parallel.cpython-312.pyc deleted file mode 100644 index 8e05d7ec7cf19becf4c14549bf69753903a60663..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4715 zcma)9Uu;v?89(=r|0Pa>VYnMH5n;trdx?Dn@;nk*3j!J_KzN>}k-4ty*7Vl8y8((kj8j*i#@#4EC^n-?6Xb zn6UL&{?57Qocn#}&+q%b-~F<#u7*JQ=a>Bv)kVl1e6WhW)L1;;=Yo8@6%9G5fGX5ml#RgTGyMj@T8)d2!gqlUIo1JWmwY6~3r-xQ6xB zKwoXSuQtU8-RZ5zWh%#FYAleT@vy4v5p6V(G$OHx5mEKfJyb9l6ilH{OPS7>V5)pN zrkdjG5#2ByniMhAG2OIlQZ#-VYN{GZs)b8-nROdT0sgBeLClg2?B}eX5b85HHVz_M zIlt6-rBqLF25MkM;WO}l8fII&7$QL--3!YKCu6dq2I2`LGA73Y;WJ6?Y(S5s)xb!c z1`g(9?o@5C-t<@yJYGa#oThjEkRdSjtjmLYR;g@6;+h^} zcd`y~BCLcVPJu=VanHl$`M(+BGDIrlF(nvy<*WeuI16{{=LE&{*4H%f7&(TBvo6ze zsW=VY`wtX~?mrk5X)V;$jRKX^IuJqL6m>N=Qi(8H4+ExCk|Lm6lG6UC4k~1gd10pB z0RnJQ>zO|J^TW&j;BEiDf`8wFzc)MZ;ScWmzm=2o&7Xwufo?+0;z&{pn>Oac)CoOd zJTc<|GVV9lH1KX8jM zHX#fFf{n73DBe_6$&jcO3oAxGE8FZP6Jkb;S|F~N0~UGfe!>7@{8`T&83OwpTf;fB zopckOI|t9F2nWBEFfN=T=eX^}U=bRXwiDXMk#Dv$&9?`jW)r%bTf(;5T=OgO)&v)=omf&Sc-UZTkw=X$y)p`J71`B90E1bFMyaNcOkSS ze{m=$+^TI|+O_wu-tPSVnSq?|de)mLc)K4jdb@9d z836{L=--DmebmsmP5gDBZy&j7Z|vJC+-z}Sd#9~$ujA%!0o!|-+*jA%Dtt~F`Zw7> zZ*XDT&w(^WMK)wpl*c1_xDtxxl(hm@-sj#IAc*i$Xjhk#18n}^ke=eE1b}7<_Cu8p zumRLFkmRE1ja6kz04FHv02d%pj3QMfD`Ytqy`34+g8qc9Vs#jFs!GPjG?zjiBvdio zQ3Y2y76(><$Fv@>|A6HLIob{y!{uZS4|skUkyd*^e9Tt{kBw6O6hFmfh_yFZ0XGQY zkk=CCCK>%kV)iAp52>2dnL!429nT{$ImwXy#CbR-kDXTJBk7)}BV`%YOl9bBEFP9) z`VnXp-`NG4bWCIank1i{z8&f-g!*o@e<6M$FNBUy5*9Io4hYhiiXod2ifP!al_`}| zlAZv?ADpB;E>AY zvzN|Z7Z*L>&Dxh5n{PL62lBAketiDqj}{tFWgW{6o(u0yy_XyPbYQWed*0d2&SDTQ z-UqgzLg)nRFi~mpm?}wTjU_`FngM z5R>Vs8o>CbtA-vjYuF@4Bsb0jG70bpNAEQkQuZXdl30KAy#jC6;m3lHFqy$^KrG$P4YRp3n zc<_zN0LOxmY5$2FOCoP_)8{}VL!W7*vNoy$XNqX19iqRgDP~OsK6K$jObeHS`3pEb z1j2Njk#*THsA*5g5P{&MwiwHUHC18UbZJs)4RkNQ$wf>T=%j`eQN~RN*kKJDn`H6{ zMMrjtOsrUne5|2F_bgrMtyKAk#>so5`WgT}t&C{s`&$k~C(e-?`Z>eQVZs3>4E*>ki?8?5j+`J{%oqP9U zZ+_tNu}jC`bK!SG*L}Y|wb1;%>?=#YmfV*73-i8R*_W5x-V1|MgBSPbZJ)gO7x&H; z7ifFvChq3x(VRRJ{nfV1otHWnYxh91nLRRdB)@aP+d1#NGn{W(tn2)n zyKM=^56&FSy|v)oG4I~-wb!2=EGHBULU7LL`g9+P{U~}-;I=Cg2Ft+g(q6}b11K<9 z6a&j4Y@z6azmA$PK^(j<^0qzseX9hj6{pA6ntyeLKsDEY?da8`ta+p7Pls+EdWgM6 zeW(TC#MXdLW7>^W0)lKYTa}ZSY15KpiByppu(jgndNEV6)vRA3aFt?)JVKAbJoFj8 z9|W*lj{AzV-yzL^CmjXSafdXn*hOwf{-qTH)tr}A*S)Lw^r-$huID}h@#shk*ZP2f HV5|Nw4+bM= diff --git a/tests/__pycache__/test_basic_imports.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_basic_imports.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index cd223f32b07681b034aa20338f55c7356b8d5f9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3358 zcmbVOO>7&-6`t82a!HY*B>%{cZHQ`w)NF0ZPST)213R``r;HQ1c5)~f1B*3xtt`Di z%*-+|1qw(}7bVdYVo)GrlOE_GKx#OLo{ALcEyrA{Qig?zed19lrZ=0kf%>ZO<~u zWow0ihwXb{n{h3})9ITvgIK0+5#p0_bQ;m&NZDP0!3MHW2}4XFmN!vJHYJdXDT7o^ z1!SVG77}cVpZu&r?Q$_ZcsbsMZ^jYgvDy7Qhz%5g3|mEtM}xi+<2f)1)J!>!?;38P zN^)pA8V0G@*1&h7@58F-8yLZ=iP&S%H}KGTG6Q=&8mzCQg0f8bb~}9c9TP+$$t17J z62$OK-(?BUC$8bJ2?OvT!OtMb4wD_rV^g7}NQ)i~IA$yf*b!pX3ldYPQmX#@u4 z`{(mtUtD^_cdaE-^E`_zz3r3hOV#>uSQ^mdf$uxl?cke+fb(aGOC4Bam-UE7i`6*DUoZn>BHK~ve*GWWCto7#at z?diPs;lb{?E$yhV9of{5bj**>e)7_-m+opyyRZuy&xDU*2AI%i$v7sMcm_MUay{T%Qm~RDn(gT|GP= zr*M`pr#}G!KxK35S6Wv(KmJJn#}hy5WuI$G!x&xuaJDQ=ttkPIxo8ckp~R9tv^*#7emS;pu3AUJw;(<%2)+_P5i8 z4B-?fJSI%#Wy)i+=?8@8IpIh%`I;TDWEE}$yBrWsYBm${^+jSCfki|a8?d>M2&s#I z$#9*iu5VgU@uKx`$51A_)S?{kyl5e_GlJRlx&s%Iw|ZnewWI0%u$F!R0=S)-TrakY z>la%WJLkI7z08wM%x&je=i4XSv~#Xq>1F1j<`i!g?=3vLx$x{?E`26{_G)k8%x2~c z_)g_}Q{QW<+gf%#)k?MJ+n3(WZSOy{;lAf~&-C^`)ttCLneR?_FZ3o)G-Z)DbDP>+ zSMFZg(!M{ON3Zt|7CwFE&u`v&?ymOqH&ds7&7BHU=R1pAOwB7m^fe5m?U%L6m_*^v zl%Wey9`fcjX{0sem^N!lLmGSO$piWVHKlRMVfG0r$orkC@vRmy>-p#R-%Dh2sZ>6qBn?z%*LBS_Pd7hBM8vJz6#3W|gV!F@;aD6tx09 zumi_px%dWN_Jd{LvzS_Td}={i7ljzIV1-G>4Tf)!0(_8iK!)g>6ZCbBfbZ+rS6B1n z89eaF^IfVSfW$NVHjce_><^2rWK(Wl=KQ_Ty3oO$r#tj5p4J>b z&geRSmxME9FC37}b8(ytexck(Vh547gtJiyUO^~N%$w+uB&#PLrDgTlqqL?zv%6PQ zXTv4jn}60dY){tI6Fab2 zs&8V+*p`OH0ttgKt>)PQSC}f^H6afY;q^!k^P}vI3d$mo1L_VBg5`u|FBea_RzU1B z4J+UD06%y&2@WN~o}3W>#!){9Px3G@&vTo?|CXGF;UI2bYtM%PKSeIV1g`~j8N@>w zWBeaU!peUW2*aED*SRmz%$I1-19b8MT6}=!zEY0h+4hZlvqv{)kA8(fd3Xvbnda(0 a<%8SG^gBQQ?JK`}A~-ydt`lt-nHLo@gXXG*07-T$X3wP)W$~ z94U#}Fz09@*$C4}*(m$QvT>fgLA1~yg5cH(l_37;-H;^)4-P z{U9NUe8I;Fu`-ekl(QQB*HSdl{5vkbls?x8#W2Di-<&caFou-{pH?-LEK zz*w@~LgN@R$p$HOY}a5bnDa8Rx;%YhmsbnEf#r_Xb$T|1Zm$-6{nb6s{H>=Kcx|&5 z-)lGQsyCR;t2MX=@7ch<`n`UQ_fGh?oE)0k#0l%Y?-*vaQYlkQ*V3A5scEZL(dC34 z%bR%vSOrwGg<{^cY~HBYqDrY+vm?MkRuK#>U$RpQqitPRsz&}-D$T2wZu1q*j!uHd z*Qr|4?eJxt=5^EV_HA*tX{L+4s}`&Jc}mMv4%vdK7pH9j)pod~Ti42(i8M>6)8Ajp zT!UcA(5hkRH1mF$Ue8o&M;ZB#y5#@=4hsU!%%D^-rd&#oR$w3Qy6d~!aDCKOMqzIA z&meA+jd<^E?cU^SeBVZ@cmCwu$%V-~ugy+6-DmH=dH2oL_{e%!>XX`swS_Yu{d&EB z_ga5ur9bmXX!cJm@0$3!e{jAr*I1lv_RF^gZ`_5u7gpm3)>Hj!slzL&!%LIR)Yx)j z%u`>ydvP@`GxedB)S;#EM}5uI$>qe!jn0AP!55pIW6RRm7A%nD{IG)t1{n*_FQBqL zJUt>nc)$oMYDD9;&^;dL;s{NUEYgJ5xkZw*{;jt1Z2 z^O=F)V%H|rgAmjr_k_ztmZmmwyY+n+fWp($`CMK%EQiC>Wy;d03$@JK@fPImNJ%$M zNSvGhg+g&nGEdQ$Qy1(wl20k>#*B5%?kTD>V+tnfRv!}MIYvjwv@}_?LtsvkE4GAo zxw1ZOhro#LL;oR~LjOIek1~g7hw-$xP%ZpbRqd*((5x;b@*)#s=?UkOFXQN;5 z-m_qRdTM?D(T#zj`{BD`XeE%?$LEeO?7#EMlc=~W_Lzv#7#)C#w&i}z<&0bwROW#Y z1wVg5hg%x_--E0kYJ>_txjYL(yt@%-;@=P8tqoB;LTUWH5WpXrk~xa$4^l>V3o=Sc zwP^kWrn2LP!f?7;)J<8i1+=pxR$0;VIg1X%XgiWC7po-$)C?k<*QVXEXzY43F&H2L zr5&V_(5fGJCQ^QOOM|U>9K=ns-ZwBmGdHvN?(Lao-;vq)zlV>m4Nt5LPc(;5toQwB z1Mr~t>v!v){^UtSz@r!3qlc~9k&3QTvuwc4EGdcsIaN{Yn4*-*TD6GvgrdAxRf~>> zV%lenqPW-xghA-69``~~V3e7tzD2h9gm~=fP*jX<^@K$SaAuPOwO>4pxAs6+%M*fw zN5jqsglPBU3#ZK?>jD&DtP}@2N&)VQLfKD-@rWz+mnJo2%JeU5>I-r!wmB z89;FYC$EH6sv0wJmBL3WmuwNg20elvI7VI*Ps-t_EttB zo{?scJS+S$y3yS;e`M~+Mlv-&I5)U(?9K}<)y3pWa@cLp@15Jb*xgL-2fC(XTSQ33 zXN5mSoNVEP2Hufu4CH@71+oPxz;|b9mmR21yj|AFLG!8Bc%;!)zoXsbO z(}xnSFdi>0E%Y3g0<#nYO8_uDMMq#fJqThGH^;ID9fg`KI{2{|M0jjTHBFsb7YsY@ z)Xrn5!mWe10syL;)uLtMlHC+z_p$ei_pb7@NqzK(@N94YBDgd!gMjRl>_t4+lEZ-D z;`rZA{N+S5d3ZLwE+szc_^@NS@6gigtI|u*T$2V?q=ALL#g3K1(MQ~>bgZ>gS{am= zPJJm&JOk%{!i=p?j)IE@bo(U;6NY0OoZDd=JOggll?FU_7^W>e zVL?eTQ`5aDFvrPZ7JBq$>|im;Fg=aUGpKL5*Xdd4*mk4a{o6nZF7JBgxljxSOrYyG zKtP;!CE4?``{Nt4!VXNE$%8F;9$A`Rl_pxDV|emkN-r_!reJp&@JW#*bPUC-D9)ic zkKz~#y!)MtV;pNJkn`@v@stmX@u!`Fczi1+h~thm+Tbl!QuBtQ$Voe*C|WrOmy~q1 znlD=L&%#cehQFOX6^*=@Iq@(}< diff --git a/tests/__pycache__/test_descriptives.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_descriptives.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index 45748962400516fec9ed121aa3073abed64c05ba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3903 zcmc&%O>7&-6`t82az#m2sDC;BVQeQ5t-sioT&GbCJ5{O}h2)@iP?$(Tthp;^WpYW+ zF6~gDS{HRt2XGM+I#2=(gbs>EhA#nn@g+c<9CFAdvJ_jG=un`&uqWFJY9yz=H{?ps7Bg+vikpznietv7xMF}(E`uruGdC0p{X9AJKH$y}OWU@|hOQ*p z#i3MYKw!u~8AG-*T)-@62x)XPG!3RQ3}awscU|-tEZf`wBF-})SIJsq$4%qb*h=F| zYi%9*W4UAVV{^wp8e41bnpbX}|HN9bR+@WP+q&|v=U(4n#~0!&&3$Vvtv84Bqq))L zmQdm3gO-<9yI#y^a+yzqg_DK$+#erx4J>sHUC%n9Sc&U5%LD$50FHHQqqkF!6?%Y~?XMQ|44rWZCMMrZ-=MT({Zqx1T&RaRF zpv)yv;EmiHh4unlZaY{#dJ=4SA@G>LC7o)xYblSCq-H!nJ7B1~*5e zJP?g0l14g?{ibO2Qd*CfX2h-DA?vV?u$Pc19zk?;{6Maeb*W7auvfg9`iPIYSBv5HftC9LuuEJ}5K7^|Q>=)t{31^7xR-N0F|(q&1sB7Xtu?xz$um`kF$Xtrq6 z6^!>dfJ9^(4_VXda{()9OuB$r2|8z?@@MG`)SH}iqd{OHHRM~8hiqU&r50tpY^RXz z;En)8=M+W0JO#6A6U{T}^C?7_&y(%#AC*3;LOzxscIk%+?kwCUuo zK$TPRz{08jg-#yb0N>g+jS@AjzH7Wop9R-XR~){vG)oPNz-mYyTvOZ=Z?~p_q%0&W za4jV3>r{oLuS#cqNq_C`8Nl~Rh6b?1NYKX@O15;KD4{Hdf`Hnv%0{q{s3)Aw8+FGq znfn$dFAtUmC*z5vQ)1mIyWhu5CmnaIVjw60wG5I`ED|$)+sDHe;R9fUgU-52R)Zb+ zow=Qlg8P?(`wL^Aow|K$IXHOTx2iQg(z=(l?)lDwe_`+9z7=gi@b)Zed*&|}rWXFN z=&Wc*Hp{6CyBFz-cCeNweXsQiu=#&Rk3r!fQU;Xmq5;96N~V5nm7=g-gw>Z9k1k_N zoI&*XC)p8ygopg9T*u9f2Cl+AvLYes=oMMh_pv$2Rz;mcs=)#nV4 z%8EYJ{63JaF*1I8d^y-JV&voIkD71B=8vvu`@rkfqe}e93ojOax70JR$X2w&n}Y)< z-d*bHTm0?!+N&Z$CIJ=q;lcJJ!8m2dkQ_&XXvSVfA}W`tZoN1}f>7z;^ZOckD#_|Q z^l3m<``34<>Y6`}gSf-RdktC`-^AQSv%KqH)18Mz%v^`VW zY2XO~@?(E1Jyg&hg$^x+4*f`A@VJ*KP1k20$UUn{@ao%tnfTMhho>LQMA<2B>VIn| BCi?&Y diff --git a/tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_refactored_modules.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index 545f97ed62f25163f6f1aa5d6d7cad5c57e8b9c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10817 zcmc&)TWlNGnVuOAsS$OTW!kdrm`)^1lx!-##!ghnO`nCw#m$0-Tcj^E|+kH?UibPUTpbtg= z|ICHta45-$+C2dOIdjf`E9U^1L+UHctL%)ond~5FLv@=O^v43MSyLCQqcuXTo<4)I26*}sjmf516XoF(&B>Nn3(flz zt;x1n8_fq2?a58CO*CJZ=typkZKio45lU`}ZDARXImO8J?=W&ushfD(o?_{F$7kka zos_cyI2+gC+(tQ@fU|iG&MwN?0-UXDaBioZZNS;S2ImgSxd}Ks*5G_X*(qz;Nth^OQe00HMGhy^@?=8M!g@MQw45Z$8j&POa_K`%9_thgwEb9iQ6lMPY`FC zmY;;j45KhH7K9swI_8tPcbFJ2`+)jo9%vxzj|7aJxSD63b*%a$xhIjKe5I>cqH%jz z9J>yFD6z|b05ZeqPTd3t1+JvqCy|*5xT@e)X75<7*UKk!Ip|U6?Y<@iJ6C#6G?emo zt~8SDd*5e|dX*8KS?Mh{uPu>K>-3&wBK*iA0?s1-%ZSfl-!#~)!R8|M2A7eI`V*2a zog`9HF}PI52!Kqb#uXy~4>_GQ{P;{6oIFNw?TkQ@O3`iB;58*Nrr|P#|8j2dVmhe| zl1Y#%GKk_hn91(dbV=9t>gjailB!o1fH>*eAX2e(Ttu^fEt<(1eofK;Ig{&ILlB~r zZ7WPf`#O-fnMds%Gkd1@;3F~}nSJ4hdp{igNWG=r<332f3U@t+Vpu@Qmb0s|LG$KlwME%??dX_`XxVQn}Cdkv^T+OPpA zHO6Q#>%R8Jq?CZ2yeMUq+Z@@3RRI8w$%L-qKM#jV7nb!>Ik0*MdC(_GN)suhndl`Z zC7q~m=1#384eRXa2^dv71>|k!VcX`JuIa9Yw(de(cmCW*uibj>-i1%wx{GbkPX!(d zjSIq#g0N$D45TP=EAc=$K+Cokgsrooo7-<}&tIMwqGSj35)jCA&6oMa8neGplIP1+U=K2f=jZbhEkh4tln)y+{ut$rP* z0&3NnnU`9faMY^9R;El@Mt2ac0VlA;(j2Ywz)U=!EZl7 zZ6B#8o8UoEUQiD!atU}z2y5^l5?o93aCSn@RMn=$p)lsUSC<1*t?t2=C~%-wh2Zu2 zYxUQguQk6%9thhkHT(O})){qLok>rp^P`|SAGzN*Z<)C;ePKqN7V~F|oBO_~ljj3@}N$HwVuY$2o z;z~w01hQY$Ks_qAc{lzFmR3%YUg*67UPC%sTjp?}WNiyRcW4b!@NV`DcK3Y+(ruS0$u?NCl#AAHBlt27H z=(pb{^1J7S$SML6q+kQv0>iu0;}~Go0`WMy6~?#3sC*9ShmUi3=4|k(rFV4)vOq{I zBrhxT?A_)o2`30uz%5kd7z|II2U4w|!H832HLjB$s5X2FC1q>m2CD|VI;}!*8mROLHQ?LNUL-pq6xfA(s-8=A!KvnQ74@28#E=^xr2t^8^$Xxfwy|;Vs9lYOh zZ%?8B`C{n!R0CDUXnMWBY4)4N&dA*ZbNa_uZeO`GG%p;X@&cNfV6cMhLxSi@4j`cl z_$2233CTeuXtSA!co=g?P(?dBcwmXI_eYjO0e}B;tIvPbY=CTEQU|-R*V`e__2N30 zYhrJ{N_~uYNip+0WY%(XOYS8&jg}_@ZmOe&%UnuF&_Ud|k;M+^J>oa@5atbD21k-0 z*D5-vzfy8){2%Jjoraz?^bJ7sbcAMxr-x@pe|QFbwE9P(J;hLT4UO?fXt@y@%T&0I^z?j-gKwK&Wys!(mF5Ler^}>$1 zy)f|Q9|%vEL&5nUb$u8IyolV6e2|(K4pnOo6ij+7{u?qID;QK`I7bG$w^9x5sOe|HiqRz5FkXB4r5$3Ph7t!2GFISI6`pa1%9D zvnGn4o<9A{9nTdzhD;pqDTaEzaoky~pF`V?J+8GZ{x2J8lYQQ?wu&RokDLU2u1}>& zQc3_i8}+u#WLE)d8d4)D^G01fotR9fv}&X_je9$UHfBZgv$=z-Z+aY~JvTmU!(epe!Hp8`~=v zr}APcN3Vv;aksL*5_X*!!b3W&4%AE=AD;ry=6=ksF@odr+4F$wB>vvuwNw!$WdajFOVd@ATih%NeW+(ETR<>;SB#cNl#`B|79sLsg%c+$PltR z@%*x|5+0#dv;w`BE8W|?IfcDt!tv#GQ|1TJ1BeaJMQ6mVW^UxinS{g+5m&ba*{PQphE8|LFYfhdPJnxIuv8%@eClxbYMv>q>4-)oy8(Q;t%V5&qH zKa5V@io0|gYn*-LcBPUB4$*I?&J&xwq=`l|?GG^kF)RrMAKt+k+qo#m6OcO#F$f2P z5>)q#G7t!!IH6?<0Y<>IRZNQz57sJYp9U;eU1HNJd<(kKFpS~>0H&|M(9vD!=+3_~ z$Icxnb_|;4Vx$-v@ILVPx9K+E5^ltSUys{wJK?skb`{13rb4C>X;W+{x=}ic)~(Ee_xr{UcW@ z3&uE#xU9_a$XrGv!M!K{3MAqyoha&iRh#wnfQi6s)S+{ofp05%usH@`pgG==6TGuG zd{CvKScmOvax)HVwaP`;DGC<{%9g`wA4<@a{D!sVPy6?4sdaLl~V@6S|txjK<@ctmwRT zikU2)hp2h%;7CS^a)qcmxL!_?QD>J0p)ox^5;26yl&a&4;ZIE_Gg*p^Mj!)U#7LSM zKsyLS(a&5&>ob=qeT$ZTxDc)SEXn+m=GhueWkrhYc7K7$2d}>8MuI@@4TWOutM<28~bZCFWn|Z-S@@xBGP~_dOVlvvh~qew9--} zXl5D$O8AQsd}R`ZR~n$F;UDURP@{lK4V5^9M~QGr)?b7n^NXTmWEOhBu!!~(AfNkK zmi>bBvHYKd49mX5{9DWKnD$>YoBoeE`zMZLxjzLM_6++O^ZIAZOP?{rD}gq4Ab)XT z_cQSQ&`?ynLieFU z_aQ79T6waSjpUCkJaw?})WH=7URHLuvIBF%Lf_Fs-%%_X_Fa#Rx4nJ( TKhAvT%)2ka9U8xl!tmb!`HbOO diff --git a/tests/__pycache__/test_regression_module.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_regression_module.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index ec908058e2fc77495f2dc0c6ec149e97d21d3b52..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4330 zcmd6qO>7&-6@X{|$Q3D3f3~6@+hS71F8a1yi7FdI?*CDnmrT@ z7k)PdFQ4wB-CA_+2qCP8C(w2zY!l=*{R~G+e2GuV#UFzr_8PTpD_JykTh$r0l7)Pc zC39+$eY$Z8%9frtlR4e|bOWoC>J^$vT5~z7GIKQc0DGKCaZcAc-2~M|8knSbr^leG zsaU@#s*Gw1Wh~DenN28GCeLWbpNsq9{KEE8$Se~|(j4p{?A&#d7Bn8Ds0kn?O#~Ui z{YXbkkyO;#hqDhyJ)hT#IhqOj!*1cE2=3ALA>aXX*S&FS_|g^l2jp;TC%JH>Dj7%h|>$)fh_h55l&h>W^*xbxN{M`>;yVIzbx|IlmbFHtZ!?ogrXe}bPF8N?b8`j{w=KdmR2p8vpd7FvRy$w{05_j zw*GCK8}Ra~3W?nPmG>HR#d=-VS_OBUCWf{8&qlb~nI< zTM7t(t1DUUdSX$!CwCxZw?>u^FCAVPSv$OXxF$b-Pj0{2d86}ov@DgcR{O_l@`*;( zLbZRgCQpHCOYX18{kMBpUSE52_08%)YJL2!e4IT7W3-0s!{#_@Hfrt~KudB?^#qv9Dtn^bD7Ux9b3AdqRDX3+(|wZYC9TL1RNu-vC;) z0Dm(*V>A32vtbZUmxtN3HEn{+GBcF5qdID8Dr0IXCAhq?$59#g&EcS3bFuLv&cU#O-(gk2!2`hArs=4t)oCP1uXXhPEQ`-HJgW zWH9J=S6;g2N((~h$ld52k8zDlEVC!IL%lh+7`6$iro!e4!>k<(3ar^jrNxJ;sBU?US3dhIY-v5Ssh+b)D1A$AlBml0bRNI9eGvx_H`FjU=ClvJ-1RTM3sfeFZO74@90o0il5e4(V&O9M)w zghw&zIWG7sk1~3d!AUx8b(IqhbTKe?PfmrWEIygB;7DfCD3m-L_62WaKZtZ=OfO>X z45|#zwS|7wk@3KPszLiiq&`I3zPot*wk9N{KbPHznzs|+3cia_DvIgW_!i}POy1NX%4cV79; R`Cp#@_0Jv(L>zR7`XBHBUseDB From dfe8ecb4e3c0f4e4b77541b12466f2b556f93dee Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 15:37:27 +0000 Subject: [PATCH 9/9] Add CODE_REVIEW_RESPONSE.md documenting all fixes Co-authored-by: EthanThePhoenix38 <103653068+EthanThePhoenix38@users.noreply.github.com> --- CODE_REVIEW_RESPONSE.md | 109 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 CODE_REVIEW_RESPONSE.md diff --git a/CODE_REVIEW_RESPONSE.md b/CODE_REVIEW_RESPONSE.md new file mode 100644 index 0000000..37f050a --- /dev/null +++ b/CODE_REVIEW_RESPONSE.md @@ -0,0 +1,109 @@ +# Code Review Response + +This document summarizes the changes made in response to the code review feedback. + +## Review Comments Addressed + +### 1. TimeSeriesModule.py - Line 82: Missing Sampling Rate Parameter + +**Issue**: The `rfftfreq` function needs a sampling rate parameter (d) to produce correct frequency values. Without it, frequencies are calculated assuming unit sampling rate, which is incorrect for time series with specific time intervals. + +**Fix Applied** (Commit: ae2fe0d): +- Added automatic detection of sampling interval from series index +- For DatetimeIndex/TimedeltaIndex: extracts frequency from index.freq or calculates from first two points +- For explicit timestamps: uses timestamp deltas +- Falls back to default of 1.0 for non-temporal data +- Fixed deprecation warning by using `pd.Timedelta()` instead of `.delta` + +**Code Added**: +```python +# Determine sampling interval for correct frequency calculation +sampling_interval = 1.0 +if isinstance(series.index, (pd.DatetimeIndex, pd.TimedeltaIndex)): + if hasattr(series.index, "freq") and series.index.freq is not None: + sampling_interval = pd.Timedelta(series.index.freq).total_seconds() + elif len(series.index) > 1: + delta = series.index[1] - series.index[0] + sampling_interval = delta.total_seconds() +# ... then uses: freqs = np.fft.rfftfreq(len(series), d=sampling_interval) +``` + +### 2. TimeSeriesModule.py - Lines 139-142: Same Sampling Rate Issue in get_seasonality() + +**Issue**: The same sampling rate parameter issue exists in the `get_seasonality()` method. + +**Fix Applied** (Commit: ae2fe0d): +- Applied identical sampling interval detection logic +- Used `pd.Timedelta()` to avoid deprecation warnings +- Ensures period detection is accurate for time series with explicit time scales + +### 3. TimeSeriesModule.py - Line 134: Unused Variable + +**Issue**: The `acf` variable (autocorrelation) is calculated but never used in the `get_seasonality()` method. + +**Fix Applied** (Commit: ae2fe0d): +- Removed the unused line: `acf = pd.Series(series).autocorr()` +- Improves performance by eliminating unnecessary computation + +### 4. FrequenceModule.py - Lines 92-100: Logic Issue with normalize=True + +**Issue**: When `process()` is called with `normalize=True`, the result DataFrame has "Fréquence Relative" columns instead of "Fréquence". This causes `get_frequence_relative()` to fail because it expects the "Fréquence" column to exist. + +**Fix Applied** (Commit: ae2fe0d): +- Modified `process()` to always compute and store absolute frequencies in `self.result` +- When `normalize=True`, returns relative frequencies as a separate DataFrame +- Internal `self.result` always has "Fréquence" column, ensuring `get_frequence_relative()` works correctly + +**Updated Logic**: +```python +# Always store absolute frequencies +freq = series.value_counts(normalize=False) +self.result = pd.DataFrame({"Fréquence": freq, "Fréquence Cumulée": cum_freq}) + +if normalize: + # Return relative frequencies separately + rel_freq = self.result["Fréquence"] / self.result["Fréquence"].sum() + rel_cum_freq = rel_freq.cumsum() + return pd.DataFrame({ + "Fréquence Relative": rel_freq, + "Fréquence Relative Cumulée": rel_cum_freq, + }, index=self.result.index) +``` + +## Additional Improvements + +### Housekeeping +- Removed accidentally committed cache files (`__pycache__`, `.coverage`) +- Updated `.gitignore` to prevent future commits of cache files + +## Testing + +All 12 existing tests pass: +``` +tests/test_basic_imports.py::TestBasicImports::test_matplotlib PASSED +tests/test_basic_imports.py::TestBasicImports::test_numpy PASSED +tests/test_basic_imports.py::TestBasicImports::test_pandas PASSED +tests/test_basic_imports.py::TestBasicImports::test_sklearn PASSED +tests/test_correlation.py::TestCorrelationAnalysis::test_analyze_dataframe PASSED +tests/test_correlation.py::TestCorrelationAnalysis::test_analyze_univariate PASSED +tests/test_descriptives.py::TestDescriptiveStatistics::test_analyze_dataframe PASSED +tests/test_descriptives.py::TestDescriptiveStatistics::test_analyze_list PASSED +tests/test_regression_module.py::TestRegressionModule::test_linear_regression_fit PASSED +tests/test_regression_module.py::TestRegressionModule::test_linear_regression_predict PASSED +tests/test_regression_module.py::TestRegressionModule::test_regression_coefficients PASSED +tests/test_regression_module.py::TestRegressionModule::test_regression_metrics PASSED +``` + +## Commits Made + +1. **25715b3**: Apply code review feedback: fix sampling rate, remove unused var, fix normalize logic +2. **ae2fe0d**: Remove cache files and update gitignore + +## Impact + +These fixes ensure: +- ✅ Correct frequency and period calculations for time series with real-world time scales +- ✅ Consistent API behavior for FrequenceModule regardless of normalize parameter +- ✅ Better code quality with no unused variables +- ✅ Cleaner repository without cache files +- ✅ Modern pandas API usage (pd.Timedelta instead of deprecated .delta)