Fix code style with black util

NPodlozhniy · May 19, 2023 · bf252d5 · bf252d5
1 parent 4e20a59
commit bf252d5
Show file tree

Hide file tree

Showing 10 changed files with 601 additions and 465 deletions.
diff --git a/podlozhnyy_module/__init__.py b/podlozhnyy_module/__init__.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
-
 from matplotlib import pyplot as plt
 
 from podlozhnyy_module import (
@@ -13,10 +12,10 @@
     permutation,
     regression,
     timeseries,
-    timetest
+    timetest,
 )
 
-print('Привет! Ты импортировал модуль созданный https://github.com/NPodlozhniy')
-print('В нем собраны часто востребованные в работе аналитика методы')
-print('Посмотреть полный cписок пакетов можно с помощью команды dir(<название библиотеки>)')
-print('Приятного использования!')
+print("Привет! Ты импортировал модуль созданный https://github.com/NPodlozhniy")
+print("В нем собраны часто востребованные в работе аналитика методы")
+print("Посмотреть полный cписок пакетов можно с помощью dir(<название библиотеки>)")
+print("Приятного использования!")
diff --git a/podlozhnyy_module/charts.py b/podlozhnyy_module/charts.py
@@ -1,8 +1,11 @@
-from podlozhnyy_module import np, pd, sns, plt
+from podlozhnyy_module import np, pd, plt, sns
 
-sns.set_style(rc = {'figure.facecolor': 'floralwhite'})
+sns.set_style(rc={"figure.facecolor": "floralwhite"})
 
-def plot_hist(df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 10) -> None:
+
+def plot_hist(
+    df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 10
+) -> None:
     """
     Строит приятную гистограмму распределения признака от целевой переменной
 
@@ -13,18 +16,15 @@ def plot_hist(df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 1
     target: Целевая переменная для разбиения признака
     n: Кол-во bin-ов, default=10
     """
-    df2 = pd.melt(df[[feature, target]], id_vars=target,
-                  value_vars=[feature], value_name='target')
-    bins = np.linspace(df2['target'].min(), df2['target'].max(), n + 1)
-
+    df2 = pd.melt(
+        df[[feature, target]], id_vars=target, value_vars=[feature], value_name="target"
+    )
+    bins = np.linspace(df2["target"].min(), df2["target"].max(), n + 1)
+
     g = sns.FacetGrid(
-        df2,
-        col='variable',
-        hue=target,
-        palette='rainbow',
-        col_wrap=2,
-        height=10)
-    g.map(plt.hist, 'target', alpha=0.5, density=True, bins=bins, ec="k")
+        df2, col="variable", hue=target, palette="rainbow", col_wrap=2, height=10
+    )
+    g.map(plt.hist, "target", alpha=0.5, density=True, bins=bins, ec="k")
     g.axes[-1].legend()
     plt.show()
 
@@ -39,18 +39,16 @@ def plot_stacked_hist(df: pd.core.frame.DataFrame, feature: str, target: str) ->
     feature: Признак, распределение которго, требуется посмотреть
     target: Целевая переменная, будет на оси x графика
     """
-    overview = pd.crosstab(
-        df[target],
-        df[feature]).sort_values(
-        target,
-        ascending=True)
+    overview = pd.crosstab(df[target], df[feature]).sort_values(target, ascending=True)
     sum_series = overview.sum(axis=1)
     for col in list(overview.columns):
         overview[col] = overview[col] / sum_series
-    overview.plot(kind='bar', stacked=True)
+    overview.plot(kind="bar", stacked=True)
 
 
-def plot_dual_axis(data: pd.core.frame.DataFrame, col1: str, col2: str, title: str = None):
+def plot_dual_axis(
+    data: pd.core.frame.DataFrame, col1: str, col2: str, title: str = None
+):
     """
     Построение графика с двумя осями ординат
 
@@ -61,43 +59,18 @@ def plot_dual_axis(data: pd.core.frame.DataFrame, col1: str, col2: str, title: s
     col2: Название дополнительного признака (правая ось)
     title: Заголовок графика
     """
-    fig, ax1 = plt.subplots(
-        figsize=(12, 6)
-    )
+    fig, ax1 = plt.subplots(figsize=(12, 6))
     ax2 = ax1.twinx()
 
-    ax2.bar(
-        data.index,
-        data[col2],
-        alpha=0.15,
-        fill=True,
-        edgecolor='b'
-    )
-    ax1.plot(
-        data.index,
-        data[col1],
-        'go--',
-        linewidth=2
-    )
+    ax2.bar(data.index, data[col2], alpha=0.15, fill=True, edgecolor="b")
+    ax1.plot(data.index, data[col1], "go--", linewidth=2)
 
     def naming(name):
-        return ' '.join(
-            [x[0].upper() + x[1:]
-             for x in name.split('_')
-            ]
-        )
+        return " ".join([x[0].upper() + x[1:] for x in name.split("_")])
 
     if data.index.name:
-        ax1.set_xlabel(
-            naming(data.index.name)
-        )
-    ax1.set_ylabel(
-        naming(col1),
-        color='g'
-    )
-    ax2.set_ylabel(
-        naming(col2),
-        color='b'
-    )
+        ax1.set_xlabel(naming(data.index.name))
+    ax1.set_ylabel(naming(col1), color="g")
+    ax2.set_ylabel(naming(col2), color="b")
     plt.title(title)
     plt.show()
diff --git a/podlozhnyy_module/collocation.py b/podlozhnyy_module/collocation.py
@@ -1,4 +1,5 @@
 from itertools import combinations
+
 from nltk.corpus import stopwords
 
 from podlozhnyy_module import pd

diff --git a/podlozhnyy_module/correlation.py b/podlozhnyy_module/correlation.py
@@ -1,6 +1,7 @@
 from scipy.stats import t as student
 
-from podlozhnyy_module import pd, sns, plt
+from podlozhnyy_module import pd, plt, sns
+
 
 def plot_corr_matrix(
     df: pd.core.frame.DataFrame,
@@ -19,9 +20,9 @@ def plot_corr_matrix(
     if features is None:
         features = df.columns[df.dtypes != "object"]
     corr = df[features].corr(method=method)
-    plt.figure(figsize=(10, 10), facecolor='floralwhite')
-    sns.heatmap(corr, vmax=1, square=True, annot=True, cmap='cubehelix')
-    plt.title('Correlation between different features')
+    plt.figure(figsize=(10, 10), facecolor="floralwhite")
+    sns.heatmap(corr, vmax=1, square=True, annot=True, cmap="cubehelix")
+    plt.title("Correlation between different features")
     bottom, top = plt.ylim()
     plt.ylim([bottom + 0.05, top - 0.05])
     plt.show()

diff --git a/podlozhnyy_module/pareto.py b/podlozhnyy_module/pareto.py
@@ -1,5 +1,5 @@
 from scipy.optimize import minimize
-from scipy.stats import pareto, bernoulli
+from scipy.stats import bernoulli, pareto
 
 from podlozhnyy_module import np
 
@@ -51,7 +51,6 @@ def theoretical(x):
     return {"alpha": result.x[0], "loc": result.x[1], "scale": result.x[2]}
 
 
-
 class ParetoExtended:
     """
     Распределение Парето дополненное значением слева принимаемым с заданной вероятностью.

diff --git a/podlozhnyy_module/permutation.py b/podlozhnyy_module/permutation.py
@@ -1,22 +1,24 @@
-from itertools import product, combinations
+from itertools import combinations, product
 
 from podlozhnyy_module import np
 
 
 def permutation_t_stat(sample1, sample2):
     return np.mean(sample1) - np.mean(sample2)
 
-    
+
 def get_random_permutations(n, max_permutations):
-    return set([tuple(x) for x in 2 * np.random.randint(2, size = (max_permutations, n)) - 1])
+    return set(
+        [tuple(x) for x in 2 * np.random.randint(2, size=(max_permutations, n)) - 1]
+    )
 
 
 def permutation_zero_dist_one_samp(sample, mean, max_permutations):
     centered_sample = np.array(sample) - mean
     if max_permutations:
         signs_array = get_random_permutations(len(sample), max_permutations)
     else:
-        signs_array = product([-1, 1], repeat = len(sample))
+        signs_array = product([-1, 1], repeat=len(sample))
     return [np.mean(centered_sample * signs) for signs in signs_array]
 
 
@@ -36,12 +38,23 @@ def permutation_zero_dist_ind(sample1, sample2, max_combinations):
     if max_combinations:
         indices = get_random_combinations(n1, n2, max_combinations)
     else:
-        indices = [(list(index), filter(lambda i: i not in index, range(n)))
-                   for index in combinations(range(n), n1)]
-    return [joined_sample[list(i[0])].mean() - joined_sample[list(i[1])].mean() for i in indices]
-
-
-def permutation_test(test, control, kind: str = 'independent', max_permutations: int = None, alternative: str = 'two-sided'):
+        indices = [
+            (list(index), filter(lambda i: i not in index, range(n)))
+            for index in combinations(range(n), n1)
+        ]
+    return [
+        joined_sample[list(i[0])].mean() - joined_sample[list(i[1])].mean()
+        for i in indices
+    ]
+
+
+def permutation_test(
+    test,
+    control,
+    kind: str = "independent",
+    max_permutations: int = None,
+    alternative: str = "two-sided",
+):
     """
     Проводит одно- или двух- выборочный статистический тест, используя семейство перестановочных критериев
     Возвращает значение p-value для заданного типа альтернативы
@@ -59,37 +72,34 @@ def permutation_test(test, control, kind: str = 'independent', max_permutations:
     alternative: str
         Тип альтернативы: {'two-sided', 'less', 'greater'}, default='two-sided'
     """
-    if alternative not in ('two-sided', 'less', 'greater'):
-        raise ValueError("alternative not recognized, should be 'two-sided', 'less' or 'greater'")
-
-    if kind not in ('independent', 'related'):
+    if alternative not in ("two-sided", "less", "greater"):
+        raise ValueError(
+            "alternative not recognized, should be 'two-sided', 'less' or 'greater'"
+        )
+
+    if kind not in ("independent", "related"):
         raise ValueError("kind not recognized, should be 'related' or 'independent'")
-    
+
     if isinstance(control, int) or isinstance(control, float):
-        zero_distr = permutation_zero_dist_one_samp(
-            test, control, max_permutations
-        )
-    elif kind == 'related':
+        zero_distr = permutation_zero_dist_one_samp(test, control, max_permutations)
+    elif kind == "related":
         if len(test) != len(control):
             raise ValueError("related samples must have the same size")
         zero_distr = permutation_zero_dist_one_samp(
             np.array(test) - np.array(control), 0.0, max_permutations
         )
     else:
-        zero_distr = permutation_zero_dist_ind(
-            test, control, max_permutations
-        )
+        zero_distr = permutation_zero_dist_ind(test, control, max_permutations)
 
     t_stat = permutation_t_stat(test, control)
 
-    if alternative == 'two-sided':
-        return sum([1. if abs(x) >= abs(t_stat)
-                   else 0. for x in zero_distr]) / len(zero_distr)
+    if alternative == "two-sided":
+        return sum([1.0 if abs(x) >= abs(t_stat) else 0.0 for x in zero_distr]) / len(
+            zero_distr
+        )
 
-    if alternative == 'less':
-        return sum([1. if x <= t_stat else 0. for x in zero_distr]
-                   ) / len(zero_distr)
+    if alternative == "less":
+        return sum([1.0 if x <= t_stat else 0.0 for x in zero_distr]) / len(zero_distr)
 
-    if alternative == 'greater':
-        return sum([1. if x >= t_stat else 0. for x in zero_distr]
-                   ) / len(zero_distr)
+    if alternative == "greater":
+        return sum([1.0 if x >= t_stat else 0.0 for x in zero_distr]) / len(zero_distr)