Skip to content

Commit

Permalink
Fix code style with black util
Browse files Browse the repository at this point in the history
  • Loading branch information
NPodlozhniy committed May 19, 2023
1 parent 4e20a59 commit bf252d5
Show file tree
Hide file tree
Showing 10 changed files with 601 additions and 465 deletions.
11 changes: 5 additions & 6 deletions podlozhnyy_module/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pandas as pd
import seaborn as sns

from matplotlib import pyplot as plt

from podlozhnyy_module import (
Expand All @@ -13,10 +12,10 @@
permutation,
regression,
timeseries,
timetest
timetest,
)

print('Привет! Ты импортировал модуль созданный https://github.com/NPodlozhniy')
print('В нем собраны часто востребованные в работе аналитика методы')
print('Посмотреть полный cписок пакетов можно с помощью команды dir(<название библиотеки>)')
print('Приятного использования!')
print("Привет! Ты импортировал модуль созданный https://github.com/NPodlozhniy")
print("В нем собраны часто востребованные в работе аналитика методы")
print("Посмотреть полный cписок пакетов можно с помощью dir(<название библиотеки>)")
print("Приятного использования!")
79 changes: 26 additions & 53 deletions podlozhnyy_module/charts.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from podlozhnyy_module import np, pd, sns, plt
from podlozhnyy_module import np, pd, plt, sns

sns.set_style(rc = {'figure.facecolor': 'floralwhite'})
sns.set_style(rc={"figure.facecolor": "floralwhite"})

def plot_hist(df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 10) -> None:

def plot_hist(
df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 10
) -> None:
"""
Строит приятную гистограмму распределения признака от целевой переменной
Expand All @@ -13,18 +16,15 @@ def plot_hist(df: pd.core.frame.DataFrame, feature: str, target: str, n: int = 1
target: Целевая переменная для разбиения признака
n: Кол-во bin-ов, default=10
"""
df2 = pd.melt(df[[feature, target]], id_vars=target,
value_vars=[feature], value_name='target')
bins = np.linspace(df2['target'].min(), df2['target'].max(), n + 1)

df2 = pd.melt(
df[[feature, target]], id_vars=target, value_vars=[feature], value_name="target"
)
bins = np.linspace(df2["target"].min(), df2["target"].max(), n + 1)

g = sns.FacetGrid(
df2,
col='variable',
hue=target,
palette='rainbow',
col_wrap=2,
height=10)
g.map(plt.hist, 'target', alpha=0.5, density=True, bins=bins, ec="k")
df2, col="variable", hue=target, palette="rainbow", col_wrap=2, height=10
)
g.map(plt.hist, "target", alpha=0.5, density=True, bins=bins, ec="k")
g.axes[-1].legend()
plt.show()

Expand All @@ -39,18 +39,16 @@ def plot_stacked_hist(df: pd.core.frame.DataFrame, feature: str, target: str) ->
feature: Признак, распределение которго, требуется посмотреть
target: Целевая переменная, будет на оси x графика
"""
overview = pd.crosstab(
df[target],
df[feature]).sort_values(
target,
ascending=True)
overview = pd.crosstab(df[target], df[feature]).sort_values(target, ascending=True)
sum_series = overview.sum(axis=1)
for col in list(overview.columns):
overview[col] = overview[col] / sum_series
overview.plot(kind='bar', stacked=True)
overview.plot(kind="bar", stacked=True)


def plot_dual_axis(data: pd.core.frame.DataFrame, col1: str, col2: str, title: str = None):
def plot_dual_axis(
data: pd.core.frame.DataFrame, col1: str, col2: str, title: str = None
):
"""
Построение графика с двумя осями ординат
Expand All @@ -61,43 +59,18 @@ def plot_dual_axis(data: pd.core.frame.DataFrame, col1: str, col2: str, title: s
col2: Название дополнительного признака (правая ось)
title: Заголовок графика
"""
fig, ax1 = plt.subplots(
figsize=(12, 6)
)
fig, ax1 = plt.subplots(figsize=(12, 6))
ax2 = ax1.twinx()

ax2.bar(
data.index,
data[col2],
alpha=0.15,
fill=True,
edgecolor='b'
)
ax1.plot(
data.index,
data[col1],
'go--',
linewidth=2
)
ax2.bar(data.index, data[col2], alpha=0.15, fill=True, edgecolor="b")
ax1.plot(data.index, data[col1], "go--", linewidth=2)

def naming(name):
return ' '.join(
[x[0].upper() + x[1:]
for x in name.split('_')
]
)
return " ".join([x[0].upper() + x[1:] for x in name.split("_")])

if data.index.name:
ax1.set_xlabel(
naming(data.index.name)
)
ax1.set_ylabel(
naming(col1),
color='g'
)
ax2.set_ylabel(
naming(col2),
color='b'
)
ax1.set_xlabel(naming(data.index.name))
ax1.set_ylabel(naming(col1), color="g")
ax2.set_ylabel(naming(col2), color="b")
plt.title(title)
plt.show()
1 change: 1 addition & 0 deletions podlozhnyy_module/collocation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from itertools import combinations

from nltk.corpus import stopwords

from podlozhnyy_module import pd
Expand Down
9 changes: 5 additions & 4 deletions podlozhnyy_module/correlation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from scipy.stats import t as student

from podlozhnyy_module import pd, sns, plt
from podlozhnyy_module import pd, plt, sns


def plot_corr_matrix(
df: pd.core.frame.DataFrame,
Expand All @@ -19,9 +20,9 @@ def plot_corr_matrix(
if features is None:
features = df.columns[df.dtypes != "object"]
corr = df[features].corr(method=method)
plt.figure(figsize=(10, 10), facecolor='floralwhite')
sns.heatmap(corr, vmax=1, square=True, annot=True, cmap='cubehelix')
plt.title('Correlation between different features')
plt.figure(figsize=(10, 10), facecolor="floralwhite")
sns.heatmap(corr, vmax=1, square=True, annot=True, cmap="cubehelix")
plt.title("Correlation between different features")
bottom, top = plt.ylim()
plt.ylim([bottom + 0.05, top - 0.05])
plt.show()
Expand Down
3 changes: 1 addition & 2 deletions podlozhnyy_module/pareto.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from scipy.optimize import minimize
from scipy.stats import pareto, bernoulli
from scipy.stats import bernoulli, pareto

from podlozhnyy_module import np

Expand Down Expand Up @@ -51,7 +51,6 @@ def theoretical(x):
return {"alpha": result.x[0], "loc": result.x[1], "scale": result.x[2]}



class ParetoExtended:
"""
Распределение Парето дополненное значением слева принимаемым с заданной вероятностью.
Expand Down
72 changes: 41 additions & 31 deletions podlozhnyy_module/permutation.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
from itertools import product, combinations
from itertools import combinations, product

from podlozhnyy_module import np


def permutation_t_stat(sample1, sample2):
return np.mean(sample1) - np.mean(sample2)


def get_random_permutations(n, max_permutations):
return set([tuple(x) for x in 2 * np.random.randint(2, size = (max_permutations, n)) - 1])
return set(
[tuple(x) for x in 2 * np.random.randint(2, size=(max_permutations, n)) - 1]
)


def permutation_zero_dist_one_samp(sample, mean, max_permutations):
centered_sample = np.array(sample) - mean
if max_permutations:
signs_array = get_random_permutations(len(sample), max_permutations)
else:
signs_array = product([-1, 1], repeat = len(sample))
signs_array = product([-1, 1], repeat=len(sample))
return [np.mean(centered_sample * signs) for signs in signs_array]


Expand All @@ -36,12 +38,23 @@ def permutation_zero_dist_ind(sample1, sample2, max_combinations):
if max_combinations:
indices = get_random_combinations(n1, n2, max_combinations)
else:
indices = [(list(index), filter(lambda i: i not in index, range(n)))
for index in combinations(range(n), n1)]
return [joined_sample[list(i[0])].mean() - joined_sample[list(i[1])].mean() for i in indices]


def permutation_test(test, control, kind: str = 'independent', max_permutations: int = None, alternative: str = 'two-sided'):
indices = [
(list(index), filter(lambda i: i not in index, range(n)))
for index in combinations(range(n), n1)
]
return [
joined_sample[list(i[0])].mean() - joined_sample[list(i[1])].mean()
for i in indices
]


def permutation_test(
test,
control,
kind: str = "independent",
max_permutations: int = None,
alternative: str = "two-sided",
):
"""
Проводит одно- или двух- выборочный статистический тест, используя семейство перестановочных критериев
Возвращает значение p-value для заданного типа альтернативы
Expand All @@ -59,37 +72,34 @@ def permutation_test(test, control, kind: str = 'independent', max_permutations:
alternative: str
Тип альтернативы: {'two-sided', 'less', 'greater'}, default='two-sided'
"""
if alternative not in ('two-sided', 'less', 'greater'):
raise ValueError("alternative not recognized, should be 'two-sided', 'less' or 'greater'")

if kind not in ('independent', 'related'):
if alternative not in ("two-sided", "less", "greater"):
raise ValueError(
"alternative not recognized, should be 'two-sided', 'less' or 'greater'"
)

if kind not in ("independent", "related"):
raise ValueError("kind not recognized, should be 'related' or 'independent'")

if isinstance(control, int) or isinstance(control, float):
zero_distr = permutation_zero_dist_one_samp(
test, control, max_permutations
)
elif kind == 'related':
zero_distr = permutation_zero_dist_one_samp(test, control, max_permutations)
elif kind == "related":
if len(test) != len(control):
raise ValueError("related samples must have the same size")
zero_distr = permutation_zero_dist_one_samp(
np.array(test) - np.array(control), 0.0, max_permutations
)
else:
zero_distr = permutation_zero_dist_ind(
test, control, max_permutations
)
zero_distr = permutation_zero_dist_ind(test, control, max_permutations)

t_stat = permutation_t_stat(test, control)

if alternative == 'two-sided':
return sum([1. if abs(x) >= abs(t_stat)
else 0. for x in zero_distr]) / len(zero_distr)
if alternative == "two-sided":
return sum([1.0 if abs(x) >= abs(t_stat) else 0.0 for x in zero_distr]) / len(
zero_distr
)

if alternative == 'less':
return sum([1. if x <= t_stat else 0. for x in zero_distr]
) / len(zero_distr)
if alternative == "less":
return sum([1.0 if x <= t_stat else 0.0 for x in zero_distr]) / len(zero_distr)

if alternative == 'greater':
return sum([1. if x >= t_stat else 0. for x in zero_distr]
) / len(zero_distr)
if alternative == "greater":
return sum([1.0 if x >= t_stat else 0.0 for x in zero_distr]) / len(zero_distr)
Loading

0 comments on commit bf252d5

Please sign in to comment.