-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathutil.py
80 lines (68 loc) · 2.14 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import snowflake.connector as sn
import pandas as pd
import tomli
with open(".streamlit/secrets.toml", mode="rb") as fp:
config = tomli.load(fp)
def connection(database=None):
return sn.connect(
user=config["SNOWFLAKE_USER"],
password=config["SNOWFLAKE_PASSWORD"],
account=config["SNOWFLAKE_ACCOUNT"],
warehouse=config["SNOWFLAKE_WH"],
role=config["SNOWFLAKE_ROLE"],
database=database or "tuva_project_demo",
)
def safe_to_pandas(conn, query):
cur = conn.cursor()
data = cur.execute(query).fetch_pandas_all()
cur.close()
lowercase = lambda x: str(x).lower()
rename_dict = {k: lowercase(k) for k in data}
data = data.rename(columns=rename_dict)
for col in [x for x in data if ("amount" in x) or (x == "member_month_count")]:
data[col] = pd.to_numeric(data[col])
return data
def human_format(num):
num = float("{:.3g}".format(num))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return "{}{}".format(
"{:f}".format(num).rstrip("0").rstrip("."), ["", "K", "M", "B", "T"][magnitude]
)
def format_df(df):
df = df.copy(deep=True)
# format column values
for c in df:
if "pct" in c:
df[c] = (
df[c]
.apply(lambda x: f"{round(x, 4) * 100}%" if x else "")
.replace("nan%", "")
)
elif "pmpm" in c:
df[c] = (
df[c]
.apply(lambda x: f"${human_format(x)}" if x else "")
.replace("$nan", "")
)
# format column headers
df.columns = [
c.replace("_", " ").title().replace("Pmpm", "PMPM").replace("Pct Change", "% Δ")
for c in df
]
return df
def group_for_pmpm(df, grouping_column):
grouped_df = (
df.groupby(grouping_column)[
["paid_amount_sum", "member_month_count", "row_count"]
]
.sum()
.query("row_count > 10")
.assign(
paid_amount_pmpm=lambda x: x["paid_amount_sum"] / x["member_month_count"]
)
.reset_index()
)
return grouped_df