-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
59 lines (42 loc) · 1.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from wahoo_api import *
from gcp_bigquery import load_to_bigquery
from gcp_secret_manager import access_secret_version
import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
PROJECTID = os.getenv("PROJECTID")
DATASET = os.getenv("DATASET")
TABLENAME = os.getenv("TABLENAME")
TABLEID = f"{PROJECTID}.{DATASET}.{TABLENAME}"
def extract_workout_data():
CLIENTID = access_secret_version("wahoo_client_id")
CLIENTSECRET = access_secret_version("wahoo_client_secret")
ACCESSTOKEN = access_secret_version("wahoo_access_token")
REFRESHTOKEN = access_secret_version("wahoo_refresh_token")
ACCESSTOKEN, REFRESHTOKEN = refreshtokens(REFRESHTOKEN, CLIENTID, CLIENTSECRET)
workout_data = get_workouts(ACCESSTOKEN)
df = pd.json_normalize(workout_data)
return df
def transform_workout_data(df):
columns_to_remove = [
"plan_id",
"workout_summary.ascent_accum",
"workout_summary.duration_paused_accum",
"workout_summary.heart_rate_avg",
"workout_summary.file.url",
"workout_summary.files",
]
df = df.drop(columns_to_remove, axis=1)
df = df.convert_dtypes(infer_objects=True)
df.columns = df.columns.str.replace(".", "_", regex=True)
return df
def load_workout_data(df):
load_to_bigquery(TABLEID, df)
print("Completed loading dataset to BigQuery")
def main():
df = extract_workout_data()
df = transform_workout_data(df)
load_to_bigquery(TABLEID, df)
if __name__ == "__main__":
main()