diff --git a/data/clean/type_counts.csv b/data/clean/type_counts.csv new file mode 100644 index 0000000..3a05058 --- /dev/null +++ b/data/clean/type_counts.csv @@ -0,0 +1,11 @@ +type,n +calendar,1 +chart,1 +datalens,20 +dataset,2716 +file,185 +filter,25 +href,154 +map,353 +story,3 +visualization,4 diff --git a/src/transform.py b/src/transform.py index 74871ba..9dea4be 100644 --- a/src/transform.py +++ b/src/transform.py @@ -47,6 +47,13 @@ def transform(verbose: bool) -> None: subset=["id"], keep="first" ) + # Type counts + if verbose: + print("Type counts") + type_counts = latest_df.groupby("type").size() + type_counts.name = "n" + type_counts.to_csv(utils.DATA_DIR / "clean" / "type_counts.csv", header=True) + # Write it out as csv out_path = utils.DATA_DIR / "clean" / "latest.csv" if verbose: