diff --git a/Problem1.py b/Problem1.py new file mode 100644 index 0000000..113fd4d --- /dev/null +++ b/Problem1.py @@ -0,0 +1,10 @@ +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + # return activities.groupby('sell_date')['product']\ + # .agg(['nunique', lambda x: ",".join(i for i in sorted(set(x)))]).reset_index()\ + # .rename(columns = {'nunique': 'num_sold', '': 'products'}) + + return activities.groupby('sell_date')\ + .agg(num_sold = ('product', 'nunique'), + products = ('product', lambda x: ",".join(sorted(set(x))))).reset_index() diff --git a/Problem2.py b/Problem2.py new file mode 100644 index 0000000..dd0a082 --- /dev/null +++ b/Problem2.py @@ -0,0 +1,8 @@ +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + return daily_sales.groupby(['date_id', 'make_name'])\ + .agg(unique_leads = ('lead_id', 'nunique'), + unique_partners = ('partner_id', 'nunique')).reset_index() + # return daily_sales.groupby(['date_id', 'make_name'])\ + # .agg('pd.series.nunique').reset_index() \ No newline at end of file diff --git a/Problem3.py b/Problem3.py new file mode 100644 index 0000000..9ac1044 --- /dev/null +++ b/Problem3.py @@ -0,0 +1,5 @@ +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + df = actor_director.groupby(['actor_id', 'director_id']).agg(coop = ('director_id', 'count')).reset_index() + return df[df['coop'] >= 3].drop(columns = ['coop']) \ No newline at end of file