diff --git a/problem1 b/problem1 new file mode 100644 index 0000000..46eaca4 --- /dev/null +++ b/problem1 @@ -0,0 +1,9 @@ +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + activities = activities.groupby('sell_date')['product'].agg( + [('num_sold','nunique'), + ('products', lambda x: ','.join(sorted(x.unique()))) + ] + ).reset_index() + return activities diff --git a/problem2 b/problem2 new file mode 100644 index 0000000..563d72f --- /dev/null +++ b/problem2 @@ -0,0 +1,9 @@ +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + result = daily_sales.groupby(['date_id', 'make_name']).agg( + unique_leads=('lead_id', 'nunique'), + unique_partners=('partner_id', 'nunique') + ).reset_index() + + return result.sort_values(['date_id', 'make_name']) diff --git a/problem3 b/problem3 new file mode 100644 index 0000000..c65f9d4 --- /dev/null +++ b/problem3 @@ -0,0 +1,7 @@ +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + coorporated = actor_director.groupby(['actor_id','director_id']).size().reset_index(name='count') + print(coorporated) + coorporated = coorporated[coorporated['count']>=3] + return coorporated[['actor_id', 'director_id']]