diff --git a/Problem1.py b/Problem1.py new file mode 100644 index 0000000..8b75c19 --- /dev/null +++ b/Problem1.py @@ -0,0 +1,13 @@ +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + grouped = activities.groupby('sell_date')['product'].unique() + sorted_products = grouped.apply(sorted).apply(','.join) + num_sold = grouped.apply(len) + result = pd.DataFrame({ + 'sell_date': num_sold.index, + 'num_sold': num_sold.values, + 'products': sorted_products.values + }) + result = result.sort_values(by='sell_date') + return result \ No newline at end of file diff --git a/Problem2.py b/Problem2.py new file mode 100644 index 0000000..3b2e574 --- /dev/null +++ b/Problem2.py @@ -0,0 +1,7 @@ +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + daily_sales=daily_sales.groupby(['date_id','make_name'])[['lead_id','partner_id']].nunique().reset_index() + daily_sales.columns=['date_id','make_name','unique_leads','unique_partners'] + return daily_sales + \ No newline at end of file diff --git a/Problem3.py b/Problem3.py new file mode 100644 index 0000000..173b6b5 --- /dev/null +++ b/Problem3.py @@ -0,0 +1,6 @@ +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + df = actor_director.groupby(['actor_id','director_id']).agg(count =('director_id','count')).reset_index() + df = df[df['count'] >= 3] + return df[['actor_id','director_id']] \ No newline at end of file