From e80130b0ace8bce4ab124c530593ea0906089ab7 Mon Sep 17 00:00:00 2001 From: Radhika Tekade Date: Sat, 15 Mar 2025 13:17:17 -0700 Subject: [PATCH 1/3] Create 150_group_sold_products_by_date --- 150_group_sold_products_by_date.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 150_group_sold_products_by_date.py diff --git a/150_group_sold_products_by_date.py b/150_group_sold_products_by_date.py new file mode 100644 index 0000000..7fae7cc --- /dev/null +++ b/150_group_sold_products_by_date.py @@ -0,0 +1,13 @@ +# Group the activities DF using 'sell_date'. agg() function is used because we want to apply multiple +# functionality to the same grouping. Using agg() we get the total number of unique products sold and also +# get the names of all those unique products by joining a string of sorted set of product names. +# Note: agg() function directly returns a DF, not a series like we get using transform() + +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + df = activities.groupby(['sell_date']).agg( + num_sold = ('product', 'nunique'), + products = ('product', lambda x: ','.join(sorted(set(x)))) + ).reset_index() + return df \ No newline at end of file From 7be0d44a2877d81d83ec25337c24af19f26d89e7 Mon Sep 17 00:00:00 2001 From: Radhika Tekade Date: Sat, 15 Mar 2025 13:40:04 -0700 Subject: [PATCH 2/3] Create 151_daily_leads_and_partners --- 151_daily_leads_and_partners.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 151_daily_leads_and_partners.py diff --git a/151_daily_leads_and_partners.py b/151_daily_leads_and_partners.py new file mode 100644 index 0000000..ca756c6 --- /dev/null +++ b/151_daily_leads_and_partners.py @@ -0,0 +1,11 @@ +# Group the daily_sales DF using both 'date_id' and 'make_name' and return unique lead IDs and unique +# partner IDs associated with it. + +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id', 'make_name']).agg( + unique_leads = ('lead_id', 'nunique'), + unique_partners = ('partner_id', 'nunique') + ).reset_index() + return df \ No newline at end of file From 6cdc9625cbe86223237cffbafcb31bd82537156d Mon Sep 17 00:00:00 2001 From: Radhika Tekade Date: Sat, 15 Mar 2025 13:45:45 -0700 Subject: [PATCH 3/3] Create 152_actors_and_directors_cooperated --- 152_actors_and_directors_cooperated.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 152_actors_and_directors_cooperated.py diff --git a/152_actors_and_directors_cooperated.py b/152_actors_and_directors_cooperated.py new file mode 100644 index 0000000..84d31f4 --- /dev/null +++ b/152_actors_and_directors_cooperated.py @@ -0,0 +1,10 @@ +# Group the DF based on actor and director ID pairs and return the number of times they cooperated. Return +# the DF where the count is greater than or equal to 3. + +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + df = actor_director.groupby(['actor_id', 'director_id']).agg( + count = ('timestamp', 'count') + ).reset_index() + return df[df['count'] >= 3][['actor_id', 'director_id']] \ No newline at end of file