From ab4623d3ef7e44d73ce350cee78eb6cba7d46079 Mon Sep 17 00:00:00 2001 From: Feminto Date: Tue, 17 Jun 2025 22:49:21 -0700 Subject: [PATCH 1/4] Adding solution for Problem 1 Group Sol Products By The Date --- GroupSolProductsByTheDate.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 GroupSolProductsByTheDate.py diff --git a/GroupSolProductsByTheDate.py b/GroupSolProductsByTheDate.py new file mode 100644 index 0000000..bd32643 --- /dev/null +++ b/GroupSolProductsByTheDate.py @@ -0,0 +1,31 @@ +# Method 1 +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + adict = {} + for i in range(len(activities)): + s = activities['sell_date'][i] + p = activities['product'][i] + + if s not in adict: + adict[s] = set() + adict[s].add(p) + + result = [] + for k,v in adict.items(): + result.append([k, len(v), ','.join(sorted(v))]) + + return pd.DataFrame(result, columns = ['sell_date','num_sold','products']).sort_values('sell_date') + + +# Method 2 +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + # df = activities.groupby('sell_date').nunique().reset_index() + df = activities.groupby('sell_date').agg( + num_sold = ('product','nunique'), + products = ('product', lambda x : ','.join(sorted(x.drop_duplicates()))) # or you can also take set(x) which gives distinct values + ).reset_index() + + return df \ No newline at end of file From 542c867923ddb8c5aad68dc016a2dce8a4e3fff5 Mon Sep 17 00:00:00 2001 From: Feminto Date: Tue, 17 Jun 2025 22:50:08 -0700 Subject: [PATCH 2/4] Correcting the name of file --- GroupSolProductsByTheDate.py => GroupSoldProductsByTheDate.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename GroupSolProductsByTheDate.py => GroupSoldProductsByTheDate.py (100%) diff --git a/GroupSolProductsByTheDate.py b/GroupSoldProductsByTheDate.py similarity index 100% rename from GroupSolProductsByTheDate.py rename to GroupSoldProductsByTheDate.py From beede0b68ca44d95934f56a081b6ed401128c37e Mon Sep 17 00:00:00 2001 From: Feminto Date: Wed, 18 Jun 2025 23:03:21 -0700 Subject: [PATCH 3/4] Adding solution for Problem 2 Daily Leads and Partners --- DailyLeadsAndPartners.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 DailyLeadsAndPartners.py diff --git a/DailyLeadsAndPartners.py b/DailyLeadsAndPartners.py new file mode 100644 index 0000000..41a7a5b --- /dev/null +++ b/DailyLeadsAndPartners.py @@ -0,0 +1,42 @@ +# Method 1 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + ds_dict = {} + for i in range(len(daily_sales)): + d = daily_sales['date_id'][i] + m = daily_sales['make_name'][i] + l = daily_sales['lead_id'][i] + p = daily_sales['partner_id'][i] + + if (d,m) not in ds_dict: + ds_dict[(d,m)] = [set(),set()] + ds_dict[(d,m)][0].add(l) + ds_dict[(d,m)][1].add(p) + + result = [] + for k,v in ds_dict.items(): + result.append([k[0], k[1], len(v[0]), len(v[1])]) + + return pd.DataFrame(result, columns = ['date_id','make_name','unique_leads','unique_partners']) + + +# Method 2 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id','make_name']).agg( + unique_leads = ('lead_id','nunique'), + unique_partners = ('partner_id','nunique') + ).reset_index() + + return df + + +# Method 3 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id','make_name']).nunique().reset_index() + + return df.rename(columns = {'lead_id':'unique_leads','partner_id':'unique_partners'}) \ No newline at end of file From 0b5b2af6c8b2cdf02539e0be6a02abd4f25615a8 Mon Sep 17 00:00:00 2001 From: Feminto Date: Sat, 21 Jun 2025 08:10:54 -0700 Subject: [PATCH 4/4] Adding solution for problem3 --- ...DirectorsWhoCooperatedAtLeastThreeTimes.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py diff --git a/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py b/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py new file mode 100644 index 0000000..61efea4 --- /dev/null +++ b/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py @@ -0,0 +1,29 @@ +# Method 1 +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + act_dict = {} + for i in range(len(actor_director)): + a = actor_director['actor_id'][i] + d = actor_director['director_id'][i] + + if (a,d) in act_dict: + act_dict[(a,d)] += 1 + else: + act_dict[(a,d)] = 1 + + result = [] + for k,v in act_dict.items(): + if v >= 3: + result.append([k[0],k[1]]) + + return pd.DataFrame(result, columns = ['actor_id','director_id']) + + +# Method 2 +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + df = actor_director.groupby(['actor_id','director_id']).size().reset_index(name = 'cnt') + + return df[df['cnt'] >= 3][['actor_id','director_id']] \ No newline at end of file