diff --git a/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py b/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py new file mode 100644 index 0000000..61efea4 --- /dev/null +++ b/ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py @@ -0,0 +1,29 @@ +# Method 1 +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + act_dict = {} + for i in range(len(actor_director)): + a = actor_director['actor_id'][i] + d = actor_director['director_id'][i] + + if (a,d) in act_dict: + act_dict[(a,d)] += 1 + else: + act_dict[(a,d)] = 1 + + result = [] + for k,v in act_dict.items(): + if v >= 3: + result.append([k[0],k[1]]) + + return pd.DataFrame(result, columns = ['actor_id','director_id']) + + +# Method 2 +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + df = actor_director.groupby(['actor_id','director_id']).size().reset_index(name = 'cnt') + + return df[df['cnt'] >= 3][['actor_id','director_id']] \ No newline at end of file diff --git a/DailyLeadsAndPartners.py b/DailyLeadsAndPartners.py new file mode 100644 index 0000000..41a7a5b --- /dev/null +++ b/DailyLeadsAndPartners.py @@ -0,0 +1,42 @@ +# Method 1 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + ds_dict = {} + for i in range(len(daily_sales)): + d = daily_sales['date_id'][i] + m = daily_sales['make_name'][i] + l = daily_sales['lead_id'][i] + p = daily_sales['partner_id'][i] + + if (d,m) not in ds_dict: + ds_dict[(d,m)] = [set(),set()] + ds_dict[(d,m)][0].add(l) + ds_dict[(d,m)][1].add(p) + + result = [] + for k,v in ds_dict.items(): + result.append([k[0], k[1], len(v[0]), len(v[1])]) + + return pd.DataFrame(result, columns = ['date_id','make_name','unique_leads','unique_partners']) + + +# Method 2 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id','make_name']).agg( + unique_leads = ('lead_id','nunique'), + unique_partners = ('partner_id','nunique') + ).reset_index() + + return df + + +# Method 3 +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id','make_name']).nunique().reset_index() + + return df.rename(columns = {'lead_id':'unique_leads','partner_id':'unique_partners'}) \ No newline at end of file diff --git a/GroupSoldProductsByTheDate.py b/GroupSoldProductsByTheDate.py new file mode 100644 index 0000000..bd32643 --- /dev/null +++ b/GroupSoldProductsByTheDate.py @@ -0,0 +1,31 @@ +# Method 1 +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + adict = {} + for i in range(len(activities)): + s = activities['sell_date'][i] + p = activities['product'][i] + + if s not in adict: + adict[s] = set() + adict[s].add(p) + + result = [] + for k,v in adict.items(): + result.append([k, len(v), ','.join(sorted(v))]) + + return pd.DataFrame(result, columns = ['sell_date','num_sold','products']).sort_values('sell_date') + + +# Method 2 +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + # df = activities.groupby('sell_date').nunique().reset_index() + df = activities.groupby('sell_date').agg( + num_sold = ('product','nunique'), + products = ('product', lambda x : ','.join(sorted(x.drop_duplicates()))) # or you can also take set(x) which gives distinct values + ).reset_index() + + return df \ No newline at end of file