diff --git a/Actors and Directors who Cooperated At Least Three Times b/Actors and Directors who Cooperated At Least Three Times new file mode 100644 index 0000000..684515a --- /dev/null +++ b/Actors and Directors who Cooperated At Least Three Times @@ -0,0 +1,24 @@ +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + #long version - have a dcitionary in place where actor id and director id are stored in form of tuple + + # dict = {} + # for i in range(len(actor_director)): + # a_id = actor_director['actor_id'][i] + # d_id = actor_director['director_id'][i] + # if (a_id,d_id) not in dict: + # dict[(a_id,d_id)] =0 + # dict[(a_id,d_id)] +=1 + + # result =[] + # for key,value in dict.items(): + # if value >=3: + # result.append(key) + # return pd.DataFrame(result, columns = ['actor_id','director_id']) + + #pandas solution + df = actor_director.groupby(['actor_id','director_id']).size().reset_index(name='count') + return df[df['count'] >= 3][['actor_id', 'director_id']] + + diff --git a/Daily Leads and Partners b/Daily Leads and Partners new file mode 100644 index 0000000..e9ca9de --- /dev/null +++ b/Daily Leads and Partners @@ -0,0 +1,13 @@ +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + if daily_sales.empty: # Handle case where there's no data + return pd.DataFrame(columns=["date_id", "make_name", "unique_leads", "unique_partners"]) + + # Group by date_id and make_name, and count unique leads and partners + result = daily_sales.groupby(["date_id", "make_name"]).agg( + unique_leads=("lead_id", "nunique"), + unique_partners=("partner_id", "nunique") + ).reset_index() + + return result diff --git a/Group Sold Products by the Date b/Group Sold Products by the Date new file mode 100644 index 0000000..70c368f --- /dev/null +++ b/Group Sold Products by the Date @@ -0,0 +1,13 @@ +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + if activities.empty: # Handle case where there's no data + return pd.DataFrame(columns=["sell_date", "num_sold", "products"]) + + # Group by sell_date and compute required fields + result = activities.groupby("sell_date").agg( + num_sold=("product", "nunique"), # Count unique products + products=("product", lambda x: ",".join(sorted(x.unique()))) # Sort and join product names + ).reset_index() + + return result