Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Actors and Directors who Cooperated At Least Three Times
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pandas as pd

def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
#long version - have a dcitionary in place where actor id and director id are stored in form of tuple

# dict = {}
# for i in range(len(actor_director)):
# a_id = actor_director['actor_id'][i]
# d_id = actor_director['director_id'][i]
# if (a_id,d_id) not in dict:
# dict[(a_id,d_id)] =0
# dict[(a_id,d_id)] +=1

# result =[]
# for key,value in dict.items():
# if value >=3:
# result.append(key)
# return pd.DataFrame(result, columns = ['actor_id','director_id'])

#pandas solution
df = actor_director.groupby(['actor_id','director_id']).size().reset_index(name='count')
return df[df['count'] >= 3][['actor_id', 'director_id']]


13 changes: 13 additions & 0 deletions Daily Leads and Partners
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd

def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
if daily_sales.empty: # Handle case where there's no data
return pd.DataFrame(columns=["date_id", "make_name", "unique_leads", "unique_partners"])

# Group by date_id and make_name, and count unique leads and partners
result = daily_sales.groupby(["date_id", "make_name"]).agg(
unique_leads=("lead_id", "nunique"),
unique_partners=("partner_id", "nunique")
).reset_index()

return result
13 changes: 13 additions & 0 deletions Group Sold Products by the Date
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd

def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
if activities.empty: # Handle case where there's no data
return pd.DataFrame(columns=["sell_date", "num_sold", "products"])

# Group by sell_date and compute required fields
result = activities.groupby("sell_date").agg(
num_sold=("product", "nunique"), # Count unique products
products=("product", lambda x: ",".join(sorted(x.unique()))) # Sort and join product names
).reset_index()

return result