Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions 150_group_sold_products_by_date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Group the activities DF using 'sell_date'. agg() function is used because we want to apply multiple
# functionality to the same grouping. Using agg() we get the total number of unique products sold and also
# get the names of all those unique products by joining a string of sorted set of product names.
# Note: agg() function directly returns a DF, not a series like we get using transform()

import pandas as pd

def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
df = activities.groupby(['sell_date']).agg(
num_sold = ('product', 'nunique'),
products = ('product', lambda x: ','.join(sorted(set(x))))
).reset_index()
return df
11 changes: 11 additions & 0 deletions 151_daily_leads_and_partners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Group the daily_sales DF using both 'date_id' and 'make_name' and return unique lead IDs and unique
# partner IDs associated with it.

import pandas as pd

def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
df = daily_sales.groupby(['date_id', 'make_name']).agg(
unique_leads = ('lead_id', 'nunique'),
unique_partners = ('partner_id', 'nunique')
).reset_index()
return df
10 changes: 10 additions & 0 deletions 152_actors_and_directors_cooperated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Group the DF based on actor and director ID pairs and return the number of times they cooperated. Return
# the DF where the count is greater than or equal to 3.

import pandas as pd

def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
df = actor_director.groupby(['actor_id', 'director_id']).agg(
count = ('timestamp', 'count')
).reset_index()
return df[df['count'] >= 3][['actor_id', 'director_id']]