Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions ActorsAndDirectorsWhoCooperatedAtLeastThreeTimes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Method 1
import pandas as pd

def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
act_dict = {}
for i in range(len(actor_director)):
a = actor_director['actor_id'][i]
d = actor_director['director_id'][i]

if (a,d) in act_dict:
act_dict[(a,d)] += 1
else:
act_dict[(a,d)] = 1

result = []
for k,v in act_dict.items():
if v >= 3:
result.append([k[0],k[1]])

return pd.DataFrame(result, columns = ['actor_id','director_id'])


# Method 2
import pandas as pd

def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
df = actor_director.groupby(['actor_id','director_id']).size().reset_index(name = 'cnt')

return df[df['cnt'] >= 3][['actor_id','director_id']]
42 changes: 42 additions & 0 deletions DailyLeadsAndPartners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Method 1
import pandas as pd

def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
ds_dict = {}
for i in range(len(daily_sales)):
d = daily_sales['date_id'][i]
m = daily_sales['make_name'][i]
l = daily_sales['lead_id'][i]
p = daily_sales['partner_id'][i]

if (d,m) not in ds_dict:
ds_dict[(d,m)] = [set(),set()]
ds_dict[(d,m)][0].add(l)
ds_dict[(d,m)][1].add(p)

result = []
for k,v in ds_dict.items():
result.append([k[0], k[1], len(v[0]), len(v[1])])

return pd.DataFrame(result, columns = ['date_id','make_name','unique_leads','unique_partners'])


# Method 2
import pandas as pd

def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
df = daily_sales.groupby(['date_id','make_name']).agg(
unique_leads = ('lead_id','nunique'),
unique_partners = ('partner_id','nunique')
).reset_index()

return df


# Method 3
import pandas as pd

def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
df = daily_sales.groupby(['date_id','make_name']).nunique().reset_index()

return df.rename(columns = {'lead_id':'unique_leads','partner_id':'unique_partners'})
31 changes: 31 additions & 0 deletions GroupSoldProductsByTheDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Method 1
import pandas as pd

def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
adict = {}
for i in range(len(activities)):
s = activities['sell_date'][i]
p = activities['product'][i]

if s not in adict:
adict[s] = set()
adict[s].add(p)

result = []
for k,v in adict.items():
result.append([k, len(v), ','.join(sorted(v))])

return pd.DataFrame(result, columns = ['sell_date','num_sold','products']).sort_values('sell_date')


# Method 2
import pandas as pd

def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
# df = activities.groupby('sell_date').nunique().reset_index()
df = activities.groupby('sell_date').agg(
num_sold = ('product','nunique'),
products = ('product', lambda x : ','.join(sorted(x.drop_duplicates()))) # or you can also take set(x) which gives distinct values
).reset_index()

return df