diff --git a/Actors_and_directors_cooperated3times.py b/Actors_and_directors_cooperated3times.py new file mode 100644 index 0000000..7be4e8b --- /dev/null +++ b/Actors_and_directors_cooperated3times.py @@ -0,0 +1,23 @@ +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + # DF = actor_director.groupby(['actor_id', 'director_id']).count().reset_index() + + # return DF[['actor_id', 'director_id']][(DF['timestamp'] >= 3)] + dict = {} + res =[] + for i in range(len(actor_director)): + actor_id = actor_director['actor_id'][i] + director_id = actor_director['director_id'][i] + + if (actor_id, director_id) not in dict: + dict[(actor_id, director_id)] = 0 + dict[(actor_id, director_id)] = dict[(actor_id, director_id)] + 1 + + for key, values in dict.items(): + if values >= 3: + res.append([key[0], key[1]]) + + + return pd.DataFrame(res, columns= ['actor_id', 'director_id']) + \ No newline at end of file diff --git a/Daily_lead_and_partners.py b/Daily_lead_and_partners.py new file mode 100644 index 0000000..07ccd32 --- /dev/null +++ b/Daily_lead_and_partners.py @@ -0,0 +1,33 @@ +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + # DF = daily_sales.groupby(['date_id', 'make_name']).agg( + # unique_leads=('lead_id', 'nunique'), + # unique_partners= ('partner_id', 'nunique') + # ).reset_index() + + + # return DF + dict = {} + res=[] + for i in range(len(daily_sales)): + date_id = daily_sales['date_id'][i] + make_name = daily_sales['make_name'][i] + lead_id = daily_sales['lead_id'][i] + partner_id = daily_sales['partner_id'][i] + key = (date_id, make_name) + + if key not in dict: + dict[key] = [set(),set()] + dict[key][0].add(lead_id) + dict[key][1].add(partner_id) + + + + for key, values in dict.items(): + res.append([key[0], key[1],len(values[0]), len(values[1])]) + + DF = pd.DataFrame(res, columns= ('date_id', 'make_name', 'unique_leads', 'unique_partners')) + + + return DF \ No newline at end of file diff --git a/Sold_product_by_the_date.py b/Sold_product_by_the_date.py new file mode 100644 index 0000000..f732d7a --- /dev/null +++ b/Sold_product_by_the_date.py @@ -0,0 +1,20 @@ +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + Dict = {} + res= [] + for i in range(len(activities)): + product = activities['product'][i] + date = activities['sell_date'][i] + if date not in Dict: + Dict[date] = [] + if product not in Dict[date]: + Dict[date].append(product) + + for key, values in Dict.items(): + res.append([key, len(values), values]) + + DF = pd.DataFrame(res, columns=['sell_date', 'num_sold', 'products']) + DF['products'] = DF['products'].apply(lambda x: ','.join(sorted(x))) + + return DF.sort_values('sell_date')