diff --git a/actors-and-directors-who-cooperated-at-least-three-times.py b/actors-and-directors-who-cooperated-at-least-three-times.py new file mode 100644 index 0000000..06f65d9 --- /dev/null +++ b/actors-and-directors-who-cooperated-at-least-three-times.py @@ -0,0 +1,58 @@ +""" + +able: ActorDirector + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| actor_id | int | +| director_id | int | +| timestamp | int | ++-------------+---------+ +timestamp is the primary key (column with unique values) for this table. + + +Write a solution to find all the pairs (actor_id, director_id) where the actor has cooperated with the director at least three times. + +Return the result table in any order. + +The result format is in the following example. + + +Example 1: + +Input: +ActorDirector table: ++-------------+-------------+-------------+ +| actor_id | director_id | timestamp | ++-------------+-------------+-------------+ +| 1 | 1 | 0 | +| 1 | 1 | 1 | +| 1 | 1 | 2 | +| 1 | 2 | 3 | +| 1 | 2 | 4 | +| 2 | 1 | 5 | +| 2 | 1 | 6 | ++-------------+-------------+-------------+ +Output: ++-------------+-------------+ +| actor_id | director_id | ++-------------+-------------+ +| 1 | 1 | ++-------------+-------------+ +Explanation: The only pair is (1, 1) where they cooperated exactly 3 times. + +""" + + +# The approach groups the dataframe by 'actor_id' and 'director_id' to count the number of collaborations between each actor-director pair. +# It then filters out pairs that have collaborated at least three times. +# Finally, it returns only the 'actor_id' and 'director_id' columns for the qualifying pairs. + +import pandas as pd + +def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame: + + cnts = actor_director.groupby(['actor_id', 'director_id']).size().reset_index(name='counts') + return cnts[cnts['counts'] >= 3][['actor_id', 'director_id']] + diff --git a/daily-leads-and-partners.py b/daily-leads-and-partners.py new file mode 100644 index 0000000..9b9969b --- /dev/null +++ b/daily-leads-and-partners.py @@ -0,0 +1,79 @@ +""" + +Table: DailySales + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| date_id | date | +| make_name | varchar | +| lead_id | int | +| partner_id | int | ++-------------+---------+ +There is no primary key (column with unique values) for this table. It may contain duplicates. +This table contains the date and the name of the product sold and the IDs of the lead and partner it was sold to. +The name consists of only lowercase English letters. + + +For each date_id and make_name, find the number of distinct lead_id's and distinct partner_id's. + +Return the result table in any order. + +The result format is in the following example. + + +Example 1: + +Input: +DailySales table: ++-----------+-----------+---------+------------+ +| date_id | make_name | lead_id | partner_id | ++-----------+-----------+---------+------------+ +| 2020-12-8 | toyota | 0 | 1 | +| 2020-12-8 | toyota | 1 | 0 | +| 2020-12-8 | toyota | 1 | 2 | +| 2020-12-7 | toyota | 0 | 2 | +| 2020-12-7 | toyota | 0 | 1 | +| 2020-12-8 | honda | 1 | 2 | +| 2020-12-8 | honda | 2 | 1 | +| 2020-12-7 | honda | 0 | 1 | +| 2020-12-7 | honda | 1 | 2 | +| 2020-12-7 | honda | 2 | 1 | ++-----------+-----------+---------+------------+ +Output: ++-----------+-----------+--------------+-----------------+ +| date_id | make_name | unique_leads | unique_partners | ++-----------+-----------+--------------+-----------------+ +| 2020-12-8 | toyota | 2 | 3 | +| 2020-12-7 | toyota | 1 | 2 | +| 2020-12-8 | honda | 2 | 2 | +| 2020-12-7 | honda | 3 | 2 | ++-----------+-----------+--------------+-----------------+ + +Explanation: +For 2020-12-8, toyota gets leads = [0, 1] and partners = [0, 1, 2] while honda gets leads = [1, 2] and partners = [1, 2]. +For 2020-12-7, toyota gets leads = [0] and partners = [1, 2] while honda gets leads = [0, 1, 2] and partners = [1, 2]. + +""" + +# This function groups daily sales data by 'date_id' and 'make_name' to count unique leads and partners per group. +# It uses the `agg` function to apply `nunique` on 'lead_id' and 'partner_id', ensuring distinct counts. +# Finally, it renames the columns for clarity and returns the resulting DataFrame. + +import pandas as pd + +def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame: + df = daily_sales.groupby(['date_id', 'make_name']).agg({ + 'lead_id': 'nunique', + 'partner_id': 'nunique' + }).reset_index() + + # Rename resulting DataFrame and rename columns + df = df.rename(columns={ + 'lead_id': 'unique_leads', + 'partner_id': 'unique_partners' + }) + + # Return DataFrame + return df + diff --git a/group-sold-products-by-the-date.py b/group-sold-products-by-the-date.py new file mode 100644 index 0000000..5f1c94e --- /dev/null +++ b/group-sold-products-by-the-date.py @@ -0,0 +1,71 @@ +""" + +Table Activities: + ++-------------+---------+ +| Column Name | Type | ++-------------+---------+ +| sell_date | date | +| product | varchar | ++-------------+---------+ +There is no primary key (column with unique values) for this table. It may contain duplicates. +Each row of this table contains the product name and the date it was sold in a market. + + +Write a solution to find for each date the number of different products sold and their names. + +The sold products names for each date should be sorted lexicographically. + +Return the result table ordered by sell_date. + +The result format is in the following example. + + +Example 1: + +Input: +Activities table: ++------------+------------+ +| sell_date | product | ++------------+------------+ +| 2020-05-30 | Headphone | +| 2020-06-01 | Pencil | +| 2020-06-02 | Mask | +| 2020-05-30 | Basketball | +| 2020-06-01 | Bible | +| 2020-06-02 | Mask | +| 2020-05-30 | T-Shirt | ++------------+------------+ +Output: ++------------+----------+------------------------------+ +| sell_date | num_sold | products | ++------------+----------+------------------------------+ +| 2020-05-30 | 3 | Basketball,Headphone,T-shirt | +| 2020-06-01 | 2 | Bible,Pencil | +| 2020-06-02 | 1 | Mask | ++------------+----------+------------------------------+ + +Explanation: +For 2020-05-30, Sold items were (Headphone, Basketball, T-shirt), we sort them lexicographically and separate them by a comma. +For 2020-06-01, Sold items were (Pencil, Bible), we sort them lexicographically and separate them by a comma. +For 2020-06-02, the Sold item is (Mask), we just return it. + +""" + +# This function groups the products by their sell dates and calculates two statistics: +# the number of unique products sold on each date and a sorted, comma-separated list of those products. +# It then resets the index to return a structured DataFrame and ensures the output is sorted by sell date. + +import pandas as pd + +def categorize_products(activities: pd.DataFrame) -> pd.DataFrame: + groups = activities.groupby('sell_date') + + stats = groups.agg( + num_sold=('product', 'nunique'), + products=('product', lambda x: ','.join(sorted(set(x)))) + ).reset_index() + + stats.sort_values('sell_date', inplace=True) + + return stats