super30admin · ankitabmungalpara · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/actors-and-directors-who-cooperated-at-least-three-times.py b/actors-and-directors-who-cooperated-at-least-three-times.py
@@ -0,0 +1,58 @@
+"""
+
+able: ActorDirector
+
++-------------+---------+
+| Column Name | Type    |
++-------------+---------+
+| actor_id    | int     |
+| director_id | int     |
+| timestamp   | int     |
++-------------+---------+
+timestamp is the primary key (column with unique values) for this table.
+
+
+Write a solution to find all the pairs (actor_id, director_id) where the actor has cooperated with the director at least three times.
+
+Return the result table in any order.
+
+The result format is in the following example.
+
+
+Example 1:
+
+Input: 
+ActorDirector table:
++-------------+-------------+-------------+
+| actor_id    | director_id | timestamp   |
++-------------+-------------+-------------+
+| 1           | 1           | 0           |
+| 1           | 1           | 1           |
+| 1           | 1           | 2           |
+| 1           | 2           | 3           |
+| 1           | 2           | 4           |
+| 2           | 1           | 5           |
+| 2           | 1           | 6           |
++-------------+-------------+-------------+
+Output: 
++-------------+-------------+
+| actor_id    | director_id |
++-------------+-------------+
+| 1           | 1           |
++-------------+-------------+
+Explanation: The only pair is (1, 1) where they cooperated exactly 3 times.
+
+"""
+
+
+# The approach groups the dataframe by 'actor_id' and 'director_id' to count the number of collaborations between each actor-director pair.  
+# It then filters out pairs that have collaborated at least three times.  
+# Finally, it returns only the 'actor_id' and 'director_id' columns for the qualifying pairs. 
+
+import pandas as pd
+
+def actors_and_directors(actor_director: pd.DataFrame) -> pd.DataFrame:
+
+    cnts = actor_director.groupby(['actor_id', 'director_id']).size().reset_index(name='counts')
+    return cnts[cnts['counts'] >= 3][['actor_id', 'director_id']]
+
diff --git a/daily-leads-and-partners.py b/daily-leads-and-partners.py
@@ -0,0 +1,79 @@
+"""
+
+Table: DailySales
+
++-------------+---------+
+| Column Name | Type    |
++-------------+---------+
+| date_id     | date    |
+| make_name   | varchar |
+| lead_id     | int     |
+| partner_id  | int     |
++-------------+---------+
+There is no primary key (column with unique values) for this table. It may contain duplicates.
+This table contains the date and the name of the product sold and the IDs of the lead and partner it was sold to.
+The name consists of only lowercase English letters.
+
+
+For each date_id and make_name, find the number of distinct lead_id's and distinct partner_id's.
+
+Return the result table in any order.
+
+The result format is in the following example.
+
+
+Example 1:
+
+Input: 
+DailySales table:
++-----------+-----------+---------+------------+
+| date_id   | make_name | lead_id | partner_id |
++-----------+-----------+---------+------------+
+| 2020-12-8 | toyota    | 0       | 1          |
+| 2020-12-8 | toyota    | 1       | 0          |
+| 2020-12-8 | toyota    | 1       | 2          |
+| 2020-12-7 | toyota    | 0       | 2          |
+| 2020-12-7 | toyota    | 0       | 1          |
+| 2020-12-8 | honda     | 1       | 2          |
+| 2020-12-8 | honda     | 2       | 1          |
+| 2020-12-7 | honda     | 0       | 1          |
+| 2020-12-7 | honda     | 1       | 2          |
+| 2020-12-7 | honda     | 2       | 1          |
++-----------+-----------+---------+------------+
+Output: 
++-----------+-----------+--------------+-----------------+
+| date_id   | make_name | unique_leads | unique_partners |
++-----------+-----------+--------------+-----------------+
+| 2020-12-8 | toyota    | 2            | 3               |
+| 2020-12-7 | toyota    | 1            | 2               |
+| 2020-12-8 | honda     | 2            | 2               |
+| 2020-12-7 | honda     | 3            | 2               |
++-----------+-----------+--------------+-----------------+
+
+Explanation: 
+For 2020-12-8, toyota gets leads = [0, 1] and partners = [0, 1, 2] while honda gets leads = [1, 2] and partners = [1, 2].
+For 2020-12-7, toyota gets leads = [0] and partners = [1, 2] while honda gets leads = [0, 1, 2] and partners = [1, 2].
+
+"""
+
+# This function groups daily sales data by 'date_id' and 'make_name' to count unique leads and partners per group.
+# It uses the `agg` function to apply `nunique` on 'lead_id' and 'partner_id', ensuring distinct counts.
+# Finally, it renames the columns for clarity and returns the resulting DataFrame.
+
+import pandas as pd
+
+def daily_leads_and_partners(daily_sales: pd.DataFrame) -> pd.DataFrame:
+    df = daily_sales.groupby(['date_id', 'make_name']).agg({
+        'lead_id': 'nunique',
+        'partner_id': 'nunique'
+    }).reset_index()
+
+    # Rename resulting DataFrame and rename columns
+    df = df.rename(columns={
+        'lead_id': 'unique_leads',
+        'partner_id': 'unique_partners'
+    })
+
+    # Return DataFrame
+    return df
+
diff --git a/group-sold-products-by-the-date.py b/group-sold-products-by-the-date.py
@@ -0,0 +1,71 @@
+"""
+
+Table Activities:
+
++-------------+---------+
+| Column Name | Type    |
++-------------+---------+
+| sell_date   | date    |
+| product     | varchar |
++-------------+---------+
+There is no primary key (column with unique values) for this table. It may contain duplicates.
+Each row of this table contains the product name and the date it was sold in a market.
+
+
+Write a solution to find for each date the number of different products sold and their names.
+
+The sold products names for each date should be sorted lexicographically.
+
+Return the result table ordered by sell_date.
+
+The result format is in the following example.
+
+
+Example 1:
+
+Input: 
+Activities table:
++------------+------------+
+| sell_date  | product     |
++------------+------------+
+| 2020-05-30 | Headphone  |
+| 2020-06-01 | Pencil     |
+| 2020-06-02 | Mask       |
+| 2020-05-30 | Basketball |
+| 2020-06-01 | Bible      |
+| 2020-06-02 | Mask       |
+| 2020-05-30 | T-Shirt    |
++------------+------------+
+Output: 
++------------+----------+------------------------------+
+| sell_date  | num_sold | products                     |
++------------+----------+------------------------------+
+| 2020-05-30 | 3        | Basketball,Headphone,T-shirt |
+| 2020-06-01 | 2        | Bible,Pencil                 |
+| 2020-06-02 | 1        | Mask                         |
++------------+----------+------------------------------+
+
+Explanation: 
+For 2020-05-30, Sold items were (Headphone, Basketball, T-shirt), we sort them lexicographically and separate them by a comma.
+For 2020-06-01, Sold items were (Pencil, Bible), we sort them lexicographically and separate them by a comma.
+For 2020-06-02, the Sold item is (Mask), we just return it.
+
+"""
+
+# This function groups the products by their sell dates and calculates two statistics: 
+# the number of unique products sold on each date and a sorted, comma-separated list of those products.
+# It then resets the index to return a structured DataFrame and ensures the output is sorted by sell date.
+
+import pandas as pd
+
+def categorize_products(activities: pd.DataFrame) -> pd.DataFrame:
+    groups = activities.groupby('sell_date')
+
+    stats = groups.agg(
+        num_sold=('product', 'nunique'), 
+        products=('product', lambda x: ','.join(sorted(set(x))))
+    ).reset_index()
+
+    stats.sort_values('sell_date', inplace=True)
+
+    return stats