update: add junda data

bigcode-project · May 8, 2024 · b379271 · b379271
2 parents 29c877a + 066b0ce
commit b379271
Show file tree

Hide file tree

Showing 79 changed files with 8,961 additions and 1 deletion.
diff --git a/data/raw/f_1706_junda_james.py b/data/raw/f_1706_junda_james.py
@@ -0,0 +1,167 @@
+import pandas as pd
+import numpy as np
+import itertools
+from datetime import datetime, timedelta
+import seaborn as sns
+
+def f_1706(df, fruits=None, days=None, seed=None, sales_lower_bound=1, sales_upper_bound=50):
+    """
+    Appends randomly generated sales data for specified fruits over a given range of days to a DataFrame, 
+    and returns a seaborn boxplot of the sales.
+
+    Parameters:
+    - df (pd.DataFrame): Initial Empty DataFrame to append sales data to. Must be empty. 
+    - fruits (List[str], optional): List of fruits for sales data. Defaults to ['Apple', 'Banana', 'Cherry', 'Date', 'Elderberry'].
+    - days (List[datetime], optional): List of days for sales data. Defaults to the range from January 1, 2024, to January 7, 2024.
+    - seed (int, optional): Seed for the random number generator. Defaults to None.
+    - sales_lower_bound (int, optional): Lower bound for random sales values. Defaults to 1.
+    - sales_upper_bound (int, optional): Upper bound for random sales values. Defaults to 50.
+
+    Returns:
+    Tuple[pd.DataFrame, sns.axisgrid.FacetGrid]: Updated DataFrame with sales data and a seaborn boxplot of the sales.
+
+    Raises:
+    TypeError: If 'df' is not a pandas DataFrame.
+    ValueError: If 'df' is not empty or  If 'sales_lower_bound' is not less than 'sales_upper_bound'.
+
+    Requirements:
+    - pandas 
+    - numpy
+    - itertools
+    - datetime
+    - seaborn
+
+    Example:
+    >>> initial_df = pd.DataFrame()
+    >>> report_df, plot = f_1706(initial_df, seed=42)
+    >>> print(report_df.head())
+       Fruit        Day  Sales
+    0  Apple 2024-01-01     39
+    1  Apple 2024-01-02     29
+    2  Apple 2024-01-03     15
+    3  Apple 2024-01-04     43
+    4  Apple 2024-01-05      8
+    >>> plot.figure.show()
+
+    """
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError("Input must be a pandas DataFrame")
+    if not df.empty:
+        raise ValueError("Input DataFrame must be empty")
+    if sales_lower_bound >= sales_upper_bound:
+        raise ValueError("sales_lower_bound must be less than sales_upper_bound")
+
+    if fruits is None:
+        fruits = ['Apple', 'Banana', 'Cherry', 'Date', 'Elderberry']
+    if days is None:
+        # Set days to range from January 1, 2024, to January 7, 2024
+        days = [datetime(2024, 1, 1) + timedelta(days=x) for x in range(7)]
+
+    if seed is not None:
+        np.random.seed(seed)
+
+    data = list(itertools.product(fruits, days))
+    sales_data = pd.DataFrame(data, columns=['Fruit', 'Day'])
+    sales_data['Sales'] = np.random.randint(sales_lower_bound, sales_upper_bound, size=len(data))
+
+    result_df = pd.concat([df, sales_data])
+    plot = sns.boxplot(x='Fruit', y='Sales', data=result_df)
+
+    return result_df, plot
+
+import unittest
+import pandas as pd
+import numpy as np
+from datetime import datetime
+
+class TestCases(unittest.TestCase):
+    def setUp(self):
+        # Define the default date range for comparison in tests
+        self.default_days = [datetime(2024, 1, 1) + timedelta(days=x) for x in range(7)]
+
+    def test_default_days_range(self):
+        """Test the default days range is correctly applied."""
+        initial_df = pd.DataFrame()
+        report_df, _ = f_1706(initial_df, seed=42)
+        unique_days = sorted(report_df['Day'].dt.date.unique())
+        expected_days = [day.date() for day in self.default_days]
+        self.assertEqual(len(unique_days), len(expected_days), "The number of unique days should match the default range.")
+        for day in unique_days:
+            self.assertIn(day, expected_days, "Each unique day should be within the default range.")
+
+    def test_custom_days_range(self):
+        """Test functionality with a custom days range."""
+        initial_df = pd.DataFrame()
+        custom_days = [datetime(2024, 1, 10), datetime(2024, 1, 11)]
+        report_df, _ = f_1706(initial_df, days=custom_days, seed=42)
+        unique_days = sorted(report_df['Day'].dt.date.unique())
+        expected_custom_days = [day.date() for day in custom_days]
+        self.assertEqual(len(unique_days), len(expected_custom_days), "The number of unique days should match the custom range.")
+        for day in unique_days:
+            self.assertIn(day, expected_custom_days, "Each unique day should be within the custom range.")
+
+
+    def test_sales_bounds(self):
+        """Test custom sales bounds are respected."""
+        initial_df = pd.DataFrame()
+        report_df, _ = f_1706(initial_df, seed=42, sales_lower_bound=20, sales_upper_bound=30)
+        sales_values = report_df['Sales'].unique()
+        self.assertTrue(all(20 <= val < 30 for val in sales_values), "All sales values should be within the specified bounds.")
+
+    def test_invalid_sales_bounds(self):
+        """Test error handling for invalid sales bounds."""
+        with self.assertRaises(ValueError):
+            f_1706(pd.DataFrame(), sales_lower_bound=50, sales_upper_bound=10)
+
+    def test_with_non_dataframe_input(self):
+        """Test that providing a non-DataFrame input raises a TypeError."""
+        with self.assertRaises(TypeError):
+            f_1706("not_a_dataframe")
+
+    def test_reproducibility_with_seed(self):
+        """Test reproducibility of sales data generation with a fixed seed."""
+        initial_df = pd.DataFrame()
+        df1, _ = f_1706(initial_df, seed=42)
+        df2, _ = f_1706(initial_df, seed=42)
+        pd.testing.assert_frame_equal(df1, df2, "DataFrames generated with the same seed should be identical.")
+
+    def test_with_custom_fruits_and_days(self):
+        fruits = ['Mango', 'Pineapple']
+        days = [pd.Timestamp('2023-01-01'), pd.Timestamp('2023-01-02')]
+        initial_df = pd.DataFrame()
+        report_df, plot = f_1706(initial_df, fruits=fruits, days=days, sales_lower_bound=1, sales_upper_bound=50, seed=42)
+
+        self.assertEqual(len(report_df['Fruit'].unique()), len(fruits), "Number of unique fruits should match the input")
+        self.assertEqual(len(report_df['Day'].unique()), len(days), "Number of unique days should match the input")
+        self.assertTrue(hasattr(plot, 'figure'), "Plot object should have a 'figure' attribute")
+
+        # Convert DataFrame to a list of strings for each row
+        df_list = report_df.apply(lambda row: ','.join(row.values.astype(str)), axis=1).tolist()
+
+        # Check if the converted list matches the expected output 
+        expect_output = ['Mango,2023-01-01 00:00:00,39', 'Mango,2023-01-02 00:00:00,29', 'Pineapple,2023-01-01 00:00:00,15', 'Pineapple,2023-01-02 00:00:00,43']
+        self.assertAlmostEqual(df_list, expect_output, "DataFrame contents should match the expected output")
+
+    def test_error_on_non_empty_dataframe(self):
+        """Test that a ValueError is raised if the input DataFrame is not empty."""
+        # Create a non-empty DataFrame
+        non_empty_df = pd.DataFrame({'A': [1, 2, 3]})
+
+        # Attempt to call f_1706 with a non-empty DataFrame and check for ValueError
+        with self.assertRaises(ValueError) as context:
+            f_1706(non_empty_df, seed=42)
+
+        # Optionally, check the error message to ensure it's for the non-empty DataFrame condition
+        self.assertTrue("Input DataFrame must be empty" in str(context.exception), "Function should raise ValueError for non-empty DataFrame input.")
+
+def run_tests():
+    """Run all tests for this function."""
+    loader = unittest.TestLoader()
+    suite = loader.loadTestsFromTestCase(TestCases)
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+    run_tests()
diff --git a/data/raw/f_1718_junda_james.py b/data/raw/f_1718_junda_james.py
@@ -0,0 +1,145 @@
+import numpy as np
+import pandas as pd
+
+def f_1718(products, n_samples=100, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42):
+    """
+    Generate a sales report with randomly simulated sales and profit data for a given list of products.
+    The data is aggregated by product and sorted by total profit in descending order. 
+    
+    Parameters:
+    - products (list of str): List of product names.
+    - n_samples (int): The number of data points to generate for the report. Default is 100.
+    - sales_lower (int): The minimum sales value for the random generation. Default is 50.
+    - sales_upper (int): The maximum sales value for the random generation. Default is 200.
+    - profit_margin_min (float): The minimum profit margin as a fraction of sales. Default is 0.1.
+    - profit_margin_max (float): The maximum profit margin as a fraction of sales. Default is 0.5.
+    - random_seed (int): Seed for the random number generator to ensure reproducibility. Default is 42.
+
+    Returns:
+    pd.DataFrame: A DataFrame containing aggregated sales and profit data for each product, sorted by profit.
+
+    Raises:
+    ValueError: If n_samples is not a positive integer, or if sales_lower is greater than sales_upper.
+    TypeError: If products is not a list of strings, or if sales_lower, sales_upper, profit_margin_min, or profit_margin_max are not numeric.
+
+    Requirements:
+    - numpy
+    - pandas
+
+    Example:
+    >>> products = ["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"]
+    >>> report = f_1718(products, n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42)
+    >>> print(report)
+           Product  Sales      Profit
+    2      Macbook   1561  444.826709
+    3         iPad   1383  401.925334
+    0      Airpods   1297  381.482713
+    1  Apple Watch   1123  308.078536
+    4       iPhone    921  294.013887
+    """
+    np.random.seed(random_seed)
+
+    if not products:
+        return pd.DataFrame(columns=["Product", "Sales", "Profit"])
+
+    if not isinstance(products, list) or not all(isinstance(product, str) for product in products):
+        raise TypeError("products must be a list of strings.")
+    if not isinstance(n_samples, int) or n_samples <= 0:
+        raise ValueError("n_samples must be a positive integer.")
+    if not (isinstance(sales_lower, int) and isinstance(sales_upper, int)) or sales_lower >= sales_upper:
+        raise ValueError("sales_lower must be less than sales_upper and both must be integers.")
+    if not all(isinstance(x, (int, float)) for x in [profit_margin_min, profit_margin_max]) or profit_margin_min >= profit_margin_max:
+        raise ValueError("profit_margin_min must be less than profit_margin_max and both must be numeric.")
+
+    data = []
+    for _ in range(n_samples):
+        product = np.random.choice(products)
+        sales = np.random.randint(sales_lower, sales_upper + 1)
+        profit = sales * np.random.uniform(profit_margin_min, profit_margin_max)
+        data.append([product, sales, profit])
+
+    df = pd.DataFrame(data, columns=["Product", "Sales", "Profit"])
+    df = df.groupby("Product", as_index=False).sum()
+    df.sort_values("Profit", ascending=False, inplace=True)
+
+    return df
+
+import pandas as pd
+import unittest
+
+class TestCases(unittest.TestCase):
+
+    def test_random_reproducibility(self):
+        report1 = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42)
+        report2 = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42)
+        pd.testing.assert_frame_equal(report1, report2)
+
+    def test_number_of_rows(self):
+        report = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200)
+        self.assertEqual(len(report), len(set(["iPhone", "iPad"])))
+
+    def test_sorting_by_profit(self):
+        report = f_1718(["iPhone", "iPad"], sales_lower=50, sales_upper=200)
+        self.assertTrue(report["Profit"].is_monotonic_decreasing)
+
+    def test_custom_parameters(self):
+        report = f_1718(["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"], n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42)
+        # This test needs to be adjusted based on the expected outcome of the custom parameters.
+        # Specific checks on DataFrame contents should account for the randomness and reproducibility aspects.
+        self.assertTrue(len(report) > 0, "The report should contain aggregated sales and profit data.")
+
+    def test_new_custom_parameters(self):
+        report1 = f_1718(["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"], n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42)
+        df_list = report1.apply(lambda row: ','.join(row.values.astype(str)), axis=1).tolist()
+        expect = ['Macbook,1561,444.82670855378143', 'iPad,1383,401.9253335536443', 'Airpods,1297,381.4827132170069', 'Apple Watch,1123,308.07853599252707', 'iPhone,921,294.0138866107959']
+
+        self.assertEqual(df_list, expect, "DataFrame contents should match the expected output")
+
+    def test_sales_bounds_validation(self):
+        """Test that an error is raised if sales_lower is greater than sales_upper."""
+        with self.assertRaises(ValueError):
+            f_1718(["Product1"], sales_lower=250, sales_upper=100)
+
+    def test_profit_margin_validation(self):
+        """Test that an error is raised if profit_margin_min is greater than or equal to profit_margin_max."""
+        with self.assertRaises(ValueError):
+            f_1718(["Product1"], profit_margin_min=0.6, profit_margin_max=0.5)
+
+    def test_product_list_validation(self):
+        """Test that an error is raised if the products list is not a list of strings."""
+        with self.assertRaises(TypeError):
+            f_1718([123, 456], n_samples=10)
+
+    def test_n_samples_validation(self):
+        """Test that an error is raised if n_samples is not a positive integer."""
+        with self.assertRaises(ValueError):
+            f_1718(["Product1"], n_samples=-10)
+
+    def test_empty_product_list(self):
+        """Test that the function can handle an empty product list."""
+        report = f_1718([], n_samples=10)
+        self.assertTrue(report.empty, "The report should be empty if no products are provided.")
+
+    def test_zero_samples(self):
+        """Test handling of zero samples."""
+        with self.assertRaises(ValueError):
+            f_1718(["Product1"], n_samples=-10)
+
+    def test_single_product_reproducibility(self):
+        """Test that the function generates consistent results for a single product across multiple runs."""
+        report1 = f_1718(["Product1"], n_samples=10, random_seed=42)
+        report2 = f_1718(["Product1"], n_samples=10, random_seed=42)
+        pd.testing.assert_frame_equal(report1, report2)
+
+
+def run_tests():
+    """Run all tests for this function."""
+    loader = unittest.TestLoader()
+    suite = loader.loadTestsFromTestCase(TestCases)
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+    run_tests()