-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
79 changed files
with
8,961 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import itertools | ||
from datetime import datetime, timedelta | ||
import seaborn as sns | ||
|
||
def f_1706(df, fruits=None, days=None, seed=None, sales_lower_bound=1, sales_upper_bound=50): | ||
""" | ||
Appends randomly generated sales data for specified fruits over a given range of days to a DataFrame, | ||
and returns a seaborn boxplot of the sales. | ||
Parameters: | ||
- df (pd.DataFrame): Initial Empty DataFrame to append sales data to. Must be empty. | ||
- fruits (List[str], optional): List of fruits for sales data. Defaults to ['Apple', 'Banana', 'Cherry', 'Date', 'Elderberry']. | ||
- days (List[datetime], optional): List of days for sales data. Defaults to the range from January 1, 2024, to January 7, 2024. | ||
- seed (int, optional): Seed for the random number generator. Defaults to None. | ||
- sales_lower_bound (int, optional): Lower bound for random sales values. Defaults to 1. | ||
- sales_upper_bound (int, optional): Upper bound for random sales values. Defaults to 50. | ||
Returns: | ||
Tuple[pd.DataFrame, sns.axisgrid.FacetGrid]: Updated DataFrame with sales data and a seaborn boxplot of the sales. | ||
Raises: | ||
TypeError: If 'df' is not a pandas DataFrame. | ||
ValueError: If 'df' is not empty or If 'sales_lower_bound' is not less than 'sales_upper_bound'. | ||
Requirements: | ||
- pandas | ||
- numpy | ||
- itertools | ||
- datetime | ||
- seaborn | ||
Example: | ||
>>> initial_df = pd.DataFrame() | ||
>>> report_df, plot = f_1706(initial_df, seed=42) | ||
>>> print(report_df.head()) | ||
Fruit Day Sales | ||
0 Apple 2024-01-01 39 | ||
1 Apple 2024-01-02 29 | ||
2 Apple 2024-01-03 15 | ||
3 Apple 2024-01-04 43 | ||
4 Apple 2024-01-05 8 | ||
>>> plot.figure.show() | ||
""" | ||
if not isinstance(df, pd.DataFrame): | ||
raise TypeError("Input must be a pandas DataFrame") | ||
if not df.empty: | ||
raise ValueError("Input DataFrame must be empty") | ||
if sales_lower_bound >= sales_upper_bound: | ||
raise ValueError("sales_lower_bound must be less than sales_upper_bound") | ||
|
||
if fruits is None: | ||
fruits = ['Apple', 'Banana', 'Cherry', 'Date', 'Elderberry'] | ||
if days is None: | ||
# Set days to range from January 1, 2024, to January 7, 2024 | ||
days = [datetime(2024, 1, 1) + timedelta(days=x) for x in range(7)] | ||
|
||
if seed is not None: | ||
np.random.seed(seed) | ||
|
||
data = list(itertools.product(fruits, days)) | ||
sales_data = pd.DataFrame(data, columns=['Fruit', 'Day']) | ||
sales_data['Sales'] = np.random.randint(sales_lower_bound, sales_upper_bound, size=len(data)) | ||
|
||
result_df = pd.concat([df, sales_data]) | ||
plot = sns.boxplot(x='Fruit', y='Sales', data=result_df) | ||
|
||
return result_df, plot | ||
|
||
import unittest | ||
import pandas as pd | ||
import numpy as np | ||
from datetime import datetime | ||
|
||
class TestCases(unittest.TestCase): | ||
def setUp(self): | ||
# Define the default date range for comparison in tests | ||
self.default_days = [datetime(2024, 1, 1) + timedelta(days=x) for x in range(7)] | ||
|
||
def test_default_days_range(self): | ||
"""Test the default days range is correctly applied.""" | ||
initial_df = pd.DataFrame() | ||
report_df, _ = f_1706(initial_df, seed=42) | ||
unique_days = sorted(report_df['Day'].dt.date.unique()) | ||
expected_days = [day.date() for day in self.default_days] | ||
self.assertEqual(len(unique_days), len(expected_days), "The number of unique days should match the default range.") | ||
for day in unique_days: | ||
self.assertIn(day, expected_days, "Each unique day should be within the default range.") | ||
|
||
def test_custom_days_range(self): | ||
"""Test functionality with a custom days range.""" | ||
initial_df = pd.DataFrame() | ||
custom_days = [datetime(2024, 1, 10), datetime(2024, 1, 11)] | ||
report_df, _ = f_1706(initial_df, days=custom_days, seed=42) | ||
unique_days = sorted(report_df['Day'].dt.date.unique()) | ||
expected_custom_days = [day.date() for day in custom_days] | ||
self.assertEqual(len(unique_days), len(expected_custom_days), "The number of unique days should match the custom range.") | ||
for day in unique_days: | ||
self.assertIn(day, expected_custom_days, "Each unique day should be within the custom range.") | ||
|
||
|
||
def test_sales_bounds(self): | ||
"""Test custom sales bounds are respected.""" | ||
initial_df = pd.DataFrame() | ||
report_df, _ = f_1706(initial_df, seed=42, sales_lower_bound=20, sales_upper_bound=30) | ||
sales_values = report_df['Sales'].unique() | ||
self.assertTrue(all(20 <= val < 30 for val in sales_values), "All sales values should be within the specified bounds.") | ||
|
||
def test_invalid_sales_bounds(self): | ||
"""Test error handling for invalid sales bounds.""" | ||
with self.assertRaises(ValueError): | ||
f_1706(pd.DataFrame(), sales_lower_bound=50, sales_upper_bound=10) | ||
|
||
def test_with_non_dataframe_input(self): | ||
"""Test that providing a non-DataFrame input raises a TypeError.""" | ||
with self.assertRaises(TypeError): | ||
f_1706("not_a_dataframe") | ||
|
||
def test_reproducibility_with_seed(self): | ||
"""Test reproducibility of sales data generation with a fixed seed.""" | ||
initial_df = pd.DataFrame() | ||
df1, _ = f_1706(initial_df, seed=42) | ||
df2, _ = f_1706(initial_df, seed=42) | ||
pd.testing.assert_frame_equal(df1, df2, "DataFrames generated with the same seed should be identical.") | ||
|
||
def test_with_custom_fruits_and_days(self): | ||
fruits = ['Mango', 'Pineapple'] | ||
days = [pd.Timestamp('2023-01-01'), pd.Timestamp('2023-01-02')] | ||
initial_df = pd.DataFrame() | ||
report_df, plot = f_1706(initial_df, fruits=fruits, days=days, sales_lower_bound=1, sales_upper_bound=50, seed=42) | ||
|
||
self.assertEqual(len(report_df['Fruit'].unique()), len(fruits), "Number of unique fruits should match the input") | ||
self.assertEqual(len(report_df['Day'].unique()), len(days), "Number of unique days should match the input") | ||
self.assertTrue(hasattr(plot, 'figure'), "Plot object should have a 'figure' attribute") | ||
|
||
# Convert DataFrame to a list of strings for each row | ||
df_list = report_df.apply(lambda row: ','.join(row.values.astype(str)), axis=1).tolist() | ||
|
||
# Check if the converted list matches the expected output | ||
expect_output = ['Mango,2023-01-01 00:00:00,39', 'Mango,2023-01-02 00:00:00,29', 'Pineapple,2023-01-01 00:00:00,15', 'Pineapple,2023-01-02 00:00:00,43'] | ||
self.assertAlmostEqual(df_list, expect_output, "DataFrame contents should match the expected output") | ||
|
||
def test_error_on_non_empty_dataframe(self): | ||
"""Test that a ValueError is raised if the input DataFrame is not empty.""" | ||
# Create a non-empty DataFrame | ||
non_empty_df = pd.DataFrame({'A': [1, 2, 3]}) | ||
|
||
# Attempt to call f_1706 with a non-empty DataFrame and check for ValueError | ||
with self.assertRaises(ValueError) as context: | ||
f_1706(non_empty_df, seed=42) | ||
|
||
# Optionally, check the error message to ensure it's for the non-empty DataFrame condition | ||
self.assertTrue("Input DataFrame must be empty" in str(context.exception), "Function should raise ValueError for non-empty DataFrame input.") | ||
|
||
def run_tests(): | ||
"""Run all tests for this function.""" | ||
loader = unittest.TestLoader() | ||
suite = loader.loadTestsFromTestCase(TestCases) | ||
runner = unittest.TextTestRunner() | ||
runner.run(suite) | ||
|
||
if __name__ == "__main__": | ||
import doctest | ||
doctest.testmod() | ||
run_tests() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
def f_1718(products, n_samples=100, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42): | ||
""" | ||
Generate a sales report with randomly simulated sales and profit data for a given list of products. | ||
The data is aggregated by product and sorted by total profit in descending order. | ||
Parameters: | ||
- products (list of str): List of product names. | ||
- n_samples (int): The number of data points to generate for the report. Default is 100. | ||
- sales_lower (int): The minimum sales value for the random generation. Default is 50. | ||
- sales_upper (int): The maximum sales value for the random generation. Default is 200. | ||
- profit_margin_min (float): The minimum profit margin as a fraction of sales. Default is 0.1. | ||
- profit_margin_max (float): The maximum profit margin as a fraction of sales. Default is 0.5. | ||
- random_seed (int): Seed for the random number generator to ensure reproducibility. Default is 42. | ||
Returns: | ||
pd.DataFrame: A DataFrame containing aggregated sales and profit data for each product, sorted by profit. | ||
Raises: | ||
ValueError: If n_samples is not a positive integer, or if sales_lower is greater than sales_upper. | ||
TypeError: If products is not a list of strings, or if sales_lower, sales_upper, profit_margin_min, or profit_margin_max are not numeric. | ||
Requirements: | ||
- numpy | ||
- pandas | ||
Example: | ||
>>> products = ["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"] | ||
>>> report = f_1718(products, n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42) | ||
>>> print(report) | ||
Product Sales Profit | ||
2 Macbook 1561 444.826709 | ||
3 iPad 1383 401.925334 | ||
0 Airpods 1297 381.482713 | ||
1 Apple Watch 1123 308.078536 | ||
4 iPhone 921 294.013887 | ||
""" | ||
np.random.seed(random_seed) | ||
|
||
if not products: | ||
return pd.DataFrame(columns=["Product", "Sales", "Profit"]) | ||
|
||
if not isinstance(products, list) or not all(isinstance(product, str) for product in products): | ||
raise TypeError("products must be a list of strings.") | ||
if not isinstance(n_samples, int) or n_samples <= 0: | ||
raise ValueError("n_samples must be a positive integer.") | ||
if not (isinstance(sales_lower, int) and isinstance(sales_upper, int)) or sales_lower >= sales_upper: | ||
raise ValueError("sales_lower must be less than sales_upper and both must be integers.") | ||
if not all(isinstance(x, (int, float)) for x in [profit_margin_min, profit_margin_max]) or profit_margin_min >= profit_margin_max: | ||
raise ValueError("profit_margin_min must be less than profit_margin_max and both must be numeric.") | ||
|
||
data = [] | ||
for _ in range(n_samples): | ||
product = np.random.choice(products) | ||
sales = np.random.randint(sales_lower, sales_upper + 1) | ||
profit = sales * np.random.uniform(profit_margin_min, profit_margin_max) | ||
data.append([product, sales, profit]) | ||
|
||
df = pd.DataFrame(data, columns=["Product", "Sales", "Profit"]) | ||
df = df.groupby("Product", as_index=False).sum() | ||
df.sort_values("Profit", ascending=False, inplace=True) | ||
|
||
return df | ||
|
||
import pandas as pd | ||
import unittest | ||
|
||
class TestCases(unittest.TestCase): | ||
|
||
def test_random_reproducibility(self): | ||
report1 = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42) | ||
report2 = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200, profit_margin_min=0.1, profit_margin_max=0.5, random_seed=42) | ||
pd.testing.assert_frame_equal(report1, report2) | ||
|
||
def test_number_of_rows(self): | ||
report = f_1718(["iPhone", "iPad"], n_samples=50, sales_lower=50, sales_upper=200) | ||
self.assertEqual(len(report), len(set(["iPhone", "iPad"]))) | ||
|
||
def test_sorting_by_profit(self): | ||
report = f_1718(["iPhone", "iPad"], sales_lower=50, sales_upper=200) | ||
self.assertTrue(report["Profit"].is_monotonic_decreasing) | ||
|
||
def test_custom_parameters(self): | ||
report = f_1718(["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"], n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42) | ||
# This test needs to be adjusted based on the expected outcome of the custom parameters. | ||
# Specific checks on DataFrame contents should account for the randomness and reproducibility aspects. | ||
self.assertTrue(len(report) > 0, "The report should contain aggregated sales and profit data.") | ||
|
||
def test_new_custom_parameters(self): | ||
report1 = f_1718(["iPhone", "iPad", "Macbook", "Airpods", "Apple Watch"], n_samples=50, sales_lower=100, sales_upper=150, profit_margin_min=0.2, profit_margin_max=0.4, random_seed=42) | ||
df_list = report1.apply(lambda row: ','.join(row.values.astype(str)), axis=1).tolist() | ||
expect = ['Macbook,1561,444.82670855378143', 'iPad,1383,401.9253335536443', 'Airpods,1297,381.4827132170069', 'Apple Watch,1123,308.07853599252707', 'iPhone,921,294.0138866107959'] | ||
|
||
self.assertEqual(df_list, expect, "DataFrame contents should match the expected output") | ||
|
||
def test_sales_bounds_validation(self): | ||
"""Test that an error is raised if sales_lower is greater than sales_upper.""" | ||
with self.assertRaises(ValueError): | ||
f_1718(["Product1"], sales_lower=250, sales_upper=100) | ||
|
||
def test_profit_margin_validation(self): | ||
"""Test that an error is raised if profit_margin_min is greater than or equal to profit_margin_max.""" | ||
with self.assertRaises(ValueError): | ||
f_1718(["Product1"], profit_margin_min=0.6, profit_margin_max=0.5) | ||
|
||
def test_product_list_validation(self): | ||
"""Test that an error is raised if the products list is not a list of strings.""" | ||
with self.assertRaises(TypeError): | ||
f_1718([123, 456], n_samples=10) | ||
|
||
def test_n_samples_validation(self): | ||
"""Test that an error is raised if n_samples is not a positive integer.""" | ||
with self.assertRaises(ValueError): | ||
f_1718(["Product1"], n_samples=-10) | ||
|
||
def test_empty_product_list(self): | ||
"""Test that the function can handle an empty product list.""" | ||
report = f_1718([], n_samples=10) | ||
self.assertTrue(report.empty, "The report should be empty if no products are provided.") | ||
|
||
def test_zero_samples(self): | ||
"""Test handling of zero samples.""" | ||
with self.assertRaises(ValueError): | ||
f_1718(["Product1"], n_samples=-10) | ||
|
||
def test_single_product_reproducibility(self): | ||
"""Test that the function generates consistent results for a single product across multiple runs.""" | ||
report1 = f_1718(["Product1"], n_samples=10, random_seed=42) | ||
report2 = f_1718(["Product1"], n_samples=10, random_seed=42) | ||
pd.testing.assert_frame_equal(report1, report2) | ||
|
||
|
||
def run_tests(): | ||
"""Run all tests for this function.""" | ||
loader = unittest.TestLoader() | ||
suite = loader.loadTestsFromTestCase(TestCases) | ||
runner = unittest.TextTestRunner() | ||
runner.run(suite) | ||
|
||
if __name__ == "__main__": | ||
import doctest | ||
doctest.testmod() | ||
run_tests() |
Oops, something went wrong.