-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/SonyShrestha/VBP_Joint_Project
- Loading branch information
Showing
11 changed files
with
10,103 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import kaggle | ||
import logging | ||
import configparser | ||
import os | ||
|
||
# Configure logging | ||
logging.basicConfig(level=logging.INFO) # Set log level to INFO | ||
|
||
# Create logger object | ||
logger = logging.getLogger() | ||
|
||
# Get the path to the parent parent directory | ||
config_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, os.pardir)) | ||
|
||
# Specify the path to config file | ||
config_file_path = os.path.join(config_dir, "config.ini") | ||
|
||
config = configparser.ConfigParser() | ||
config.read(config_file_path) | ||
|
||
def download_kaggle_dataset(dataset_path,raw_data_dir): | ||
logger.info('-----------------------------------------------------') | ||
logger.info("Downloading customer data from kaggle dataset for Supermart Grocery Sales - Retail Analytics") | ||
kaggle.api.dataset_download_files(dataset=dataset_path, path=raw_data_dir, unzip=True) | ||
os.rename(os.path.join(raw_data_dir, 'Supermart Grocery Sales - Retail Analytics Dataset.csv'), os.path.join(raw_data_dir, 'sm_retail_customers.csv')) | ||
|
||
if __name__ == "__main__": | ||
dataset_path = config["SM_RETAIL_CUSTOMERS"]["dataset_path"] | ||
raw_data_dir = config["COMMON"]["raw_data_dir"] | ||
download_kaggle_dataset(dataset_path,raw_data_dir) |
Binary file not shown.
68 changes: 68 additions & 0 deletions
68
landing_zone/synthetic/supermarket_products/supermarket_products.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import csv | ||
import json | ||
import random | ||
import configparser | ||
import os | ||
import logging | ||
|
||
|
||
|
||
logger = logging.getLogger() | ||
|
||
# Load configuration | ||
config_path = os.path.join(os.getcwd(), '../../..', 'config.ini') | ||
config = configparser.ConfigParser() | ||
config.read(config_path) | ||
logging.info(f'Configuration loaded from {config_path}') | ||
|
||
|
||
# Base directory for files | ||
raw_data_dir = config.get('COMMON', 'raw_data_dir') | ||
|
||
number_of_products = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'number_of_products') | ||
quantity_min = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_min') | ||
quantity_max = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_max') | ||
|
||
# Reading file paths from config file | ||
products_json = os.path.join(raw_data_dir,'flipkarts_products.json' ) | ||
stores_csv = os.path.join(raw_data_dir, 'establishments_catalonia.csv') | ||
output_csv = os.path.join(raw_data_dir, 'assigned_products.csv') | ||
|
||
# Load JSON data from file | ||
with open(products_json, 'r') as file: | ||
products = json.load(file) | ||
|
||
# Load CSV data from file and filter rows | ||
supermarkets = [] | ||
with open(stores_csv, 'r') as file: | ||
reader = csv.DictReader(file) | ||
for row in reader: | ||
if "supermercat" in row['Activity_description'].lower(): | ||
supermarkets.append(row) | ||
|
||
# Assign products randomly | ||
assigned_products = [] | ||
for supermarket in supermarkets: | ||
selected_products = random.sample(products, number_of_products) | ||
for product in selected_products: | ||
assigned_products.append({ | ||
"store_id": supermarket['Id'], | ||
"store_name": supermarket['Commercial_name'], | ||
"product_id": product['product_id'], | ||
"product_name": product['name'], | ||
"manufacture_date": product['manufacturing_date'], | ||
"expiry_date": product['expiry_date'], | ||
"quantity": random.randint(quantity_min, quantity_max) # Random quantity between 1 and 100 | ||
}) | ||
|
||
# Output to CSV | ||
try: | ||
with open(output_csv, 'w', newline='') as file: | ||
fieldnames = ['store_id', 'store_name', 'product_id', 'product_name', 'manufacture_date', 'expiry_date', 'quantity'] | ||
writer = csv.DictWriter(file, fieldnames=fieldnames) | ||
writer.writeheader() | ||
for item in assigned_products: | ||
writer.writerow(item) | ||
logger.info(f"Data has been processed and output to {output_csv}.") | ||
except Exception as e: | ||
logger.error("Failed to write data to CSV", exc_info=True) |