diff --git a/.DS_Store b/.DS_Store index 6be7078..af6d004 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/config.ini b/config.ini index 599a8c0..ddc3f32 100644 --- a/config.ini +++ b/config.ini @@ -20,4 +20,10 @@ num_of_reviews=500 [FLIPKART] BASE_URL= https://www.flipkart.com URL= https://www.flipkart.com/grocery-supermart-store?marketplace=GROCERY -HEADER= ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept-Language': 'en, en-US, en;g=0.5'}) \ No newline at end of file +HEADER= ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept-Language': 'en, en-US, en;g=0.5'}) + + +[SUPERMARKET_PRODUCT_PARAMS] +number_of_products = 40 +quantity_min = 1 +quantity_max = 100 \ No newline at end of file diff --git a/data/.DS_Store b/data/.DS_Store index 1d5c55e..d567fc9 100644 Binary files a/data/.DS_Store and b/data/.DS_Store differ diff --git a/landing_zone/.DS_Store b/landing_zone/.DS_Store index d553391..22187bd 100644 Binary files a/landing_zone/.DS_Store and b/landing_zone/.DS_Store differ diff --git a/landing_zone/OCR/.DS_Store b/landing_zone/OCR/.DS_Store new file mode 100644 index 0000000..a3a9c54 Binary files /dev/null and b/landing_zone/OCR/.DS_Store differ diff --git a/landing_zone/collectors/.DS_Store b/landing_zone/collectors/.DS_Store index fbbb429..18a94d8 100644 Binary files a/landing_zone/collectors/.DS_Store and b/landing_zone/collectors/.DS_Store differ diff --git a/landing_zone/collectors/Flipkart/.DS_Store b/landing_zone/collectors/Flipkart/.DS_Store new file mode 100644 index 0000000..f7f1920 Binary files /dev/null and b/landing_zone/collectors/Flipkart/.DS_Store differ diff --git a/landing_zone/synthetic/.DS_Store b/landing_zone/synthetic/.DS_Store index eaaba62..b8579d6 100644 Binary files a/landing_zone/synthetic/.DS_Store and b/landing_zone/synthetic/.DS_Store differ diff --git a/landing_zone/synthetic/supermarket_products/supermarket_products.py b/landing_zone/synthetic/supermarket_products/supermarket_products.py new file mode 100644 index 0000000..590790c --- /dev/null +++ b/landing_zone/synthetic/supermarket_products/supermarket_products.py @@ -0,0 +1,68 @@ +import csv +import json +import random +import configparser +import os +import logging + + + +logger = logging.getLogger() + +# Load configuration +config_path = os.path.join(os.getcwd(), '../../..', 'config.ini') +config = configparser.ConfigParser() +config.read(config_path) +logging.info(f'Configuration loaded from {config_path}') + + +# Base directory for files +raw_data_dir = config.get('COMMON', 'raw_data_dir') + +number_of_products = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'number_of_products') +quantity_min = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_min') +quantity_max = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_max') + +# Reading file paths from config file +products_json = os.path.join(raw_data_dir,'flipkarts_products.json' ) +stores_csv = os.path.join(raw_data_dir, 'establishments_catalonia.csv') +output_csv = os.path.join(raw_data_dir, 'assigned_products.csv') + +# Load JSON data from file +with open(products_json, 'r') as file: + products = json.load(file) + +# Load CSV data from file and filter rows +supermarkets = [] +with open(stores_csv, 'r') as file: + reader = csv.DictReader(file) + for row in reader: + if "supermercat" in row['Activity_description'].lower(): + supermarkets.append(row) + +# Assign products randomly +assigned_products = [] +for supermarket in supermarkets: + selected_products = random.sample(products, number_of_products) + for product in selected_products: + assigned_products.append({ + "store_id": supermarket['Id'], + "store_name": supermarket['Commercial_name'], + "product_id": product['product_id'], + "product_name": product['name'], + "manufacture_date": product['manufacturing_date'], + "expiry_date": product['expiry_date'], + "quantity": random.randint(quantity_min, quantity_max) # Random quantity between 1 and 100 + }) + +# Output to CSV +try: + with open(output_csv, 'w', newline='') as file: + fieldnames = ['store_id', 'store_name', 'product_id', 'product_name', 'manufacture_date', 'expiry_date', 'quantity'] + writer = csv.DictWriter(file, fieldnames=fieldnames) + writer.writeheader() + for item in assigned_products: + writer.writerow(item) + logger.info(f"Data has been processed and output to {output_csv}.") +except Exception as e: + logger.error("Failed to write data to CSV", exc_info=True)