Skip to content

Commit

Permalink
added supermarket_product.py file to generate B2C data
Browse files Browse the repository at this point in the history
  • Loading branch information
Pratistha authored and Pratistha committed Apr 6, 2024
1 parent b95e62b commit d173b1f
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 1 deletion.
Binary file modified .DS_Store
Binary file not shown.
8 changes: 7 additions & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,10 @@ num_of_reviews=500
[FLIPKART]
BASE_URL= https://www.flipkart.com
URL= https://www.flipkart.com/grocery-supermart-store?marketplace=GROCERY
HEADER= ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept-Language': 'en, en-US, en;g=0.5'})
HEADER= ({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 'Accept-Language': 'en, en-US, en;g=0.5'})


[SUPERMARKET_PRODUCT_PARAMS]
number_of_products = 40
quantity_min = 1
quantity_max = 100
Binary file modified data/.DS_Store
Binary file not shown.
Binary file modified landing_zone/.DS_Store
Binary file not shown.
Binary file added landing_zone/OCR/.DS_Store
Binary file not shown.
Binary file modified landing_zone/collectors/.DS_Store
Binary file not shown.
Binary file added landing_zone/collectors/Flipkart/.DS_Store
Binary file not shown.
Binary file modified landing_zone/synthetic/.DS_Store
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import csv
import json
import random
import configparser
import os
import logging



logger = logging.getLogger()

# Load configuration
config_path = os.path.join(os.getcwd(), '../../..', 'config.ini')
config = configparser.ConfigParser()
config.read(config_path)
logging.info(f'Configuration loaded from {config_path}')


# Base directory for files
raw_data_dir = config.get('COMMON', 'raw_data_dir')

number_of_products = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'number_of_products')
quantity_min = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_min')
quantity_max = config.getint('SUPERMARKET_PRODUCT_PARAMS', 'quantity_max')

# Reading file paths from config file
products_json = os.path.join(raw_data_dir,'flipkarts_products.json' )
stores_csv = os.path.join(raw_data_dir, 'establishments_catalonia.csv')
output_csv = os.path.join(raw_data_dir, 'assigned_products.csv')

# Load JSON data from file
with open(products_json, 'r') as file:
products = json.load(file)

# Load CSV data from file and filter rows
supermarkets = []
with open(stores_csv, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
if "supermercat" in row['Activity_description'].lower():
supermarkets.append(row)

# Assign products randomly
assigned_products = []
for supermarket in supermarkets:
selected_products = random.sample(products, number_of_products)
for product in selected_products:
assigned_products.append({
"store_id": supermarket['Id'],
"store_name": supermarket['Commercial_name'],
"product_id": product['product_id'],
"product_name": product['name'],
"manufacture_date": product['manufacturing_date'],
"expiry_date": product['expiry_date'],
"quantity": random.randint(quantity_min, quantity_max) # Random quantity between 1 and 100
})

# Output to CSV
try:
with open(output_csv, 'w', newline='') as file:
fieldnames = ['store_id', 'store_name', 'product_id', 'product_name', 'manufacture_date', 'expiry_date', 'quantity']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for item in assigned_products:
writer.writerow(item)
logger.info(f"Data has been processed and output to {output_csv}.")
except Exception as e:
logger.error("Failed to write data to CSV", exc_info=True)

0 comments on commit d173b1f

Please sign in to comment.