The Objective is to develop queries that analyze and identify the top customers, products, and markets for a given business. The project involves querying a database containing transactional data to derive insights that can inform strategic decision-making and improve business performance.
The objective of the project is to develop SQL queries that analyze transactional data to identify the top customers, products, and markets for a given business. This involves writing queries to calculate relevant metrics such as total purchase value, sales volume, and market share, and ranking customers, products, and markets accordingly. Additionally, stored procedures and views are created to encapsulate the query logic and provide a simplified interface for accessing the results. The project aims to derive insights that inform strategic decision-making and improve business performance.
SELECT
s.date,
s.product_code,
p.product,
p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=get_fiscal_year(s.date)
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=get_fiscal_year(s.date)
WHERE
s.customer_code=90002002 AND
get_fiscal_year(s.date)=2021
LIMIT 1000000;
SELECT
s.date,
s.product_code,
p.product,
p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=get_fiscal_year(s.date)
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=get_fiscal_year(s.date)
WHERE
get_fiscal_year(s.date)=2021
LIMIT 1000000;
addition to the SQL queries and report creation, optimizing performance is crucial for handling large datasets efficiently and ensuring that queries execute quickly.
-- creating dim_date and joining with this table and avoid using the function 'get_fiscal_year()' to reduce the amount of time taking to run the query
SELECT
s.date,
s.customer_code,
s.product_code,
p.product, p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_date dt
ON dt.calendar_date = s.date
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=dt.fiscal_year
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=dt.fiscal_year
WHERE
dt.fiscal_year=2021
LIMIT 1500000;
SELECT
s.date,
s.customer_code,
s.product_code,
p.product, p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=s.fiscal_year
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=s.fiscal_year
WHERE
s.fiscal_year=2021
LIMIT 1500000;
WITH cte1 AS (
SELECT
s.date,
s.customer_code,
s.product_code,
p.product, p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=s.fiscal_year
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=s.fiscal_year
WHERE
s.fiscal_year=2021)
SELECT
*,
(gross_price_total-pre_invoice_discount_pct*gross_price_total) as net_invoice_sales
FROM cte1
LIMIT 1500000;
Creating views can be a beneficial approach to simplify complex queries, encapsulate frequently used logic, and enhance query performance. Here's how you can incorporate views into the SQL Project:
CREATE VIEW `sales_preinv_discount` AS
SELECT
s.date,
s.fiscal_year,
s.customer_code,
c.market,
s.product_code,
p.product,
p.variant,
s.sold_quantity,
g.gross_price as gross_price_per_item,
ROUND(s.sold_quantity*g.gross_price,2) as gross_price_total,
pre.pre_invoice_discount_pct
FROM fact_sales_monthly s
JOIN dim_customer c
ON s.customer_code = c.customer_code
JOIN dim_product p
ON s.product_code=p.product_code
JOIN fact_gross_price g
ON g.fiscal_year=s.fiscal_year
AND g.product_code=s.product_code
JOIN fact_pre_invoice_deductions as pre
ON pre.customer_code = s.customer_code AND
pre.fiscal_year=s.fiscal_year
SELECT
*,
(gross_price_total-pre_invoice_discount_pct*gross_price_total) as net_invoice_sales
FROM gdb0041.sales_preinv_discount
CREATE VIEW `sales_post_inv_discount` AS
SELECT
s.date, s.fiscal_year,
s.customer_code, s.market,
s.product_code, s.product, s.variant,
s.sold_quantity, s.gross_price_total,
s.pre_invoice_discount_pct,
(s.gross_price_total-s.pre_invoice_discount_pct*s.gross_price_total) as net_invoice_sales,
(po.discounts_pct+po.other_deductions_pct) as post_invoice_discount_pct
FROM sales_preinv_discount s
JOIN fact_post_invoice_deductions po
ON po.customer_code = s.customer_code AND
po.product_code = s.product_code AND
po.date = s.date;
SELECT
*,
net_invoice_sales*(1-post_invoice_discount_pct) as net_sales
FROM gdb0041.sales_postinv_discount;
-- Finally creating the view net_sales
which inbuiltly use/include all the previous created view and gives the final result
CREATE VIEW `net_sales` AS
SELECT
*,
net_invoice_sales*(1-post_invoice_discount_pct) as net_sales
FROM gdb0041.sales_postinv_discount;
SELECT
market,
round(sum(net_sales)/1000000,2) as net_sales_mln
FROM gdb0041.net_sales
where fiscal_year=2021
group by market
order by net_sales_mln desc
limit 5
-- Stored proc to get top n markets by net sales for a given year
CREATE PROCEDURE get_top_n_markets_by_net_sales
(
in_fiscal_year INT,
in_top_n INT
)
BEGIN
SELECT
market,
round(sum(net_sales)/1000000,2) as net_sales_mln
FROM net_sales
where fiscal_year=in_fiscal_year
group by market
order by net_sales_mln desc
limit in_top_n;
END
-- Created a stored procedure that takes market, fiscal_year and top n as an input and returns top n customers by net sales in that given fiscal year and market
CREATE PROCEDURE `get_top_n_customers_by_net_sales`(
in_market VARCHAR(45),
in_fiscal_year INT,
in_top_n INT
)
BEGIN
select
customer,
round(sum(net_sales)/1000000,2) as net_sales_mln
from net_sales s
join dim_customer c
on s.customer_code=c.customer_code
where
s.fiscal_year=in_fiscal_year
and s.market=in_market
group by customer
order by net_sales_mln desc
limit in_top_n;
END