From 25ab007d3cf019ab0ab9805d2728f459a0c8c1e1 Mon Sep 17 00:00:00 2001 From: Isaias Gutierrez Cruz Date: Sat, 7 Sep 2024 14:02:15 -0600 Subject: [PATCH] feat: add first implementation of query 8 --- tpch/execute/q8.py | 39 +++++++++++++++++++++++++++++++++++ tpch/queries/q8.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 tpch/execute/q8.py create mode 100644 tpch/queries/q8.py diff --git a/tpch/execute/q8.py b/tpch/execute/q8.py new file mode 100644 index 000000000..f1a8677ff --- /dev/null +++ b/tpch/execute/q8.py @@ -0,0 +1,39 @@ +from queries import q8 + +from . import IO_FUNCS +from . import customer +from . import lineitem +from . import nation +from . import orders +from . import part +from . import region +from . import supplier + +tool = "pandas[pyarrow]" +fn = IO_FUNCS[tool] +print( + q8.query( + fn(part), + fn(supplier), + fn(lineitem), + fn(orders), + fn(customer), + fn(nation), + fn(region), + ) +) + + +tool = "polars[lazy]" +fn = IO_FUNCS[tool] +print( + q8.query( + fn(part), + fn(supplier), + fn(lineitem), + fn(orders), + fn(customer), + fn(nation), + fn(region), + ).collect() +) diff --git a/tpch/queries/q8.py b/tpch/queries/q8.py new file mode 100644 index 000000000..3fba96313 --- /dev/null +++ b/tpch/queries/q8.py @@ -0,0 +1,51 @@ +from datetime import date + +import narwhals as nw +from narwhals.typing import FrameT + + +@nw.narwhalify +def query( + part_ds: FrameT, + supplier_ds: FrameT, + line_item_ds: FrameT, + orders_ds: FrameT, + customer_ds: FrameT, + nation_ds: FrameT, + region_ds: FrameT, +) -> FrameT: + nation = "BRAZIL" + region = "AMERICA" + type = "ECONOMY ANODIZED STEEL" + date1 = date(1995, 1, 1) + date2 = date(1996, 12, 31) + + n1 = nation_ds.select("n_nationkey", "n_regionkey") + n2 = nation_ds.select("n_nationkey", "n_name") + + return ( + part_ds.join(line_item_ds, left_on="p_partkey", right_on="l_partkey") + .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey") + .join(orders_ds, left_on="l_orderkey", right_on="o_orderkey") + .join(customer_ds, left_on="o_custkey", right_on="c_custkey") + .join(n1, left_on="c_nationkey", right_on="n_nationkey") + .join(region_ds, left_on="n_regionkey", right_on="r_regionkey") + .filter(nw.col("r_name") == region) + .join(n2, left_on="s_nationkey", right_on="n_nationkey") + .filter(nw.col("o_orderdate").is_between(date1, date2)) + .filter(nw.col("p_type") == type) + .select( + nw.col("o_orderdate").dt.year().alias("o_year"), + (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("volume"), + nw.col("n_name").alias("nation"), + ) + .with_columns( + nw.when(nw.col("nation") == nation) + .then(nw.col("volume")) + .otherwise(0) + .alias("_tmp") + ) + .group_by("o_year") + .agg((nw.sum("_tmp") / nw.sum("volume")).round(2).alias("mkt_share")) + .sort("o_year") + )