Skip to content

Commit

Permalink
feat: add first implementation of query 8
Browse files Browse the repository at this point in the history
  • Loading branch information
IsaiasGutierrezCruz committed Sep 7, 2024
1 parent 956274c commit 25ab007
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
39 changes: 39 additions & 0 deletions tpch/execute/q8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from queries import q8

from . import IO_FUNCS
from . import customer
from . import lineitem
from . import nation
from . import orders
from . import part
from . import region
from . import supplier

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(
q8.query(
fn(part),
fn(supplier),
fn(lineitem),
fn(orders),
fn(customer),
fn(nation),
fn(region),
)
)


tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(
q8.query(
fn(part),
fn(supplier),
fn(lineitem),
fn(orders),
fn(customer),
fn(nation),
fn(region),
).collect()
)
51 changes: 51 additions & 0 deletions tpch/queries/q8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from datetime import date

import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(
part_ds: FrameT,
supplier_ds: FrameT,
line_item_ds: FrameT,
orders_ds: FrameT,
customer_ds: FrameT,
nation_ds: FrameT,
region_ds: FrameT,
) -> FrameT:
nation = "BRAZIL"
region = "AMERICA"
type = "ECONOMY ANODIZED STEEL"
date1 = date(1995, 1, 1)
date2 = date(1996, 12, 31)

n1 = nation_ds.select("n_nationkey", "n_regionkey")
n2 = nation_ds.select("n_nationkey", "n_name")

return (
part_ds.join(line_item_ds, left_on="p_partkey", right_on="l_partkey")
.join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
.join(orders_ds, left_on="l_orderkey", right_on="o_orderkey")
.join(customer_ds, left_on="o_custkey", right_on="c_custkey")
.join(n1, left_on="c_nationkey", right_on="n_nationkey")
.join(region_ds, left_on="n_regionkey", right_on="r_regionkey")
.filter(nw.col("r_name") == region)
.join(n2, left_on="s_nationkey", right_on="n_nationkey")
.filter(nw.col("o_orderdate").is_between(date1, date2))
.filter(nw.col("p_type") == type)
.select(
nw.col("o_orderdate").dt.year().alias("o_year"),
(nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("volume"),
nw.col("n_name").alias("nation"),
)
.with_columns(
nw.when(nw.col("nation") == nation)
.then(nw.col("volume"))
.otherwise(0)
.alias("_tmp")
)
.group_by("o_year")
.agg((nw.sum("_tmp") / nw.sum("volume")).round(2).alias("mkt_share"))
.sort("o_year")
)

0 comments on commit 25ab007

Please sign in to comment.