Skip to content

Commit

Permalink
feat: add q3 implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
montanarograziano committed Aug 31, 2024
1 parent d9975f6 commit eafb5b2
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
37 changes: 37 additions & 0 deletions tpch/execute/q3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pathlib import Path

import pandas as pd
import polars as pl
from queries import q3

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True

customer = Path("data") / "customer.parquet"
line_item = Path("data") / "lineitem.parquet"
orders = Path("data") / "orders.parquet"

IO_FUNCS = {
"pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
"pandas[pyarrow]": lambda x: pd.read_parquet(
x, engine="pyarrow", dtype_backend="pyarrow"
),
"polars[eager]": lambda x: pl.read_parquet(x),
"polars[lazy]": lambda x: pl.scan_parquet(x),
}

tool = "pandas"
fn = IO_FUNCS[tool]
print(q3.query(fn(customer), fn(line_item), fn(orders)))

tool = "pandas[pyarrow]"
fn = IO_FUNCS[tool]
print(q3.query(fn(customer), fn(line_item), fn(orders)))

tool = "polars[eager]"
fn = IO_FUNCS[tool]
print(q3.query(fn(customer), fn(line_item), fn(orders)))

tool = "polars[lazy]"
fn = IO_FUNCS[tool]
print(q3.query(fn(customer), fn(line_item), fn(orders)).collect())
45 changes: 45 additions & 0 deletions tpch/queries/q3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import datetime

import narwhals as nw
from narwhals.typing import FrameT


@nw.narwhalify
def query(
customer_ds_raw: FrameT,
line_item_ds_raw: FrameT,
orders_ds_raw: FrameT,
) -> FrameT:
var_1 = var_2 = datetime(1995, 3, 15)
var_3 = "BUILDING"

customer_ds = nw.from_native(customer_ds_raw)
line_item_ds = nw.from_native(line_item_ds_raw)
orders_ds = nw.from_native(orders_ds_raw)

q_final = (
customer_ds.filter(nw.col("c_mktsegment") == var_3)
.join(orders_ds, left_on="c_custkey", right_on="o_custkey")
.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
.filter(
nw.col("o_orderdate") < var_2,
nw.col("l_shipdate") > var_1,
)
.with_columns(
(nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("revenue")
)
.group_by(["o_orderkey", "o_orderdate", "o_shippriority"])
.agg([nw.sum("revenue")])
.select(
[
nw.col("o_orderkey").alias("l_orderkey"),
"revenue",
"o_orderdate",
"o_shippriority",
]
)
.sort(by=["revenue", "o_orderdate"], descending=[True, False])
.head(10)
)

return nw.to_native(q_final)

0 comments on commit eafb5b2

Please sign in to comment.