Skip to content
This repository was archived by the owner on Sep 17, 2024. It is now read-only.

Commit cff354d

Browse files
authored
Merge pull request #3 from marenwestermann/benchmark-q10
feat: query 10 implementation
2 parents eb21bab + c6d3ce1 commit cff354d

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

execute/q10.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from queries import q10
2+
import os
3+
4+
import pandas as pd
5+
import polars as pl
6+
7+
pd.options.mode.copy_on_write = True
8+
pd.options.future.infer_string = True
9+
10+
customer = os.path.join("data", "customer.parquet")
11+
nation = os.path.join("data", "nation.parquet")
12+
lineitem = os.path.join("data", "lineitem.parquet")
13+
orders = os.path.join("data", "orders.parquet")
14+
15+
IO_FUNCS = {
16+
'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),
17+
'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),
18+
'polars[eager]': lambda x: pl.read_parquet(x),
19+
'polars[lazy]': lambda x: pl.scan_parquet(x),
20+
}
21+
22+
tool = 'pandas'
23+
fn = IO_FUNCS[tool]
24+
print(
25+
q10.query(
26+
fn(customer),
27+
fn(nation),
28+
fn(lineitem),
29+
fn(orders)
30+
)
31+
)
32+
33+
tool = 'pandas[pyarrow]'
34+
fn = IO_FUNCS[tool]
35+
print(
36+
q10.query(
37+
fn(customer),
38+
fn(nation),
39+
fn(lineitem),
40+
fn(orders)
41+
)
42+
)
43+
44+
tool = 'polars[eager]'
45+
fn = IO_FUNCS[tool]
46+
print(
47+
q10.query(
48+
fn(customer),
49+
fn(nation),
50+
fn(lineitem),
51+
fn(orders)
52+
)
53+
)
54+
55+
tool = 'polars[lazy]'
56+
fn = IO_FUNCS[tool]
57+
print(
58+
q10.query(
59+
fn(customer),
60+
fn(nation),
61+
fn(lineitem),
62+
fn(orders)
63+
).collect()
64+
)

queries/q10.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from typing import Any
2+
from datetime import datetime
3+
import narwhals as nw
4+
5+
def query(
6+
customer_ds_raw: Any,
7+
nation_ds_raw: Any,
8+
lineitem_ds_raw: Any,
9+
orders_ds_raw: Any,
10+
) -> Any:
11+
12+
nation_ds = nw.from_native(nation_ds_raw)
13+
line_item_ds = nw.from_native(lineitem_ds_raw)
14+
orders_ds = nw.from_native(orders_ds_raw)
15+
customer_ds = nw.from_native(customer_ds_raw)
16+
17+
var1 = datetime(1993, 10, 1)
18+
var2 = datetime(1994, 1, 1)
19+
20+
result = (
21+
customer_ds.join(orders_ds, left_on="c_custkey", right_on="o_custkey")
22+
.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
23+
.join(nation_ds, left_on="c_nationkey", right_on="n_nationkey")
24+
.filter(nw.col("o_orderdate").is_between(var1, var2, closed="left"))
25+
.filter(nw.col("l_returnflag") == "R")
26+
.with_columns(
27+
(nw.col("l_extendedprice") * (1 - nw.col("l_discount")))
28+
.alias("revenue")
29+
)
30+
.group_by(
31+
"c_custkey",
32+
"c_name",
33+
"c_acctbal",
34+
"c_phone",
35+
"n_name",
36+
"c_address",
37+
"c_comment",
38+
)
39+
.agg(nw.sum("revenue"))
40+
.select(
41+
"c_custkey",
42+
"c_name",
43+
"revenue",
44+
"c_acctbal",
45+
"n_name",
46+
"c_address",
47+
"c_phone",
48+
"c_comment",
49+
)
50+
.sort(by="revenue", descending=True)
51+
.head(20)
52+
)
53+
54+
return nw.to_native(result)

0 commit comments

Comments
 (0)