Skip to content

Commit

Permalink
Prepare reproducibility scripts for Q5
Browse files Browse the repository at this point in the history
  • Loading branch information
wagjamin authored and Benjamin Wagner committed Nov 5, 2023
1 parent 7309530 commit d2a89cf
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 9 deletions.
2 changes: 1 addition & 1 deletion reproduce/reproduce_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def set_up_schema(con):


def load_data(con):
tables = ['lineitem', 'customer', 'orders', 'part']
tables = ['lineitem', 'customer', 'orders', 'part', 'supplier', 'nation', 'region']
for table in tables:
print(f'Loading {table}')
con.execute(f"INSERT INTO {table} SELECT * FROM read_csv_auto('data/{table}.tbl', delim='|', header=False)")
Expand Down
2 changes: 1 addition & 1 deletion reproduce/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
duckdb==0.7.1
duckdb==0.9.1
tableauhyperapi
numpy
matplotlib
Expand Down
23 changes: 23 additions & 0 deletions reproduce/sql/q5.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-- Removed ORDER BY
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer,
orders,
lineitem,
supplier,
nation,
region
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'ASIA'
and o_orderdate >= date '1994-01-01'
and o_orderdate < date '1994-01-01' + interval '1' year
group by
n_name
42 changes: 35 additions & 7 deletions reproduce/sql/schema.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
-- Align schema to allow for a fair comparison with InkFuse
-- Decimals are turned into DOUBLE PRECISION
-- Any VARCHARs/CHARs (apart from char(1)) are turned into TEXT
-- Removed Primary key so other system's can't use B-Trees.
-- InkFuse always does full scans.

create table customer (
c_custkey integer not null,
c_name text not null,
Expand Down Expand Up @@ -49,13 +51,39 @@ create table lineitem (

create table part (
p_partkey integer not null,
p_name varchar(55) not null,
p_mfgr char(25) not null,
p_brand char(10) not null,
p_type varchar(25) not null,
p_name text not null,
p_mfgr text not null,
p_brand text not null,
p_type text not null,
p_size integer not null,
p_container char(10) not null,
p_retailprice decimal(12,2) not null,
p_comment varchar(23) not null
p_container text not null,
p_retailprice double precision not null,
p_comment text not null
-- primary key (p_partkey)
);

create table supplier (
s_suppkey integer not null,
s_name text not null,
s_address text not null,
s_nationkey integer not null,
s_phone text not null,
s_acctbal double precision not null,
s_comment text not null,
-- primary key (s_suppkey)
);

create table nation (
n_nationkey integer not null,
n_name text not null,
n_regionkey integer not null,
n_comment text not null,
-- primary key (n_nationkey)
);

create table region (
r_regionkey integer not null,
r_name text not null,
r_comment text not null,
-- primary key (r_regionkey)
);

0 comments on commit d2a89cf

Please sign in to comment.