diff --git a/reproduce/reproduce_duckdb.py b/reproduce/reproduce_duckdb.py index fd6e509..32c5acb 100755 --- a/reproduce/reproduce_duckdb.py +++ b/reproduce/reproduce_duckdb.py @@ -19,7 +19,7 @@ def set_up_schema(con): def load_data(con): - tables = ['lineitem', 'customer', 'orders', 'part'] + tables = ['lineitem', 'customer', 'orders', 'part', 'supplier', 'nation', 'region'] for table in tables: print(f'Loading {table}') con.execute(f"INSERT INTO {table} SELECT * FROM read_csv_auto('data/{table}.tbl', delim='|', header=False)") diff --git a/reproduce/requirements.txt b/reproduce/requirements.txt index 199a6c4..3057779 100644 --- a/reproduce/requirements.txt +++ b/reproduce/requirements.txt @@ -1,4 +1,4 @@ -duckdb==0.7.1 +duckdb==0.9.1 tableauhyperapi numpy matplotlib diff --git a/reproduce/sql/q5.sql b/reproduce/sql/q5.sql new file mode 100644 index 0000000..dd8f9d6 --- /dev/null +++ b/reproduce/sql/q5.sql @@ -0,0 +1,23 @@ +-- Removed ORDER BY +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name diff --git a/reproduce/sql/schema.sql b/reproduce/sql/schema.sql index be8756f..2e4f601 100644 --- a/reproduce/sql/schema.sql +++ b/reproduce/sql/schema.sql @@ -1,7 +1,9 @@ +-- Align schema to allow for a fair comparison with InkFuse -- Decimals are turned into DOUBLE PRECISION -- Any VARCHARs/CHARs (apart from char(1)) are turned into TEXT -- Removed Primary key so other system's can't use B-Trees. -- InkFuse always does full scans. + create table customer ( c_custkey integer not null, c_name text not null, @@ -49,13 +51,39 @@ create table lineitem ( create table part ( p_partkey integer not null, - p_name varchar(55) not null, - p_mfgr char(25) not null, - p_brand char(10) not null, - p_type varchar(25) not null, + p_name text not null, + p_mfgr text not null, + p_brand text not null, + p_type text not null, p_size integer not null, - p_container char(10) not null, - p_retailprice decimal(12,2) not null, - p_comment varchar(23) not null + p_container text not null, + p_retailprice double precision not null, + p_comment text not null -- primary key (p_partkey) ); + +create table supplier ( + s_suppkey integer not null, + s_name text not null, + s_address text not null, + s_nationkey integer not null, + s_phone text not null, + s_acctbal double precision not null, + s_comment text not null, + -- primary key (s_suppkey) +); + +create table nation ( + n_nationkey integer not null, + n_name text not null, + n_regionkey integer not null, + n_comment text not null, + -- primary key (n_nationkey) +); + +create table region ( + r_regionkey integer not null, + r_name text not null, + r_comment text not null, + -- primary key (r_regionkey) +);