-
Notifications
You must be signed in to change notification settings - Fork 577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(frontend): support iceberg predicate pushdown #19228
Open
kwannoel
wants to merge
39
commits into
main
Choose a base branch
from
kwannoel/iceberg-predicate-pushdown
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+553
−20
Open
Changes from all commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
a0656ff
match rw predicates
kwannoel a105859
mark place to add filter
kwannoel 7016979
pass in schema fields as a parameter
kwannoel b0c00c0
convert input ref to reference, datum to iceberg datum
kwannoel f07e235
convert rw expressions into iceberg predicates
kwannoel aa7e899
add support for more literals
kwannoel 9b9bac0
add predicate proto
kwannoel 31dff2f
interim commit: use iceberg proto
kwannoel 11d5930
change predicate_pushdown return
kwannoel f648af3
derive eq, hash for iceberg predicate
kwannoel eed2ed2
interim commit: add iceberg_predicate to batch
kwannoel ed6f73d
add fetch_parameters
kwannoel 5fb182b
Revert "derive eq, hash for iceberg predicate"
kwannoel 6dea2bc
Revert "change predicate_pushdown return"
kwannoel 33b6103
Revert "interim commit: use iceberg proto"
kwannoel b53d205
Revert "add predicate proto"
kwannoel 287b032
Revert "interim commit: add iceberg_predicate to batch"
kwannoel 060b6f1
use iceberg predicate in logical_iceberg_scan fields
kwannoel 1e23059
add to batch
kwannoel 34392c8
build with predicate
kwannoel 1dc7fd5
clean
kwannoel 28389ee
implement distill
kwannoel 57f0c33
fix warn
kwannoel 6f41c2e
add tests
kwannoel 0c9da00
no verbose for wget
kwannoel 693b608
more tests
kwannoel e76d65d
check results
kwannoel 5672443
fix bugs
kwannoel d742365
explain source plan, maybe schema malformed
kwannoel 519c115
fix tests
kwannoel f4e9633
fmt
kwannoel 32823b5
increase timeout
kwannoel 665b46f
no need double assert
kwannoel 0d2f661
docs
kwannoel a7c98a2
fmt
kwannoel f394302
increase timeout
kwannoel 115e2ef
use rule based predicate push down
kwannoel e60b4e4
prune BatchFilter if predicate always true
kwannoel 326cd17
test mix filter and predicate
kwannoel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
e2e_test/iceberg/test_case/iceberg_predicate_pushdown.slt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
statement ok | ||
set sink_decouple = false; | ||
|
||
statement ok | ||
set streaming_parallelism=4; | ||
|
||
statement ok | ||
drop table if exists s1 cascade; | ||
|
||
statement ok | ||
CREATE TABLE s1 (i1 int, i2 varchar, i3 varchar); | ||
|
||
statement ok | ||
insert into s1 select x, 'some str', 'another str' from generate_series(1, 500) t(x); | ||
|
||
statement ok | ||
insert into s1 select x, null as y, null as z from generate_series(501, 1000) t(x); | ||
|
||
statement ok | ||
flush; | ||
|
||
statement ok | ||
CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM s1; | ||
|
||
statement ok | ||
CREATE SINK sink1 AS select * from mv1 WITH ( | ||
connector = 'iceberg', | ||
type = 'append-only', | ||
force_append_only = 'true', | ||
database.name = 'demo_db', | ||
table.name = 't1', | ||
catalog.name = 'demo', | ||
catalog.type = 'storage', | ||
warehouse.path = 's3a://icebergdata/demo', | ||
s3.endpoint = 'http://127.0.0.1:9301', | ||
s3.region = 'us-east-1', | ||
s3.access.key = 'hummockadmin', | ||
s3.secret.key = 'hummockadmin', | ||
commit_checkpoint_interval = 1, | ||
create_table_if_not_exists = 'true' | ||
); | ||
|
||
statement ok | ||
drop source if exists iceberg_t1_source; | ||
|
||
statement ok | ||
CREATE SOURCE iceberg_t1_source | ||
WITH ( | ||
connector = 'iceberg', | ||
s3.endpoint = 'http://127.0.0.1:9301', | ||
s3.region = 'us-east-1', | ||
s3.access.key = 'hummockadmin', | ||
s3.secret.key = 'hummockadmin', | ||
s3.path.style.access = 'true', | ||
catalog.type = 'storage', | ||
warehouse.path = 's3a://icebergdata/demo', | ||
database.name = 'demo_db', | ||
table.name = 't1', | ||
); | ||
|
||
statement ok | ||
flush; | ||
|
||
query I | ||
select * from iceberg_t1_source order by i1 limit 1; | ||
---- | ||
1 some str another str | ||
|
||
query I | ||
select count(*) from iceberg_t1_source; | ||
---- | ||
1000 | ||
|
||
query I | ||
select * from iceberg_t1_source where i1 > 990 order by i1; | ||
---- | ||
991 NULL NULL | ||
992 NULL NULL | ||
993 NULL NULL | ||
994 NULL NULL | ||
995 NULL NULL | ||
996 NULL NULL | ||
997 NULL NULL | ||
998 NULL NULL | ||
999 NULL NULL | ||
1000 NULL NULL | ||
|
||
query I | ||
explain select * from iceberg_t1_source where i1 > 500 and i1 < 600 and i1 >= 550 and i1 <= 590 and i1 != 570 and i1 = 580; | ||
---- | ||
BatchExchange { order: [], dist: Single } | ||
└─BatchIcebergScan { source: iceberg_t1_source, columns: [i1, i2, i3], predicate: (((((i1 = 580) AND (i1 > 500)) AND (i1 < 600)) AND (i1 >= 550)) AND (i1 <= 590)) AND (i1 != 570) } | ||
|
||
query I | ||
select i1 from iceberg_t1_source where i1 > 500 and i1 < 600 and i1 >= 550 and i1 <= 590 and i1 != 570 and i1 = 580; | ||
---- | ||
580 | ||
|
||
query I | ||
explain select * from iceberg_t1_source where i1 in (1, 2, 3, 4, 5); | ||
---- | ||
BatchExchange { order: [], dist: Single } | ||
└─BatchIcebergScan { source: iceberg_t1_source, columns: [i1, i2, i3], predicate: i1 IN (5, 4, 1, 3, 2) } | ||
|
||
query I | ||
select i1 from iceberg_t1_source where i1 in (1, 2, 3, 4, 5) order by i1; | ||
---- | ||
1 | ||
2 | ||
3 | ||
4 | ||
5 | ||
|
||
query I | ||
select count(*), i2, i3 from iceberg_t1_source where i2 = 'some str' and i3 = 'another str' group by i2, i3; | ||
---- | ||
500 some str another str | ||
|
||
query I | ||
explain select i1 from iceberg_t1_source where i1 > 500 and i2 = i3; | ||
---- | ||
BatchExchange { order: [], dist: Single } | ||
└─BatchProject { exprs: [i1] } | ||
└─BatchFilter { predicate: (i2 = i3) } | ||
└─BatchIcebergScan { source: iceberg_t1_source, columns: [i1, i2, i3], predicate: i1 > 500 } | ||
|
||
query I | ||
select i1 from iceberg_t1_source where i1 > 500 and i2 = i3; | ||
---- | ||
|
||
statement ok | ||
DROP SINK sink1; | ||
|
||
statement ok | ||
DROP SOURCE iceberg_t1_source; | ||
|
||
statement ok | ||
DROP TABLE s1 cascade; |
11 changes: 11 additions & 0 deletions
11
e2e_test/iceberg/test_case/iceberg_predicate_pushdown.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
init_sqls = [ | ||
'CREATE SCHEMA IF NOT EXISTS demo_db', | ||
'DROP TABLE IF EXISTS demo_db.t1', | ||
] | ||
|
||
slt = 'test_case/iceberg_predicate_pushdown.slt' | ||
|
||
drop_sqls = [ | ||
'DROP TABLE IF EXISTS demo_db.t1', | ||
'DROP SCHEMA IF EXISTS demo_db', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,8 @@ | |
|
||
use std::rc::Rc; | ||
|
||
use educe::Educe; | ||
use iceberg::expr::Predicate as IcebergPredicate; | ||
use pretty_xmlish::{Pretty, XmlNode}; | ||
use risingwave_pb::batch_plan::plan_node::NodeBody; | ||
use risingwave_pb::batch_plan::IcebergScanNode; | ||
|
@@ -29,10 +31,13 @@ use crate::error::Result; | |
use crate::optimizer::plan_node::expr_visitable::ExprVisitable; | ||
use crate::optimizer::property::{Distribution, Order}; | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
#[derive(Educe, Debug, Clone, PartialEq)] | ||
#[educe(Eq, Hash)] | ||
pub struct BatchIcebergScan { | ||
pub base: PlanBase<Batch>, | ||
pub core: generic::Source, | ||
#[educe(Hash(ignore))] | ||
pub predicate: IcebergPredicate, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hash and Eq are only required for streaming share plan. But down the road we may support batch share plan. In that case every single batch plan node using |
||
} | ||
|
||
impl BatchIcebergScan { | ||
|
@@ -44,7 +49,11 @@ impl BatchIcebergScan { | |
Order::any(), | ||
); | ||
|
||
Self { base, core } | ||
Self { | ||
base, | ||
core, | ||
predicate: IcebergPredicate::AlwaysTrue, | ||
} | ||
} | ||
|
||
pub fn column_names(&self) -> Vec<&str> { | ||
|
@@ -62,6 +71,15 @@ impl BatchIcebergScan { | |
Self { | ||
base, | ||
core: self.core.clone(), | ||
predicate: self.predicate.clone(), | ||
} | ||
} | ||
|
||
pub fn clone_with_predicate(&self, predicate: IcebergPredicate) -> Self { | ||
Self { | ||
base: self.base.clone(), | ||
core: self.core.clone(), | ||
predicate, | ||
} | ||
} | ||
|
||
|
@@ -78,6 +96,7 @@ impl Distill for BatchIcebergScan { | |
let fields = vec![ | ||
("source", src), | ||
("columns", column_names_pretty(self.schema())), | ||
("predicate", Pretty::from(self.predicate.to_string())), | ||
]; | ||
childless_record("BatchIcebergScan", fields) | ||
} | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm thinking if it's reasonable to have 0 splits found, e.g. when querying empty iceberg table.