Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,13 @@ protected void checkAndGenObject() throws HiveException {
} else {
if (!alw.hasRows()) {
hasEmpty = true;
alw.addRow(dummyObj[i]);
if (!isRightOfAntiJoin) {
alw.addRow(dummyObj[i]);
}
} else if (isRightOfAntiJoin && !needsPostEvaluation) {
// For anti join the right side should be empty. For needsPostEvaluation case we will
// wait till evaluation is done. For other cases we can directly return from here.
return;
} else if (!hasEmpty && alw.isSingleRow()) {
if (hasAnyFiltered(alias, alw.rowIter().first())) {
hasEmpty = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,8 @@ private List<Byte> joinObject(int[] smallestPos, boolean clear) throws HiveExcep

private void putDummyOrEmpty(Byte i) {
// put a empty list or null
if (noOuterJoin) {
boolean isRightOfAntiJoin = (i != 0 && condn[i-1].getType() == JoinDesc.ANTI_JOIN);
if (noOuterJoin || isRightOfAntiJoin) {
storage[i] = emptyList;
} else {
storage[i] = dummyObjVectors[i];
Expand Down
22 changes: 22 additions & 0 deletions ql/src/test/queries/clientpositive/antijoin3.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
set hive.merge.nway.joins=true;

create table taba(id string);
create table tabb(id string);
create table tabc(id string);

INSERT INTO TABLE taba VALUES ('1'),('2');
INSERT INTO TABLE tabc VALUES ('1'),('2'),('2');

explain
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null;
explain cbo
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null;
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null;

INSERT INTO TABLE tabb VALUES ('1');

select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null;

INSERT INTO TABLE tabb VALUES ('2');

select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null;
250 changes: 250 additions & 0 deletions ql/src/test/results/clientpositive/llap/antijoin3.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
PREHOOK: query: create table taba(id string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@taba
POSTHOOK: query: create table taba(id string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@taba
PREHOOK: query: create table tabb(id string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tabb
POSTHOOK: query: create table tabb(id string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tabb
PREHOOK: query: create table tabc(id string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tabc
POSTHOOK: query: create table tabc(id string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tabc
PREHOOK: query: INSERT INTO TABLE taba VALUES ('1'),('2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@taba
POSTHOOK: query: INSERT INTO TABLE taba VALUES ('1'),('2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@taba
POSTHOOK: Lineage: taba.id SCRIPT []
PREHOOK: query: INSERT INTO TABLE tabc VALUES ('1'),('2'),('2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tabc
POSTHOOK: query: INSERT INTO TABLE tabc VALUES ('1'),('2'),('2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tabc
POSTHOOK: Lineage: tabc.id SCRIPT []
PREHOOK: query: explain
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
PREHOOK: type: QUERY
PREHOOK: Input: default@taba
PREHOOK: Input: default@tabb
PREHOOK: Input: default@tabc
#### A masked pattern was here ####
POSTHOOK: query: explain
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@taba
POSTHOOK: Input: default@tabb
POSTHOOK: Input: default@tabc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: a
Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: id (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 3
Map Operator Tree:
TableScan
alias: b
filterExpr: id is not null (type: boolean)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: id is not null (type: boolean)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
TableScan
alias: c
filterExpr: id is not null (type: boolean)
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: id is not null (type: boolean)
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: id (type: string)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Anti Join 0 to 1
Left Outer Join 0 to 2
keys:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 561 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), null (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 561 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 6 Data size: 561 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: explain cbo
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
PREHOOK: type: QUERY
PREHOOK: Input: default@taba
PREHOOK: Input: default@tabb
PREHOOK: Input: default@tabc
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@taba
POSTHOOK: Input: default@tabb
POSTHOOK: Input: default@tabc
#### A masked pattern was here ####
CBO PLAN:
HiveProject(id=[$1], id1=[null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id2=[$0])
HiveJoin(condition=[=($0, $1)], joinType=[right], algorithm=[none], cost=[not available])
HiveProject(id=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, tabc]], table:alias=[c])
HiveAntiJoin(condition=[=($0, $1)], joinType=[anti])
HiveProject(id=[$0])
HiveTableScan(table=[[default, taba]], table:alias=[a])
HiveProject(id=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, tabb]], table:alias=[b])

PREHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
PREHOOK: type: QUERY
PREHOOK: Input: default@taba
PREHOOK: Input: default@tabb
PREHOOK: Input: default@tabc
#### A masked pattern was here ####
POSTHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@taba
POSTHOOK: Input: default@tabb
POSTHOOK: Input: default@tabc
#### A masked pattern was here ####
1 NULL 1
2 NULL 2
2 NULL 2
PREHOOK: query: INSERT INTO TABLE tabb VALUES ('1')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tabb
POSTHOOK: query: INSERT INTO TABLE tabb VALUES ('1')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tabb
POSTHOOK: Lineage: tabb.id SCRIPT []
PREHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
PREHOOK: type: QUERY
PREHOOK: Input: default@taba
PREHOOK: Input: default@tabb
PREHOOK: Input: default@tabc
#### A masked pattern was here ####
POSTHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@taba
POSTHOOK: Input: default@tabb
POSTHOOK: Input: default@tabc
#### A masked pattern was here ####
2 NULL 2
2 NULL 2
PREHOOK: query: INSERT INTO TABLE tabb VALUES ('2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tabb
POSTHOOK: query: INSERT INTO TABLE tabb VALUES ('2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tabb
POSTHOOK: Lineage: tabb.id SCRIPT []
PREHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
PREHOOK: type: QUERY
PREHOOK: Input: default@taba
PREHOOK: Input: default@tabb
PREHOOK: Input: default@tabc
#### A masked pattern was here ####
POSTHOOK: query: select * from taba A left outer join tabb B on (A.id = B.id) left outer join tabc C on (C.id = A.id) where B.id is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@taba
POSTHOOK: Input: default@tabb
POSTHOOK: Input: default@tabc
#### A masked pattern was here ####