-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: Add hash-join for semijoin and antijoin. Refactor join code.
- Loading branch information
Showing
9 changed files
with
183 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
export function singleRowLookup(table, hash) { | ||
const lut = new Map(); | ||
table.scan((row, data) => { | ||
const key = hash(row, data); | ||
if (key != null && key === key) { | ||
lut.set(key, row); | ||
} | ||
}); | ||
return lut; | ||
} | ||
|
||
export function multiRowLookup(idx, data, hash) { | ||
const lut = new Map(); | ||
const n = idx.length; | ||
for (let i = 0; i < n; ++i) { | ||
const row = idx[i]; | ||
const key = hash(row, data); | ||
if (key != null && key === key) { | ||
lut.has(key) | ||
? lut.get(key).push(row) | ||
: lut.set(key, [row]); | ||
} | ||
} | ||
return lut; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,33 @@ | ||
import { singleRowLookup } from './join/lookup'; | ||
import { aggregateGet } from './reduce/util'; | ||
import columnSet from '../table/column-set'; | ||
import NULL from '../util/null'; | ||
import concat from '../util/concat'; | ||
import unroll from '../util/unroll'; | ||
|
||
export default function(tableL, tableR, [keyL, keyR], { names, exprs, ops }) { | ||
// instantiate output data | ||
const cols = columnSet(tableL); | ||
const total = tableL.totalRows(); | ||
names.forEach(name => cols.add(name, Array(total))); | ||
names.forEach(name => cols.add(name, Array(total).fill(NULL))); | ||
|
||
// build lookup table | ||
const lut = new Map(); | ||
tableR.scan((row, data) => { | ||
const key = keyR(row, data); | ||
if (key != null && key === key) { | ||
lut.set(keyR(row, data), row); | ||
} | ||
}); | ||
const lut = singleRowLookup(tableR, keyR); | ||
|
||
// generate setter function for lookup match | ||
const set = unroll( | ||
['lr', 'rr', 'data'], | ||
'{' + concat(names, (_, i) => `_[${i}][lr] = $[${i}](rr, data);`) + '}', | ||
names.map(name => cols.data[name]), | ||
aggregateGet(tableR, ops, exprs) | ||
); | ||
|
||
// find matching rows | ||
const rowL = new Int32Array(tableL.numRows()); | ||
const rowR = new Int32Array(tableL.numRows()); | ||
let m = 0; | ||
// find matching rows, set values on match | ||
const dataR = tableR.data(); | ||
tableL.scan((lrow, data) => { | ||
const rrow = lut.get(keyL(lrow, data)); | ||
rowL[m] = lrow; | ||
rowR[m] = rrow == null ? -1 : rrow; | ||
++m; | ||
if (rrow >= 0) set(lrow, rrow, dataR); | ||
}); | ||
|
||
// output values for matching rows | ||
const dataR = tableR.data(); | ||
const get = aggregateGet(tableR, ops, exprs); | ||
const n = get.length; | ||
|
||
for (let i = 0; i < n; ++i) { | ||
const column = cols.data[names[i]]; | ||
const getter = get[i]; | ||
for (let j = 0; j < m; ++j) { | ||
const rrow = rowR[j]; | ||
column[rowL[j]] = rrow >= 0 ? getter(rrow, dataR) : NULL; | ||
} | ||
} | ||
|
||
return tableL.create(cols); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,15 @@ | ||
import _join_filter from '../engine/join-filter'; | ||
import { inferKeys, keyPredicate } from './util/join-keys'; | ||
import parse from '../expression/parse'; | ||
import { inferKeys } from './join'; | ||
import parseKey from './util/parse-key'; | ||
import isArray from '../util/is-array'; | ||
import toArray from '../util/to-array'; | ||
|
||
export default function(tableL, tableR, on, options) { | ||
on = inferKeys(tableL, tableR, on); | ||
on = isArray(on) | ||
? toPredicate( | ||
parseKey('join', tableL, on[0]), | ||
parseKey('join', tableR, on[1]) | ||
) | ||
: parse({ on }, { join: [tableL, tableR] }).exprs[0]; | ||
|
||
return _join_filter(tableL, tableR, on, options); | ||
} | ||
const predicate = isArray(on) | ||
? keyPredicate(tableL, tableR, ...on.map(toArray)) | ||
: parse({ on }, { join: [tableL, tableR] }).exprs[0]; | ||
|
||
function toPredicate(keyL, keyR) { | ||
return (rowL, dataL, rowR, dataR) => { | ||
const kl = keyL(rowL, dataL); | ||
const kr = keyR(rowR, dataR); | ||
return kl === kr && kl != null && kr != null; | ||
}; | ||
return _join_filter(tableL, tableR, predicate, options); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import parseKey from './parse-key'; | ||
import error from '../../util/error'; | ||
import intersect from '../../util/intersect'; | ||
import isArray from '../../util/is-array'; | ||
import isString from '../../util/is-string'; | ||
|
||
export function inferKeys(tableL, tableR, on) { | ||
if (!on) { | ||
// perform natural join if join condition not provided | ||
const isect = intersect(tableL.columnNames(), tableR.columnNames()); | ||
if (!isect.length) error('Natural join requires shared column names.'); | ||
on = [isect, isect]; | ||
} else if (isString(on)) { | ||
on = [on, on]; | ||
} else if (isArray(on) && on.length === 1) { | ||
on = [on[0], on[0]]; | ||
} | ||
|
||
return on; | ||
} | ||
|
||
export function keyPredicate(tableL, tableR, onL, onR) { | ||
if (onL.length !== onR.length) { | ||
error('Mismatched number of join keys'); | ||
} | ||
return [ | ||
parseKey('join', tableL, onL), | ||
parseKey('join', tableR, onR) | ||
]; | ||
} |
Oops, something went wrong.