Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of HyperLogLog in Snowflake #2066

Merged
merged 10 commits into from
Jan 2, 2025
36 changes: 36 additions & 0 deletions packages/malloy/src/dialect/snowflake/dialect_functions.ts
Original file line number Diff line number Diff line change
@@ -10,10 +10,14 @@ import {
def,
DefinitionBlueprintMap,
OverloadedDefinitionBlueprint,
TypeDescBlueprint,
arg as a,
sql,
} from '../functions/util';

// Cute shortcut So you can write things like: {array: T} and {dimension: T}
const T: TypeDescBlueprint = {generic: 'T'};

const order_by: AggregateOrderByNode = {
node: 'aggregate_order_by',
prefix: ' WITHIN GROUP(',
@@ -62,6 +66,38 @@ const string_agg_distinct: OverloadedDefinitionBlueprint = {
export const SNOWFLAKE_DIALECT_FUNCTIONS: DefinitionBlueprintMap = {
string_agg,
string_agg_distinct,
hll_accumulate: {
default: {
takes: {'value': {dimension: T}},
returns: {measure: {sql_native: 'hyperloglog'}},
generic: {
'T': ['string', 'number'],
},
isSymmetric: true,
impl: {function: 'hll_accumulate'},
},
},
hll_combine: {
takes: {'value': {sql_native: 'hyperloglog'}},
returns: {measure: {sql_native: 'hyperloglog'}},
impl: {function: 'hll_combine'},
isSymmetric: true,
},
hll_estimate: {
takes: {'value': {sql_native: 'hyperloglog'}},
returns: {dimension: 'number'},
impl: {function: 'hll_estimate'},
},
hll_export: {
takes: {'value': {sql_native: 'hyperloglog'}},
returns: {dimension: {sql_native: 'bytes'}},
impl: {function: 'hll_export'},
},
hll_import: {
takes: {'value': {sql_native: 'bytes'}},
returns: {dimension: {sql_native: 'hyperloglog'}},
impl: {function: 'hll_import'},
},
...def('repeat', {'str': 'string', 'n': 'number'}, 'string'),
...def('reverse', {'str': 'string'}, 'string'),
};
1 change: 1 addition & 0 deletions packages/malloy/src/dialect/snowflake/snowflake.ts
Original file line number Diff line number Diff line change
@@ -116,6 +116,7 @@ export class SnowflakeDialect extends Dialect {
supportsSafeCast = true;
supportsNesting = true;
defaultSampling = {rows: 50000};
supportsHyperLogLog = true;

// NOTE: safely setting all these to false for now
// more many be implemented in future
9 changes: 0 additions & 9 deletions test/src/databases/bigquery/malloy_query.spec.ts
Original file line number Diff line number Diff line change
@@ -750,15 +750,6 @@ describe('airport_tests', () => {
`).malloyResultMatches(model, {'pipe_turtle.total_airports': 1845});
});

it('hyperloglog combine', async () => {
await expect(`run: bigquery.table('malloytest.airports')->{
aggregate: code_hll is hll_accumulate(code)
} -> {
aggregate: code_count is hll_estimate(hll_combine(code_hll))
}
`).malloyResultMatches(runtime, {code_count: 19799});
});

it.skip('crossjoined turtles', async () => {
// const result = await runQuery(model,`
// explore airports