Skip to content

Commit c461b96

Browse files
authored
Initial implementation of HyperLogLog in Snowflake (#2066)
* Initial implementation of HyperLogLog in Snowflake Signed-off-by: Jason Smart <jlsmart@meta.com> * Added a custom HyperLogLog test for Snowflake. This test is similar to BigQuery, but the two estimators return different values, reflecting the probabalistic nature of the algorithm. Thus we provide standalone tests for each. Signed-off-by: Jason Smart <jlsmart@meta.com> * Fixed formatting and added a comment explaining why we have a dialect-specific test here. Signed-off-by: Jason Smart <jlsmart@meta.com> * Cleaned up lint errors. Why does the linter care about EOF new lines? Signed-off-by: Jason Smart <jlsmart@meta.com> * More lint fixes Signed-off-by: Jason Smart <jlsmart@meta.com> * Added afterAll() to properly close databases after tests. Signed-off-by: Jason Smart <jlsmart@meta.com> * Added misisng EOL Signed-off-by: Jason Smart <jlsmart@meta.com> * Implemented a better example query for HLL as a test, and fixed a bug in the db name specified in the test. Signed-off-by: Jason Smart <jlsmart@meta.com> * Removed dialect-specific HLL tests, as they are redundant and potentially brittle if the underlying implementations ever change. Signed-off-by: Jason Smart <jlsmart@meta.com> --------- Signed-off-by: Jason Smart <jlsmart@meta.com>
1 parent 33cff42 commit c461b96

File tree

3 files changed

+37
-9
lines changed

3 files changed

+37
-9
lines changed

packages/malloy/src/dialect/snowflake/dialect_functions.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@ import {
1010
def,
1111
DefinitionBlueprintMap,
1212
OverloadedDefinitionBlueprint,
13+
TypeDescBlueprint,
1314
arg as a,
1415
sql,
1516
} from '../functions/util';
1617

18+
// Cute shortcut So you can write things like: {array: T} and {dimension: T}
19+
const T: TypeDescBlueprint = {generic: 'T'};
20+
1721
const order_by: AggregateOrderByNode = {
1822
node: 'aggregate_order_by',
1923
prefix: ' WITHIN GROUP(',
@@ -62,6 +66,38 @@ const string_agg_distinct: OverloadedDefinitionBlueprint = {
6266
export const SNOWFLAKE_DIALECT_FUNCTIONS: DefinitionBlueprintMap = {
6367
string_agg,
6468
string_agg_distinct,
69+
hll_accumulate: {
70+
default: {
71+
takes: {'value': {dimension: T}},
72+
returns: {measure: {sql_native: 'hyperloglog'}},
73+
generic: {
74+
'T': ['string', 'number'],
75+
},
76+
isSymmetric: true,
77+
impl: {function: 'hll_accumulate'},
78+
},
79+
},
80+
hll_combine: {
81+
takes: {'value': {sql_native: 'hyperloglog'}},
82+
returns: {measure: {sql_native: 'hyperloglog'}},
83+
impl: {function: 'hll_combine'},
84+
isSymmetric: true,
85+
},
86+
hll_estimate: {
87+
takes: {'value': {sql_native: 'hyperloglog'}},
88+
returns: {dimension: 'number'},
89+
impl: {function: 'hll_estimate'},
90+
},
91+
hll_export: {
92+
takes: {'value': {sql_native: 'hyperloglog'}},
93+
returns: {dimension: {sql_native: 'bytes'}},
94+
impl: {function: 'hll_export'},
95+
},
96+
hll_import: {
97+
takes: {'value': {sql_native: 'bytes'}},
98+
returns: {dimension: {sql_native: 'hyperloglog'}},
99+
impl: {function: 'hll_import'},
100+
},
65101
...def('repeat', {'str': 'string', 'n': 'number'}, 'string'),
66102
...def('reverse', {'str': 'string'}, 'string'),
67103
};

packages/malloy/src/dialect/snowflake/snowflake.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ export class SnowflakeDialect extends Dialect {
116116
supportsSafeCast = true;
117117
supportsNesting = true;
118118
defaultSampling = {rows: 50000};
119+
supportsHyperLogLog = true;
119120

120121
// NOTE: safely setting all these to false for now
121122
// more many be implemented in future

test/src/databases/bigquery/malloy_query.spec.ts

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -750,15 +750,6 @@ describe('airport_tests', () => {
750750
`).malloyResultMatches(model, {'pipe_turtle.total_airports': 1845});
751751
});
752752

753-
it('hyperloglog combine', async () => {
754-
await expect(`run: bigquery.table('malloytest.airports')->{
755-
aggregate: code_hll is hll_accumulate(code)
756-
} -> {
757-
aggregate: code_count is hll_estimate(hll_combine(code_hll))
758-
}
759-
`).malloyResultMatches(runtime, {code_count: 19799});
760-
});
761-
762753
it.skip('crossjoined turtles', async () => {
763754
// const result = await runQuery(model,`
764755
// explore airports

0 commit comments

Comments
 (0)