From 50bd0592b6d28a532ab5bd9eb58d2e3ada8edcdb Mon Sep 17 00:00:00 2001 From: Jason Smart Date: Sat, 21 Dec 2024 21:16:05 -0800 Subject: [PATCH 1/3] Added initial support for BQ HLL functions Signed-off-by: Jason Smart --- .../dialect/standardsql/dialect_functions.ts | 36 +++++++++++++++++++ .../src/dialect/standardsql/standardsql.ts | 1 + 2 files changed, 37 insertions(+) diff --git a/packages/malloy/src/dialect/standardsql/dialect_functions.ts b/packages/malloy/src/dialect/standardsql/dialect_functions.ts index b2f08dfd4..33be2308d 100644 --- a/packages/malloy/src/dialect/standardsql/dialect_functions.ts +++ b/packages/malloy/src/dialect/standardsql/dialect_functions.ts @@ -10,8 +10,12 @@ import { DefinitionBlueprint, DefinitionBlueprintMap, OverloadedDefinitionBlueprint, + TypeDescBlueprint, } from '../functions/util'; +// Cute shortcut So you can write things like: {array: T} and {dimension: T} +const T: TypeDescBlueprint = {generic: 'T'}; + const date_from_unix_date: DefinitionBlueprint = { takes: {'unix_date': 'number'}, returns: 'date', @@ -69,6 +73,38 @@ export const STANDARDSQL_DIALECT_FUNCTIONS: DefinitionBlueprintMap = { date_from_unix_date, string_agg, string_agg_distinct, + hll_accumulate: { + default: { + takes: {'value': {dimension: T}}, + returns: {measure: {sql_native: 'bytes'}}, + generic: { + 'T': ['string', 'number'], + }, + isSymmetric: true, + impl: {function: 'HLL_COUNT.INIT'}, + }, + }, + hll_combine: { + takes: {'value': {sql_native: 'bytes'}}, + returns: {measure: {sql_native: 'bytes'}}, + impl: {function: 'HLL_COUNT.MERGE_PARTIAL'}, + isSymmetric: true, + }, + hll_estimate: { + takes: {'value': {sql_native: 'bytes'}}, + returns: {dimension: 'number'}, + impl: {function: 'HLL_COUNT.EXTRACT'}, + }, + hll_export: { + takes: {'value': {sql_native: 'bytes'}}, + returns: {dimension: {sql_native: 'bytes'}}, + impl: {sql: 'CAST(${value} AS BYTES)'}, + }, + hll_import: { + takes: {'value': {sql_native: 'bytes'}}, + returns: {dimension: {sql_native: 'bytes'}}, + impl: {sql: 'CAST(${value} AS BYTES)'}, + }, ...def('repeat', {'str': 'string', 'n': 'number'}, 'string'), ...def('reverse', {'str': 'string'}, 'string'), }; diff --git a/packages/malloy/src/dialect/standardsql/standardsql.ts b/packages/malloy/src/dialect/standardsql/standardsql.ts index 7ea2dcdff..309504cc0 100644 --- a/packages/malloy/src/dialect/standardsql/standardsql.ts +++ b/packages/malloy/src/dialect/standardsql/standardsql.ts @@ -126,6 +126,7 @@ export class StandardSQLDialect extends Dialect { cantPartitionWindowFunctionsOnExpressions = true; hasModOperator = false; nestedArrays = false; // Can't have an array of arrays for some reason + supportsHyperLogLog = true; quoteTablePath(tablePath: string): string { return `\`${tablePath}\``; From 2b6bc7cb9ec30e6213373df442bf7a30ca94a34b Mon Sep 17 00:00:00 2001 From: Jason Smart Date: Sun, 22 Dec 2024 17:06:45 -0800 Subject: [PATCH 2/3] Added a BQ-specific test. Signed-off-by: Jason Smart --- test/src/databases/bigquery/malloy_query.spec.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/src/databases/bigquery/malloy_query.spec.ts b/test/src/databases/bigquery/malloy_query.spec.ts index 50cb83d77..18a300fcf 100644 --- a/test/src/databases/bigquery/malloy_query.spec.ts +++ b/test/src/databases/bigquery/malloy_query.spec.ts @@ -750,6 +750,15 @@ describe('airport_tests', () => { `).malloyResultMatches(model, {'pipe_turtle.total_airports': 1845}); }); + it(`hyperloglog combine`, async () => { + await expect(`run: bigquery.table('malloytest.airports')->{ + aggregate: code_hll is hll_accumulate(code) + } -> { + aggregate: code_count is hll_estimate(hll_combine(code_hll)) + } + `).malloyResultMatches(runtime, {code_count: 6}); + }); + it.skip('crossjoined turtles', async () => { // const result = await runQuery(model,` // explore airports From 539c26de866c85f428577fdcaa466d073623ff27 Mon Sep 17 00:00:00 2001 From: Jason Smart Date: Sun, 22 Dec 2024 17:26:26 -0800 Subject: [PATCH 3/3] Updated the expected test output to the correct number. Signed-off-by: Jason Smart --- test/src/databases/bigquery/malloy_query.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/databases/bigquery/malloy_query.spec.ts b/test/src/databases/bigquery/malloy_query.spec.ts index 18a300fcf..8a870411d 100644 --- a/test/src/databases/bigquery/malloy_query.spec.ts +++ b/test/src/databases/bigquery/malloy_query.spec.ts @@ -756,7 +756,7 @@ describe('airport_tests', () => { } -> { aggregate: code_count is hll_estimate(hll_combine(code_hll)) } - `).malloyResultMatches(runtime, {code_count: 6}); + `).malloyResultMatches(runtime, {code_count: 19799}); }); it.skip('crossjoined turtles', async () => {