Skip to content

Commit

Permalink
added some basic hyperloglog functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lloydtabb committed Dec 16, 2024
1 parent cdd3586 commit a21d11d
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
69 changes: 69 additions & 0 deletions packages/malloy/src/dialect/trino/dialect_functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,70 @@ const count_approx: DefinitionBlueprint = {
isSymmetric: true,
};

const hll_accumulate: OverloadedDefinitionBlueprint = {
default: {
generic: [
'T',
['string', 'number', 'date', 'timestamp', 'boolean', 'json'],
],
takes: {'value': {dimension: {generic: 'T'}}},
returns: {measure: 'string'},
isSymmetric: true,
impl: {
function: 'APPROX_SET',
},
},
with_percent: {
generic: [
'T',
['string', 'number', 'date', 'timestamp', 'boolean', 'json'],
],
takes: {'value': {dimension: {generic: 'T'}}, 'accuracy': 'number'},
returns: {measure: 'string'},
isSymmetric: true,
impl: {
function: 'APPROX_SET',
},
},
};

const hll_combine: DefinitionBlueprint = {
takes: {
'value': 'string',
},
returns: {measure: 'string'},
impl: {function: 'MERGE'},
isSymmetric: true,
};

const hll_estimate: DefinitionBlueprint = {
takes: {
'value': 'string',
},
returns: {dimension: 'number'},
impl: {function: 'CARDINALITY'},
};

const hll_export: DefinitionBlueprint = {
takes: {
'value': 'string',
},
returns: {dimension: 'string'},
impl: {
sql: 'CAST(${value} AS VARBINARY)',
},
};

const hll_import: DefinitionBlueprint = {
takes: {
'value': 'string',
},
returns: {dimension: 'string'},
impl: {
sql: 'CAST(${value} AS HyperLogLog)',
},
};

const max_by: DefinitionBlueprint = {
generic: ['T', ['string', 'number', 'date', 'timestamp', 'boolean', 'json']],
takes: {
Expand Down Expand Up @@ -294,6 +358,8 @@ export const TRINO_DIALECT_FUNCTIONS: DefinitionBlueprintMap = {
bool_or,
corr,
count_approx,
hll_accumulate,
hll_combine,
max_by,
min_by,
string_agg,
Expand All @@ -306,6 +372,9 @@ export const TRINO_DIALECT_FUNCTIONS: DefinitionBlueprintMap = {
date_format,
date_parse,
from_unixtime,
hll_estimate,
hll_export,
hll_import,
json_extract_scalar,
regexp_like,
regexp_replace,
Expand Down
27 changes: 27 additions & 0 deletions test/src/databases/presto-trino/presto-trino.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,33 @@ describe.each(runtimes.runtimeList)(
]);
});

it(`hyperloglog basic - ${databaseName}`, async () => {
await expect(`run: ${databaseName}.table('malloytest.state_facts') -> {
aggregate:
m1 is floor(hll_estimate(hll_accumulate(state))/10)
}`).malloyResultMatches(runtime, {m1: 5});
});

it(`hyperloglog combine - ${databaseName}`, async () => {
await expect(`run: ${databaseName}.table('malloytest.state_facts') -> {
group_by: state
aggregate: names_hll is hll_accumulate(popular_name)
} -> {
aggregate: name_count is hll_estimate(hll_combine(names_hll))
}
`).malloyResultMatches(runtime, {name_count: 6});
});

it(`hyperloglog import/export - ${databaseName}`, async () => {
await expect(`run: ${databaseName}.table('malloytest.state_facts') -> {
group_by: state
aggregate: names_hll is hll_export(hll_accumulate(popular_name))
} -> {
aggregate: name_count is hll_estimate(hll_combine(hll_import(names_hll)))
}
`).malloyResultMatches(runtime, {name_count: 6});
});

it(`runs the url_extract functions - ${databaseName}`, async () => {
await expect(`
run: ${databaseName}.sql(
Expand Down

0 comments on commit a21d11d

Please sign in to comment.