Skip to content

Commit

Permalink
Handle nested schemas (#1695)
Browse files Browse the repository at this point in the history
  • Loading branch information
Will Scullin authored Apr 2, 2024
1 parent 3f176c7 commit 3939785
Show file tree
Hide file tree
Showing 2 changed files with 224 additions and 10 deletions.
136 changes: 136 additions & 0 deletions packages/malloy-db-trino/src/trino_connection.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Copyright 2024 Google LLC
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

import {TrinoConnection, TrinoExecutor} from '.';

const ARRAY_SCHEMA = 'array(integer)';
const STRUCT_SCHEMA = 'row(a double, b integer, c varchar(60))';
const DEEP_SCHEMA = 'array(row(a double, b integer, c varchar(60)))';

describe('Trino connection', () => {
describe('schema parser', () => {
it('parses arrays', () => {
const connection = new TrinoConnection(
'trino',
{},
TrinoExecutor.getConnectionOptionsFromEnv()
);
expect(connection.malloyTypeFromTrinoType('test', ARRAY_SCHEMA)).toEqual({
'name': 'test',
'type': 'struct',
'dialect': 'trino',
'structRelationship': {
'fieldName': 'test',
'isArray': true,
'type': 'nested',
},
'structSource': {
'type': 'nested',
},
'fields': [
{
'name': 'value',
'type': 'number',
'numberType': 'integer',
},
],
});
});

it('parses structs', () => {
const connection = new TrinoConnection(
'trino',
{},
TrinoExecutor.getConnectionOptionsFromEnv()
);
expect(connection.malloyTypeFromTrinoType('test', STRUCT_SCHEMA)).toEqual(
{
'name': 'test',
'type': 'struct',
'dialect': 'trino',
'structRelationship': {
'fieldName': 'test',
'isArray': false,
'type': 'nested',
},
'structSource': {
'type': 'nested',
},
'fields': [
{
'name': 'a',
'type': 'number',
'numberType': 'float',
},
{
'name': 'b',
'type': 'number',
'numberType': 'integer',
},
{
'name': 'c',
'type': 'string',
},
],
}
);
});

it('parses arrays of structs', () => {
const connection = new TrinoConnection(
'trino',
{},
TrinoExecutor.getConnectionOptionsFromEnv()
);
expect(connection.malloyTypeFromTrinoType('test', DEEP_SCHEMA)).toEqual({
'name': 'test',
'type': 'struct',
'dialect': 'trino',
'structRelationship': {
'fieldName': 'test',
'isArray': true,
'type': 'nested',
},
'structSource': {'type': 'nested'},
'fields': [
{
'name': 'value',
'type': 'struct',
'dialect': 'trino',
'structRelationship': {
'fieldName': 'test',
'isArray': false,
'type': 'nested',
},
'structSource': {'type': 'nested'},
'fields': [
{'name': 'a', 'numberType': 'float', 'type': 'number'},
{'name': 'b', 'numberType': 'integer', 'type': 'number'},
{'name': 'c', 'type': 'string'},
],
},
],
});
});
});
});
98 changes: 88 additions & 10 deletions packages/malloy-db-trino/src/trino_connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ export class TrinoConnection implements Connection, PersistSQLResults {
'integer': {type: 'number', numberType: 'integer'},
'bigint': {type: 'number', numberType: 'integer'},
'double': {type: 'number', numberType: 'float'},
'decimal': {type: 'number', numberType: 'float'},
'string': {type: 'string'},
'date': {type: 'date'},
'timestamp': {type: 'timestamp'},
'boolean': {type: 'boolean'},

// TODO(figutierrez0): cleanup.
/* 'INT64': {type: 'number', numberType: 'integer'},
Expand Down Expand Up @@ -451,6 +454,89 @@ export class TrinoConnection implements Connection, PersistSQLResults {
while (!(await result.next()).done);
}

malloyTypeFromTrinoType(
name: string,
trinoType: string
): FieldAtomicTypeDef | StructDef {
let malloyType: FieldAtomicTypeDef | StructDef;
// Arrays look like `array(type)`
const arrayMatch = trinoType.match(/^array\((.*)\)$/);

// Structs look like `row(name type, name type)`
const structMatch = trinoType.match(/^row\((.*)\)$/);

if (arrayMatch) {
const arrayType = arrayMatch[1];
const innerType = this.malloyTypeFromTrinoType(name, arrayType);
malloyType = {
type: 'struct',
name,
dialect: this.dialectName,
structSource: {type: 'nested'},
structRelationship: {
type: 'nested',
fieldName: name,
isArray: true,
},
fields: [{...innerType, name: 'value'} as FieldTypeDef],
};
} else if (structMatch) {
// TODO: Trino doesn't quote or escape commas in field names,
// so some magic is going to need to be applied before we get here
// to avoid confusion if a field name contains a comma
const innerTypes = structMatch[1].split(/,\s+/);
malloyType = {
type: 'struct',
name,
dialect: this.dialectName,
structSource: {type: 'nested'},
structRelationship: {
type: 'nested',
fieldName: name,
isArray: false,
},
fields: [],
};
for (let innerType of innerTypes) {
// TODO: Handle time zone type annotation, which is an
// exception to the types not containing spaces assumption
innerType = innerType.replace(/ with time zone$/, '');
const parts = innerType.match(/^(.*)\s(\S+)$/);
if (parts) {
const innerName = parts[1];
const innerTrinoType = parts[2];
const innerMalloyType = this.malloyTypeFromTrinoType(
innerName,
innerTrinoType
);
malloyType.fields.push({...innerMalloyType, name: innerName});
} else {
malloyType.fields.push({
name: 'unknown',
type: 'unsupported',
rawType: innerType.toLowerCase(),
});
}
}
} else {
malloyType = this.sqlToMalloyType(trinoType) ?? {
type: 'unsupported',
rawType: trinoType.toLowerCase(),
};
}
return malloyType;
}

structDefFromSchema(rows: string[][], structDef: StructDef): void {
for (const row of rows) {
const name = row[0];
const type = row[1] || row[4];
const malloyType = this.malloyTypeFromTrinoType(name, type);
// console.log('>', row, '\n<', malloyType);
structDef.fields.push({name, ...malloyType});
}
}

private async loadSchemaForSqlBlock(
sqlBlock: string,
structDef: StructDef,
Expand All @@ -470,16 +556,8 @@ export class TrinoConnection implements Connection, PersistSQLResults {
);
}

const rows = queryResult.value.data ?? [];
for (const row of rows) {
const fieldName = row[0];
const type = row[1];
const malloyType = this.sqlToMalloyType(type) ?? {
type: 'unsupported',
rawType: type.toLowerCase(),
};
structDef.fields.push({name: fieldName, ...malloyType} as FieldTypeDef);
}
const rows: string[][] = queryResult.value.data ?? [];
this.structDefFromSchema(rows, structDef);
} catch (e) {
throw new Error(`Could not fetch schema for ${element} ${e}`);
}
Expand Down

0 comments on commit 3939785

Please sign in to comment.