Skip to content

Commit

Permalink
Motherduck wasm (#1640)
Browse files Browse the repository at this point in the history
Add support for motherduck in wasm build. Add tests for node motherduck.
  • Loading branch information
Will Scullin authored Mar 12, 2024
1 parent 7a17884 commit 50b3db5
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 47 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/db-motherduck.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: MotherDuck

on: [push]

jobs:
build:
runs-on: ubuntu-latest

strategy:
matrix:
node-version: [18.x]

steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: npm install, build, and test
run: |
npm ci --loglevel error
npm run build
npm run test-silent
env:
CI: true
MALLOY_DATABASES: motherduck
MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }}
13 changes: 10 additions & 3 deletions jest.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@

process.env.TZ = 'America/Los_Angeles';

const transformIgnoreModules = [
'lit-html',
'lit-element',
'lit',
'@lit',
'@lit-labs',
'@motherduck/wasm-client',
].join('|');

module.exports = {
moduleFileExtensions: ['js', 'jsx', 'ts', 'tsx'],
setupFilesAfterEnv: ['jest-expect-message'],
testMatch: ['**/?(*.)spec.(ts|js)?(x)'],
testPathIgnorePatterns: ['/node_modules/', '/dist/', '/out/'],
transformIgnorePatterns: [
'node_modules/(?!(lit-html|lit-element|lit|@lit|@lit-labs)/)',
],
transformIgnorePatterns: [`node_modules/(?!(${transformIgnoreModules})/)`],
transform: {
'^.+\\.(ts|tsx)$': ['ts-jest', {tsconfig: '<rootDir>/tsconfig.json'}],
'^.+\\.(js|jsx)$': [
Expand Down
37 changes: 23 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions packages/malloy-db-duckdb/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@
"prepublishOnly": "npm run build"
},
"dependencies": {
"@malloydata/duckdb-wasm": "0.0.6",
"@malloydata/malloy": "^0.0.130",
"apache-arrow": "^13.0.0",
"@duckdb/duckdb-wasm": "1.28.1-dev106.0",
"@malloydata/malloy": "0.0.130",
"@motherduck/wasm-client": "^0.4.0",
"apache-arrow": "^14.0.0",
"duckdb": "0.9.2",
"web-worker": "^1.2.0"
}
Expand Down
3 changes: 3 additions & 0 deletions packages/malloy-db-duckdb/src/duckdb_common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ const unquoteName = (name: string) => {
export abstract class DuckDBCommon
implements TestableConnection, PersistSQLResults, StreamingConnection
{
protected isMotherDuck = false;
protected motherDuckToken: string | undefined;

private readonly dialect = new DuckDBDialect();
static DEFAULT_QUERY_OPTIONS: DuckDBQueryOptions = {
rowLimit: 10,
Expand Down
2 changes: 0 additions & 2 deletions packages/malloy-db-duckdb/src/duckdb_connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ export class DuckDBConnection extends DuckDBCommon {
private additionalExtensions: string[] = [];
private databasePath = ':memory:';
private workingDirectory = '.';
private isMotherDuck = false;
private motherDuckToken: string | undefined;
private readOnly = false;

connecting: Promise<void>;
Expand Down
45 changes: 37 additions & 8 deletions packages/malloy-db-duckdb/src/duckdb_wasm_connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

import * as duckdb from '@malloydata/duckdb-wasm';
import * as duckdb from '@duckdb/duckdb-wasm';
import Worker from 'web-worker';
import {
FetchSchemaOptions,
Expand All @@ -38,6 +38,8 @@ import {DuckDBCommon} from './duckdb_common';
const TABLE_MATCH = /FROM\s*('([^']*)'|"([^"]*)")/gi;
const TABLE_FUNCTION_MATCH = /FROM\s+[a-z0-9_]+\(('([^']*)'|"([^"]*)")/gi;

const FILE_EXTS = ['.csv', '.tsv', '.parquet'] as const;

/**
* Arrow's toJSON() doesn't really do what I'd expect, since
* it still includes Arrow objects like DecimalBigNums and Vectors,
Expand All @@ -47,7 +49,7 @@ const TABLE_FUNCTION_MATCH = /FROM\s+[a-z0-9_]+\(('([^']*)'|"([^"]*)")/gi;
* @return Vanilla Javascript value
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const unwrapArrow = (value: unknown): any => {
export const unwrapArrow = (value: unknown): any => {
if (value === null) {
return value;
} else if (value instanceof Vector) {
Expand All @@ -67,6 +69,12 @@ const unwrapArrow = (value: unknown): any => {
return Number(obj[Symbol.toPrimitive]());
} else if (Array.isArray(value)) {
return value.map(unwrapArrow);
} else if (obj['microseconds'] && obj['timezone'] === null) {
// Convert epoch µs to ms
return Number(obj['microseconds']) / 1000;
} else if (obj['days']) {
// Convert epoch day to Date
return new Date(obj['days'] * 8.64e7);
} else {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result: Record<string | symbol, any> = {};
Expand All @@ -88,7 +96,7 @@ const unwrapArrow = (value: unknown): any => {
* For some reason a custom replacer only sees DecimalBigNums as
* strings, as well.
*/
const unwrapRow = (row: StructRow): QueryDataRow => {
export const unwrapRow = (row: StructRow): QueryDataRow => {
return unwrapArrow(row.toJSON());
};

Expand All @@ -106,10 +114,13 @@ type RemoteFileCallback = (
) => Promise<Uint8Array | undefined>;

export interface DuckDBWasmOptions extends ConnectionConfig {
additionalExtensions?: string[];
databasePath?: string;
motherDuckToken: string | undefined;
workingDirectory?: string;
}
export abstract class DuckDBWASMConnection extends DuckDBCommon {
private additionalExtensions: string[] = [];
public readonly name: string;
private databasePath: string | null = null;
protected workingDirectory = '/';
Expand Down Expand Up @@ -158,11 +169,21 @@ export abstract class DuckDBWASMConnection extends DuckDBCommon {
if (typeof arg.workingDirectory === 'string') {
this.workingDirectory = arg.workingDirectory;
}
if (typeof arg.motherDuckToken === 'string') {
this.motherDuckToken = arg.motherDuckToken;
}
if (Array.isArray(arg.additionalExtensions)) {
this.additionalExtensions = arg.additionalExtensions;
}
}
this.isMotherDuck =
this.databasePath?.startsWith('md:') ||
this.databasePath?.startsWith('motherduck:') ||
false;
this.connecting = this.init();
}

private async init(): Promise<void> {
protected async init(): Promise<void> {
// Select a bundle based on browser checks
const bundle = await duckdb.selectBundle(this.getBundles());

Expand Down Expand Up @@ -219,10 +240,11 @@ export abstract class DuckDBWASMConnection extends DuckDBCommon {
`SET FILE_SEARCH_PATH='${this.workingDirectory}'`
);
}
// Not quite ready for prime time
// for (const ext of ['json', 'httpfs', 'icu']) {
// await this.loadExtension(ext);
// }
const extensions = ['json', 'icu', ...this.additionalExtensions];

for (const ext of extensions) {
await this.loadExtension(ext);
}
const setupCmds = ["SET TimeZone='UTC'"];
for (const cmd of setupCmds) {
try {
Expand Down Expand Up @@ -327,6 +349,13 @@ export abstract class DuckDBWASMConnection extends DuckDBCommon {
await this.setup();

for (const tablePath of tables) {
if (
this.isMotherDuck &&
!tables.includes('/') &&
!FILE_EXTS.some(ext => tablePath.endsWith(ext))
) {
continue;
}
// http and s3 urls are handled by duckdb-wasm
if (tablePath.match(/^https?:\/\//)) {
continue;
Expand Down
85 changes: 83 additions & 2 deletions packages/malloy-db-duckdb/src/duckdb_wasm_connection_browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,95 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

import * as duckdb from '@malloydata/duckdb-wasm';
import {DuckDBWASMConnection as DuckDBWASMConnectionBase} from './duckdb_wasm_connection';
import * as duckdb from '@duckdb/duckdb-wasm';
import {
DuckDBWASMConnection as DuckDBWASMConnectionBase,
unwrapArrow,
} from './duckdb_wasm_connection';
import {MDConnection} from '@motherduck/wasm-client';
import {QueryDataRow} from '@malloydata/malloy';

export class DuckDBWASMConnection extends DuckDBWASMConnectionBase {
protected _mdConnection: MDConnection | null = null;

getBundles(): duckdb.DuckDBBundles {
return duckdb.getJsDelivrBundles();
}

override async init(): Promise<void> {
if (this.isMotherDuck) {
if (!this.motherDuckToken) {
throw new Error('Please set your MotherDuck token');
}
const mdConnection = MDConnection.create({
mdToken: this.motherDuckToken,
});
await mdConnection.isInitialized();
this._mdConnection = mdConnection;
console.info('MotherDuck initialized');
} else {
await super.init();
}
}

override async setup(): Promise<void> {
if (this.isMotherDuck) {
const doSetup = async () => {
const setupCmds = ["SET TimeZone='UTC'"];
for (const cmd of setupCmds) {
try {
await this.runDuckDBQuery(cmd);
} catch (error) {
// eslint-disable-next-line no-console
console.error(`duckdb setup ${cmd} => ${error}`);
}
}
};
await this.connecting;
if (!this.isSetup) {
this.isSetup = doSetup();
}
await this.isSetup;
} else {
await super.setup();
}
}

protected override async runDuckDBQuery(
sql: string,
abortSignal?: AbortSignal
): Promise<{rows: QueryDataRow[]; totalRows: number}> {
if (this.isMotherDuck) {
if (this._mdConnection) {
const connection = this._mdConnection;
let queryId: string | undefined = undefined;
const cancel = () => {
if (queryId) {
connection.cancelQuery(queryId, 'Cancelled');
}
};
abortSignal?.addEventListener('abort', cancel);
queryId = connection.enqueueQuery(sql);
if (queryId) {
const result = await connection.evaluateQueuedQuery(queryId);
if (result?.data) {
const rows = unwrapArrow(result.data.toRows());
const totalRows = result.data.rowCount;
return {
rows,
totalRows,
};
}
throw new Error('No data');
}
throw new Error('Failed to enqueue query');
}
throw new Error('MotherDuck not initialized');
} else {
return super.runDuckDBQuery(sql, abortSignal);
}
}

async createHash(sqlCommand: string): Promise<string> {
const msgUint8 = new TextEncoder().encode(sqlCommand);
const hashBuffer = await crypto.subtle.digest('SHA-256', msgUint8);
Expand Down
8 changes: 4 additions & 4 deletions packages/malloy-db-duckdb/src/duckdb_wasm_connection_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@
*/

import crypto from 'crypto';
import {DuckDBBundles} from '@malloydata/duckdb-wasm';
import {DuckDBBundles} from '@duckdb/duckdb-wasm';
import {DuckDBWASMConnection as DuckDBWASMConnectionBase} from './duckdb_wasm_connection';

export class DuckDBWASMConnection extends DuckDBWASMConnectionBase {
getBundles(): DuckDBBundles {
const resolvePath = require.resolve('@malloydata/duckdb-wasm');
const resolvePath = require.resolve('@duckdb/duckdb-wasm');
if (!resolvePath) {
throw new Error('Unable to resolve @malloydata/duckdb-wasm path');
throw new Error('Unable to resolve @duckdb/duckdb-wasm path');
}
const distMatch = resolvePath.match(/^.*\/dist\//);
if (!distMatch) {
throw new Error('Unable to resolve @malloydata/duckdb-wasm dist path');
throw new Error('Unable to resolve @duckdb/duckdb-wasm dist path');
}
const dist = distMatch[0];

Expand Down
Loading

0 comments on commit 50b3db5

Please sign in to comment.