From f7f02a8ae68dae49e88b2607b7e159be56eafe89 Mon Sep 17 00:00:00 2001
From: Conor McCarter <conormccarter@gmail.com>
Date: Thu, 6 Nov 2025 16:51:14 -0800
Subject: [PATCH] Add Databricks and benchmark results for most SQL warehouse
 options

---
 README.md                        |   1 -
 databricks/.env.example          |  22 ++
 databricks/NOTES.md              |   4 +
 databricks/README.md             |  47 ++++
 databricks/benchmark.py          | 361 +++++++++++++++++++++++++++++++
 databricks/benchmark.sh          |  22 ++
 databricks/create.sql            | 109 ++++++++++
 databricks/queries.sql           |  43 ++++
 databricks/query.py              |  88 ++++++++
 databricks/results/2x-large.json |  56 +++++
 databricks/results/2x-small.json |  56 +++++
 databricks/results/4x-large.json |  56 +++++
 databricks/results/large.json    |  56 +++++
 databricks/results/medium.json   |  56 +++++
 databricks/results/small.json    |  56 +++++
 databricks/results/x-large.json  |  56 +++++
 databricks/results/x-small.json  |  56 +++++
 databricks/run.sh                |  15 ++
 18 files changed, 1159 insertions(+), 1 deletion(-)
 create mode 100644 databricks/.env.example
 create mode 100644 databricks/NOTES.md
 create mode 100644 databricks/README.md
 create mode 100755 databricks/benchmark.py
 create mode 100755 databricks/benchmark.sh
 create mode 100644 databricks/create.sql
 create mode 100644 databricks/queries.sql
 create mode 100755 databricks/query.py
 create mode 100644 databricks/results/2x-large.json
 create mode 100644 databricks/results/2x-small.json
 create mode 100644 databricks/results/4x-large.json
 create mode 100644 databricks/results/large.json
 create mode 100644 databricks/results/medium.json
 create mode 100644 databricks/results/small.json
 create mode 100644 databricks/results/x-large.json
 create mode 100644 databricks/results/x-small.json
 create mode 100755 databricks/run.sh

diff --git a/README.md b/README.md
index 60dd15462..2bf0295f1 100644
--- a/README.md
+++ b/README.md
@@ -222,7 +222,6 @@ Please help us add more systems and run the benchmarks on more types of VMs:
 - [ ] Azure Synapse
 - [ ] Boilingdata
 - [ ] CockroachDB Serverless
-- [ ] Databricks
 - [ ] DolphinDB
 - [ ] Dremio (without publishing)
 - [ ] DuckDB operating like "Athena" on remote Parquet files
diff --git a/databricks/.env.example b/databricks/.env.example
new file mode 100644
index 000000000..a0b353aab
--- /dev/null
+++ b/databricks/.env.example
@@ -0,0 +1,22 @@
+# Databricks Configuration
+# Copy this file to .env and fill in your actual values
+
+# Your Databricks workspace hostname (e.g., dbc-xxxxxxxx-xxxx.cloud.databricks.com)
+DATABRICKS_SERVER_HOSTNAME=your-workspace-hostname.cloud.databricks.com
+
+# SQL Warehouse HTTP path (found in your SQL Warehouse settings)
+# Uncomment the warehouse size you want to use
+DATABRICKS_HTTP_PATH=/sql/1.0/warehouses/your-warehouse-id
+
+# Instance type name for results file naming & results machine type label
+databricks_instance_type=Large
+
+# Your Databricks personal access token
+DATABRICKS_TOKEN=your-databricks-token
+
+# Unity Catalog and Schema names
+DATABRICKS_CATALOG=clickbench_catalog
+DATABRICKS_SCHEMA=clickbench_schema
+
+# Parquet data location
+DATABRICKS_PARQUET_LOCATION=s3://some/path/hits.parquet
diff --git a/databricks/NOTES.md b/databricks/NOTES.md
new file mode 100644
index 000000000..d29fb021e
--- /dev/null
+++ b/databricks/NOTES.md
@@ -0,0 +1,4 @@
+I created each warehouse in the Databricks UI.
+Besides the warehouse size, the only other change I made to default settings was to set the sleep time to 5 minutes to save money (the 4x large warehouse is very expensive). 
+
+Once the warehouse was created, I'd save the warehouse path to use in the .env file for each run. 
\ No newline at end of file
diff --git a/databricks/README.md b/databricks/README.md
new file mode 100644
index 000000000..4b4bc47b3
--- /dev/null
+++ b/databricks/README.md
@@ -0,0 +1,47 @@
+## Setup
+
+1. Create a Databricks workspace and SQL Warehouse
+2. Generate a personal access token from your Databricks workspace
+3. Copy `.env.example` to `.env` and fill in your values:
+
+```bash
+cp .env.example .env
+# Edit .env with your actual credentials
+```
+
+Required environment variables:
+- `DATABRICKS_SERVER_HOSTNAME`: Your workspace hostname (e.g., `dbc-xxxxxxxx-xxxx.cloud.databricks.com`)
+- `DATABRICKS_HTTP_PATH`: SQL Warehouse path (e.g., `/sql/1.0/warehouses/your-warehouse-id`)
+- `DATABRICKS_TOKEN`: Your personal access token
+- `databricks_instance_type`: Instance type name for results file naming, e.g., "2X-Large"
+- `DATABRICKS_CATALOG`: Unity Catalog name
+- `DATABRICKS_SCHEMA`: Schema name
+- `DATABRICKS_PARQUET_LOCATION`: S3 path to the parquet file
+
+## Running the Benchmark
+
+```bash
+./benchmark.sh
+```
+
+## How It Works
+
+1. **benchmark.sh**: Entry point that installs dependencies via `uv` and runs the benchmark
+2. **benchmark.py**: Orchestrates the full benchmark:
+   - Creates the catalog and schema
+   - Creates the `hits` table with explicit schema (including TIMESTAMP conversion)
+   - Loads data from the parquet file using `INSERT INTO` with type conversions
+   - Runs all queries via `run.sh`
+   - Collects timing metrics from Databricks REST API
+   - Outputs results to JSON in the `results/` directory
+3. **run.sh**: Iterates through queries.sql and executes each query
+4. **query.py**: Executes individual queries and retrieves execution times from Databricks REST API (`/api/2.0/sql/history/queries/{query_id}`)
+5. **queries.sql**: Contains the 43 benchmark queries
+
+## Notes
+
+- Query execution times are pulled from the Databricks REST API, which provides server-side metrics
+- The data is loaded from a parquet file with explicit type conversions (Unix timestamps → TIMESTAMP, Unix dates → DATE)
+- The benchmark uses Databricks SQL Connector for Python
+- Results include load time, data size, and individual query execution times (3 runs per query)
+- Results are saved to `results/{instance_type}.json`
diff --git a/databricks/benchmark.py b/databricks/benchmark.py
new file mode 100755
index 000000000..497fee222
--- /dev/null
+++ b/databricks/benchmark.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+
+from databricks import sql
+import json
+import os
+import sys
+import subprocess
+import time
+import requests
+
+def write_result_to_file(run_metadata, query_results):
+    # Ensure results directory exists
+    os.makedirs('results', exist_ok=True)
+
+    # Get instance type and convert to lowercase for filename
+    instance_type = os.getenv('databricks_instance_type')
+    if not instance_type:
+        raise Exception("Missing required environment variable: databricks_instance_type")
+    filename = os.path.join('results', instance_type.lower() + ".json")
+    with open(filename, 'w') as f:
+        print("{", file=f)
+        for key in run_metadata:
+            print(f'\t"{key}": {json.dumps(run_metadata[key])},', file=f)
+
+        print('\t"result": [', file=f)
+        num_lines = len(query_results)
+        for i in range(num_lines):
+            print(f"\t\t{query_results[i]}", end='', file=f)
+            print("," if i < num_lines - 1 else "", file=f)
+
+        print("\t]\n}", file=f)
+
+def load_data(run_metadata):
+    server_hostname = os.getenv('DATABRICKS_SERVER_HOSTNAME')
+    http_path = os.getenv('DATABRICKS_HTTP_PATH')
+    access_token = os.getenv('DATABRICKS_TOKEN')
+    catalog = os.getenv('DATABRICKS_CATALOG')
+    schema = os.getenv('DATABRICKS_SCHEMA')
+    parquet_location = os.getenv('DATABRICKS_PARQUET_LOCATION')
+
+    if not all([server_hostname, http_path, access_token, catalog, schema, parquet_location]):
+        raise Exception("Missing required environment variables: DATABRICKS_SERVER_HOSTNAME, DATABRICKS_HTTP_PATH, DATABRICKS_TOKEN, DATABRICKS_CATALOG, DATABRICKS_SCHEMA, DATABRICKS_PARQUET_LOCATION")
+
+    print(f'Connecting to Databricks; loading the data into {catalog}.{schema}', file=sys.stderr)
+
+    connection = sql.connect(
+        server_hostname=server_hostname,
+        http_path=http_path,
+        access_token=access_token
+    )
+
+    cursor = connection.cursor()
+
+    # Create catalog and schema if they don't exist
+    cursor.execute(f'CREATE CATALOG IF NOT EXISTS {catalog}')
+    cursor.execute(f'USE CATALOG {catalog}')
+    cursor.execute(f'CREATE SCHEMA IF NOT EXISTS {schema}')
+    cursor.execute(f'USE SCHEMA {schema}')
+
+    print(f'Creating table and loading data from {parquet_location}...', file=sys.stderr)
+
+    # Drop table if exists
+    cursor.execute(f'DROP TABLE IF EXISTS {catalog}.{schema}.hits')
+
+    # Create table with explicit schema (EventTime as TIMESTAMP)
+    create_query = f"""
+        CREATE TABLE {catalog}.{schema}.hits (
+            WatchID BIGINT NOT NULL,
+            JavaEnable SMALLINT NOT NULL,
+            Title STRING,
+            GoodEvent SMALLINT NOT NULL,
+            EventTime TIMESTAMP NOT NULL,
+            EventDate DATE NOT NULL,
+            CounterID INT NOT NULL,
+            ClientIP INT NOT NULL,
+            RegionID INT NOT NULL,
+            UserID BIGINT NOT NULL,
+            CounterClass SMALLINT NOT NULL,
+            OS SMALLINT NOT NULL,
+            UserAgent SMALLINT NOT NULL,
+            URL STRING,
+            Referer STRING,
+            IsRefresh SMALLINT NOT NULL,
+            RefererCategoryID SMALLINT NOT NULL,
+            RefererRegionID INT NOT NULL,
+            URLCategoryID SMALLINT NOT NULL,
+            URLRegionID INT NOT NULL,
+            ResolutionWidth SMALLINT NOT NULL,
+            ResolutionHeight SMALLINT NOT NULL,
+            ResolutionDepth SMALLINT NOT NULL,
+            FlashMajor SMALLINT NOT NULL,
+            FlashMinor SMALLINT NOT NULL,
+            FlashMinor2 STRING,
+            NetMajor SMALLINT NOT NULL,
+            NetMinor SMALLINT NOT NULL,
+            UserAgentMajor SMALLINT NOT NULL,
+            UserAgentMinor STRING NOT NULL,
+            CookieEnable SMALLINT NOT NULL,
+            JavascriptEnable SMALLINT NOT NULL,
+            IsMobile SMALLINT NOT NULL,
+            MobilePhone SMALLINT NOT NULL,
+            MobilePhoneModel STRING,
+            Params STRING,
+            IPNetworkID INT NOT NULL,
+            TraficSourceID SMALLINT NOT NULL,
+            SearchEngineID SMALLINT NOT NULL,
+            SearchPhrase STRING,
+            AdvEngineID SMALLINT NOT NULL,
+            IsArtifical SMALLINT NOT NULL,
+            WindowClientWidth SMALLINT NOT NULL,
+            WindowClientHeight SMALLINT NOT NULL,
+            ClientTimeZone SMALLINT NOT NULL,
+            ClientEventTime TIMESTAMP NOT NULL,
+            SilverlightVersion1 SMALLINT NOT NULL,
+            SilverlightVersion2 SMALLINT NOT NULL,
+            SilverlightVersion3 INT NOT NULL,
+            SilverlightVersion4 SMALLINT NOT NULL,
+            PageCharset STRING,
+            CodeVersion INT NOT NULL,
+            IsLink SMALLINT NOT NULL,
+            IsDownload SMALLINT NOT NULL,
+            IsNotBounce SMALLINT NOT NULL,
+            FUniqID BIGINT NOT NULL,
+            OriginalURL STRING,
+            HID INT NOT NULL,
+            IsOldCounter SMALLINT NOT NULL,
+            IsEvent SMALLINT NOT NULL,
+            IsParameter SMALLINT NOT NULL,
+            DontCountHits SMALLINT NOT NULL,
+            WithHash SMALLINT NOT NULL,
+            HitColor STRING NOT NULL,
+            LocalEventTime TIMESTAMP NOT NULL,
+            Age SMALLINT NOT NULL,
+            Sex SMALLINT NOT NULL,
+            Income SMALLINT NOT NULL,
+            Interests SMALLINT NOT NULL,
+            Robotness SMALLINT NOT NULL,
+            RemoteIP INT NOT NULL,
+            WindowName INT NOT NULL,
+            OpenerName INT NOT NULL,
+            HistoryLength SMALLINT NOT NULL,
+            BrowserLanguage STRING,
+            BrowserCountry STRING,
+            SocialNetwork STRING,
+            SocialAction STRING,
+            HTTPError SMALLINT NOT NULL,
+            SendTiming INT NOT NULL,
+            DNSTiming INT NOT NULL,
+            ConnectTiming INT NOT NULL,
+            ResponseStartTiming INT NOT NULL,
+            ResponseEndTiming INT NOT NULL,
+            FetchTiming INT NOT NULL,
+            SocialSourceNetworkID SMALLINT NOT NULL,
+            SocialSourcePage STRING,
+            ParamPrice BIGINT NOT NULL,
+            ParamOrderID STRING,
+            ParamCurrency STRING,
+            ParamCurrencyID SMALLINT NOT NULL,
+            OpenstatServiceName STRING,
+            OpenstatCampaignID STRING,
+            OpenstatAdID STRING,
+            OpenstatSourceID STRING,
+            UTMSource STRING,
+            UTMMedium STRING,
+            UTMCampaign STRING,
+            UTMContent STRING,
+            UTMTerm STRING,
+            FromTag STRING,
+            HasGCLID SMALLINT NOT NULL,
+            RefererHash BIGINT NOT NULL,
+            URLHash BIGINT NOT NULL,
+            CLID INT NOT NULL
+        )
+    """
+    cursor.execute(create_query)
+
+    # Insert data from parquet file with type conversions
+    load_query = f"""
+        INSERT INTO {catalog}.{schema}.hits
+        SELECT
+            WatchID,
+            JavaEnable,
+            Title,
+            GoodEvent,
+            CAST(FROM_UNIXTIME(EventTime) AS TIMESTAMP) AS EventTime,
+            DATE_FROM_UNIX_DATE(EventDate) AS EventDate,
+            CounterID,
+            ClientIP,
+            RegionID,
+            UserID,
+            CounterClass,
+            OS,
+            UserAgent,
+            URL,
+            Referer,
+            IsRefresh,
+            RefererCategoryID,
+            RefererRegionID,
+            URLCategoryID,
+            URLRegionID,
+            ResolutionWidth,
+            ResolutionHeight,
+            ResolutionDepth,
+            FlashMajor,
+            FlashMinor,
+            FlashMinor2,
+            NetMajor,
+            NetMinor,
+            UserAgentMajor,
+            UserAgentMinor,
+            CookieEnable,
+            JavascriptEnable,
+            IsMobile,
+            MobilePhone,
+            MobilePhoneModel,
+            Params,
+            IPNetworkID,
+            TraficSourceID,
+            SearchEngineID,
+            SearchPhrase,
+            AdvEngineID,
+            IsArtifical,
+            WindowClientWidth,
+            WindowClientHeight,
+            ClientTimeZone,
+            CAST(FROM_UNIXTIME(ClientEventTime) AS TIMESTAMP) AS ClientEventTime,
+            SilverlightVersion1,
+            SilverlightVersion2,
+            SilverlightVersion3,
+            SilverlightVersion4,
+            PageCharset,
+            CodeVersion,
+            IsLink,
+            IsDownload,
+            IsNotBounce,
+            FUniqID,
+            OriginalURL,
+            HID,
+            IsOldCounter,
+            IsEvent,
+            IsParameter,
+            DontCountHits,
+            WithHash,
+            HitColor,
+            CAST(FROM_UNIXTIME(LocalEventTime) AS TIMESTAMP) AS LocalEventTime,
+            Age,
+            Sex,
+            Income,
+            Interests,
+            Robotness,
+            RemoteIP,
+            WindowName,
+            OpenerName,
+            HistoryLength,
+            BrowserLanguage,
+            BrowserCountry,
+            SocialNetwork,
+            SocialAction,
+            HTTPError,
+            SendTiming,
+            DNSTiming,
+            ConnectTiming,
+            ResponseStartTiming,
+            ResponseEndTiming,
+            FetchTiming,
+            SocialSourceNetworkID,
+            SocialSourcePage,
+            ParamPrice,
+            ParamOrderID,
+            ParamCurrency,
+            ParamCurrencyID,
+            OpenstatServiceName,
+            OpenstatCampaignID,
+            OpenstatAdID,
+            OpenstatSourceID,
+            UTMSource,
+            UTMMedium,
+            UTMCampaign,
+            UTMContent,
+            UTMTerm,
+            FromTag,
+            HasGCLID,
+            RefererHash,
+            URLHash,
+            CLID
+        FROM parquet.`{parquet_location}`
+    """
+
+    cursor.execute(load_query)
+    load_query_id = cursor.query_id
+
+    # Get load time from REST API
+    print(f"Getting load time for query {load_query_id}...", file=sys.stderr)
+    max_retries = 3
+
+    for retry in range(max_retries):
+        time.sleep(2)
+
+        url = f"https://{server_hostname}/api/2.0/sql/history/queries/{load_query_id}"
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            response = requests.get(url, headers=headers, timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                if 'duration' in data:
+                    load_duration = round(data['duration'] / 1000.0, 3)
+                    run_metadata["load_time"] = load_duration
+                    print(f"Table created successfully in {load_duration}s", file=sys.stderr)
+                    break
+        except Exception as api_error:
+            print(f"API error on retry {retry + 1}: {api_error}", file=sys.stderr)
+
+    # Get table size from DESCRIBE DETAIL
+    cursor.execute(f"DESCRIBE DETAIL {catalog}.{schema}.hits")
+    result = cursor.fetchone()
+    run_metadata["data_size"] = result[10]  # sizeInBytes column
+    print(f"Table size: {run_metadata['data_size']} bytes", file=sys.stderr)
+
+    print(f'Finished loading the data in {run_metadata["load_time"]}s; data size = {run_metadata["data_size"]} bytes', file=sys.stderr)
+
+    cursor.close()
+    connection.close()
+
+def run_queries():
+    # Run the benchmark script
+    result = subprocess.run(
+        ["./run.sh"],
+        stdout=subprocess.PIPE,
+        text=True,
+        timeout=3600,  # 1 hour timeout
+    )
+
+    if result.returncode != 0:
+        raise Exception(f"Benchmark failed with return code {result.returncode}")
+
+    return result.stdout
+
+if __name__ == "__main__":
+    instance_type = os.getenv('databricks_instance_type')
+    if not instance_type:
+        raise Exception("Missing required environment variable: databricks_instance_type")
+
+    run_metadata = {
+        "system": "Databricks",
+        "date": time.strftime("%Y-%m-%d"),
+        "machine": f"Databricks: {instance_type}",
+        "cluster_size": "<Lookup here: https://docs.databricks.com/aws/en/compute/sql-warehouse/warehouse-behavior>",
+        "proprietary": "yes",
+        "tuned": "no",
+        "tags": ["managed", "column-oriented"],
+    }
+
+    load_data(run_metadata)
+
+    query_output = run_queries()
+
+    write_result_to_file(run_metadata, query_output.strip().split('\n'))
diff --git a/databricks/benchmark.sh b/databricks/benchmark.sh
new file mode 100755
index 000000000..6928ba071
--- /dev/null
+++ b/databricks/benchmark.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Load environment variables
+if [ -f .env ]; then
+    set -a
+    source .env
+    set +a
+else
+    echo "Error: .env file not found. Please copy .env.example to .env and fill in your credentials."
+    exit 1
+fi
+
+# Create virtual environment if it doesn't exist
+if [ ! -d ".venv" ]; then
+    uv venv
+fi
+
+# Install dependencies
+uv pip install databricks-sql-connector
+
+# Run benchmark
+uv run python ./benchmark.py 2>&1
diff --git a/databricks/create.sql b/databricks/create.sql
new file mode 100644
index 000000000..729e40370
--- /dev/null
+++ b/databricks/create.sql
@@ -0,0 +1,109 @@
+-- This is not used in the setup script, but is included here for reference.
+-- The actual table is created in benchmark.py
+CREATE OR REPLACE TABLE hits (
+    WatchID BIGINT NOT NULL,
+    JavaEnable SMALLINT NOT NULL,
+    Title STRING,
+    GoodEvent SMALLINT NOT NULL,
+    EventTime TIMESTAMP NOT NULL,
+    EventDate DATE NOT NULL,
+    CounterID INT NOT NULL,
+    ClientIP INT NOT NULL,
+    RegionID INT NOT NULL,
+    UserID BIGINT NOT NULL,
+    CounterClass SMALLINT NOT NULL,
+    OS SMALLINT NOT NULL,
+    UserAgent SMALLINT NOT NULL,
+    URL STRING,
+    Referer STRING,
+    IsRefresh SMALLINT NOT NULL,
+    RefererCategoryID SMALLINT NOT NULL,
+    RefererRegionID INT NOT NULL,
+    URLCategoryID SMALLINT NOT NULL,
+    URLRegionID INT NOT NULL,
+    ResolutionWidth SMALLINT NOT NULL,
+    ResolutionHeight SMALLINT NOT NULL,
+    ResolutionDepth SMALLINT NOT NULL,
+    FlashMajor SMALLINT NOT NULL,
+    FlashMinor SMALLINT NOT NULL,
+    FlashMinor2 STRING,
+    NetMajor SMALLINT NOT NULL,
+    NetMinor SMALLINT NOT NULL,
+    UserAgentMajor SMALLINT NOT NULL,
+    UserAgentMinor STRING NOT NULL,
+    CookieEnable SMALLINT NOT NULL,
+    JavascriptEnable SMALLINT NOT NULL,
+    IsMobile SMALLINT NOT NULL,
+    MobilePhone SMALLINT NOT NULL,
+    MobilePhoneModel STRING,
+    Params STRING,
+    IPNetworkID INT NOT NULL,
+    TraficSourceID SMALLINT NOT NULL,
+    SearchEngineID SMALLINT NOT NULL,
+    SearchPhrase STRING,
+    AdvEngineID SMALLINT NOT NULL,
+    IsArtifical SMALLINT NOT NULL,
+    WindowClientWidth SMALLINT NOT NULL,
+    WindowClientHeight SMALLINT NOT NULL,
+    ClientTimeZone SMALLINT NOT NULL,
+    ClientEventTime TIMESTAMP NOT NULL,
+    SilverlightVersion1 SMALLINT NOT NULL,
+    SilverlightVersion2 SMALLINT NOT NULL,
+    SilverlightVersion3 INT NOT NULL,
+    SilverlightVersion4 SMALLINT NOT NULL,
+    PageCharset STRING,
+    CodeVersion INT NOT NULL,
+    IsLink SMALLINT NOT NULL,
+    IsDownload SMALLINT NOT NULL,
+    IsNotBounce SMALLINT NOT NULL,
+    FUniqID BIGINT NOT NULL,
+    OriginalURL STRING,
+    HID INT NOT NULL,
+    IsOldCounter SMALLINT NOT NULL,
+    IsEvent SMALLINT NOT NULL,
+    IsParameter SMALLINT NOT NULL,
+    DontCountHits SMALLINT NOT NULL,
+    WithHash SMALLINT NOT NULL,
+    HitColor STRING NOT NULL,
+    LocalEventTime TIMESTAMP NOT NULL,
+    Age SMALLINT NOT NULL,
+    Sex SMALLINT NOT NULL,
+    Income SMALLINT NOT NULL,
+    Interests SMALLINT NOT NULL,
+    Robotness SMALLINT NOT NULL,
+    RemoteIP INT NOT NULL,
+    WindowName INT NOT NULL,
+    OpenerName INT NOT NULL,
+    HistoryLength SMALLINT NOT NULL,
+    BrowserLanguage STRING,
+    BrowserCountry STRING,
+    SocialNetwork STRING,
+    SocialAction STRING,
+    HTTPError SMALLINT NOT NULL,
+    SendTiming INT NOT NULL,
+    DNSTiming INT NOT NULL,
+    ConnectTiming INT NOT NULL,
+    ResponseStartTiming INT NOT NULL,
+    ResponseEndTiming INT NOT NULL,
+    FetchTiming INT NOT NULL,
+    SocialSourceNetworkID SMALLINT NOT NULL,
+    SocialSourcePage STRING,
+    ParamPrice BIGINT NOT NULL,
+    ParamOrderID STRING,
+    ParamCurrency STRING,
+    ParamCurrencyID SMALLINT NOT NULL,
+    OpenstatServiceName STRING,
+    OpenstatCampaignID STRING,
+    OpenstatAdID STRING,
+    OpenstatSourceID STRING,
+    UTMSource STRING,
+    UTMMedium STRING,
+    UTMCampaign STRING,
+    UTMContent STRING,
+    UTMTerm STRING,
+    FromTag STRING,
+    HasGCLID SMALLINT NOT NULL,
+    RefererHash BIGINT NOT NULL,
+    URLHash BIGINT NOT NULL,
+    CLID INT NOT NULL
+);
\ No newline at end of file
diff --git a/databricks/queries.sql b/databricks/queries.sql
new file mode 100644
index 000000000..8fafcbcf9
--- /dev/null
+++ b/databricks/queries.sql
@@ -0,0 +1,43 @@
+SELECT COUNT(*) FROM hits;
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+SELECT AVG(UserID) FROM hits;
+SELECT COUNT(DISTINCT UserID) FROM hits;
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '$1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
diff --git a/databricks/query.py b/databricks/query.py
new file mode 100755
index 000000000..0ec552c36
--- /dev/null
+++ b/databricks/query.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+from databricks import sql
+import os
+import sys
+import time
+import requests
+
+query = sys.stdin.read()
+print(f"running {query}", file=sys.stderr)
+
+# Get connection parameters from environment variables
+server_hostname = os.getenv('DATABRICKS_SERVER_HOSTNAME')
+http_path = os.getenv('DATABRICKS_HTTP_PATH')
+access_token = os.getenv('DATABRICKS_TOKEN')
+catalog = os.getenv('DATABRICKS_CATALOG', 'main')
+schema = os.getenv('DATABRICKS_SCHEMA', 'clickbench')
+
+if not all([server_hostname, http_path, access_token]):
+    print("Error: Missing required environment variables:", file=sys.stderr)
+    print("  DATABRICKS_SERVER_HOSTNAME", file=sys.stderr)
+    print("  DATABRICKS_HTTP_PATH", file=sys.stderr)
+    print("  DATABRICKS_TOKEN", file=sys.stderr)
+    sys.exit(1)
+
+connection = sql.connect(
+    server_hostname=server_hostname,
+    http_path=http_path,
+    access_token=access_token,
+    catalog=catalog,
+    schema=schema
+)
+
+print('[', end='')
+
+for try_num in range(3):
+    if try_num > 0:
+        print(',', end='')
+
+    try:
+        cursor = connection.cursor()
+
+        # Execute the query
+        cursor.execute(query)
+        results = cursor.fetchall()
+        query_id = cursor.query_id
+
+        # Get execution time from REST API
+        duration = None
+        max_retries = 3
+
+        for retry in range(max_retries):
+            # Wait a moment for query to complete and be available
+            time.sleep(1 if retry == 0 else 2)
+
+            # Call the query history API
+            url = f"https://{server_hostname}/api/2.0/sql/history/queries/{query_id}"
+            headers = {
+                "Authorization": f"Bearer {access_token}",
+                "Content-Type": "application/json"
+            }
+
+            try:
+                response = requests.get(url, headers=headers, timeout=10)
+                if response.status_code == 200:
+                    data = response.json()
+                    if 'duration' in data:
+                        # Duration is in milliseconds, convert to seconds
+                        duration = round(data['duration'] / 1000.0, 3)
+                        break
+            except Exception as api_error:
+                print(f"API error on retry {retry + 1}: {api_error}", file=sys.stderr)
+
+        if duration is None:
+            # Fallback: if metrics aren't available after retries, use null
+            duration = 'null'
+            print(f"Could not retrieve metrics for query_id {query_id} after {max_retries} retries", file=sys.stderr)
+
+        print(duration if isinstance(duration, str) else duration, end='')
+
+        cursor.close()
+    except Exception as e:
+        print('null', end='')
+        print(f"query <{query.strip()}> errored out on attempt <{try_num+1}>: {e}", file=sys.stderr)
+
+print(']')
+
+connection.close()
diff --git a/databricks/results/2x-large.json b/databricks/results/2x-large.json
new file mode 100644
index 000000000..0c4709633
--- /dev/null
+++ b/databricks/results/2x-large.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: 2X-Large",
+  "cluster_size": 64,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 25.978,
+  "data_size": 10219802927,
+  "result": [
+    [0.747, 0.108, 0.098],
+    [2.372, 0.107, 0.109],
+    [0.601, 0.095, 0.459],
+    [0.606, 0.11, 0.123],
+    [0.806, 0.115, 0.108],
+    [0.934, 0.108, 0.112],
+    [0.72, 0.107, 0.114],
+    [0.444, 0.099, 0.111],
+    [1.023, 0.103, 0.115],
+    [1.094, 0.108, 0.113],
+    [0.674, 0.097, 0.101],
+    [0.688, 0.103, 0.104],
+    [0.755, 0.104, 0.098],
+    [0.874, 0.104, 0.102],
+    [0.844, 0.096, 0.097],
+    [0.669, 0.101, 0.112],
+    [0.894, 0.096, 0.102],
+    [0.614, 0.095, 0.094],
+    [1.254, 0.121, 0.111],
+    [0.382, 0.092, 0.094],
+    [0.908, 0.095, 0.091],
+    [0.617, 0.113, 0.1],
+    [1.029, 0.115, 0.104],
+    [1.392, 0.1, 0.114],
+    [0.389, 0.108, 0.1],
+    [0.359, 0.1, 0.09],
+    [0.386, 0.1, 0.096],
+    [0.616, 0.102, 0.099],
+    [3.37, 0.103, 0.108],
+    [0.639, 0.15, 0.136],
+    [0.643, 0.095, 0.096],
+    [0.688, 0.104, 0.098],
+    [0.914, 0.094, 0.109],
+    [1.573, 0.107, 0.098],
+    [1.605, 0.108, 0.109],
+    [0.646, 0.108, 0.099],
+    [0.66, 0.112, 0.104],
+    [0.426, 0.093, 0.095],
+    [0.546, 0.108, 0.101],
+    [1.081, 0.102, 0.129],
+    [0.53, 0.103, 0.101],
+    [0.418, 0.131, 0.103],
+    [0.443, 0.105, 0.107]
+  ]
+}
diff --git a/databricks/results/2x-small.json b/databricks/results/2x-small.json
new file mode 100644
index 000000000..9de0aa0a3
--- /dev/null
+++ b/databricks/results/2x-small.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: 2X-Small",
+  "cluster_size": 1,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 125.99,
+  "data_size": 10219802927,
+  "result": [
+    [0.714, 0.128, 0.13],
+    [1.382, 0.129, 0.128],
+    [0.883, 0.129, 0.117],
+    [0.984, 0.12, 0.127],
+    [1.447, 0.119, 0.121],
+    [2.362, 0.117, 0.101],
+    [0.848, 0.113, 0.103],
+    [0.509, 0.104, 0.108],
+    [2.435, 0.108, 0.107],
+    [3.127, 0.118, 0.108],
+    [1.063, 0.104, 0.109],
+    [1.015, 0.12, 0.103],
+    [2.953, 0.121, 0.098],
+    [3.39, 0.125, 0.115],
+    [3.569, 0.101, 0.135],
+    [2.122, 0.12, 0.102],
+    [5.774, 0.098, 0.132],
+    [3.587, 0.104, 0.103],
+    [9.929, 0.147, 0.101],
+    [0.389, 0.104, 0.099],
+    [3.301, 0.102, 0.099],
+    [2.708, 0.104, 0.099],
+    [5.225, 0.113, 0.133],
+    [13.016, 0.105, 0.106],
+    [1.329, 0.103, 0.107],
+    [0.903, 0.099, 0.097],
+    [1.249, 0.13, 0.093],
+    [2.484, 0.108, 0.109],
+    [20.366, 0.119, 0.104],
+    [1.298, 0.154, 0.147],
+    [1.915, 0.096, 0.11],
+    [2.117, 0.095, 0.104],
+    [7.453, 0.119, 0.092],
+    [11.578, 0.095, 0.093],
+    [10.26, 0.113, 0.097],
+    [2.217, 0.101, 0.095],
+    [0.664, 0.11, 0.101],
+    [0.441, 0.092, 0.106],
+    [0.683, 0.111, 0.106],
+    [1.11, 0.106, 0.114],
+    [0.539, 0.101, 0.095],
+    [0.434, 0.361, 0.096],
+    [0.483, 0.097, 0.105]
+  ]
+}
diff --git a/databricks/results/4x-large.json b/databricks/results/4x-large.json
new file mode 100644
index 000000000..62224ed5c
--- /dev/null
+++ b/databricks/results/4x-large.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: 4X-Large",
+  "cluster_size": 256,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 38.981,
+  "data_size": 10219802927,
+  "result": [
+    [0.597, 0.158, 0.116],
+    [3.236, 0.113, 0.103],
+    [0.619, 0.105, 0.101],
+    [0.681, 0.132, 0.107],
+    [1.405, 0.108, 0.099],
+    [1.105, 0.103, 0.095],
+    [0.719, 0.101, 0.1],
+    [3.267, 0.104, 0.104],
+    [4.144, 0.101, 0.115],
+    [1.431, 0.108, 0.114],
+    [2.427, 0.101, 0.119],
+    [6.409, 0.11, 0.101],
+    [4.826, 0.096, 0.103],
+    [1.097, 0.102, 0.108],
+    [1.116, 0.103, 0.103],
+    [0.794, 0.109, 0.14],
+    [0.962, 0.095, 0.096],
+    [0.626, 0.096, 0.096],
+    [1.732, 0.125, 0.102],
+    [0.319, 0.092, 0.105],
+    [0.981, 0.096, 0.123],
+    [0.614, 0.103, 0.111],
+    [1.069, 0.1, 0.104],
+    [1.419, 0.105, 0.107],
+    [0.382, 0.091, 0.105],
+    [0.321, 0.104, 0.099],
+    [0.368, 0.108, 0.09],
+    [0.71, 0.099, 0.095],
+    [3.437, 0.109, 0.112],
+    [0.685, 0.157, 0.166],
+    [0.743, 0.099, 0.159],
+    [1.045, 0.094, 0.097],
+    [1.117, 0.095, 0.102],
+    [1.796, 0.093, 0.099],
+    [1.855, 0.097, 0.112],
+    [0.746, 0.104, 0.097],
+    [0.67, 0.104, 0.099],
+    [0.432, 0.094, 0.094],
+    [0.596, 0.095, 0.125],
+    [1.126, 0.105, 0.104],
+    [0.522, 0.103, 0.103],
+    [0.453, 0.126, 0.107],
+    [0.454, 0.106, 0.106]
+  ]
+}
diff --git a/databricks/results/large.json b/databricks/results/large.json
new file mode 100644
index 000000000..afc2cabdc
--- /dev/null
+++ b/databricks/results/large.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: Large",
+  "cluster_size": 16,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 20.207,
+  "data_size": 10219802927,
+  "result": [
+    [0.606, 0.113, 0.122],
+    [2.986, 0.111, 0.108],
+    [0.592, 0.108, 0.105],
+    [0.704, 0.103, 0.103],
+    [0.814, 0.105, 0.108],
+    [0.897, 0.1, 0.115],
+    [0.776, 0.1, 0.104],
+    [0.437, 0.105, 0.105],
+    [1.137, 0.104, 0.1],
+    [1.104, 0.111, 0.115],
+    [0.796, 0.105, 0.105],
+    [0.702, 0.113, 0.104],
+    [0.669, 0.099, 0.104],
+    [0.935, 0.102, 0.101],
+    [0.816, 0.106, 0.11],
+    [0.602, 0.103, 0.092],
+    [0.86, 0.11, 0.107],
+    [0.875, 0.1, 0.095],
+    [1.851, 0.116, 0.107],
+    [0.328, 0.096, 0.093],
+    [1.007, 0.11, 0.11],
+    [0.615, 0.108, 0.1],
+    [1.385, 0.109, 0.124],
+    [1.913, 0.108, 0.102],
+    [0.403, 0.092, 0.098],
+    [0.336, 0.09, 0.097],
+    [0.392, 0.271, 0.107],
+    [0.724, 0.103, 0.105],
+    [3.35, 0.1, 0.103],
+    [0.621, 0.147, 0.137],
+    [0.695, 0.094, 0.09],
+    [0.744, 0.092, 0.093],
+    [1.463, 0.106, 0.095],
+    [1.776, 0.107, 0.098],
+    [1.821, 0.112, 0.115],
+    [0.597, 0.106, 0.102],
+    [0.843, 0.118, 0.114],
+    [0.47, 0.095, 0.18],
+    [0.544, 0.109, 0.109],
+    [1.071, 0.119, 0.102],
+    [0.501, 0.114, 0.095],
+    [0.424, 0.12, 0.095],
+    [0.43, 0.101, 0.112]
+  ]
+}
diff --git a/databricks/results/medium.json b/databricks/results/medium.json
new file mode 100644
index 000000000..f2fb57124
--- /dev/null
+++ b/databricks/results/medium.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: Medium",
+  "cluster_size": 1,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 58.818,
+  "data_size": 10219802927,
+  "result": [
+    [0.655, 0.125, 0.12],
+    [2.588, 0.11, 0.114],
+    [0.872, 0.122, 0.109],
+    [0.895, 0.103, 0.117],
+    [0.829, 0.11, 0.108],
+    [1.106, 0.106, 0.108],
+    [0.934, 0.106, 0.101],
+    [0.607, 0.103, 0.11],
+    [1.303, 0.124, 0.109],
+    [1.28, 0.119, 0.143],
+    [0.836, 0.109, 0.103],
+    [0.818, 0.112, 0.103],
+    [0.796, 0.108, 0.105],
+    [0.941, 0.104, 0.111],
+    [1.004, 0.221, 0.105],
+    [0.72, 0.101, 0.107],
+    [1.32, 0.097, 0.106],
+    [0.872, 0.108, 0.1],
+    [2.514, 0.12, 0.097],
+    [0.374, 0.096, 0.096],
+    [1.302, 0.102, 0.095],
+    [0.784, 0.113, 0.106],
+    [1.676, 0.114, 0.103],
+    [2.723, 0.105, 0.117],
+    [0.499, 0.098, 0.095],
+    [0.407, 0.094, 0.108],
+    [0.482, 0.107, 0.102],
+    [0.792, 0.115, 0.111],
+    [3.748, 0.105, 0.113],
+    [0.695, 0.158, 0.15],
+    [0.702, 0.169, 0.12],
+    [0.818, 0.098, 0.11],
+    [2.375, 0.147, 0.099],
+    [2.13, 0.101, 0.099],
+    [2.07, 0.104, 0.108],
+    [0.729, 0.099, 0.101],
+    [0.699, 0.112, 0.112],
+    [0.488, 0.095, 0.097],
+    [0.62, 0.103, 0.106],
+    [1.167, 0.106, 0.109],
+    [0.525, 0.107, 0.109],
+    [0.454, 0.141, 0.119],
+    [0.477, 0.122, 0.101]
+  ]
+}
diff --git a/databricks/results/small.json b/databricks/results/small.json
new file mode 100644
index 000000000..7a9721053
--- /dev/null
+++ b/databricks/results/small.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: Small",
+  "cluster_size": 4,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 57.949,
+  "data_size": 10219802927,
+  "result": [
+    [0.61, 0.104, 0.114],
+    [1.338, 0.1, 0.108],
+    [0.627, 0.098, 0.112],
+    [0.752, 0.106, 0.097],
+    [0.9, 0.109, 0.095],
+    [1.183, 0.102, 0.099],
+    [0.782, 0.098, 0.098],
+    [0.427, 0.108, 0.116],
+    [1.279, 0.102, 0.106],
+    [1.459, 0.112, 0.113],
+    [0.799, 0.11, 0.108],
+    [0.795, 0.109, 0.097],
+    [1.054, 0.101, 0.102],
+    [1.199, 0.102, 0.108],
+    [1.138, 0.101, 0.1],
+    [0.841, 0.098, 0.097],
+    [3.005, 0.12, 0.096],
+    [1.282, 0.094, 0.09],
+    [4.636, 0.105, 0.131],
+    [0.385, 0.087, 0.099],
+    [1.598, 0.092, 0.096],
+    [1.028, 0.102, 0.105],
+    [1.797, 0.108, 0.101],
+    [4.426, 0.102, 0.105],
+    [0.746, 0.096, 0.114],
+    [0.452, 0.103, 0.095],
+    [0.576, 0.122, 0.103],
+    [0.984, 0.102, 0.107],
+    [6.114, 0.105, 0.112],
+    [0.751, 0.204, 0.13],
+    [0.827, 0.104, 0.093],
+    [1.169, 0.101, 0.091],
+    [4.099, 0.095, 0.092],
+    [3.929, 0.106, 0.09],
+    [3.93, 0.095, 0.099],
+    [0.971, 0.1, 0.096],
+    [0.692, 0.106, 0.097],
+    [0.457, 0.1, 0.099],
+    [0.571, 0.097, 0.107],
+    [1.181, 0.113, 0.099],
+    [0.498, 0.156, 0.11],
+    [0.467, 0.104, 0.1],
+    [0.436, 0.104, 0.115]
+  ]
+}
diff --git a/databricks/results/x-large.json b/databricks/results/x-large.json
new file mode 100644
index 000000000..c2207e129
--- /dev/null
+++ b/databricks/results/x-large.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: X-Large",
+  "cluster_size": 32,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 30.201,
+  "data_size": 10219802927,
+  "result": [
+    [0.625, 0.126, 0.119],
+    [1.28, 0.115, 0.11],
+    [0.765, 0.107, 0.111],
+    [0.678, 0.11, 0.128],
+    [0.771, 0.109, 0.106],
+    [0.879, 0.107, 0.114],
+    [0.714, 0.113, 0.105],
+    [2.3, 0.108, 0.111],
+    [3.976, 0.114, 0.106],
+    [1.284, 0.116, 0.114],
+    [0.716, 0.107, 0.103],
+    [0.738, 0.113, 0.106],
+    [0.778, 0.12, 0.108],
+    [0.818, 0.105, 0.116],
+    [0.839, 0.102, 0.124],
+    [2.784, 0.124, 0.099],
+    [1.041, 0.097, 0.099],
+    [2.169, 0.11, 0.094],
+    [3.134, 0.122, 0.098],
+    [0.492, 0.093, 0.093],
+    [1.983, 0.102, 0.092],
+    [0.629, 0.107, 0.103],
+    [1.217, 0.1, 0.101],
+    [1.853, 0.11, 0.123],
+    [0.455, 0.105, 0.09],
+    [0.335, 0.099, 0.1],
+    [0.431, 0.094, 0.098],
+    [0.668, 0.096, 0.105],
+    [3.355, 0.109, 0.113],
+    [0.679, 0.157, 0.143],
+    [0.629, 0.097, 0.106],
+    [0.827, 0.11, 0.111],
+    [1.064, 0.102, 0.116],
+    [1.913, 0.106, 0.094],
+    [1.906, 0.111, 0.106],
+    [0.665, 0.108, 0.106],
+    [0.653, 0.1, 0.111],
+    [0.422, 0.102, 0.097],
+    [0.555, 0.113, 0.108],
+    [1.054, 0.103, 0.114],
+    [0.497, 0.105, 0.103],
+    [0.402, 0.139, 0.102],
+    [0.446, 0.108, 0.106]
+  ]
+}
diff --git a/databricks/results/x-small.json b/databricks/results/x-small.json
new file mode 100644
index 000000000..65fe38aad
--- /dev/null
+++ b/databricks/results/x-small.json
@@ -0,0 +1,56 @@
+{
+  "system": "Databricks",
+  "date": "2025-11-06",
+  "machine": "Databricks: X-Small",
+  "cluster_size": 2,
+  "proprietary": "yes",
+  "tuned": "no",
+  "tags": ["managed", "column-oriented"],
+  "load_time": 78.857,
+  "data_size": 10219802927,
+  "result": [
+    [0.737, 0.228, 0.15],
+    [1.412, 0.131, 0.131],
+    [0.807, 0.123, 0.116],
+    [0.932, 0.115, 0.125],
+    [1.119, 0.116, 0.116],
+    [1.743, 0.158, 0.114],
+    [0.887, 0.152, 0.105],
+    [0.515, 0.102, 0.124],
+    [1.793, 0.104, 0.108],
+    [1.905, 0.132, 0.113],
+    [0.884, 0.104, 0.103],
+    [0.867, 0.125, 0.106],
+    [1.453, 0.117, 0.107],
+    [1.863, 0.108, 0.122],
+    [2.464, 0.126, 0.101],
+    [1.315, 0.103, 0.102],
+    [3.63, 0.102, 0.125],
+    [2.14, 0.114, 0.099],
+    [6.03, 0.11, 0.1],
+    [0.374, 0.106, 0.098],
+    [1.982, 0.105, 0.101],
+    [1.621, 0.121, 0.1],
+    [2.877, 0.107, 0.102],
+    [6.877, 0.112, 0.108],
+    [0.833, 0.099, 0.097],
+    [0.573, 0.097, 0.114],
+    [0.851, 0.105, 0.107],
+    [1.522, 0.11, 0.11],
+    [11.223, 0.126, 0.11],
+    [0.95, 0.142, 0.129],
+    [1.263, 0.102, 0.097],
+    [1.902, 0.103, 0.111],
+    [5.577, 0.113, 0.099],
+    [5.908, 0.103, 0.095],
+    [6.459, 0.122, 0.121],
+    [1.433, 0.111, 0.104],
+    [0.837, 0.098, 0.1],
+    [0.475, 0.102, 0.103],
+    [0.708, 0.109, 0.108],
+    [1.196, 0.109, 0.113],
+    [0.473, 0.114, 0.101],
+    [0.409, 0.128, 0.113],
+    [0.443, 0.11, 0.212]
+  ]
+}
diff --git a/databricks/run.sh b/databricks/run.sh
new file mode 100755
index 000000000..b335f912b
--- /dev/null
+++ b/databricks/run.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Determine the directory of this script
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Use venv python if available, otherwise system python
+if [ -f "$SCRIPT_DIR/.venv/bin/python3" ]; then
+    PYTHON="$SCRIPT_DIR/.venv/bin/python3"
+else
+    PYTHON="python3"
+fi
+
+cat queries.sql | while read -r query; do
+    $PYTHON ./query.py <<< "${query}"
+done