Skip to content

Latest commit

 

History

History
61 lines (47 loc) · 1.65 KB

README.md

File metadata and controls

61 lines (47 loc) · 1.65 KB

subframe

Subframe is a POC project implementing python dataframe API on top of substrait. It aims to expose an API identical to ibis (maybe, roughly, eventually), but use substrait directly as an IR. It also aims to generate SQL in various dialects directly from substrait.

tldr: ibis w/o ibis IR or sqlglot

import subframe

orders = subframe.table(
    [
        ("order_id", "int64"),
        ("fk_store_id", "int64"),
        ("fk_customer_id", "int64"),
        ("description", "string"),
        ("order_total", "float"),
    ],
    name="orders",
)

table = orders.select(
    "order_id",
    "fk_store_id",
    "description",
    orders["fk_store_id"] + orders["fk_customer_id"],
    subframe.literal(1).name("one"),
    two=subframe.literal(2),
)

table = table.filter(table["order_id"] > table["fk_store_id"])

# No dialect support yet, tested with duckdb
sql = subframe.to_sql(table)
print(sql)

Unlike ibis, subframe will probably not have a concept of a Backend, it will integrate with adbc instead.

import adbc_driver_duckdb.dbapi
import pyarrow
import subframe

data = pyarrow.record_batch(
    [[1, 2, 3, 4], ["a", "b", "c", "d"]],
    names=["ints", "strs"],
)

with adbc_driver_duckdb.dbapi.connect(":memory:") as conn:
    with conn.cursor() as cur:
        cur.adbc_ingest("AnswerToEverything", data)

        cur.executescript("INSTALL substrait;")
        cur.executescript("LOAD substrait;")

        table = subframe.named_table("AnswerToEverything", conn)
        table = table.select((table["ints"] + subframe.literal(100)).name("col"))

        cur.execute(table.to_substrait().SerializeToString())
        print(cur.fetch_arrow_table())