From 98046edfb273e9292a91424f4bee8aa03e84dc00 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Thu, 12 Feb 2026 23:18:34 -0500
Subject: [PATCH 1/3] Unnest and prune documentation

---
 docs/dialect/aggregation-operators.rst        |  49 +----
 docs/dialect/distance-operators.rst           |  62 +------
 docs/dialect/index.rst                        |  41 +++-
 docs/dialect/quantifiers.rst                  |  53 +-----
 docs/dialect/spatial-operators.rst            |  75 +-------
 docs/guides/index.rst                         |  26 ---
 docs/guides/quickstart.rst                    | 175 ------------------
 docs/index.rst                                |  42 +++--
 .../syntax-reference.rst => quickstart.rst}   | 120 +++++++++---
 .../{advanced-queries.rst => advanced.rst}    |   4 -
 docs/recipes/bedtools-migration.rst           |   8 +-
 ...{clustering-queries.rst => clustering.rst} |   8 +-
 .../{distance-queries.rst => distance.rst}    |   8 +-
 docs/recipes/index.rst                        |  43 +----
 .../{intersect-queries.rst => intersect.rst}  |   8 +-
 docs/transpilation/index.rst                  |   6 +
 .../{guides => transpilation}/performance.rst | 165 ++++++++++-------
 .../schema-mapping.rst                        |  37 ----
 18 files changed, 290 insertions(+), 640 deletions(-)
 delete mode 100644 docs/guides/index.rst
 delete mode 100644 docs/guides/quickstart.rst
 rename docs/{dialect/syntax-reference.rst => quickstart.rst} (58%)
 rename docs/recipes/{advanced-queries.rst => advanced.rst} (99%)
 rename docs/recipes/{clustering-queries.rst => clustering.rst} (98%)
 rename docs/recipes/{distance-queries.rst => distance.rst} (98%)
 rename docs/recipes/{intersect-queries.rst => intersect.rst} (99%)
 rename docs/{guides => transpilation}/performance.rst (65%)
 rename docs/{guides => transpilation}/schema-mapping.rst (85%)

diff --git a/docs/dialect/aggregation-operators.rst b/docs/dialect/aggregation-operators.rst
index cc3d5ec..585cdc4 100644
--- a/docs/dialect/aggregation-operators.rst
+++ b/docs/dialect/aggregation-operators.rst
@@ -5,10 +5,6 @@ Aggregation operators combine and cluster genomic intervals. These operators are
 essential for reducing complex interval data into summarized regions, such as
 merging overlapping peaks or identifying clusters of related features.
 
-.. contents::
-   :local:
-   :depth: 1
-
 .. _cluster-operator:
 
 CLUSTER
@@ -149,26 +145,6 @@ Find regions with multiple overlapping features:
    INNER JOIN cluster_sizes s ON c.cluster_id = s.cluster_id
    WHERE s.size >= 3
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     - Efficient window function implementation
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
 Performance Notes
 ~~~~~~~~~~~~~~~~~
 
@@ -342,31 +318,10 @@ Calculate the total base pairs covered after merging:
    SELECT SUM(end - start) AS total_coverage
    FROM merged
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
+Notes
+~~~~~
 
 - MERGE is an aggregate operation that processes all matching rows
-- For very large datasets, consider filtering by chromosome first
 - The operation sorts data internally, so pre-sorting is not required
 
 Related Operators
diff --git a/docs/dialect/distance-operators.rst b/docs/dialect/distance-operators.rst
index 216bdcb..773965a 100644
--- a/docs/dialect/distance-operators.rst
+++ b/docs/dialect/distance-operators.rst
@@ -1,14 +1,10 @@
-Distance and Proximity
+Distance and Neighbors
 ======================
 
 Distance and proximity operators calculate genomic distances and find nearest features.
 These operators are essential for proximity analysis, such as finding genes near
 regulatory elements or variants near transcription start sites.
 
-.. contents::
-   :local:
-   :depth: 1
-
 .. _distance-operator:
 
 DISTANCE
@@ -97,33 +93,12 @@ Distinguish between overlapping and nearby features:
    CROSS JOIN genes g
    WHERE p.chrom = g.chrom
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
+Notes
+~~~~~
 
 - Always include ``WHERE a.chrom = b.chrom`` to avoid unnecessary
   cross-chromosome comparisons
 - For large datasets, consider pre-filtering by region before calculating distances
-- Create indexes on chromosome and position columns for better performance
 
 Related Operators
 ~~~~~~~~~~~~~~~~~
@@ -332,39 +307,12 @@ Find nearby same-strand features within distance constraints:
    WHERE nearest.distance BETWEEN -10000 AND 10000
    ORDER BY peaks.name, ABS(nearest.distance)
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     - Efficient lateral join support
-   * - SQLite
-     - Partial
-     - Works but slower for large k values
-   * - PostgreSQL
-     - Planned
-     -
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
+Notes
+~~~~~
 
 - **Chromosome pre-filtering**: NEAREST automatically filters by chromosome for efficiency
 - **Use max_distance**: Specifying a maximum distance reduces the search space significantly
 - **Limit k**: Only request as many neighbors as you actually need
-- **Create indexes**: Add indexes on ``(chrom, start, "end")`` for better performance
-
-.. code-block:: sql
-
-   -- Create indexes for better NEAREST performance
-   CREATE INDEX idx_genes_position
-   ON genes (chrom, start, "end")
 
 Related Operators
 ~~~~~~~~~~~~~~~~~
diff --git a/docs/dialect/index.rst b/docs/dialect/index.rst
index 48e7bb2..3d6167f 100644
--- a/docs/dialect/index.rst
+++ b/docs/dialect/index.rst
@@ -1,5 +1,5 @@
-Operators
-=========
+The GIQL Dialect
+================
 
 GIQL extends SQL with operators specifically designed for genomic interval queries.
 These operators enable powerful spatial reasoning over genomic coordinates without
@@ -9,6 +9,29 @@ Operators are organized by functionality. All operators work across supported
 database backends (DuckDB, SQLite, with PostgreSQL planned). Each operator page 
 includes a compatibility table showing backend support status.
 
+Logical genomic range columns
+-----------------------------
+
+GIQL allows you to reference *logical* genomic range columns in queries. Such logical
+columns do not need to exist explicitly or materially in any of your data sources,
+and no specialized composite data types are needed. Rather, a logical genomic range 
+column or "pseudo-column" can be mapped to physical columns in your source table that 
+contain the required information and use conventional data types (reference sequence name, 
+start and end coordinates, optional strand, etc.).
+
+In GIQL queries, you reference a logical genomic range column using a designated name 
+like ``interval``:
+
+.. code-block:: sql
+
+   SELECT * FROM variants WHERE interval INTERSECTS 'chr1:1000-2000'
+
+By providing a :doc:`schema mapping <../transpilation/schema-mapping>` for the genomic 
+range columns of each of the tables in a GIQL query, the GIQL transpiler can translate 
+range operations into standard SQL expressions to be consumed by a general-purpose 
+query engine. Alternatively, a GIQL-aware query engine could use the schema mapping
+directly for optimization.
+
 Spatial Relationship Operators
 ------------------------------
 
@@ -97,11 +120,11 @@ Apply operators to multiple ranges simultaneously.
 See :doc:`quantifiers` for detailed documentation.
 
 
-.. toctree::
-   :maxdepth: 2
-   :hidden:
+.. .. toctree::
+..    :maxdepth: 2
+..    :hidden:
 
-   spatial-operators
-   distance-operators
-   aggregation-operators
-   quantifiers
+..    spatial-operators
+..    distance-operators
+..    aggregation-operators
+..    quantifiers
diff --git a/docs/dialect/quantifiers.rst b/docs/dialect/quantifiers.rst
index b10a38b..956714f 100644
--- a/docs/dialect/quantifiers.rst
+++ b/docs/dialect/quantifiers.rst
@@ -5,10 +5,6 @@ Set quantifiers extend spatial operators to work with multiple ranges simultaneo
 They allow you to test whether a genomic position matches any or all of a set of
 specified ranges in a single query.
 
-.. contents::
-   :local:
-   :depth: 1
-
 .. _any-quantifier:
 
 ANY
@@ -112,32 +108,11 @@ Query across different chromosomes efficiently:
        'chrX:100000-200000'
    )
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
+Notes
+~~~~~
 
 - ``ANY`` expands to multiple OR conditions in the generated SQL
 - For very large sets of ranges, consider using a separate table and JOIN instead
-- The optimizer may benefit from indexes on chromosome and position columns
 
 Related
 ~~~~~~~
@@ -238,28 +213,8 @@ features in the intersection of multiple regions):
    -- This finds features that overlap BOTH ranges
    -- (i.e., features in the intersection: chr1:1500-2000)
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
+Notes
+~~~~~
 
 - ``ALL`` expands to multiple AND conditions in the generated SQL
 - Queries with ``ALL`` may be more restrictive, potentially reducing result sets
diff --git a/docs/dialect/spatial-operators.rst b/docs/dialect/spatial-operators.rst
index fa1c7be..6bf4433 100644
--- a/docs/dialect/spatial-operators.rst
+++ b/docs/dialect/spatial-operators.rst
@@ -1,14 +1,10 @@
-Spatial Relationships
-=====================
+Spatial Operators
+=================
 
 Spatial relationship operators test positional relationships between genomic ranges.
 These are the core operators for determining whether genomic intervals overlap,
 contain, or are contained within other intervals.
 
-.. contents::
-   :local:
-   :depth: 1
-
 .. _intersects-operator:
 
 INTERSECTS
@@ -103,33 +99,6 @@ Find all variants, with gene information where available:
    FROM variants v
    LEFT JOIN genes g ON v.interval INTERSECTS g.interval
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     - Targeted for future release
-
-Performance Notes
-~~~~~~~~~~~~~~~~~
-
-- Create indexes on ``(chrom, start, "end")`` for better join performance
-- When joining large tables, consider filtering by chromosome first
-- The generated SQL uses efficient range comparison predicates
-
 Related Operators
 ~~~~~~~~~~~~~~~~~
 
@@ -220,26 +189,6 @@ Find variants that are completely within gene boundaries:
    FROM variants v
    INNER JOIN genes g ON g.interval CONTAINS v.interval
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
 Related Operators
 ~~~~~~~~~~~~~~~~~
 
@@ -316,26 +265,6 @@ Find exons that are completely within their parent gene:
    FROM exons e
    INNER JOIN genes g ON e.interval WITHIN g.interval
 
-Backend Compatibility
-~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :header-rows: 1
-   :widths: 20 20 60
-
-   * - Backend
-     - Support
-     - Notes
-   * - DuckDB
-     - Full
-     -
-   * - SQLite
-     - Full
-     -
-   * - PostgreSQL
-     - Planned
-     -
-
 Related Operators
 ~~~~~~~~~~~~~~~~~
 
diff --git a/docs/guides/index.rst b/docs/guides/index.rst
deleted file mode 100644
index b7644d1..0000000
--- a/docs/guides/index.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-Guides
-======
-
-Task-oriented guides for working with GIQL. These guides cover common workflows
-and best practices for using GIQL effectively.
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   schema-mapping
-   engine
-   performance
-
-:doc:`schema-mapping`
-   Learn how to configure GIQL to work with your genomic data, including
-   table configuration and mapping logical genomic columns.
-
-:doc:`engine`
-   Understand how to use GIQL's transpiled SQL with different
-   execution engines like DuckDB, SQLite, and PostgreSQL.
-
-:doc:`performance`
-   Optimize your GIQL queries for better performance with indexing strategies,
-   query patterns, and backend-specific tips.
-
diff --git a/docs/guides/quickstart.rst b/docs/guides/quickstart.rst
deleted file mode 100644
index ef7c3ae..0000000
--- a/docs/guides/quickstart.rst
+++ /dev/null
@@ -1,175 +0,0 @@
-Quick Start
-===========
-
-GIQL provides a familiar SQL syntax for bioinformatics workflows, allowing
-you to express complex genomic range operations without writing intricate
-SQL expressions. GIQL queries read naturally, making your analysis code 
-easier to review and share. GIQL operators follow established conventions 
-around genomic spatial relationships, so the semantics are familiar and 
-predictable.
-
-- **Spatial operators**: INTERSECTS, CONTAINS, WITHIN for range relationships
-- **Distance operators**: DISTANCE, NEAREST for proximity queries
-- **Aggregation operators**: CLUSTER, MERGE for combining intervals
-- **Set quantifiers**: ANY, ALL for multi-range queries
-- **Range parsing**: Understands genomic range strings and coordinate systems
-- **Transpilation**: Converts GIQL to standard SQL-92 compatible output for execution on any backend
-
-Installation
-------------
-
-Install GIQL using pip:
-
-.. code-block:: bash
-
-   pip install giql
-
-Basic Usage
------------
-
-Table Configuration
-~~~~~~~~~~~~~~~~~~~
-
-GIQL works with genomic data stored in tables with separate columns for chromosome,
-start position, and end position. The default column names are:
-
-* **chrom**: Chromosome identifier (e.g., 'chr1', 'chr2', 'chrX')
-* **start**: Start position of the genomic interval (0-based, inclusive)
-* **end**: End position of the genomic interval (0-based, exclusive, half-open)
-* **strand** (optional): Strand orientation ('+', '-', or '.')
-
-If your table uses the default column names, you can pass just the table name
-as a string. For custom column names, use a ``Table`` object:
-
-.. code-block:: python
-
-   from giql import Table, transpile
-
-   # Default column names (chrom, start, end, strand)
-   sql = transpile(query, tables=["peaks"])
-
-   # Custom column names
-   sql = transpile(
-       query,
-       tables=[
-           Table(
-               "variants",
-               genomic_col="interval",
-               chrom_col="chromosome",
-               start_col="start_pos",
-               end_col="end_pos",
-           )
-       ],
-   )
-
-After configuration, you can use the genomic pseudo-column (default: ``interval``)
-in your GIQL queries, and the transpiler will automatically expand it to the
-physical column comparisons.
-
-Query with DuckDB
-~~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-   import duckdb
-   from giql import transpile
-
-   sql = transpile(
-       """
-       SELECT * FROM variants
-       WHERE interval INTERSECTS 'chr1:1000-2000'
-       """,
-       tables=["variants"],
-   )
-
-   conn = duckdb.connect()
-   conn.execute("CREATE TABLE variants AS SELECT * FROM read_csv('variants.csv')")
-   df = conn.execute(sql).fetchdf()
-
-Query with SQLite
-~~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-   import sqlite3
-   from giql import transpile
-
-   sql = transpile(
-       """
-       SELECT * FROM variants
-       WHERE interval INTERSECTS 'chr1:1000-2000'
-       """,
-       tables=["variants"],
-   )
-
-   conn = sqlite3.connect("data.db")
-   cursor = conn.execute(sql)
-   for row in cursor:
-       print(row)
-
-Spatial Operators
------------------
-
-INTERSECTS
-~~~~~~~~~~
-
-Check if genomic ranges overlap:
-
-.. code-block:: sql
-
-   SELECT * FROM variants
-   WHERE interval INTERSECTS 'chr1:1000-2000'
-
-CONTAINS
-~~~~~~~~
-
-Check if a range contains a point or another range:
-
-.. code-block:: sql
-
-   SELECT * FROM variants
-   WHERE interval CONTAINS 'chr1:1500'
-
-WITHIN
-~~~~~~
-
-Check if a range is within another range:
-
-.. code-block:: sql
-
-   SELECT * FROM variants
-   WHERE interval WITHIN 'chr1:1000-5000'
-
-Set Quantifiers
----------------
-
-ANY
-~~~
-
-Match any of the specified ranges:
-
-.. code-block:: sql
-
-   SELECT * FROM variants
-   WHERE interval INTERSECTS ANY('chr1:1000-2000', 'chr1:5000-6000')
-
-ALL
-~~~
-
-Match all of the specified ranges:
-
-.. code-block:: sql
-
-   SELECT * FROM variants
-   WHERE interval CONTAINS ALL('chr1:1500', 'chr1:1600')
-
-Column-to-Column Joins
-----------------------
-
-Join tables on genomic position:
-
-.. code-block:: sql
-
-   SELECT v.*, g.name
-   FROM variants v
-   INNER JOIN genes g ON v.interval INTERSECTS g.interval
diff --git a/docs/index.rst b/docs/index.rst
index 417faad..83b2834 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -5,11 +5,11 @@ Genomic Interval Query Language (GIQL)
    :hidden:
 
    Home <self>
-   guides/quickstart
+   quickstart
 
 **GIQL** is an extended SQL dialect that allows you to declaratively express genomic interval operations.
 
-See the :doc:`guides/quickstart` to get started.
+See the :doc:`quickstart` to get started.
    
 Dialect
 -------
@@ -19,37 +19,51 @@ than how. Whether you're filtering variants by genomic region, finding
 overlapping features, or calculating distances between intervals, GIQL 
 makes these operations intuitive and portable.
 
+See the :doc:`GIQL dialect <dialect/index>` docs.
+
 .. toctree::
-   :maxdepth: 1
+   :hidden:
    :caption: Dialect
 
-   dialect/index
-   dialect/syntax-reference
+   Overview <dialect/index>
+   dialect/spatial-operators
+   dialect/distance-operators
+   dialect/aggregation-operators
+   dialect/quantifiers
 
 Transpilation
 -------------
 The ``giql`` package *transpiles* queries written in GIQL to regular SQL
-for use in existing database systems and analytics engines.
+for use in existing database systems (like SQLite or PostgreSQL), data
+warehouses, or analytics engines (like Polars and DuckDB).
+
+See the :doc:`GIQL transpiler <transpilation/index>` docs.
 
 .. toctree::
-   :maxdepth: 1
+   :hidden:
    :caption: Transpilation
 
-   transpilation/index
+   Overview <transpilation/index>
+   transpilation/schema-mapping
    transpilation/execution
+   transpilation/performance
    transpilation/api-reference
 
 
 Learn more
 ----------
-See the following guides to learn how to use GIQL effectively:
+See the following :doc:`recipes <recipes/index>` to learn how to use GIQL effectively.
 
 .. toctree::
-   :maxdepth: 1
-   :caption: Guides and Recipes
-
-   guides/index
-   recipes/index
+   :hidden:
+   :caption: Recipes
+
+   Overview <recipes/index>
+   recipes/intersect
+   recipes/distance
+   recipes/clustering
+   recipes/advanced
+   recipes/bedtools-migration
 
 
 Indices and tables
diff --git a/docs/dialect/syntax-reference.rst b/docs/quickstart.rst
similarity index 58%
rename from docs/dialect/syntax-reference.rst
rename to docs/quickstart.rst
index 0082e26..32f292f 100644
--- a/docs/dialect/syntax-reference.rst
+++ b/docs/quickstart.rst
@@ -1,40 +1,112 @@
-Syntax Reference
-================
+Quick Start
+===========
 
-Quick reference for GIQL syntax and operators.
+GIQL provides a familiar SQL syntax for bioinformatics workflows, allowing
+you to express complex genomic range operations without writing intricate
+SQL expressions. GIQL queries read naturally, making your analysis code 
+easier to review and share. GIQL operators follow established conventions 
+around genomic spatial relationships, so the semantics are familiar and 
+predictable.
 
-.. contents::
-   :local:
-   :depth: 1
+- **Spatial operators**: INTERSECTS, CONTAINS, WITHIN for range relationships
+- **Distance operators**: DISTANCE, NEAREST for proximity queries
+- **Aggregation operators**: CLUSTER, MERGE for combining intervals
+- **Set quantifiers**: ANY, ALL for multi-range queries
+- **Range parsing**: Understands genomic range strings and coordinate systems
+- **Transpilation**: Converts GIQL to standard SQL-92 compatible output for execution on any backend
 
-Genomic Range Literals
-----------------------
+Installation
+------------
 
-Format
-~~~~~~
+Install GIQL using pip:
 
-Genomic ranges are specified as string literals:
+.. code-block:: bash
 
-.. code-block:: text
+   pip install giql
 
-   'chromosome:start-end'
+Basic Usage
+-----------
 
-Examples
-~~~~~~~~
+Table Configuration
+~~~~~~~~~~~~~~~~~~~
 
-.. code-block:: sql
+GIQL works with genomic data stored in tables with separate columns for chromosome,
+start position, and end position. The default column names are:
+
+* **chrom**: Chromosome identifier (e.g., 'chr1', 'chr2', 'chrX')
+* **start**: Start position of the genomic interval (0-based, inclusive)
+* **end**: End position of the genomic interval (0-based, exclusive, half-open)
+* **strand** (optional): Strand orientation ('+', '-', or '.')
+
+If your table uses the default column names, you can pass just the table name
+as a string. For custom column names, use a ``Table`` object:
 
-   'chr1:1000-2000'        -- Range on chr1 from 1000 to 2000
-   'chr1:1000'             -- Point at position 1000
-   'chrX:50000-100000'     -- Range on chrX
-   'chr1:0-1000000'        -- First megabase of chr1
+.. code-block:: python
 
-Coordinate System
+   from giql import Table, transpile
+
+   # Default column names (chrom, start, end, strand)
+   sql = transpile(query, tables=["peaks"])
+
+   # Custom column names
+   sql = transpile(
+       query,
+       tables=[
+           Table(
+               "variants",
+               genomic_col="interval",
+               chrom_col="chromosome",
+               start_col="start_pos",
+               end_col="end_pos",
+           )
+       ],
+   )
+
+After configuration, you can use the genomic pseudo-column (default: ``interval``)
+in your GIQL queries, and the transpiler will automatically expand it to the
+physical column comparisons.
+
+Query with DuckDB
 ~~~~~~~~~~~~~~~~~
 
-- **0-based start**: First base is position 0
-- **Half-open interval**: [start, end) - start inclusive, end exclusive
-- Range ``chr1:100-200`` covers bases 100 through 199
+.. code-block:: python
+
+   import duckdb
+   from giql import transpile
+
+   sql = transpile(
+       """
+       SELECT * FROM variants
+       WHERE interval INTERSECTS 'chr1:1000-2000'
+       """,
+       tables=["variants"],
+   )
+
+   conn = duckdb.connect()
+   conn.execute("CREATE TABLE variants AS SELECT * FROM read_csv('variants.csv')")
+   df = conn.execute(sql).fetchdf()
+
+Query with SQLite
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+   import sqlite3
+   from giql import transpile
+
+   sql = transpile(
+       """
+       SELECT * FROM variants
+       WHERE interval INTERSECTS 'chr1:1000-2000'
+       """,
+       tables=["variants"],
+   )
+
+   conn = sqlite3.connect("data.db")
+   cursor = conn.execute(sql)
+   for row in cursor:
+       print(row)
+
 
 Spatial Operators
 -----------------
diff --git a/docs/recipes/advanced-queries.rst b/docs/recipes/advanced.rst
similarity index 99%
rename from docs/recipes/advanced-queries.rst
rename to docs/recipes/advanced.rst
index 62147f6..4205bc1 100644
--- a/docs/recipes/advanced-queries.rst
+++ b/docs/recipes/advanced.rst
@@ -4,10 +4,6 @@ Advanced Queries
 This section covers advanced query patterns including multi-range matching,
 complex filtering, aggregate statistics, and multi-table workflows.
 
-.. contents::
-   :local:
-   :depth: 2
-
 Multi-Range Matching
 --------------------
 
diff --git a/docs/recipes/bedtools-migration.rst b/docs/recipes/bedtools-migration.rst
index 4a00011..8e7c691 100644
--- a/docs/recipes/bedtools-migration.rst
+++ b/docs/recipes/bedtools-migration.rst
@@ -5,10 +5,6 @@ This guide maps bedtools commands to their GIQL equivalents. If you're familiar
 with bedtools and want to replicate specific operations in GIQL, use this
 reference to find the corresponding query patterns.
 
-.. contents::
-   :local:
-   :depth: 2
-
 Quick Reference Table
 ---------------------
 
@@ -634,8 +630,8 @@ Key Differences from Bedtools
 4. **Built-in aggregation**: SQL's GROUP BY and aggregate functions (COUNT, AVG,
    SUM, etc.) are available directly, without needing separate post-processing.
 
-5. **Database integration**: GIQL queries run against database tables, enabling
-   integration with other data and persistence of results.
+5. **Database integration**: GIQL queries can be run against database tables, 
+   enabling integration with other data and persistence of results.
 
 6. **Multi-backend support**: The same GIQL query can run on DuckDB, SQLite,
    or other supported backends without modification.
diff --git a/docs/recipes/clustering-queries.rst b/docs/recipes/clustering.rst
similarity index 98%
rename from docs/recipes/clustering-queries.rst
rename to docs/recipes/clustering.rst
index 3dbd682..fc613e9 100644
--- a/docs/recipes/clustering-queries.rst
+++ b/docs/recipes/clustering.rst
@@ -1,13 +1,9 @@
-Clustering and Merging Queries
-==============================
+Clustering and Merging
+======================
 
 This section covers patterns for clustering overlapping intervals and merging
 them into unified regions using GIQL's aggregation operators.
 
-.. contents::
-   :local:
-   :depth: 2
-
 Basic Clustering
 ----------------
 
diff --git a/docs/recipes/distance-queries.rst b/docs/recipes/distance.rst
similarity index 98%
rename from docs/recipes/distance-queries.rst
rename to docs/recipes/distance.rst
index c71a4ee..2f1f352 100644
--- a/docs/recipes/distance-queries.rst
+++ b/docs/recipes/distance.rst
@@ -1,13 +1,9 @@
-Distance and Proximity Queries
-==============================
+Distance and Neighbors
+======================
 
 This section covers patterns for calculating genomic distances and finding
 nearest features using GIQL's distance operators.
 
-.. contents::
-   :local:
-   :depth: 2
-
 Calculating Distances
 ---------------------
 
diff --git a/docs/recipes/index.rst b/docs/recipes/index.rst
index 5597846..cc97e47 100644
--- a/docs/recipes/index.rst
+++ b/docs/recipes/index.rst
@@ -4,45 +4,22 @@ Recipes
 This section provides practical examples and patterns for common genomic analysis tasks
 using GIQL. Each recipe focuses on a specific use case with ready-to-use query patterns.
 
-.. contents::
-   :local:
-   :depth: 1
-
-Getting Started with Recipes
-----------------------------
-
-All recipes show GIQL queries that you can transpile and execute on your database.
-Setup:
-
-.. code-block:: python
-
-   from giql import transpile
-
-   # Transpile any GIQL query to SQL
-   sql = transpile(
-       "... GIQL query from the recipes below ...",
-       tables=["features_a", "features_b"],
-   )
-
-   # Then execute the SQL on your database connection
-   # e.g., conn.execute(sql)
-
 Recipe Categories
 -----------------
 
-:doc:`intersect-queries`
+:doc:`intersect`
    Finding overlapping features, filtering by overlap, counting overlaps,
    strand-specific operations, and join patterns.
 
-:doc:`distance-queries`
+:doc:`distance`
    Calculating distances between features, finding nearest neighbors,
    distance-constrained searches, and directional queries.
 
-:doc:`clustering-queries`
+:doc:`clustering`
    Clustering overlapping intervals, distance-based clustering,
    merging intervals, and aggregating cluster statistics.
 
-:doc:`advanced-queries`
+:doc:`advanced`
    Multi-range matching, complex filtering with joins, aggregate statistics,
    window expansions, and multi-table queries.
 
@@ -51,14 +28,4 @@ Coming from Bedtools?
 
 If you're familiar with bedtools and want to replicate specific commands in GIQL,
 see the :doc:`bedtools-migration` guide for a complete mapping of bedtools
-operations to GIQL equivalents.
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   intersect-queries
-   distance-queries
-   clustering-queries
-   advanced-queries
-   bedtools-migration
+operations to GIQL equivalents.
\ No newline at end of file
diff --git a/docs/recipes/intersect-queries.rst b/docs/recipes/intersect.rst
similarity index 99%
rename from docs/recipes/intersect-queries.rst
rename to docs/recipes/intersect.rst
index ef7c022..2872ab7 100644
--- a/docs/recipes/intersect-queries.rst
+++ b/docs/recipes/intersect.rst
@@ -1,13 +1,9 @@
-Intersection Queries
-====================
+Intersection
+============
 
 This section covers common patterns for finding overlapping genomic features
 using GIQL's spatial operators.
 
-.. contents::
-   :local:
-   :depth: 2
-
 Finding Overlapping Features
 ----------------------------
 
diff --git a/docs/transpilation/index.rst b/docs/transpilation/index.rst
index e5e743b..14b58b6 100644
--- a/docs/transpilation/index.rst
+++ b/docs/transpilation/index.rst
@@ -32,6 +32,12 @@ The result is a standard SQL query that can be consumed by an execution engine t
    SELECT * FROM variants
    WHERE "chrom" = 'chr1' AND "start" < 2000 AND "end" > 1000
 
+Notably, the transpiler expands logical genomic range columns into physical column comparisons.
+
+The ``Table`` configuration of ``"variants"`` tells GIQL which physical columns correspond to 
+the logical ``interval`` column. The above example simply maps to the default column names: 
+``chrom``, ``start``, ``end``.
+
 
 Examples
 --------
diff --git a/docs/guides/performance.rst b/docs/transpilation/performance.rst
similarity index 65%
rename from docs/guides/performance.rst
rename to docs/transpilation/performance.rst
index 019416e..028a990 100644
--- a/docs/guides/performance.rst
+++ b/docs/transpilation/performance.rst
@@ -1,43 +1,118 @@
-Performance Guide
-=================
-
-This guide covers strategies for optimizing GIQL query performance, including
-indexing, query patterns, and backend-specific optimizations.
-
-.. contents::
-   :local:
-   :depth: 1
-
-Understanding Query Performance
--------------------------------
-
-How GIQL Queries Execute
-~~~~~~~~~~~~~~~~~~~~~~~~
+Performance
+-----------
 
 When you use GIQL:
 
 1. GIQL parses the query and identifies genomic operators
 2. Operators are expanded into SQL predicates
-3. You execute the SQL on your database backend
-4. The database executes the query using its optimizer
-
-Performance depends on both the generated SQL and how the database executes it.
+3. You execute the SQL on your database backend or analytics engine
+4. The system optimizes the query and executes it
 
-Common Performance Bottlenecks
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Performance depends on both the generated SQL and how the target data system
+plans, optimizes, and executes it. Some common performance bottlenecks include:
 
 - **Full table scans**: No indexes to speed up filtering
 - **Cartesian products**: Large cross joins without early filtering
 - **Missing chromosome filters**: Comparing features across all chromosomes
 - **Inefficient join order**: Small tables should drive joins
 
-Indexing Strategies
--------------------
+Streaming
+---------
+
+Analytics engines like DuckDB and Polars support streaming data sources
+in sequences of small "record batches", enabling parallel processing and 
+out-of-core workflows on files that may be much larger than memory.
+
+For delimited text files, you can use native APIs:
+
+**DuckDB:**
+
+.. code-block:: python
+
+   import duckdb
+   from giql import transpile
+
+   conn = duckdb.connect()
+
+   # DuckDB can query CSV/TSV files directly
+   conn.execute("""
+       CREATE VIEW peaks AS
+       SELECT * FROM read_csv('peaks.bed', delim='\t',
+           columns={'chrom': 'VARCHAR', 'start': 'INTEGER',
+                    'end': 'INTEGER', 'name': 'VARCHAR',
+                    'score': 'INTEGER', 'strand': 'VARCHAR'})
+   """)
 
-Creating Indexes
-~~~~~~~~~~~~~~~~
+   sql = transpile(
+       "SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
+       tables=["peaks"],
+   )
+
+   df = conn.execute(sql).fetchdf()
+
+**Polars:**
+
+.. code-block:: python
+
+   import polars as pl
+   from giql import transpile
 
-Create indexes on genomic columns for faster queries:
+   lf = pl.scan_csv("peaks.bed", separator="\t",
+       new_columns=["chrom", "start", "end", "name", "score", "strand"])
+
+   sql = transpile(
+       "SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
+       tables=["peaks"],
+   )
+
+   ctx = pl.SQLContext(peaks=lf)
+   df = ctx.execute(sql).collect()
+
+For specialized NGS formats, you can supply streaming data using the
+`oxbow <https://github.com/abdenlab/oxbow>`_ package:
+
+.. code-block:: python
+
+   import duckdb
+   import oxbow as ox
+   from giql import transpile
+
+   conn = duckdb.connect()
+
+   # Load a streaming data source as a DuckDB relation
+   peaks = ox.read_bed("peaks.bed").to_duckdb(conn, "peaks")
+
+   sql = transpile(
+       "SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
+       tables=["peaks"],
+   )
+
+   df = conn.execute(sql).fetchdf()
+
+.. code-block:: python
+
+   import polars as pl
+   import oxbow as ox
+   from giql import transpile
+
+   # Load a streaming data source as a Polars LazyFrame
+   lf = ox.read_bed("peaks.bed").pl(lazy=True)
+
+   sql = transpile(
+      """SELECT *, CLUSTER(interval) AS cluster_id
+         FROM features
+         ORDER BY chrom, start
+      """,
+      tables=["peaks"],
+   )
+   ctx = pl.SQLContext(peaks=lf)
+   ctx.execute(sql).sink_parquet("peaks_clustered.parquet")
+
+Indexing
+--------
+
+If your data source is a database table, you can create indexes on 
+genomic columns for faster queries:
 
 .. code-block:: sql
 
@@ -45,9 +120,6 @@ Create indexes on genomic columns for faster queries:
    CREATE INDEX idx_features_position
    ON features (chrom, start, "end")
 
-Recommended Index Patterns
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 **For single-table queries (filtering):**
 
 .. code-block:: sql
@@ -68,12 +140,9 @@ Recommended Index Patterns
 
    CREATE INDEX idx_features_strand ON features (chrom, strand, start, "end")
 
-When to Create Indexes
-~~~~~~~~~~~~~~~~~~~~~~
-
 Create indexes when:
 
-- Tables have more than ~10,000 rows
+- Tables are very large
 - You're running repeated queries on the same tables
 - Join queries are slow
 - Filtering by genomic position is common
@@ -142,9 +211,6 @@ DISTINCT can be expensive. Only use when necessary:
        WHERE a.interval INTERSECTS b.interval
    )
 
-NEAREST Query Optimization
---------------------------
-
 Optimizing K-NN Queries
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -179,9 +245,6 @@ The NEAREST operator can be expensive for large datasets. Optimize with:
 
    CREATE INDEX idx_genes_position ON genes (chrom, start, "end")
 
-Merge and Cluster Optimization
-------------------------------
-
 Efficient Clustering
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -273,27 +336,3 @@ SQLite Optimizations
 .. code-block:: sql
 
    ANALYZE features
-
-Performance Checklist
----------------------
-
-Before running large queries, check:
-
-.. code-block:: text
-
-   - Indexes created on genomic columns
-   - Chromosome filtering included in joins
-   - Selective filters applied early
-   - LIMIT used for exploration
-   - Only necessary columns selected
-   - NEAREST queries use max_distance
-   - Results streamed instead of fetched all at once
-
-Quick Wins
-~~~~~~~~~~
-
-1. **Add indexes** - Usually the biggest performance improvement
-2. **Filter by chromosome** - Reduces join complexity significantly
-3. **Use max_distance with NEAREST** - Limits search space
-4. **Stream results** - Reduces memory pressure
-5. **Use DuckDB** - Generally faster for analytical queries
diff --git a/docs/guides/schema-mapping.rst b/docs/transpilation/schema-mapping.rst
similarity index 85%
rename from docs/guides/schema-mapping.rst
rename to docs/transpilation/schema-mapping.rst
index 43c580c..453c289 100644
--- a/docs/guides/schema-mapping.rst
+++ b/docs/transpilation/schema-mapping.rst
@@ -4,21 +4,11 @@ Schema Mapping
 This guide explains how to configure GIQL to work with your genomic data by
 defining table configurations that map logical genomic columns to physical columns.
 
-.. contents::
-   :local:
-   :depth: 1
-
-Understanding Schema Mapping
-----------------------------
-
 GIQL needs to know how your genomic data is structured in order to translate
 genomic operators into SQL. This is done through ``Table`` objects, which
 map a logical "genomic column" (used in your queries) to the physical columns
 in your files, data frames, or database tables.
 
-The Core Concept
-~~~~~~~~~~~~~~~~
-
 In GIQL queries, you use a logical genomic column name like ``interval``:
 
 .. code-block:: sql
@@ -95,9 +85,6 @@ the mapping:
        ],
    )
 
-Multiple Tables
----------------
-
 Configuring Multiple Tables
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -197,27 +184,3 @@ For point features (like SNPs), create an interval of length 1:
    # 0-based interval: [999, 1000)
    start = 999
    end = 1000
-
-
-Troubleshooting
----------------
-
-Common Issues
-~~~~~~~~~~~~~
-
-**"Unknown column" errors:**
-
-- Ensure the table is included in the ``tables`` parameter
-- Check that the genomic column name in your query matches the configured name
-- Verify column names in the ``Table`` object match actual table columns
-
-**Incorrect results:**
-
-- Verify your coordinate system (0-based vs 1-based)
-- Check that start < end for all intervals
-- Ensure chromosome names match between tables (e.g., 'chr1' vs '1')
-
-**Performance issues:**
-
-- See the :doc:`performance` guide for optimization tips
-- Consider adding indexes on genomic columns
\ No newline at end of file

From c3dfc4bd11f4df251310f067979a9d9e9a943104 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Thu, 12 Feb 2026 23:52:27 -0500
Subject: [PATCH 2/3] Update demo notebook

---
 demo.ipynb | 2892 +++++++++++++++++++++++++---------------------------
 1 file changed, 1382 insertions(+), 1510 deletions(-)

diff --git a/demo.ipynb b/demo.ipynb
index e7f02c3..5a6705e 100644
--- a/demo.ipynb
+++ b/demo.ipynb
@@ -4,18 +4,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# GIQL operators demo using DuckDB query engine\n",
+    "# GIQL operators demo\n",
     "\n",
     "This notebook demonstrates the 7 core GIQL operators:\n",
     "- **Binary predicates**: INTERSECTS, WITHIN, CONTAINS\n",
     "- **Aggregation operators**: MERGE, CLUSTER\n",
-    "- **UDF operators**: DISTANCE\n",
-    "- **Table-valued functions**: NEAREST\n",
+    "- **Distance operators**: DISTANCE, NEAREST\n",
     "\n",
     "For each operator, we:\n",
-    "1. Create a GIQL query\n",
-    "2. Transpile it to standard SQL using GIQL\n",
-    "3. Execute the SQL using DuckDB"
+    "1. Write a GIQL query\n",
+    "2. Transpile it to standard SQL\n",
+    "3. Execute the SQL with DuckDB"
    ]
   },
   {
@@ -24,10 +23,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import polars as pl\n",
+    "import duckdb\n",
+    "import sqlglot\n",
     "import sqlparse\n",
     "\n",
-    "from giql import GIQLEngine"
+    "from giql import transpile"
    ]
   },
   {
@@ -36,7 +36,7 @@
    "source": [
     "## Load BED files\n",
     "\n",
-    "Load the two ENCODE ChIP-seq peak files as Polars DataFrames."
+    "Load two ENCODE ChIP-seq peak files into DuckDB."
    ]
   },
   {
@@ -49,36 +49,126 @@
      "output_type": "stream",
      "text": [
       "Features A: 60893 intervals\n",
-      "Features B: 46196 intervals\n",
-      "\n",
-      "Features A preview:\n"
+      "Features B: 46196 intervals\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "<div><style>\n",
-       ".dataframe > thead > tr,\n",
-       ".dataframe > tbody > tr {\n",
-       "  text-align: right;\n",
-       "  white-space: pre-wrap;\n",
-       "}\n",
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
-       "<small>shape: (5, 10)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chromosome</th><th>start_pos</th><th>end_pos</th><th>name</th><th>score</th><th>strand</th><th>signal_value</th><th>p_value</th><th>q_value</th><th>peak</th></tr><tr><td>str</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>str</td><td>f64</td><td>f64</td><td>f64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;chr17&quot;</td><td>27381032</td><td>27381306</td><td>&quot;.&quot;</td><td>1000</td><td>&quot;.&quot;</td><td>461.10586</td><td>-1.0</td><td>5.08726</td><td>136</td></tr><tr><td>&quot;chr7&quot;</td><td>25565932</td><td>25566199</td><td>&quot;.&quot;</td><td>1000</td><td>&quot;.&quot;</td><td>457.60096</td><td>-1.0</td><td>5.08726</td><td>133</td></tr><tr><td>&quot;chr7&quot;</td><td>143215117</td><td>143215391</td><td>&quot;.&quot;</td><td>1000</td><td>&quot;.&quot;</td><td>443.34712</td><td>-1.0</td><td>5.08726</td><td>139</td></tr><tr><td>&quot;chr2&quot;</td><td>164600030</td><td>164600306</td><td>&quot;.&quot;</td><td>1000</td><td>&quot;.&quot;</td><td>442.64446</td><td>-1.0</td><td>5.08726</td><td>137</td></tr><tr><td>&quot;chr15&quot;</td><td>56246046</td><td>56246359</td><td>&quot;.&quot;</td><td>1000</td><td>&quot;.&quot;</td><td>438.56423</td><td>-1.0</td><td>5.08726</td><td>154</td></tr></tbody></table></div>"
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
+       "      <th>name</th>\n",
+       "      <th>score</th>\n",
+       "      <th>strand</th>\n",
+       "      <th>signal_value</th>\n",
+       "      <th>p_value</th>\n",
+       "      <th>q_value</th>\n",
+       "      <th>peak</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>chr17</td>\n",
+       "      <td>27381032</td>\n",
+       "      <td>27381306</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>461.10586</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>chr7</td>\n",
+       "      <td>25565932</td>\n",
+       "      <td>25566199</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>457.60096</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>133</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>chr7</td>\n",
+       "      <td>143215117</td>\n",
+       "      <td>143215391</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>443.34712</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>139</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>chr2</td>\n",
+       "      <td>164600030</td>\n",
+       "      <td>164600306</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>442.64446</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>137</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>chr15</td>\n",
+       "      <td>56246046</td>\n",
+       "      <td>56246359</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>438.56423</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>154</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
       ],
       "text/plain": [
-       "shape: (5, 10)\n",
-       "┌────────────┬───────────┬───────────┬──────┬───┬──────────────┬─────────┬─────────┬──────┐\n",
-       "│ chromosome ┆ start_pos ┆ end_pos   ┆ name ┆ … ┆ signal_value ┆ p_value ┆ q_value ┆ peak │\n",
-       "│ ---        ┆ ---       ┆ ---       ┆ ---  ┆   ┆ ---          ┆ ---     ┆ ---     ┆ ---  │\n",
-       "│ str        ┆ i64       ┆ i64       ┆ str  ┆   ┆ f64          ┆ f64     ┆ f64     ┆ i64  │\n",
-       "╞════════════╪═══════════╪═══════════╪══════╪═══╪══════════════╪═════════╪═════════╪══════╡\n",
-       "│ chr17      ┆ 27381032  ┆ 27381306  ┆ .    ┆ … ┆ 461.10586    ┆ -1.0    ┆ 5.08726 ┆ 136  │\n",
-       "│ chr7       ┆ 25565932  ┆ 25566199  ┆ .    ┆ … ┆ 457.60096    ┆ -1.0    ┆ 5.08726 ┆ 133  │\n",
-       "│ chr7       ┆ 143215117 ┆ 143215391 ┆ .    ┆ … ┆ 443.34712    ┆ -1.0    ┆ 5.08726 ┆ 139  │\n",
-       "│ chr2       ┆ 164600030 ┆ 164600306 ┆ .    ┆ … ┆ 442.64446    ┆ -1.0    ┆ 5.08726 ┆ 137  │\n",
-       "│ chr15      ┆ 56246046  ┆ 56246359  ┆ .    ┆ … ┆ 438.56423    ┆ -1.0    ┆ 5.08726 ┆ 154  │\n",
-       "└────────────┴───────────┴───────────┴──────┴───┴──────────────┴─────────┴─────────┴──────┘"
+       "   chrom      start        end name  score strand  signal_value  p_value  \\\n",
+       "0  chr17   27381032   27381306    .   1000      .     461.10586     -1.0   \n",
+       "1   chr7   25565932   25566199    .   1000      .     457.60096     -1.0   \n",
+       "2   chr7  143215117  143215391    .   1000      .     443.34712     -1.0   \n",
+       "3   chr2  164600030  164600306    .   1000      .     442.64446     -1.0   \n",
+       "4  chr15   56246046   56246359    .   1000      .     438.56423     -1.0   \n",
+       "\n",
+       "   q_value  peak  \n",
+       "0  5.08726   136  \n",
+       "1  5.08726   133  \n",
+       "2  5.08726   139  \n",
+       "3  5.08726   137  \n",
+       "4  5.08726   154  "
       ]
      },
      "execution_count": 2,
@@ -87,84 +177,28 @@
     }
    ],
    "source": [
-    "# Load BED files with standard BED column names\n",
-    "bed_schema = {\n",
-    "    \"chromosome\": pl.Utf8,\n",
-    "    \"start_pos\": pl.Int64,\n",
-    "    \"end_pos\": pl.Int64,\n",
-    "    \"name\": pl.Utf8,\n",
-    "    \"score\": pl.Int64,\n",
-    "    \"strand\": pl.Utf8,\n",
-    "    \"signal_value\": pl.Float64,\n",
-    "    \"p_value\": pl.Float64,\n",
-    "    \"q_value\": pl.Float64,\n",
-    "    \"peak\": pl.Int64,\n",
-    "}\n",
-    "\n",
-    "# Load first BED file\n",
-    "features_a = pl.read_csv(\n",
-    "    \".ENCFF199YFA.bed\", separator=\"\\t\", has_header=False, schema=bed_schema\n",
-    ")\n",
-    "\n",
-    "# Load second BED file\n",
-    "features_b = pl.read_csv(\n",
-    "    \".ENCFF205OKL.bed\", separator=\"\\t\", has_header=False, schema=bed_schema\n",
-    ")\n",
-    "\n",
-    "print(f\"Features A: {len(features_a)} intervals\")\n",
-    "print(f\"Features B: {len(features_b)} intervals\")\n",
-    "print(\"\\nFeatures A preview:\")\n",
-    "features_a.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setup GIQL engine\n",
-    "\n",
-    "Create a GIQL engine and register the table schemas."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "GIQL engine configured successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create GIQL engine with DuckDB backend\n",
-    "engine = GIQLEngine(target_dialect=\"duckdb\", verbose=False)\n",
-    "\n",
-    "# Register DataFrames using Arrow (preferred for DuckDB)\n",
-    "engine.conn.register(\"features_a\", features_a.to_arrow())\n",
-    "engine.conn.register(\"features_b\", features_b.to_arrow())\n",
-    "\n",
-    "# Register schemas with genomic columns\n",
-    "schema_dict = {\n",
-    "    \"chromosome\": \"VARCHAR\",\n",
-    "    \"start_pos\": \"BIGINT\",\n",
-    "    \"end_pos\": \"BIGINT\",\n",
-    "    \"name\": \"VARCHAR\",\n",
-    "    \"score\": \"BIGINT\",\n",
-    "    \"strand\": \"VARCHAR\",\n",
-    "    \"signal_value\": \"DOUBLE\",\n",
-    "    \"p_value\": \"DOUBLE\",\n",
-    "    \"q_value\": \"DOUBLE\",\n",
-    "    \"peak\": \"BIGINT\",\n",
-    "}\n",
-    "\n",
-    "for table in [\"features_a\", \"features_b\"]:\n",
-    "    engine.register_table_schema(table, schema_dict, genomic_column=\"interval\")\n",
-    "\n",
-    "print(\"GIQL engine configured successfully\")"
+    "conn = duckdb.connect()\n",
+    "\n",
+    "# Load BED files with standard column names\n",
+    "bed_columns = \"\"\"columns={\n",
+    "    'chrom': 'VARCHAR', 'start': 'INTEGER', 'end': 'INTEGER',\n",
+    "    'name': 'VARCHAR', 'score': 'INTEGER', 'strand': 'VARCHAR',\n",
+    "    'signal_value': 'DOUBLE', 'p_value': 'DOUBLE',\n",
+    "    'q_value': 'DOUBLE', 'peak': 'INTEGER'\n",
+    "}\"\"\"\n",
+    "\n",
+    "for table_name, file_path in [\n",
+    "    (\"features_a\", \".ENCFF199YFA.bed\"),\n",
+    "    (\"features_b\", \".ENCFF205OKL.bed\"),\n",
+    "]:\n",
+    "    conn.execute(f\"\"\"\n",
+    "        CREATE TABLE {table_name} AS\n",
+    "        SELECT * FROM read_csv('{file_path}', delim='\\t', header=false, {bed_columns})\n",
+    "    \"\"\")\n",
+    "\n",
+    "print(\"Features A:\", conn.execute(\"SELECT COUNT(*) FROM features_a\").fetchone()[0], \"intervals\")\n",
+    "print(\"Features B:\", conn.execute(\"SELECT COUNT(*) FROM features_b\").fetchone()[0], \"intervals\")\n",
+    "conn.execute(\"SELECT * FROM features_a LIMIT 5\").fetchdf()"
    ]
   },
   {
@@ -182,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -194,9 +228,9 @@
       "FROM\n",
       "  features_a AS a,\n",
       "  features_b AS b\n",
-      "WHERE (a.\"chromosome\" = b.\"chromosome\"\n",
-      "       AND a.\"start_pos\" < b.\"end_pos\"\n",
-      "       AND a.\"end_pos\" > b.\"start_pos\")\n",
+      "WHERE (a.\"chrom\" = b.\"chrom\"\n",
+      "       AND a.\"start\" < b.\"end\"\n",
+      "       AND a.\"end\" > b.\"start\")\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -224,9 +258,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>name</th>\n",
        "      <th>score</th>\n",
        "      <th>strand</th>\n",
@@ -239,165 +273,165 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>chr11</td>\n",
-       "      <td>34359322</td>\n",
-       "      <td>34359560</td>\n",
+       "      <td>chr22</td>\n",
+       "      <td>37933998</td>\n",
+       "      <td>37934242</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>220.19056</td>\n",
+       "      <td>283.80615</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>117</td>\n",
+       "      <td>123</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>chr17</td>\n",
-       "      <td>18625675</td>\n",
-       "      <td>18625936</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>267021</td>\n",
+       "      <td>267251</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>218.02293</td>\n",
+       "      <td>258.60836</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>133</td>\n",
+       "      <td>126</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>chr16</td>\n",
-       "      <td>67883801</td>\n",
-       "      <td>67884000</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>63537989</td>\n",
+       "      <td>63538200</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>219.30369</td>\n",
+       "      <td>272.24062</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>102</td>\n",
+       "      <td>98</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>chr14</td>\n",
-       "      <td>94036611</td>\n",
-       "      <td>94036828</td>\n",
+       "      <td>25042978</td>\n",
+       "      <td>25043164</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>220.16568</td>\n",
+       "      <td>259.47262</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>116</td>\n",
+       "      <td>106</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>chr8</td>\n",
-       "      <td>110060410</td>\n",
-       "      <td>110060629</td>\n",
+       "      <td>chr15</td>\n",
+       "      <td>64728285</td>\n",
+       "      <td>64728479</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>219.42024</td>\n",
+       "      <td>234.42653</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>106</td>\n",
+       "      <td>96</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>chr9</td>\n",
-       "      <td>135967630</td>\n",
-       "      <td>135967829</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>34129746</td>\n",
+       "      <td>34129928</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>218.66102</td>\n",
+       "      <td>243.36217</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>92</td>\n",
+       "      <td>101</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>94136789</td>\n",
-       "      <td>94136976</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>48657805</td>\n",
+       "      <td>48658001</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>218.30192</td>\n",
+       "      <td>257.44302</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>99</td>\n",
+       "      <td>93</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>chr20</td>\n",
-       "      <td>24918423</td>\n",
-       "      <td>24918598</td>\n",
+       "      <td>chr14</td>\n",
+       "      <td>38721933</td>\n",
+       "      <td>38722118</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>218.92314</td>\n",
+       "      <td>239.09691</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>94</td>\n",
+       "      <td>98</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>chr2</td>\n",
-       "      <td>202178449</td>\n",
-       "      <td>202178624</td>\n",
+       "      <td>chr14</td>\n",
+       "      <td>23632038</td>\n",
+       "      <td>23632231</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>217.36688</td>\n",
+       "      <td>255.61314</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>90</td>\n",
+       "      <td>91</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>chr7</td>\n",
-       "      <td>120987697</td>\n",
-       "      <td>120987871</td>\n",
+       "      <td>chr16</td>\n",
+       "      <td>11532213</td>\n",
+       "      <td>11532470</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>217.34347</td>\n",
+       "      <td>339.88370</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>85</td>\n",
+       "      <td>124</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos    end_pos name  score strand  signal_value  p_value  \\\n",
-       "0      chr11   34359322   34359560    .   1000      .     220.19056     -1.0   \n",
-       "1      chr17   18625675   18625936    .   1000      .     218.02293     -1.0   \n",
-       "2      chr16   67883801   67884000    .   1000      .     219.30369     -1.0   \n",
-       "3      chr14   94036611   94036828    .   1000      .     220.16568     -1.0   \n",
-       "4       chr8  110060410  110060629    .   1000      .     219.42024     -1.0   \n",
-       "5       chr9  135967630  135967829    .   1000      .     218.66102     -1.0   \n",
-       "6      chr14   94136789   94136976    .   1000      .     218.30192     -1.0   \n",
-       "7      chr20   24918423   24918598    .   1000      .     218.92314     -1.0   \n",
-       "8       chr2  202178449  202178624    .   1000      .     217.36688     -1.0   \n",
-       "9       chr7  120987697  120987871    .   1000      .     217.34347     -1.0   \n",
+       "   chrom     start       end name  score strand  signal_value  p_value  \\\n",
+       "0  chr22  37933998  37934242    .   1000      .     283.80615     -1.0   \n",
+       "1  chr20    267021    267251    .   1000      .     258.60836     -1.0   \n",
+       "2  chr20  63537989  63538200    .   1000      .     272.24062     -1.0   \n",
+       "3  chr14  25042978  25043164    .   1000      .     259.47262     -1.0   \n",
+       "4  chr15  64728285  64728479    .   1000      .     234.42653     -1.0   \n",
+       "5  chr20  34129746  34129928    .   1000      .     243.36217     -1.0   \n",
+       "6  chr20  48657805  48658001    .   1000      .     257.44302     -1.0   \n",
+       "7  chr14  38721933  38722118    .   1000      .     239.09691     -1.0   \n",
+       "8  chr14  23632038  23632231    .   1000      .     255.61314     -1.0   \n",
+       "9  chr16  11532213  11532470    .   1000      .     339.88370     -1.0   \n",
        "\n",
        "   q_value  peak  \n",
-       "0  5.08726   117  \n",
-       "1  5.08726   133  \n",
-       "2  5.08726   102  \n",
-       "3  5.08726   116  \n",
-       "4  5.08726   106  \n",
-       "5  5.08726    92  \n",
-       "6  5.08726    99  \n",
-       "7  5.08726    94  \n",
-       "8  5.08726    90  \n",
-       "9  5.08726    85  "
+       "0  5.08726   123  \n",
+       "1  5.08726   126  \n",
+       "2  5.08726    98  \n",
+       "3  5.08726   106  \n",
+       "4  5.08726    96  \n",
+       "5  5.08726   101  \n",
+       "6  5.08726    93  \n",
+       "7  5.08726    98  \n",
+       "8  5.08726    91  \n",
+       "9  5.08726   124  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -411,7 +445,7 @@
     "\"\"\"\n",
     "\n",
     "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query)\n",
     "print(\"Transpiled SQL:\")\n",
     "print(\n",
     "    sqlparse.format(\n",
@@ -421,7 +455,7 @@
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
     "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
+    "cursor = conn.execute(sql)\n",
     "result = cursor.df()  # Get result as pandas DataFrame\n",
     "print(f\"Result: {len(result)} overlapping intervals from A\")\n",
     "result.head(10)"
@@ -438,7 +472,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -446,33 +480,17 @@
      "output_type": "stream",
      "text": [
       "Transpiled SQL:\n",
-      "WITH overlap_counts AS\n",
-      "  (SELECT\n",
-      "     a.chromosome,\n",
-      "     a.start_pos,\n",
-      "     a.end_pos,\n",
-      "     a.signal_value,\n",
-      "     COUNT(*) AS depth\n",
-      "   FROM\n",
-      "     features_a AS a,\n",
-      "     features_b AS b\n",
-      "   WHERE (a.\"chromosome\" = b.\"chromosome\"\n",
-      "          AND a.\"start_pos\" < b.\"end_pos\"\n",
-      "          AND a.\"end_pos\" > b.\"start_pos\")\n",
-      "   GROUP BY\n",
-      "     a.chromosome,\n",
-      "     a.start_pos,\n",
-      "     a.end_pos,\n",
-      "     a.signal_value)\n",
-      "SELECT *\n",
-      "FROM overlap_counts\n",
-      "WHERE depth > 2\n",
-      "ORDER BY depth DESC\n",
+      "SELECT DISTINCT\n",
+      "  a.*\n",
+      "FROM features_a AS a, features_b AS b\n",
+      "WHERE\n",
+      "  (\n",
+      "    a.\"chrom\" = b.\"chrom\" AND a.\"start\" < b.\"end\" AND a.\"end\" > b.\"start\"\n",
+      "  )\n",
       "\n",
       "================================================================================\n",
       "\n",
-      "Result: 2 intervals with overlap depth >= 5\n",
-      "Max overlap depth: 3\n"
+      "Result: 42616 overlapping intervals from A\n"
      ]
     },
     {
@@ -496,80 +514,198 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
+       "      <th>name</th>\n",
+       "      <th>score</th>\n",
+       "      <th>strand</th>\n",
        "      <th>signal_value</th>\n",
-       "      <th>depth</th>\n",
+       "      <th>p_value</th>\n",
+       "      <th>q_value</th>\n",
+       "      <th>peak</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>105999349</td>\n",
-       "      <td>105999944</td>\n",
-       "      <td>240.51649</td>\n",
-       "      <td>3</td>\n",
+       "      <td>chr19</td>\n",
+       "      <td>45642140</td>\n",
+       "      <td>45642400</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>297.57428</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>128</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>chr11</td>\n",
+       "      <td>518674</td>\n",
+       "      <td>518971</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>315.46469</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>149</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>chr20</td>\n",
+       "      <td>50794798</td>\n",
+       "      <td>50795030</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>260.64664</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>chr19</td>\n",
+       "      <td>46395603</td>\n",
+       "      <td>46395857</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>335.49048</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>134</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>chr9</td>\n",
+       "      <td>34646905</td>\n",
+       "      <td>34647110</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>235.66436</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
        "      <td>chr14</td>\n",
-       "      <td>105999349</td>\n",
-       "      <td>105999944</td>\n",
-       "      <td>108.06722</td>\n",
-       "      <td>3</td>\n",
+       "      <td>53929955</td>\n",
+       "      <td>53930179</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>340.46918</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>chr12</td>\n",
+       "      <td>6323309</td>\n",
+       "      <td>6323518</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>262.07653</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>99</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>chr14</td>\n",
+       "      <td>32073682</td>\n",
+       "      <td>32073873</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>278.18167</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>chr20</td>\n",
+       "      <td>59087928</td>\n",
+       "      <td>59088111</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>234.98679</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>chr15</td>\n",
+       "      <td>79596083</td>\n",
+       "      <td>79596368</td>\n",
+       "      <td>.</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>.</td>\n",
+       "      <td>347.14815</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.08726</td>\n",
+       "      <td>144</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos    end_pos  signal_value  depth\n",
-       "0      chr14  105999349  105999944     240.51649      3\n",
-       "1      chr14  105999349  105999944     108.06722      3"
+       "   chrom     start       end name  score strand  signal_value  p_value  \\\n",
+       "0  chr19  45642140  45642400    .   1000      .     297.57428     -1.0   \n",
+       "1  chr11    518674    518971    .   1000      .     315.46469     -1.0   \n",
+       "2  chr20  50794798  50795030    .   1000      .     260.64664     -1.0   \n",
+       "3  chr19  46395603  46395857    .   1000      .     335.49048     -1.0   \n",
+       "4   chr9  34646905  34647110    .   1000      .     235.66436     -1.0   \n",
+       "5  chr14  53929955  53930179    .   1000      .     340.46918     -1.0   \n",
+       "6  chr12   6323309   6323518    .   1000      .     262.07653     -1.0   \n",
+       "7  chr14  32073682  32073873    .   1000      .     278.18167     -1.0   \n",
+       "8  chr20  59087928  59088111    .   1000      .     234.98679     -1.0   \n",
+       "9  chr15  79596083  79596368    .   1000      .     347.14815     -1.0   \n",
+       "\n",
+       "   q_value  peak  \n",
+       "0  5.08726   128  \n",
+       "1  5.08726   149  \n",
+       "2  5.08726   115  \n",
+       "3  5.08726   134  \n",
+       "4  5.08726   100  \n",
+       "5  5.08726   115  \n",
+       "6  5.08726    99  \n",
+       "7  5.08726    89  \n",
+       "8  5.08726    92  \n",
+       "9  5.08726   144  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Define GIQL query to find high-depth overlaps\n",
     "giql_query = \"\"\"\n",
-    "    WITH overlap_counts AS (\n",
-    "        SELECT \n",
-    "            a.chromosome,\n",
-    "            a.start_pos,\n",
-    "            a.end_pos,\n",
-    "            a.signal_value,\n",
-    "            COUNT(*) as depth\n",
-    "        FROM features_a a, features_b b\n",
-    "        WHERE a.interval INTERSECTS b.interval\n",
-    "        GROUP BY a.chromosome, a.start_pos, a.end_pos, a.signal_value\n",
-    "    )\n",
-    "    SELECT *\n",
-    "    FROM overlap_counts\n",
-    "    WHERE depth > 2\n",
-    "    ORDER BY depth DESC\n",
-    "\"\"\"\n",
-    "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "    SELECT DISTINCT a.*\n",
+    "    FROM features_a a, features_b b\n",
+    "    WHERE a.interval INTERSECTS b.interval\n",
+    "\"\"\"\n",
+    "\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
-    "print(f\"Result: {len(result)} intervals with overlap depth >= 5\")\n",
-    "print(f\"Max overlap depth: {result['depth'].max() if len(result) > 0 else 0}\")\n",
+    "result = conn.execute(sql).fetchdf()\n",
+    "print(f\"Result: {len(result)} overlapping intervals from A\")\n",
     "result.head(10)"
    ]
   },
@@ -586,6 +722,151 @@
     "**GIQL Query**: `SELECT a.* FROM features_a a, features_b b WHERE a.interval WITHIN b.interval`"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Transpiled SQL:\n",
+      "WITH overlap_counts AS (\n",
+      "  SELECT\n",
+      "    a.chrom,\n",
+      "    a.start,\n",
+      "    a.\"end\",\n",
+      "    a.signal_value,\n",
+      "    COUNT(*) AS depth\n",
+      "  FROM features_a AS a, features_b AS b\n",
+      "  WHERE\n",
+      "    (\n",
+      "      a.\"chrom\" = b.\"chrom\" AND a.\"start\" < b.\"end\" AND a.\"end\" > b.\"start\"\n",
+      "    )\n",
+      "  GROUP BY\n",
+      "    a.chrom,\n",
+      "    a.start,\n",
+      "    a.\"end\",\n",
+      "    a.signal_value\n",
+      ")\n",
+      "SELECT\n",
+      "  *\n",
+      "FROM overlap_counts\n",
+      "WHERE\n",
+      "  depth > 2\n",
+      "ORDER BY\n",
+      "  depth DESC\n",
+      "\n",
+      "================================================================================\n",
+      "\n",
+      "Result: 2 intervals with overlap depth > 2\n",
+      "Max overlap depth: 3\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
+       "      <th>signal_value</th>\n",
+       "      <th>depth</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>chr14</td>\n",
+       "      <td>105999349</td>\n",
+       "      <td>105999944</td>\n",
+       "      <td>240.51649</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>chr14</td>\n",
+       "      <td>105999349</td>\n",
+       "      <td>105999944</td>\n",
+       "      <td>108.06722</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   chrom      start        end  signal_value  depth\n",
+       "0  chr14  105999349  105999944     240.51649      3\n",
+       "1  chr14  105999349  105999944     108.06722      3"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "giql_query = \"\"\"\n",
+    "    WITH overlap_counts AS (\n",
+    "        SELECT\n",
+    "            a.chrom,\n",
+    "            a.start,\n",
+    "            a.\"end\",\n",
+    "            a.signal_value,\n",
+    "            COUNT(*) as depth\n",
+    "        FROM features_a a, features_b b\n",
+    "        WHERE a.interval INTERSECTS b.interval\n",
+    "        GROUP BY a.chrom, a.start, a.\"end\", a.signal_value\n",
+    "    )\n",
+    "    SELECT *\n",
+    "    FROM overlap_counts\n",
+    "    WHERE depth > 2\n",
+    "    ORDER BY depth DESC\n",
+    "\"\"\"\n",
+    "\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
+    "print(\"Transpiled SQL:\")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
+    "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
+    "\n",
+    "result = conn.execute(sql).fetchdf()\n",
+    "print(f\"Result: {len(result)} intervals with overlap depth > 2\")\n",
+    "print(f\"Max overlap depth: {result['depth'].max() if len(result) > 0 else 0}\")\n",
+    "result.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "## 3. CONTAINS operator\n",
+    "\n",
+    "Find features in A that **completely contain** features in B.\n",
+    "\n",
+    "**GIQL Query**: `SELECT a.* FROM features_a a, features_b b WHERE a.interval CONTAINS b.interval`"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -599,12 +880,11 @@
       "SELECT DISTINCT\n",
       "  a.*,\n",
       "  b.*\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "  features_b AS b\n",
-      "WHERE (a.\"chromosome\" = b.\"chromosome\"\n",
-      "       AND a.\"start_pos\" >= b.\"start_pos\"\n",
-      "       AND a.\"end_pos\" <= b.\"end_pos\")\n",
+      "FROM features_a AS a, features_b AS b\n",
+      "WHERE\n",
+      "  (\n",
+      "    a.\"chrom\" = b.\"chrom\" AND a.\"start\" >= b.\"start\" AND a.\"end\" <= b.\"end\"\n",
+      "  )\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -632,9 +912,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>name</th>\n",
        "      <th>score</th>\n",
        "      <th>strand</th>\n",
@@ -642,9 +922,9 @@
        "      <th>p_value</th>\n",
        "      <th>q_value</th>\n",
        "      <th>peak</th>\n",
-       "      <th>chromosome_1</th>\n",
-       "      <th>start_pos_1</th>\n",
-       "      <th>end_pos_1</th>\n",
+       "      <th>chrom_1</th>\n",
+       "      <th>start_1</th>\n",
+       "      <th>end_1</th>\n",
        "      <th>name_1</th>\n",
        "      <th>score_1</th>\n",
        "      <th>strand_1</th>\n",
@@ -657,274 +937,274 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>24216011</td>\n",
-       "      <td>24216203</td>\n",
+       "      <td>chr19</td>\n",
+       "      <td>43774483</td>\n",
+       "      <td>43774712</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>251.87452</td>\n",
+       "      <td>252.61723</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>95</td>\n",
-       "      <td>chr14</td>\n",
-       "      <td>24215977</td>\n",
-       "      <td>24216234</td>\n",
+       "      <td>103</td>\n",
+       "      <td>chr19</td>\n",
+       "      <td>43774432</td>\n",
+       "      <td>43774742</td>\n",
        "      <td>.</td>\n",
-       "      <td>1000</td>\n",
+       "      <td>951</td>\n",
        "      <td>.</td>\n",
-       "      <td>209.90753</td>\n",
+       "      <td>28.41530</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>125</td>\n",
+       "      <td>155</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>33792757</td>\n",
-       "      <td>33792965</td>\n",
+       "      <td>chr17</td>\n",
+       "      <td>75615499</td>\n",
+       "      <td>75615705</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>252.08812</td>\n",
+       "      <td>244.31776</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>105</td>\n",
-       "      <td>chr14</td>\n",
-       "      <td>33792735</td>\n",
-       "      <td>33793013</td>\n",
+       "      <td>96</td>\n",
+       "      <td>chr17</td>\n",
+       "      <td>75615479</td>\n",
+       "      <td>75615789</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>232.27970</td>\n",
+       "      <td>26.62768</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>134</td>\n",
+       "      <td>155</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>chr6</td>\n",
-       "      <td>43769540</td>\n",
-       "      <td>43769748</td>\n",
+       "      <td>chr11</td>\n",
+       "      <td>397249</td>\n",
+       "      <td>397474</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>250.99653</td>\n",
+       "      <td>237.92944</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>104</td>\n",
-       "      <td>chr6</td>\n",
-       "      <td>43769499</td>\n",
-       "      <td>43769760</td>\n",
+       "      <td>114</td>\n",
+       "      <td>chr11</td>\n",
+       "      <td>397230</td>\n",
+       "      <td>397540</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>154.83893</td>\n",
+       "      <td>31.74873</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>141</td>\n",
+       "      <td>155</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>chr17</td>\n",
-       "      <td>49861200</td>\n",
-       "      <td>49861405</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>44116742</td>\n",
+       "      <td>44116950</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>250.16995</td>\n",
+       "      <td>292.51551</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>93</td>\n",
-       "      <td>chr17</td>\n",
-       "      <td>49861195</td>\n",
-       "      <td>49861409</td>\n",
+       "      <td>106</td>\n",
+       "      <td>chr20</td>\n",
+       "      <td>44116709</td>\n",
+       "      <td>44116975</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>78.03419</td>\n",
+       "      <td>201.73737</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>86</td>\n",
+       "      <td>132</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>chr2</td>\n",
-       "      <td>219476345</td>\n",
-       "      <td>219476531</td>\n",
+       "      <td>chr16</td>\n",
+       "      <td>23434388</td>\n",
+       "      <td>23434589</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>248.18364</td>\n",
+       "      <td>267.80248</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>102</td>\n",
-       "      <td>chr2</td>\n",
-       "      <td>219476292</td>\n",
-       "      <td>219476557</td>\n",
+       "      <td>106</td>\n",
+       "      <td>chr16</td>\n",
+       "      <td>23434384</td>\n",
+       "      <td>23434621</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>160.44163</td>\n",
+       "      <td>148.78159</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>125</td>\n",
+       "      <td>119</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>chr11</td>\n",
-       "      <td>57527230</td>\n",
-       "      <td>57527419</td>\n",
+       "      <td>chr17</td>\n",
+       "      <td>35804783</td>\n",
+       "      <td>35805037</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>247.38897</td>\n",
+       "      <td>314.27261</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>104</td>\n",
-       "      <td>chr11</td>\n",
-       "      <td>57527209</td>\n",
-       "      <td>57527456</td>\n",
+       "      <td>134</td>\n",
+       "      <td>chr17</td>\n",
+       "      <td>35804768</td>\n",
+       "      <td>35805048</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>121.89660</td>\n",
+       "      <td>113.42031</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>134</td>\n",
+       "      <td>149</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>chr2</td>\n",
-       "      <td>152025859</td>\n",
-       "      <td>152026122</td>\n",
+       "      <td>chr4</td>\n",
+       "      <td>147866849</td>\n",
+       "      <td>147867113</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>247.10786</td>\n",
+       "      <td>395.15579</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>146</td>\n",
-       "      <td>chr2</td>\n",
-       "      <td>152025676</td>\n",
-       "      <td>152026161</td>\n",
+       "      <td>133</td>\n",
+       "      <td>chr4</td>\n",
+       "      <td>147866840</td>\n",
+       "      <td>147867128</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>181.47229</td>\n",
+       "      <td>191.86545</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>304</td>\n",
+       "      <td>141</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>chr2</td>\n",
-       "      <td>112259523</td>\n",
-       "      <td>112259722</td>\n",
+       "      <td>chr10</td>\n",
+       "      <td>97759515</td>\n",
+       "      <td>97759702</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>249.77072</td>\n",
+       "      <td>236.46727</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>95</td>\n",
-       "      <td>chr2</td>\n",
-       "      <td>112259464</td>\n",
-       "      <td>112259743</td>\n",
+       "      <td>97</td>\n",
+       "      <td>chr10</td>\n",
+       "      <td>97759495</td>\n",
+       "      <td>97759724</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>219.16901</td>\n",
+       "      <td>140.81146</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>141</td>\n",
+       "      <td>104</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>chr21</td>\n",
-       "      <td>43810840</td>\n",
-       "      <td>43811034</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>53701870</td>\n",
+       "      <td>53702056</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>201.96817</td>\n",
+       "      <td>253.82117</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>102</td>\n",
-       "      <td>chr21</td>\n",
-       "      <td>43810814</td>\n",
-       "      <td>43811039</td>\n",
+       "      <td>96</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>53701848</td>\n",
+       "      <td>53702069</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>98.62279</td>\n",
+       "      <td>131.82871</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>131</td>\n",
+       "      <td>97</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>chr17</td>\n",
-       "      <td>44331388</td>\n",
-       "      <td>44331569</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>115398399</td>\n",
+       "      <td>115398644</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>200.30254</td>\n",
+       "      <td>296.60121</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>100</td>\n",
-       "      <td>chr17</td>\n",
-       "      <td>44331387</td>\n",
-       "      <td>44331617</td>\n",
+       "      <td>110</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>115398396</td>\n",
+       "      <td>115398671</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>100.15563</td>\n",
+       "      <td>135.44415</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.10518</td>\n",
-       "      <td>116</td>\n",
+       "      <td>138</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos    end_pos name  score strand  signal_value  p_value  \\\n",
-       "0      chr14   24216011   24216203    .   1000      .     251.87452     -1.0   \n",
-       "1      chr14   33792757   33792965    .   1000      .     252.08812     -1.0   \n",
-       "2       chr6   43769540   43769748    .   1000      .     250.99653     -1.0   \n",
-       "3      chr17   49861200   49861405    .   1000      .     250.16995     -1.0   \n",
-       "4       chr2  219476345  219476531    .   1000      .     248.18364     -1.0   \n",
-       "5      chr11   57527230   57527419    .   1000      .     247.38897     -1.0   \n",
-       "6       chr2  152025859  152026122    .   1000      .     247.10786     -1.0   \n",
-       "7       chr2  112259523  112259722    .   1000      .     249.77072     -1.0   \n",
-       "8      chr21   43810840   43811034    .   1000      .     201.96817     -1.0   \n",
-       "9      chr17   44331388   44331569    .   1000      .     200.30254     -1.0   \n",
+       "   chrom      start        end name  score strand  signal_value  p_value  \\\n",
+       "0  chr19   43774483   43774712    .   1000      .     252.61723     -1.0   \n",
+       "1  chr17   75615499   75615705    .   1000      .     244.31776     -1.0   \n",
+       "2  chr11     397249     397474    .   1000      .     237.92944     -1.0   \n",
+       "3  chr20   44116742   44116950    .   1000      .     292.51551     -1.0   \n",
+       "4  chr16   23434388   23434589    .   1000      .     267.80248     -1.0   \n",
+       "5  chr17   35804783   35805037    .   1000      .     314.27261     -1.0   \n",
+       "6   chr4  147866849  147867113    .   1000      .     395.15579     -1.0   \n",
+       "7  chr10   97759515   97759702    .   1000      .     236.46727     -1.0   \n",
+       "8  chr12   53701870   53702056    .   1000      .     253.82117     -1.0   \n",
+       "9  chr12  115398399  115398644    .   1000      .     296.60121     -1.0   \n",
        "\n",
-       "   q_value  peak chromosome_1  start_pos_1  end_pos_1 name_1  score_1  \\\n",
-       "0  5.08726    95        chr14     24215977   24216234      .     1000   \n",
-       "1  5.08726   105        chr14     33792735   33793013      .     1000   \n",
-       "2  5.08726   104         chr6     43769499   43769760      .     1000   \n",
-       "3  5.08726    93        chr17     49861195   49861409      .     1000   \n",
-       "4  5.08726   102         chr2    219476292  219476557      .     1000   \n",
-       "5  5.08726   104        chr11     57527209   57527456      .     1000   \n",
-       "6  5.08726   146         chr2    152025676  152026161      .     1000   \n",
-       "7  5.08726    95         chr2    112259464  112259743      .     1000   \n",
-       "8  5.08726   102        chr21     43810814   43811039      .     1000   \n",
-       "9  5.08726   100        chr17     44331387   44331617      .     1000   \n",
+       "   q_value  peak chrom_1    start_1      end_1 name_1  score_1 strand_1  \\\n",
+       "0  5.08726   103   chr19   43774432   43774742      .      951        .   \n",
+       "1  5.08726    96   chr17   75615479   75615789      .     1000        .   \n",
+       "2  5.08726   114   chr11     397230     397540      .     1000        .   \n",
+       "3  5.08726   106   chr20   44116709   44116975      .     1000        .   \n",
+       "4  5.08726   106   chr16   23434384   23434621      .     1000        .   \n",
+       "5  5.08726   134   chr17   35804768   35805048      .     1000        .   \n",
+       "6  5.08726   133    chr4  147866840  147867128      .     1000        .   \n",
+       "7  5.08726    97   chr10   97759495   97759724      .     1000        .   \n",
+       "8  5.08726    96   chr12   53701848   53702069      .     1000        .   \n",
+       "9  5.08726   110   chr12  115398396  115398671      .     1000        .   \n",
        "\n",
-       "  strand_1  signal_value_1  p_value_1  q_value_1  peak_1  \n",
-       "0        .       209.90753       -1.0    5.10518     125  \n",
-       "1        .       232.27970       -1.0    5.10518     134  \n",
-       "2        .       154.83893       -1.0    5.10518     141  \n",
-       "3        .        78.03419       -1.0    5.10518      86  \n",
-       "4        .       160.44163       -1.0    5.10518     125  \n",
-       "5        .       121.89660       -1.0    5.10518     134  \n",
-       "6        .       181.47229       -1.0    5.10518     304  \n",
-       "7        .       219.16901       -1.0    5.10518     141  \n",
-       "8        .        98.62279       -1.0    5.10518     131  \n",
-       "9        .       100.15563       -1.0    5.10518     116  "
+       "   signal_value_1  p_value_1  q_value_1  peak_1  \n",
+       "0        28.41530       -1.0    5.10518     155  \n",
+       "1        26.62768       -1.0    5.10518     155  \n",
+       "2        31.74873       -1.0    5.10518     155  \n",
+       "3       201.73737       -1.0    5.10518     132  \n",
+       "4       148.78159       -1.0    5.10518     119  \n",
+       "5       113.42031       -1.0    5.10518     149  \n",
+       "6       191.86545       -1.0    5.10518     141  \n",
+       "7       140.81146       -1.0    5.10518     104  \n",
+       "8       131.82871       -1.0    5.10518      97  \n",
+       "9       135.44415       -1.0    5.10518     138  "
       ]
      },
      "execution_count": 6,
@@ -933,26 +1213,18 @@
     }
    ],
    "source": [
-    "# Define GIQL query\n",
     "giql_query = \"\"\"\n",
     "    SELECT DISTINCT a.*, b.*\n",
     "    FROM features_a a, features_b b\n",
     "    WHERE a.interval WITHIN b.interval\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} intervals from A contained within B\")\n",
     "result.head(10)"
    ]
@@ -963,11 +1235,13 @@
    "source": [
     "---\n",
     "\n",
-    "## 3. CONTAINS operator\n",
+    "## 4. MERGE operator\n",
     "\n",
-    "Find features in A that **completely contain** features in B.\n",
+    "**Combine overlapping intervals** from features_a into merged regions.\n",
     "\n",
-    "**GIQL Query**: `SELECT a.* FROM features_a a, features_b b WHERE a.interval CONTAINS b.interval`"
+    "Similar to `bedtools merge`, this collapses overlapping genomic intervals.\n",
+    "\n",
+    "**GIQL Query**: `SELECT MERGE(interval) FROM features_a`"
    ]
   },
   {
@@ -980,13 +1254,13 @@
      "output_type": "stream",
      "text": [
       "Transpiled SQL:\n",
-      "SELECT DISTINCT a.*\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "  features_b AS b\n",
-      "WHERE (a.\"chromosome\" = b.\"chromosome\"\n",
-      "       AND a.\"start_pos\" <= b.\"start_pos\"\n",
-      "       AND a.\"end_pos\" >= b.\"end_pos\")\n",
+      "SELECT DISTINCT\n",
+      "  a.*\n",
+      "FROM features_a AS a, features_b AS b\n",
+      "WHERE\n",
+      "  (\n",
+      "    a.\"chrom\" = b.\"chrom\" AND a.\"start\" <= b.\"start\" AND a.\"end\" >= b.\"end\"\n",
+      "  )\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -1014,9 +1288,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>name</th>\n",
        "      <th>score</th>\n",
        "      <th>strand</th>\n",
@@ -1029,162 +1303,162 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>chr4</td>\n",
-       "      <td>102431434</td>\n",
-       "      <td>102431659</td>\n",
+       "      <td>chr22</td>\n",
+       "      <td>22559162</td>\n",
+       "      <td>22559461</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>299.13707</td>\n",
+       "      <td>378.39470</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>111</td>\n",
+       "      <td>143</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>49586912</td>\n",
-       "      <td>49587154</td>\n",
+       "      <td>chr19</td>\n",
+       "      <td>45642140</td>\n",
+       "      <td>45642400</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>303.26119</td>\n",
+       "      <td>297.57428</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>126</td>\n",
+       "      <td>128</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>chr5</td>\n",
-       "      <td>134039564</td>\n",
-       "      <td>134039867</td>\n",
+       "      <td>chr19</td>\n",
+       "      <td>46395603</td>\n",
+       "      <td>46395857</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>317.39854</td>\n",
+       "      <td>335.49048</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>157</td>\n",
+       "      <td>134</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>chr18</td>\n",
-       "      <td>77900138</td>\n",
-       "      <td>77900404</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>6323309</td>\n",
+       "      <td>6323518</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>314.56958</td>\n",
+       "      <td>262.07653</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>127</td>\n",
+       "      <td>99</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>chrX</td>\n",
-       "      <td>123960372</td>\n",
-       "      <td>123960620</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>546987</td>\n",
+       "      <td>547224</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>291.07538</td>\n",
+       "      <td>273.49144</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>123</td>\n",
+       "      <td>126</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>157311233</td>\n",
-       "      <td>157311386</td>\n",
+       "      <td>chr6</td>\n",
+       "      <td>34154821</td>\n",
+       "      <td>34155022</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>188.57350</td>\n",
+       "      <td>258.93713</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>80</td>\n",
+       "      <td>95</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>chr21</td>\n",
-       "      <td>32403252</td>\n",
-       "      <td>32403390</td>\n",
+       "      <td>chr5</td>\n",
+       "      <td>181184125</td>\n",
+       "      <td>181184440</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>132.27311</td>\n",
+       "      <td>277.71855</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>80</td>\n",
+       "      <td>138</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>chr14</td>\n",
-       "      <td>104564131</td>\n",
-       "      <td>104564266</td>\n",
+       "      <td>chr16</td>\n",
+       "      <td>11532213</td>\n",
+       "      <td>11532470</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>131.97784</td>\n",
+       "      <td>339.88370</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>68</td>\n",
+       "      <td>124</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>chr10</td>\n",
-       "      <td>132204836</td>\n",
-       "      <td>132204945</td>\n",
+       "      <td>chr5</td>\n",
+       "      <td>95781099</td>\n",
+       "      <td>95781372</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>114.77888</td>\n",
+       "      <td>255.82894</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>59</td>\n",
+       "      <td>132</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>chrX</td>\n",
-       "      <td>54453024</td>\n",
-       "      <td>54453130</td>\n",
+       "      <td>chr12</td>\n",
+       "      <td>111396988</td>\n",
+       "      <td>111397250</td>\n",
        "      <td>.</td>\n",
        "      <td>1000</td>\n",
        "      <td>.</td>\n",
-       "      <td>102.02848</td>\n",
+       "      <td>406.49219</td>\n",
        "      <td>-1.0</td>\n",
        "      <td>5.08726</td>\n",
-       "      <td>65</td>\n",
+       "      <td>130</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos    end_pos name  score strand  signal_value  p_value  \\\n",
-       "0       chr4  102431434  102431659    .   1000      .     299.13707     -1.0   \n",
-       "1      chr14   49586912   49587154    .   1000      .     303.26119     -1.0   \n",
-       "2       chr5  134039564  134039867    .   1000      .     317.39854     -1.0   \n",
-       "3      chr18   77900138   77900404    .   1000      .     314.56958     -1.0   \n",
-       "4       chrX  123960372  123960620    .   1000      .     291.07538     -1.0   \n",
-       "5       chr1  157311233  157311386    .   1000      .     188.57350     -1.0   \n",
-       "6      chr21   32403252   32403390    .   1000      .     132.27311     -1.0   \n",
-       "7      chr14  104564131  104564266    .   1000      .     131.97784     -1.0   \n",
-       "8      chr10  132204836  132204945    .   1000      .     114.77888     -1.0   \n",
-       "9       chrX   54453024   54453130    .   1000      .     102.02848     -1.0   \n",
+       "   chrom      start        end name  score strand  signal_value  p_value  \\\n",
+       "0  chr22   22559162   22559461    .   1000      .     378.39470     -1.0   \n",
+       "1  chr19   45642140   45642400    .   1000      .     297.57428     -1.0   \n",
+       "2  chr19   46395603   46395857    .   1000      .     335.49048     -1.0   \n",
+       "3  chr12    6323309    6323518    .   1000      .     262.07653     -1.0   \n",
+       "4  chr12     546987     547224    .   1000      .     273.49144     -1.0   \n",
+       "5   chr6   34154821   34155022    .   1000      .     258.93713     -1.0   \n",
+       "6   chr5  181184125  181184440    .   1000      .     277.71855     -1.0   \n",
+       "7  chr16   11532213   11532470    .   1000      .     339.88370     -1.0   \n",
+       "8   chr5   95781099   95781372    .   1000      .     255.82894     -1.0   \n",
+       "9  chr12  111396988  111397250    .   1000      .     406.49219     -1.0   \n",
        "\n",
        "   q_value  peak  \n",
-       "0  5.08726   111  \n",
-       "1  5.08726   126  \n",
-       "2  5.08726   157  \n",
-       "3  5.08726   127  \n",
-       "4  5.08726   123  \n",
-       "5  5.08726    80  \n",
-       "6  5.08726    80  \n",
-       "7  5.08726    68  \n",
-       "8  5.08726    59  \n",
-       "9  5.08726    65  "
+       "0  5.08726   143  \n",
+       "1  5.08726   128  \n",
+       "2  5.08726   134  \n",
+       "3  5.08726    99  \n",
+       "4  5.08726   126  \n",
+       "5  5.08726    95  \n",
+       "6  5.08726   138  \n",
+       "7  5.08726   124  \n",
+       "8  5.08726   132  \n",
+       "9  5.08726   130  "
       ]
      },
      "execution_count": 7,
@@ -1193,26 +1467,18 @@
     }
    ],
    "source": [
-    "# Define GIQL query\n",
     "giql_query = \"\"\"\n",
     "    SELECT DISTINCT a.*\n",
     "    FROM features_a a, features_b b\n",
     "    WHERE a.interval CONTAINS b.interval\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} intervals from A that contain B\")\n",
     "result.head(10)"
    ]
@@ -1221,15 +1487,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "---\n",
-    "\n",
-    "## 4. MERGE operator\n",
-    "\n",
-    "**Combine overlapping intervals** from features_a into merged regions.\n",
-    "\n",
-    "Similar to `bedtools merge`, this collapses overlapping genomic intervals.\n",
+    "### MERGE with aggregation CTE\n",
     "\n",
-    "**GIQL Query**: `SELECT MERGE(interval) FROM features_a`"
+    "Compute statistics on merged intervals and filter for intervals that merged multiple features."
    ]
   },
   {
@@ -1243,33 +1503,34 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  \"chromosome\",\n",
-      "  MIN(\"start_pos\") AS start_pos,\n",
-      "  MAX(\"end_pos\") AS end_pos\n",
-      "FROM\n",
-      "  (SELECT\n",
-      "     *,\n",
-      "     SUM(is_new_cluster) OVER (PARTITION BY \"chromosome\"\n",
-      "                               ORDER BY \"start_pos\" NULLS LAST) AS __giql_cluster_id\n",
-      "   FROM\n",
-      "     (SELECT\n",
-      "        *,\n",
-      "        CASE\n",
-      "            WHEN LAG(\"end_pos\") OVER (PARTITION BY \"chromosome\"\n",
-      "                                      ORDER BY \"start_pos\" NULLS LAST) >= \"start_pos\" THEN 0\n",
-      "            ELSE 1\n",
-      "        END AS is_new_cluster\n",
-      "      FROM features_b) AS lag_calc) AS clustered\n",
+      "  \"chrom\",\n",
+      "  MIN(\"start\") AS start,\n",
+      "  MAX(\"end\") AS end\n",
+      "FROM (\n",
+      "  SELECT\n",
+      "    *,\n",
+      "    SUM(is_new_cluster) OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) AS __giql_cluster_id\n",
+      "  FROM (\n",
+      "    SELECT\n",
+      "      *,\n",
+      "      CASE\n",
+      "        WHEN LAG(\"end\") OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) >= \"start\"\n",
+      "        THEN 0\n",
+      "        ELSE 1\n",
+      "      END AS is_new_cluster\n",
+      "    FROM features_b\n",
+      "  ) AS lag_calc\n",
+      ") AS clustered\n",
       "GROUP BY\n",
-      "  chromosome,\n",
+      "  chrom,\n",
       "  __giql_cluster_id\n",
       "ORDER BY\n",
-      "  \"chromosome\" NULLS LAST,\n",
-      "  \"start_pos\" NULLS LAST\n",
+      "  \"chrom\" NULLS LAST,\n",
+      "  \"start\" NULLS LAST\n",
       "\n",
       "================================================================================\n",
       "\n",
-      "Result: 45630 merged intervals (from 60893 original)\n"
+      "Result: 45630 merged intervals (from 46196 original)\n"
      ]
     },
     {
@@ -1293,9 +1554,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1364,17 +1625,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos  end_pos\n",
-       "0       chr1     186654   186964\n",
-       "1       chr1     267979   268128\n",
-       "2       chr1     850424   850734\n",
-       "3       chr1     869711   870021\n",
-       "4       chr1     912864   913174\n",
-       "5       chr1     919635   919945\n",
-       "6       chr1     931626   931936\n",
-       "7       chr1     938195   938352\n",
-       "8       chr1     951414   951724\n",
-       "9       chr1     976080   976390"
+       "  chrom   start     end\n",
+       "0  chr1  186654  186964\n",
+       "1  chr1  267979  268128\n",
+       "2  chr1  850424  850734\n",
+       "3  chr1  869711  870021\n",
+       "4  chr1  912864  913174\n",
+       "5  chr1  919635  919945\n",
+       "6  chr1  931626  931936\n",
+       "7  chr1  938195  938352\n",
+       "8  chr1  951414  951724\n",
+       "9  chr1  976080  976390"
       ]
      },
      "execution_count": 8,
@@ -1383,26 +1644,19 @@
     }
    ],
    "source": [
-    "# Define GIQL query - basic merge\n",
     "giql_query = \"\"\"\n",
     "    SELECT MERGE(interval)\n",
     "    FROM features_b\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
-    "print(f\"Result: {len(result)} merged intervals (from {len(features_a)} original)\")\n",
+    "n_original = conn.execute(\"SELECT COUNT(*) FROM features_b\").fetchone()[0]\n",
+    "result = conn.execute(sql).fetchdf()\n",
+    "print(f\"Result: {len(result)} merged intervals (from {n_original} original)\")\n",
     "result.head(10)"
    ]
   },
@@ -1410,9 +1664,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### MERGE with aggregation CTE\n",
+    "### MERGE with distance parameter\n",
     "\n",
-    "Compute statistics on merged intervals and filter for intervals that merged multiple features."
+    "Merge intervals within 1000bp of each other."
    ]
   },
   {
@@ -1425,46 +1679,44 @@
      "output_type": "stream",
      "text": [
       "Transpiled SQL:\n",
-      "WITH merged_intervals AS\n",
-      "  (SELECT\n",
-      "     \"chromosome\",\n",
-      "     MIN(\"start_pos\") AS start_pos,\n",
-      "     MAX(\"end_pos\") AS end_pos,\n",
-      "     COUNT(*) AS interval_count,\n",
-      "     AVG(signal_value) AS avg_signal\n",
-      "   FROM\n",
-      "     (SELECT\n",
+      "WITH merged_intervals AS (\n",
+      "  SELECT\n",
+      "    \"chrom\",\n",
+      "    MIN(\"start\") AS start,\n",
+      "    MAX(\"end\") AS end,\n",
+      "    COUNT(*) AS interval_count,\n",
+      "    AVG(signal_value) AS avg_signal\n",
+      "  FROM (\n",
+      "    SELECT\n",
+      "      *,\n",
+      "      SUM(is_new_cluster) OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) AS __giql_cluster_id\n",
+      "    FROM (\n",
+      "      SELECT\n",
       "        *,\n",
-      "        SUM(is_new_cluster) OVER (PARTITION BY \"chromosome\"\n",
-      "                                  ORDER BY \"start_pos\" NULLS LAST) AS __giql_cluster_id\n",
-      "      FROM\n",
-      "        (SELECT\n",
-      "           *,\n",
-      "           CASE\n",
-      "               WHEN LAG(\"end_pos\") OVER (PARTITION BY \"chromosome\"\n",
-      "                                         ORDER BY \"start_pos\" NULLS LAST) >= \"start_pos\" THEN 0\n",
-      "               ELSE 1\n",
-      "           END AS is_new_cluster\n",
-      "         FROM features_b) AS lag_calc) AS clustered\n",
-      "   GROUP BY\n",
-      "     chromosome,\n",
-      "     __giql_cluster_id\n",
-      "   ORDER BY\n",
-      "     \"chromosome\" NULLS LAST,\n",
-      "     \"start_pos\" NULLS LAST)\n",
-      "SELECT *\n",
+      "        CASE\n",
+      "          WHEN LAG(\"end\") OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) >= \"start\"\n",
+      "          THEN 0\n",
+      "          ELSE 1\n",
+      "        END AS is_new_cluster\n",
+      "      FROM features_b\n",
+      "    ) AS lag_calc\n",
+      "  ) AS clustered\n",
+      "  GROUP BY\n",
+      "    chrom,\n",
+      "    __giql_cluster_id\n",
+      "  ORDER BY\n",
+      "    \"chrom\" NULLS LAST,\n",
+      "    \"start\" NULLS LAST\n",
+      ")\n",
+      "SELECT\n",
+      "  *\n",
       "FROM merged_intervals\n",
-      "WHERE interval_count > 1\n",
+      "WHERE\n",
+      "  interval_count > 1\n",
       "\n",
       "================================================================================\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Result: 559 merged intervals with more than 1 original interval\n"
+      "\n",
+      "Result: 559 merged intervals with more than 1 original interval\n"
      ]
     },
     {
@@ -1488,9 +1740,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>interval_count</th>\n",
        "      <th>avg_signal</th>\n",
        "    </tr>\n",
@@ -1581,17 +1833,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos   end_pos  interval_count  avg_signal\n",
-       "0       chr1    1300073   1300592               2   14.844510\n",
-       "1       chr1    8374912   8375330               2   28.608395\n",
-       "2       chr1    9717307   9717819               2   13.833490\n",
-       "3       chr1   14782921  14783438               2    7.107920\n",
-       "4       chr1   18362141  18362677               2    9.312525\n",
-       "5       chr1   19445773  19446283               2   16.590025\n",
-       "6       chr1   23958874  23959385               2   10.616965\n",
-       "7       chr1   24157968  24158482               2   11.985625\n",
-       "8       chr1   27392817  27393359               2   11.789830\n",
-       "9       chr1   30106143  30106664               2   31.694800"
+       "  chrom     start       end  interval_count  avg_signal\n",
+       "0  chr1   1300073   1300592               2   14.844510\n",
+       "1  chr1   8374912   8375330               2   28.608395\n",
+       "2  chr1   9717307   9717819               2   13.833490\n",
+       "3  chr1  14782921  14783438               2    7.107920\n",
+       "4  chr1  18362141  18362677               2    9.312525\n",
+       "5  chr1  19445773  19446283               2   16.590025\n",
+       "6  chr1  23958874  23959385               2   10.616965\n",
+       "7  chr1  24157968  24158482               2   11.985625\n",
+       "8  chr1  27392817  27393359               2   11.789830\n",
+       "9  chr1  30106143  30106664               2   31.694800"
       ]
      },
      "execution_count": 9,
@@ -1600,7 +1852,6 @@
     }
    ],
    "source": [
-    "# Define GIQL query with aggregations in a CTE\n",
     "giql_query = \"\"\"\n",
     "    WITH merged_intervals AS (\n",
     "        SELECT\n",
@@ -1614,19 +1865,12 @@
     "    WHERE interval_count > 1\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} merged intervals with more than 1 original interval\")\n",
     "result.head(10)"
    ]
@@ -1635,9 +1879,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### MERGE with distance parameter\n",
+    "---\n",
     "\n",
-    "Merge intervals within 1000bp of each other."
+    "## 5. CLUSTER operator\n",
+    "\n",
+    "**Assign cluster IDs** to overlapping intervals from features_a.\n",
+    "\n",
+    "Similar to `bedtools cluster`, this groups overlapping genomic intervals.\n",
+    "\n",
+    "**GIQL Query**: `SELECT *, CLUSTER(interval) AS cluster_id FROM features_a`"
    ]
   },
   {
@@ -1651,34 +1901,35 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  \"chromosome\",\n",
-      "  MIN(\"start_pos\") AS start_pos,\n",
-      "  MAX(\"end_pos\") AS end_pos\n",
-      "FROM\n",
-      "  (SELECT\n",
-      "     *,\n",
-      "     SUM(is_new_cluster) OVER (PARTITION BY \"chromosome\"\n",
-      "                               ORDER BY \"start_pos\" NULLS LAST) AS __giql_cluster_id\n",
-      "   FROM\n",
-      "     (SELECT\n",
-      "        *,\n",
-      "        CASE\n",
-      "            WHEN LAG(\"end_pos\") OVER (PARTITION BY \"chromosome\"\n",
-      "                                      ORDER BY \"start_pos\" NULLS LAST) + 10000 >= \"start_pos\" THEN 0\n",
-      "            ELSE 1\n",
-      "        END AS is_new_cluster\n",
-      "      FROM features_b) AS lag_calc) AS clustered\n",
+      "  \"chrom\",\n",
+      "  MIN(\"start\") AS start,\n",
+      "  MAX(\"end\") AS end\n",
+      "FROM (\n",
+      "  SELECT\n",
+      "    *,\n",
+      "    SUM(is_new_cluster) OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) AS __giql_cluster_id\n",
+      "  FROM (\n",
+      "    SELECT\n",
+      "      *,\n",
+      "      CASE\n",
+      "        WHEN LAG(\"end\") OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) + 10000 >= \"start\"\n",
+      "        THEN 0\n",
+      "        ELSE 1\n",
+      "      END AS is_new_cluster\n",
+      "    FROM features_b\n",
+      "  ) AS lag_calc\n",
+      ") AS clustered\n",
       "GROUP BY\n",
-      "  chromosome,\n",
+      "  chrom,\n",
       "  __giql_cluster_id\n",
       "ORDER BY\n",
-      "  \"chromosome\" NULLS LAST,\n",
-      "  \"start_pos\" NULLS LAST\n",
+      "  \"chrom\" NULLS LAST,\n",
+      "  \"start\" NULLS LAST\n",
       "\n",
       "================================================================================\n",
       "\n",
-      "Result: 34097 merged intervals (1000bp distance)\n",
-      "Compared to: 60893 original intervals\n"
+      "Result: 34097 merged intervals (10000bp distance)\n",
+      "Compared to: 46196 original intervals\n"
      ]
     },
     {
@@ -1702,9 +1953,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1773,17 +2024,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos  end_pos\n",
-       "0       chr1     186654   186964\n",
-       "1       chr1     267979   268128\n",
-       "2       chr1     850424   850734\n",
-       "3       chr1     869711   870021\n",
-       "4       chr1     912864   919945\n",
-       "5       chr1     931626   938352\n",
-       "6       chr1     951414   951724\n",
-       "7       chr1     976080   984477\n",
-       "8       chr1    1013987  1015656\n",
-       "9       chr1    1063787  1064097"
+       "  chrom    start      end\n",
+       "0  chr1   186654   186964\n",
+       "1  chr1   267979   268128\n",
+       "2  chr1   850424   850734\n",
+       "3  chr1   869711   870021\n",
+       "4  chr1   912864   919945\n",
+       "5  chr1   931626   938352\n",
+       "6  chr1   951414   951724\n",
+       "7  chr1   976080   984477\n",
+       "8  chr1  1013987  1015656\n",
+       "9  chr1  1063787  1064097"
       ]
      },
      "execution_count": 10,
@@ -1792,27 +2043,20 @@
     }
    ],
    "source": [
-    "# Define GIQL query - merge with distance parameter\n",
     "giql_query = \"\"\"\n",
     "    SELECT MERGE(interval, 10000)\n",
     "    FROM features_b\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
-    "print(f\"Result: {len(result)} merged intervals (1000bp distance)\")\n",
-    "print(f\"Compared to: {len(features_a)} original intervals\")\n",
+    "n_original = conn.execute(\"SELECT COUNT(*) FROM features_b\").fetchone()[0]\n",
+    "result = conn.execute(sql).fetchdf()\n",
+    "print(f\"Result: {len(result)} merged intervals (10000bp distance)\")\n",
+    "print(f\"Compared to: {n_original} original intervals\")\n",
     "result.head(10)"
    ]
   },
@@ -1820,15 +2064,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "---\n",
-    "\n",
-    "## 5. CLUSTER operator\n",
-    "\n",
-    "**Assign cluster IDs** to overlapping intervals from features_a.\n",
-    "\n",
-    "Similar to `bedtools cluster`, this groups overlapping genomic intervals.\n",
+    "### CLUSTER with distance parameter\n",
     "\n",
-    "**GIQL Query**: `SELECT *, CLUSTER(interval) AS cluster_id FROM features_a`"
+    "Cluster intervals within 1000bp of each other."
    ]
   },
   {
@@ -1842,27 +2080,27 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  chromosome,\n",
-      "  start_pos,\n",
-      "  end_pos,\n",
+      "  chrom,\n",
+      "  start,\n",
+      "  \"end\",\n",
       "  signal_value,\n",
-      "  SUM(is_new_cluster) OVER (PARTITION BY \"chromosome\"\n",
-      "                            ORDER BY \"start_pos\" NULLS LAST) AS cluster_id\n",
-      "FROM\n",
-      "  (SELECT\n",
-      "     chromosome,\n",
-      "     start_pos,\n",
-      "     end_pos,\n",
-      "     signal_value,\n",
-      "     CASE\n",
-      "         WHEN LAG(\"end_pos\") OVER (PARTITION BY \"chromosome\"\n",
-      "                                   ORDER BY \"start_pos\" NULLS LAST) >= \"start_pos\" THEN 0\n",
-      "         ELSE 1\n",
-      "     END AS is_new_cluster\n",
-      "   FROM features_a) AS lag_calc\n",
+      "  SUM(is_new_cluster) OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) AS cluster_id\n",
+      "FROM (\n",
+      "  SELECT\n",
+      "    chrom,\n",
+      "    start,\n",
+      "    \"end\",\n",
+      "    signal_value,\n",
+      "    CASE\n",
+      "      WHEN LAG(\"end\") OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) >= \"start\"\n",
+      "      THEN 0\n",
+      "      ELSE 1\n",
+      "    END AS is_new_cluster\n",
+      "  FROM features_a\n",
+      ") AS lag_calc\n",
       "ORDER BY\n",
-      "  chromosome,\n",
-      "  start_pos\n",
+      "  chrom,\n",
+      "  start\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -1891,9 +2129,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>signal_value</th>\n",
        "      <th>cluster_id</th>\n",
        "    </tr>\n",
@@ -1984,17 +2222,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos  end_pos  signal_value  cluster_id\n",
-       "0       chr1     181368   181564      27.54796         1.0\n",
-       "1       chr1     186650   186846      19.93098         2.0\n",
-       "2       chr1     267909   268105      88.00863         3.0\n",
-       "3       chr1     586106   586302      35.02027         4.0\n",
-       "4       chr1     729261   729457      23.29415         5.0\n",
-       "5       chr1     778812   779008      56.97663         6.0\n",
-       "6       chr1     850473   850669      27.08243         7.0\n",
-       "7       chr1     858056   858252      15.55869         8.0\n",
-       "8       chr1     869860   869991     168.87294         9.0\n",
-       "9       chr1     904689   904883     167.56897        10.0"
+       "  chrom   start     end  signal_value  cluster_id\n",
+       "0  chr1  181368  181564      27.54796         1.0\n",
+       "1  chr1  186650  186846      19.93098         2.0\n",
+       "2  chr1  267909  268105      88.00863         3.0\n",
+       "3  chr1  586106  586302      35.02027         4.0\n",
+       "4  chr1  729261  729457      23.29415         5.0\n",
+       "5  chr1  778812  779008      56.97663         6.0\n",
+       "6  chr1  850473  850669      27.08243         7.0\n",
+       "7  chr1  858056  858252      15.55869         8.0\n",
+       "8  chr1  869860  869991     168.87294         9.0\n",
+       "9  chr1  904689  904883     167.56897        10.0"
       ]
      },
      "execution_count": 11,
@@ -2003,31 +2241,23 @@
     }
    ],
    "source": [
-    "# Define GIQL query - basic clustering\n",
     "giql_query = \"\"\"\n",
     "    SELECT\n",
-    "        chromosome,\n",
-    "        start_pos,\n",
-    "        end_pos,\n",
+    "        chrom,\n",
+    "        start,\n",
+    "        \"end\",\n",
     "        signal_value,\n",
     "        CLUSTER(interval) AS cluster_id\n",
     "    FROM features_a\n",
-    "    ORDER BY chromosome, start_pos\n",
+    "    ORDER BY chrom, start\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} intervals with cluster assignments\")\n",
     "print(f\"Number of unique clusters: {result['cluster_id'].nunique()}\")\n",
     "result.head(10)"
@@ -2037,9 +2267,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### CLUSTER with distance parameter\n",
+    "---\n",
     "\n",
-    "Cluster intervals within 1000bp of each other."
+    "## 6. DISTANCE operator\n",
+    "\n",
+    "**Calculate genomic distances** between intervals from `features_a` and `features_b`.\n",
+    "\n",
+    "Similar to `bedtools closest -d`, this calculates the distance in base pairs between genomic intervals.\n",
+    "\n",
+    "**GIQL Query**: `SELECT a.*, b.*, DISTANCE(a.interval, b.interval) AS distance FROM features_a a, features_b b`"
    ]
   },
   {
@@ -2053,31 +2289,31 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  chromosome,\n",
-      "  start_pos,\n",
-      "  end_pos,\n",
+      "  chrom,\n",
+      "  start,\n",
+      "  \"end\",\n",
       "  signal_value,\n",
-      "  SUM(is_new_cluster) OVER (PARTITION BY \"chromosome\"\n",
-      "                            ORDER BY \"start_pos\" NULLS LAST) AS cluster_id\n",
-      "FROM\n",
-      "  (SELECT\n",
-      "     chromosome,\n",
-      "     start_pos,\n",
-      "     end_pos,\n",
-      "     signal_value,\n",
-      "     CASE\n",
-      "         WHEN LAG(\"end_pos\") OVER (PARTITION BY \"chromosome\"\n",
-      "                                   ORDER BY \"start_pos\" NULLS LAST) + 50000 >= \"start_pos\" THEN 0\n",
-      "         ELSE 1\n",
-      "     END AS is_new_cluster\n",
-      "   FROM features_a) AS lag_calc\n",
+      "  SUM(is_new_cluster) OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) AS cluster_id\n",
+      "FROM (\n",
+      "  SELECT\n",
+      "    chrom,\n",
+      "    start,\n",
+      "    \"end\",\n",
+      "    signal_value,\n",
+      "    CASE\n",
+      "      WHEN LAG(\"end\") OVER (PARTITION BY \"chrom\" ORDER BY \"start\" NULLS LAST) + 50000 >= \"start\"\n",
+      "      THEN 0\n",
+      "      ELSE 1\n",
+      "    END AS is_new_cluster\n",
+      "  FROM features_a\n",
+      ") AS lag_calc\n",
       "ORDER BY\n",
-      "  chromosome,\n",
-      "  start_pos\n",
+      "  chrom,\n",
+      "  start\n",
       "\n",
       "================================================================================\n",
       "\n",
-      "Result: 60893 intervals with cluster assignments (1000bp distance)\n",
+      "Result: 60893 intervals with cluster assignments (50000bp distance)\n",
       "Number of unique clusters: 1376\n"
      ]
     },
@@ -2102,9 +2338,9 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>start_pos</th>\n",
-       "      <th>end_pos</th>\n",
+       "      <th>chrom</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
        "      <th>signal_value</th>\n",
        "      <th>cluster_id</th>\n",
        "    </tr>\n",
@@ -2195,17 +2431,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  start_pos  end_pos  signal_value  cluster_id\n",
-       "0       chr1     181368   181564      27.54796         1.0\n",
-       "1       chr1     186650   186846      19.93098         1.0\n",
-       "2       chr1     267909   268105      88.00863         2.0\n",
-       "3       chr1     586106   586302      35.02027         3.0\n",
-       "4       chr1     729261   729457      23.29415         4.0\n",
-       "5       chr1     778812   779008      56.97663         4.0\n",
-       "6       chr1     850473   850669      27.08243         5.0\n",
-       "7       chr1     858056   858252      15.55869         5.0\n",
-       "8       chr1     869860   869991     168.87294         5.0\n",
-       "9       chr1     904689   904883     167.56897         5.0"
+       "  chrom   start     end  signal_value  cluster_id\n",
+       "0  chr1  181368  181564      27.54796         1.0\n",
+       "1  chr1  186650  186846      19.93098         1.0\n",
+       "2  chr1  267909  268105      88.00863         2.0\n",
+       "3  chr1  586106  586302      35.02027         3.0\n",
+       "4  chr1  729261  729457      23.29415         4.0\n",
+       "5  chr1  778812  779008      56.97663         4.0\n",
+       "6  chr1  850473  850669      27.08243         5.0\n",
+       "7  chr1  858056  858252      15.55869         5.0\n",
+       "8  chr1  869860  869991     168.87294         5.0\n",
+       "9  chr1  904689  904883     167.56897         5.0"
       ]
      },
      "execution_count": 12,
@@ -2214,32 +2450,24 @@
     }
    ],
    "source": [
-    "# Define GIQL query with distance parameter\n",
     "giql_query = \"\"\"\n",
     "    SELECT\n",
-    "        chromosome,\n",
-    "        start_pos,\n",
-    "        end_pos,\n",
+    "        chrom,\n",
+    "        start,\n",
+    "        \"end\",\n",
     "        signal_value,\n",
     "        CLUSTER(interval, 50000) AS cluster_id\n",
     "    FROM features_a\n",
-    "    ORDER BY chromosome, start_pos\n",
+    "    ORDER BY chrom, start\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
-    "print(f\"Result: {len(result)} intervals with cluster assignments (1000bp distance)\")\n",
+    "result = conn.execute(sql).fetchdf()\n",
+    "print(f\"Result: {len(result)} intervals with cluster assignments (50000bp distance)\")\n",
     "print(f\"Number of unique clusters: {result['cluster_id'].nunique()}\")\n",
     "result.head(10)"
    ]
@@ -2248,15 +2476,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "---\n",
-    "\n",
-    "## 6. DISTANCE operator\n",
-    "\n",
-    "**Calculate genomic distances** between intervals from `features_a` and `features_b`.\n",
-    "\n",
-    "Similar to `bedtools closest -d`, this calculates the distance in base pairs between genomic intervals.\n",
+    "### Signed Distance (Directional)\n",
     "\n",
-    "**GIQL Query**: `SELECT a.*, b.*, DISTANCE(a.interval, b.interval) AS distance FROM features_a a, features_b b`"
+    "Calculate **directional distances** where negative values indicate upstream features and positive values indicate downstream features, similar to `bedtools closest -D ref`."
    ]
   },
   {
@@ -2269,49 +2491,68 @@
      "output_type": "stream",
      "text": [
       "Transpiled SQL:\n",
-      "WITH distances AS\n",
-      "  (SELECT\n",
-      "     a.chromosome,\n",
-      "     a.start_pos AS a_start,\n",
-      "     a.end_pos AS a_end,\n",
-      "     b.start_pos AS b_start,\n",
-      "     b.end_pos AS b_end,\n",
-      "     CASE\n",
-      "         WHEN a.\"chromosome\" != b.\"chromosome\" THEN NULL\n",
-      "         WHEN a.\"start_pos\" < b.\"end_pos\"\n",
-      "              AND a.\"end_pos\" > b.\"start_pos\" THEN 0\n",
-      "         WHEN a.\"end_pos\" <= b.\"start_pos\" THEN (b.\"start_pos\" - a.\"end_pos\")\n",
-      "         ELSE (a.\"start_pos\" - b.\"end_pos\")\n",
-      "     END AS signed_distance,\n",
-      "     ROW_NUMBER() OVER (PARTITION BY a.chromosome, a.start_pos, a.end_pos\n",
-      "                        ORDER BY ABS(CASE\n",
-      "                                         WHEN a.\"chromosome\" != b.\"chromosome\" THEN NULL\n",
-      "                                         WHEN a.\"start_pos\" < b.\"end_pos\"\n",
-      "                                              AND a.\"end_pos\" > b.\"start_pos\" THEN 0\n",
-      "                                         WHEN a.\"end_pos\" <= b.\"start_pos\" THEN (b.\"start_pos\" - a.\"end_pos\")\n",
-      "                                         ELSE (a.\"start_pos\" - b.\"end_pos\")\n",
-      "                                     END)) AS rank\n",
-      "   FROM features_a AS a\n",
-      "   CROSS JOIN features_b AS b\n",
-      "   WHERE a.chromosome = b.chromosome\n",
-      "     AND a.chromosome = 'chr1'\n",
-      "     AND a.start_pos < 500000)\n",
+      "WITH distances AS (\n",
+      "  SELECT\n",
+      "    a.chrom,\n",
+      "    a.start AS a_start,\n",
+      "    a.\"end\" AS a_end,\n",
+      "    b.start AS b_start,\n",
+      "    b.\"end\" AS b_end,\n",
+      "    CASE\n",
+      "      WHEN a.\"chrom\" <> b.\"chrom\"\n",
+      "      THEN NULL\n",
+      "      WHEN a.\"start\" < b.\"end\" AND a.\"end\" > b.\"start\"\n",
+      "      THEN 0\n",
+      "      WHEN a.\"end\" <= b.\"start\"\n",
+      "      THEN (\n",
+      "        b.\"start\" - a.\"end\"\n",
+      "      )\n",
+      "      ELSE -(\n",
+      "        a.\"start\" - b.\"end\"\n",
+      "      )\n",
+      "    END AS signed_distance,\n",
+      "    ROW_NUMBER() OVER (\n",
+      "      PARTITION BY a.chrom, a.start, a.\"end\"\n",
+      "      ORDER BY ABS(\n",
+      "        CASE\n",
+      "          WHEN a.\"chrom\" <> b.\"chrom\"\n",
+      "          THEN NULL\n",
+      "          WHEN a.\"start\" < b.\"end\" AND a.\"end\" > b.\"start\"\n",
+      "          THEN 0\n",
+      "          WHEN a.\"end\" <= b.\"start\"\n",
+      "          THEN (\n",
+      "            b.\"start\" - a.\"end\"\n",
+      "          )\n",
+      "          ELSE -(\n",
+      "            a.\"start\" - b.\"end\"\n",
+      "          )\n",
+      "        END\n",
+      "      )\n",
+      "    ) AS rank\n",
+      "  FROM features_a AS a\n",
+      "  CROSS JOIN features_b AS b\n",
+      "  WHERE\n",
+      "    a.chrom = b.chrom AND a.chrom = 'chr1' AND a.start < 500000\n",
+      ")\n",
       "SELECT\n",
-      "  chromosome,\n",
+      "  chrom,\n",
       "  a_start,\n",
       "  a_end,\n",
       "  b_start,\n",
       "  b_end,\n",
       "  signed_distance,\n",
       "  CASE\n",
-      "      WHEN signed_distance < 0 THEN 'upstream'\n",
-      "      WHEN signed_distance > 0 THEN 'downstream'\n",
-      "      ELSE 'overlap'\n",
+      "    WHEN signed_distance < 0\n",
+      "    THEN 'upstream'\n",
+      "    WHEN signed_distance > 0\n",
+      "    THEN 'downstream'\n",
+      "    ELSE 'overlap'\n",
       "  END AS direction\n",
       "FROM distances\n",
-      "WHERE rank = 1\n",
+      "WHERE\n",
+      "  rank = 1\n",
       "ORDER BY\n",
-      "  chromosome,\n",
+      "  chrom,\n",
       "  a_start\n",
       "\n",
       "================================================================================\n",
@@ -2343,7 +2584,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
+       "      <th>chrom</th>\n",
        "      <th>a_start</th>\n",
        "      <th>a_end</th>\n",
        "      <th>b_start</th>\n",
@@ -2388,10 +2629,10 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  a_start   a_end  b_start   b_end  signed_distance   direction\n",
-       "0       chr1   181368  181564   186654  186964             5090  downstream\n",
-       "1       chr1   186650  186846   186654  186964                0     overlap\n",
-       "2       chr1   267909  268105   267979  268128                0     overlap"
+       "  chrom  a_start   a_end  b_start   b_end  signed_distance   direction\n",
+       "0  chr1   181368  181564   186654  186964             5090  downstream\n",
+       "1  chr1   186650  186846   186654  186964                0     overlap\n",
+       "2  chr1   267909  268105   267979  268128                0     overlap"
       ]
      },
      "execution_count": 13,
@@ -2400,58 +2641,48 @@
     }
    ],
    "source": [
-    "# Define GIQL query with signed distance\n",
-    "# Signed distance (signed=true) returns negative values for upstream features and positive for downstream\n",
-    "# This is similar to bedtools closest -D ref\n",
     "giql_query = \"\"\"\n",
     "    WITH distances AS (\n",
     "        SELECT\n",
-    "            a.chromosome,\n",
-    "            a.start_pos AS a_start,\n",
-    "            a.end_pos AS a_end,\n",
-    "            b.start_pos AS b_start,\n",
-    "            b.end_pos AS b_end,\n",
+    "            a.chrom,\n",
+    "            a.start AS a_start,\n",
+    "            a.\"end\" AS a_end,\n",
+    "            b.start AS b_start,\n",
+    "            b.\"end\" AS b_end,\n",
     "            DISTANCE(a.interval, b.interval, signed=true) AS signed_distance,\n",
     "            ROW_NUMBER() OVER (\n",
-    "                PARTITION BY a.chromosome, a.start_pos, a.end_pos\n",
+    "                PARTITION BY a.chrom, a.start, a.\"end\"\n",
     "                ORDER BY ABS(DISTANCE(a.interval, b.interval, signed=true))\n",
     "            ) AS rank\n",
     "        FROM features_a a\n",
     "        CROSS JOIN features_b b\n",
-    "        WHERE a.chromosome = b.chromosome\n",
-    "            AND a.chromosome = 'chr1'\n",
-    "            AND a.start_pos < 500000\n",
+    "        WHERE a.chrom = b.chrom\n",
+    "            AND a.chrom = 'chr1'\n",
+    "            AND a.start < 500000\n",
     "    )\n",
-    "    SELECT \n",
-    "        chromosome,\n",
+    "    SELECT\n",
+    "        chrom,\n",
     "        a_start,\n",
     "        a_end,\n",
     "        b_start,\n",
     "        b_end,\n",
     "        signed_distance,\n",
-    "        CASE \n",
+    "        CASE\n",
     "            WHEN signed_distance < 0 THEN 'upstream'\n",
     "            WHEN signed_distance > 0 THEN 'downstream'\n",
     "            ELSE 'overlap'\n",
     "        END AS direction\n",
     "    FROM distances\n",
     "    WHERE rank = 1\n",
-    "    ORDER BY chromosome, a_start\n",
+    "    ORDER BY chrom, a_start\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} features with directional distances\")\n",
     "print(f\"Upstream features: {(result['signed_distance'] < 0).sum()}\")\n",
     "print(f\"Downstream features: {(result['signed_distance'] > 0).sum()}\")\n",
@@ -2463,9 +2694,26 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Signed Distance (Directional)\n",
+    "---\n",
     "\n",
-    "Calculate **directional distances** where negative values indicate upstream features and positive values indicate downstream features, similar to `bedtools closest -D ref`."
+    "## 7. NEAREST operator\n",
+    "\n",
+    "**Find the k-nearest genomic features** using the NEAREST operator.\n",
+    "\n",
+    "Similar to `bedtools closest`, this finds the closest features to query intervals. The NEAREST operator eliminates the need to write complex window functions for k-nearest neighbor queries.\n",
+    "\n",
+    "**Key Features**:\n",
+    "- Find k-nearest features (k=1 for closest, k>1 for multiple neighbors)\n",
+    "- Directional queries with `signed=true` (upstream vs downstream)\n",
+    "- Distance-constrained queries with `max_distance`\n",
+    "- Strand-specific queries with `stranded=true`\n",
+    "- Implicit reference resolution in LATERAL joins (automatically uses outer table's interval)\n",
+    "\n",
+    "**Database Support**:\n",
+    "- **DuckDB**: Full support (LATERAL joins)\n",
+    "- **SQLite**: Standalone mode only (no LATERAL support)\n",
+    "\n",
+    "**GIQL Query**: `SELECT * FROM peaks CROSS JOIN LATERAL NEAREST(genes, k=3)`"
    ]
   },
   {
@@ -2479,28 +2727,33 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  a.chromosome,\n",
-      "  a.start_pos AS a_start,\n",
-      "  a.end_pos AS a_end,\n",
-      "  b.start_pos AS b_start,\n",
-      "  b.end_pos AS b_end,\n",
+      "  a.chrom,\n",
+      "  a.start AS a_start,\n",
+      "  a.\"end\" AS a_end,\n",
+      "  b.start AS b_start,\n",
+      "  b.\"end\" AS b_end,\n",
       "  CASE\n",
-      "      WHEN a.\"chromosome\" != b.\"chromosome\" THEN NULL\n",
-      "      WHEN a.\"start_pos\" < b.\"end_pos\"\n",
-      "           AND a.\"end_pos\" > b.\"start_pos\" THEN 0\n",
-      "      WHEN a.\"end_pos\" <= b.\"start_pos\" THEN (b.\"start_pos\" - a.\"end_pos\")\n",
-      "      ELSE (a.\"start_pos\" - b.\"end_pos\")\n",
+      "    WHEN a.\"chrom\" <> b.\"chrom\"\n",
+      "    THEN NULL\n",
+      "    WHEN a.\"start\" < b.\"end\" AND a.\"end\" > b.\"start\"\n",
+      "    THEN 0\n",
+      "    WHEN a.\"end\" <= b.\"start\"\n",
+      "    THEN (\n",
+      "      b.\"start\" - a.\"end\"\n",
+      "    )\n",
+      "    ELSE (\n",
+      "      a.\"start\" - b.\"end\"\n",
+      "    )\n",
       "  END AS distance\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "  features_b AS b\n",
-      "WHERE a.chromosome = b.chromosome\n",
-      "  AND a.chromosome = 'chr1'\n",
-      "  AND a.start_pos BETWEEN 180000 AND 190000\n",
-      "  AND b.start_pos BETWEEN 180000 AND 200000\n",
+      "FROM features_a AS a, features_b AS b\n",
+      "WHERE\n",
+      "  a.chrom = b.chrom\n",
+      "  AND a.chrom = 'chr1'\n",
+      "  AND a.start BETWEEN 180000 AND 190000\n",
+      "  AND b.start BETWEEN 180000 AND 200000\n",
       "ORDER BY\n",
-      "  a.start_pos,\n",
-      "  b.start_pos\n",
+      "  a.start,\n",
+      "  b.start\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -2530,7 +2783,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
+       "      <th>chrom</th>\n",
        "      <th>a_start</th>\n",
        "      <th>a_end</th>\n",
        "      <th>b_start</th>\n",
@@ -2562,9 +2815,9 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  a_start   a_end  b_start   b_end  distance\n",
-       "0       chr1   181368  181564   186654  186964      5090\n",
-       "1       chr1   186650  186846   186654  186964         0"
+       "  chrom  a_start   a_end  b_start   b_end  distance\n",
+       "0  chr1   181368  181564   186654  186964      5090\n",
+       "1  chr1   186650  186846   186654  186964         0"
       ]
      },
      "execution_count": 14,
@@ -2573,38 +2826,28 @@
     }
    ],
    "source": [
-    "# Define GIQL query - basic distance calculation\n",
-    "# DISTANCE() returns 0 for overlapping intervals, positive integers for gaps between intervals\n",
-    "# Filters to a small region on chr1 for demonstration\n",
     "giql_query = \"\"\"\n",
-    "    SELECT \n",
-    "        a.chromosome,\n",
-    "        a.start_pos AS a_start,\n",
-    "        a.end_pos AS a_end,\n",
-    "        b.start_pos AS b_start,\n",
-    "        b.end_pos AS b_end,\n",
+    "    SELECT\n",
+    "        a.chrom,\n",
+    "        a.start AS a_start,\n",
+    "        a.\"end\" AS a_end,\n",
+    "        b.start AS b_start,\n",
+    "        b.\"end\" AS b_end,\n",
     "        DISTANCE(a.interval, b.interval) AS distance\n",
     "    FROM features_a a, features_b b\n",
-    "    WHERE a.chromosome = b.chromosome \n",
-    "        AND a.chromosome = 'chr1'\n",
-    "        AND a.start_pos BETWEEN 180000 AND 190000\n",
-    "        AND b.start_pos BETWEEN 180000 AND 200000\n",
-    "    ORDER BY a.start_pos, b.start_pos\n",
+    "    WHERE a.chrom = b.chrom\n",
+    "        AND a.chrom = 'chr1'\n",
+    "        AND a.start BETWEEN 180000 AND 190000\n",
+    "        AND b.start BETWEEN 180000 AND 200000\n",
+    "    ORDER BY a.start, b.start\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} distance calculations\")\n",
     "print(\"\\nExample distances (0 = overlap, positive = gap in base pairs):\")\n",
     "result.head(10)"
@@ -2614,26 +2857,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "---\n",
-    "\n",
-    "## 7. NEAREST operator\n",
-    "\n",
-    "**Find the k-nearest genomic features** using the NEAREST operator.\n",
-    "\n",
-    "Similar to `bedtools closest`, this finds the closest features to query intervals. The NEAREST operator eliminates the need to write complex window functions for k-nearest neighbor queries.\n",
-    "\n",
-    "**Key Features**:\n",
-    "- Find k-nearest features (k=1 for closest, k>1 for multiple neighbors)\n",
-    "- Directional queries with `signed=true` (upstream vs downstream)\n",
-    "- Distance-constrained queries with `max_distance`\n",
-    "- Strand-specific queries with `stranded=true`\n",
-    "- Implicit reference resolution in LATERAL joins (automatically uses outer table's interval)\n",
-    "\n",
-    "**Database Support**:\n",
-    "- **DuckDB**: Full support (LATERAL joins)\n",
-    "- **SQLite**: Standalone mode only (no LATERAL support)\n",
+    "### Finding k-Nearest Features\n",
     "\n",
-    "**GIQL Query**: `SELECT * FROM peaks CROSS JOIN LATERAL NEAREST(genes, k=3)`"
+    "Find the **3 closest features** in B for each feature in A using `k=3`."
    ]
   },
   {
@@ -2647,41 +2873,56 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  a.chromosome,\n",
-      "  a.start_pos AS a_start,\n",
-      "  a.end_pos AS a_end,\n",
+      "  a.chrom,\n",
+      "  a.start AS a_start,\n",
+      "  a.\"end\" AS a_end,\n",
       "  a.signal_value AS a_signal,\n",
-      "  b.start_pos AS b_start,\n",
-      "  b.end_pos AS b_end,\n",
+      "  b.start AS b_start,\n",
+      "  b.\"end\" AS b_end,\n",
       "  b.signal_value AS b_signal,\n",
       "  distance\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "\n",
-      "  (SELECT\n",
-      "     features_b.*,\n",
-      "     CASE\n",
-      "         WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "         WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "              AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "         WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "         ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "     END AS distance\n",
-      "   FROM features_b\n",
-      "   WHERE a.\"chromosome\" = features_b.\"chromosome\"\n",
-      "   ORDER BY ABS(CASE\n",
-      "                    WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "                    WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "                         AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "                    WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "                    ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "                END)\n",
-      "   LIMIT 1) AS b\n",
-      "WHERE a.chromosome = 'chr1'\n",
-      "  AND a.start_pos < 1000000\n",
+      "FROM features_a AS a, (\n",
+      "  SELECT\n",
+      "    features_b.*,\n",
+      "    CASE\n",
+      "      WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "      THEN NULL\n",
+      "      WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "      THEN 0\n",
+      "      WHEN a.\"end\" <= features_b.\"start\"\n",
+      "      THEN (\n",
+      "        features_b.\"start\" - a.\"end\"\n",
+      "      )\n",
+      "      ELSE (\n",
+      "        a.\"start\" - features_b.\"end\"\n",
+      "      )\n",
+      "    END AS distance\n",
+      "  FROM features_b\n",
+      "  WHERE\n",
+      "    a.\"chrom\" = features_b.\"chrom\"\n",
+      "  ORDER BY\n",
+      "    ABS(\n",
+      "      CASE\n",
+      "        WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "        THEN NULL\n",
+      "        WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "        THEN 0\n",
+      "        WHEN a.\"end\" <= features_b.\"start\"\n",
+      "        THEN (\n",
+      "          features_b.\"start\" - a.\"end\"\n",
+      "        )\n",
+      "        ELSE (\n",
+      "          a.\"start\" - features_b.\"end\"\n",
+      "        )\n",
+      "      END\n",
+      "    )\n",
+      "  LIMIT 1\n",
+      ") AS b\n",
+      "WHERE\n",
+      "  a.chrom = 'chr1' AND a.start < 1000000\n",
       "ORDER BY\n",
-      "  a.chromosome,\n",
-      "  a.start_pos\n",
+      "  a.chrom,\n",
+      "  a.start\n",
       "\n",
       "================================================================================\n",
       "\n",
@@ -2711,7 +2952,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
+       "      <th>chrom</th>\n",
        "      <th>a_start</th>\n",
        "      <th>a_end</th>\n",
        "      <th>a_signal</th>\n",
@@ -2837,17 +3078,17 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  a_start   a_end   a_signal  b_start   b_end  b_signal  distance\n",
-       "0       chr1   181368  181564   27.54796   186654  186964  14.61908      5090\n",
-       "1       chr1   186650  186846   19.93098   186654  186964  14.61908         0\n",
-       "2       chr1   267909  268105   88.00863   267979  268128  49.39366         0\n",
-       "3       chr1   586106  586302   35.02027   850424  850734  15.07029    264122\n",
-       "4       chr1   729261  729457   23.29415   850424  850734  15.07029    120967\n",
-       "5       chr1   778812  779008   56.97663   850424  850734  15.07029     71416\n",
-       "6       chr1   850473  850669   27.08243   850424  850734  15.07029         0\n",
-       "7       chr1   858056  858252   15.55869   850424  850734  15.07029      7322\n",
-       "8       chr1   869860  869991  168.87294   869711  870021  12.42156         0\n",
-       "9       chr1   904689  904883  167.56897   912864  913174  31.07865      7981"
+       "  chrom  a_start   a_end   a_signal  b_start   b_end  b_signal  distance\n",
+       "0  chr1   181368  181564   27.54796   186654  186964  14.61908      5090\n",
+       "1  chr1   186650  186846   19.93098   186654  186964  14.61908         0\n",
+       "2  chr1   267909  268105   88.00863   267979  268128  49.39366         0\n",
+       "3  chr1   586106  586302   35.02027   850424  850734  15.07029    264122\n",
+       "4  chr1   729261  729457   23.29415   850424  850734  15.07029    120967\n",
+       "5  chr1   778812  779008   56.97663   850424  850734  15.07029     71416\n",
+       "6  chr1   850473  850669   27.08243   850424  850734  15.07029         0\n",
+       "7  chr1   858056  858252   15.55869   850424  850734  15.07029      7322\n",
+       "8  chr1   869860  869991  168.87294   869711  870021  12.42156         0\n",
+       "9  chr1   904689  904883  167.56897   912864  913174  31.07865      7981"
       ]
      },
      "execution_count": 15,
@@ -2856,38 +3097,28 @@
     }
    ],
    "source": [
-    "# Define GIQL query - find closest feature in B for each feature in A\n",
-    "# This replicates bedtools closest -d functionality using NEAREST(target, k=1)\n",
-    "# The reference interval is automatically inferred from the outer table (features_a)\n",
     "giql_query = \"\"\"\n",
-    "    SELECT \n",
-    "        a.chromosome,\n",
-    "        a.start_pos AS a_start,\n",
-    "        a.end_pos AS a_end,\n",
+    "    SELECT\n",
+    "        a.chrom,\n",
+    "        a.start AS a_start,\n",
+    "        a.\"end\" AS a_end,\n",
     "        a.signal_value AS a_signal,\n",
-    "        b.start_pos AS b_start,\n",
-    "        b.end_pos AS b_end,\n",
+    "        b.start AS b_start,\n",
+    "        b.\"end\" AS b_end,\n",
     "        b.signal_value AS b_signal,\n",
     "        distance\n",
     "    FROM features_a a, NEAREST(features_b, k=1) b\n",
-    "    WHERE a.chromosome = 'chr1'\n",
-    "        AND a.start_pos < 1000000\n",
-    "    ORDER BY a.chromosome, a.start_pos\n",
+    "    WHERE a.chrom = 'chr1'\n",
+    "        AND a.start < 1000000\n",
+    "    ORDER BY a.chrom, a.start\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "print(f\"Result: {len(result)} features from A with their closest feature in B\")\n",
     "print(f\"Average distance to closest feature: {result['distance'].mean():.1f} bp\")\n",
     "print(f\"Features overlapping with closest: {(result['distance'] == 0).sum()}\")\n",
@@ -2898,9 +3129,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Finding k-Nearest Features\n",
+    "### Directional Queries with signed=true\n",
     "\n",
-    "Find the **3 closest features** in B for each feature in A using `k=3`."
+    "Find the **3 nearest upstream or downstream features** using `signed=true`.\n",
+    "\n",
+    "Negative distances indicate upstream features (B is before A), positive distances indicate downstream features (B is after A)."
    ]
   },
   {
@@ -2914,41 +3147,56 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  a.chromosome,\n",
-      "  a.start_pos AS a_start,\n",
-      "  a.end_pos AS a_end,\n",
+      "  a.chrom,\n",
+      "  a.start AS a_start,\n",
+      "  a.\"end\" AS a_end,\n",
       "  a.signal_value AS a_signal,\n",
-      "  b.start_pos AS b_start,\n",
-      "  b.end_pos AS b_end,\n",
+      "  b.start AS b_start,\n",
+      "  b.\"end\" AS b_end,\n",
       "  b.signal_value AS b_signal,\n",
       "  distance\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "\n",
-      "  (SELECT\n",
-      "     features_b.*,\n",
-      "     CASE\n",
-      "         WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "         WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "              AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "         WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "         ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "     END AS distance\n",
-      "   FROM features_b\n",
-      "   WHERE a.\"chromosome\" = features_b.\"chromosome\"\n",
-      "   ORDER BY ABS(CASE\n",
-      "                    WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "                    WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "                         AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "                    WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "                    ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "                END)\n",
-      "   LIMIT 3) AS b\n",
-      "WHERE a.chromosome = 'chr1'\n",
-      "  AND a.start_pos < 500000\n",
+      "FROM features_a AS a, (\n",
+      "  SELECT\n",
+      "    features_b.*,\n",
+      "    CASE\n",
+      "      WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "      THEN NULL\n",
+      "      WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "      THEN 0\n",
+      "      WHEN a.\"end\" <= features_b.\"start\"\n",
+      "      THEN (\n",
+      "        features_b.\"start\" - a.\"end\"\n",
+      "      )\n",
+      "      ELSE (\n",
+      "        a.\"start\" - features_b.\"end\"\n",
+      "      )\n",
+      "    END AS distance\n",
+      "  FROM features_b\n",
+      "  WHERE\n",
+      "    a.\"chrom\" = features_b.\"chrom\"\n",
+      "  ORDER BY\n",
+      "    ABS(\n",
+      "      CASE\n",
+      "        WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "        THEN NULL\n",
+      "        WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "        THEN 0\n",
+      "        WHEN a.\"end\" <= features_b.\"start\"\n",
+      "        THEN (\n",
+      "          features_b.\"start\" - a.\"end\"\n",
+      "        )\n",
+      "        ELSE (\n",
+      "          a.\"start\" - features_b.\"end\"\n",
+      "        )\n",
+      "      END\n",
+      "    )\n",
+      "  LIMIT 3\n",
+      ") AS b\n",
+      "WHERE\n",
+      "  a.chrom = 'chr1' AND a.start < 500000\n",
       "ORDER BY\n",
-      "  a.chromosome,\n",
-      "  a.start_pos,\n",
+      "  a.chrom,\n",
+      "  a.start,\n",
       "  distance\n",
       "\n",
       "================================================================================\n",
@@ -2979,7 +3227,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
+       "      <th>chrom</th>\n",
        "      <th>a_start</th>\n",
        "      <th>a_end</th>\n",
        "      <th>a_signal</th>\n",
@@ -3094,16 +3342,16 @@
        "</div>"
       ],
       "text/plain": [
-       "  chromosome  a_start   a_end  a_signal  b_start   b_end  b_signal  distance\n",
-       "0       chr1   181368  181564  27.54796   186654  186964  14.61908      5090\n",
-       "1       chr1   181368  181564  27.54796   267979  268128  49.39366     86415\n",
-       "2       chr1   181368  181564  27.54796   850424  850734  15.07029    668860\n",
-       "3       chr1   186650  186846  19.93098   186654  186964  14.61908         0\n",
-       "4       chr1   186650  186846  19.93098   267979  268128  49.39366     81133\n",
-       "5       chr1   186650  186846  19.93098   850424  850734  15.07029    663578\n",
-       "6       chr1   267909  268105  88.00863   267979  268128  49.39366         0\n",
-       "7       chr1   267909  268105  88.00863   186654  186964  14.61908     80945\n",
-       "8       chr1   267909  268105  88.00863   850424  850734  15.07029    582319"
+       "  chrom  a_start   a_end  a_signal  b_start   b_end  b_signal  distance\n",
+       "0  chr1   181368  181564  27.54796   186654  186964  14.61908      5090\n",
+       "1  chr1   181368  181564  27.54796   267979  268128  49.39366     86415\n",
+       "2  chr1   181368  181564  27.54796   850424  850734  15.07029    668860\n",
+       "3  chr1   186650  186846  19.93098   186654  186964  14.61908         0\n",
+       "4  chr1   186650  186846  19.93098   267979  268128  49.39366     81133\n",
+       "5  chr1   186650  186846  19.93098   850424  850734  15.07029    663578\n",
+       "6  chr1   267909  268105  88.00863   267979  268128  49.39366         0\n",
+       "7  chr1   267909  268105  88.00863   186654  186964  14.61908     80945\n",
+       "8  chr1   267909  268105  88.00863   850424  850734  15.07029    582319"
       ]
      },
      "execution_count": 16,
@@ -3112,54 +3360,45 @@
     }
    ],
    "source": [
-    "# Define GIQL query - find 3 nearest features in B for each feature in A\n",
     "giql_query = \"\"\"\n",
-    "    SELECT \n",
-    "        a.chromosome,\n",
-    "        a.start_pos AS a_start,\n",
-    "        a.end_pos AS a_end,\n",
+    "    SELECT\n",
+    "        a.chrom,\n",
+    "        a.start AS a_start,\n",
+    "        a.\"end\" AS a_end,\n",
     "        a.signal_value AS a_signal,\n",
-    "        b.start_pos AS b_start,\n",
-    "        b.end_pos AS b_end,\n",
+    "        b.start AS b_start,\n",
+    "        b.\"end\" AS b_end,\n",
     "        b.signal_value AS b_signal,\n",
     "        distance\n",
     "    FROM features_a a, NEAREST(features_b, k=3) b\n",
-    "    WHERE a.chromosome = 'chr1'\n",
-    "        AND a.start_pos < 500000\n",
-    "    ORDER BY a.chromosome, a.start_pos, distance\n",
+    "    WHERE a.chrom = 'chr1'\n",
+    "        AND a.start < 500000\n",
+    "    ORDER BY a.chrom, a.start, distance\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "\n",
-    "# Group by feature A to show k neighbors per feature\n",
     "grouped = result.groupby([\"a_start\", \"a_end\"]).size()\n",
     "print(f\"Result: {len(result)} total rows (up to 3 neighbors per feature in A)\")\n",
     "print(f\"Number of features from A: {len(grouped)}\")\n",
     "print(f\"Average neighbors per feature: {grouped.mean():.1f}\")\n",
-    "result.head(15)  # Show first 5 features with their 3 neighbors each"
+    "result.head(15)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Directional Queries with signed=true\n",
+    "### Distance-Constrained Queries with max_distance\n",
     "\n",
-    "Find the **3 nearest upstream or downstream features** using `signed=true`.\n",
+    "Find up to **5 nearest features within 50kb** using `max_distance=50000`.\n",
     "\n",
-    "Negative distances indicate upstream features (B is before A), positive distances indicate downstream features (B is after A)."
+    "This is useful for finding nearby regulatory elements or associated genes within a biologically relevant distance."
    ]
   },
   {
@@ -3173,51 +3412,67 @@
      "text": [
       "Transpiled SQL:\n",
       "SELECT\n",
-      "  a.chromosome,\n",
-      "  a.start_pos AS a_start,\n",
-      "  a.end_pos AS a_end,\n",
-      "  b.start_pos AS b_start,\n",
-      "  b.end_pos AS b_end,\n",
+      "  a.chrom,\n",
+      "  a.start AS a_start,\n",
+      "  a.\"end\" AS a_end,\n",
+      "  b.start AS b_start,\n",
+      "  b.\"end\" AS b_end,\n",
       "  distance,\n",
       "  CASE\n",
-      "      WHEN distance < 0 THEN 'upstream'\n",
-      "      WHEN distance > 0 THEN 'downstream'\n",
-      "      ELSE 'overlap'\n",
+      "    WHEN distance < 0\n",
+      "    THEN 'upstream'\n",
+      "    WHEN distance > 0\n",
+      "    THEN 'downstream'\n",
+      "    ELSE 'overlap'\n",
       "  END AS direction\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "\n",
-      "  (SELECT\n",
-      "     features_b.*,\n",
-      "     CASE\n",
-      "         WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "         WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "              AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "         WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "         ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "     END AS distance\n",
-      "   FROM features_b\n",
-      "   WHERE a.\"chromosome\" = features_b.\"chromosome\"\n",
-      "   ORDER BY ABS(CASE\n",
-      "                    WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "                    WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "                         AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "                    WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "                    ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "                END)\n",
-      "   LIMIT 3) AS b\n",
-      "WHERE a.chromosome = 'chr1'\n",
-      "  AND a.start_pos < 500000\n",
-      "  AND direction = 'upstream'\n",
+      "FROM features_a AS a, (\n",
+      "  SELECT\n",
+      "    features_b.*,\n",
+      "    CASE\n",
+      "      WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "      THEN NULL\n",
+      "      WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "      THEN 0\n",
+      "      WHEN a.\"end\" <= features_b.\"start\"\n",
+      "      THEN (\n",
+      "        features_b.\"start\" - a.\"end\"\n",
+      "      )\n",
+      "      ELSE -(\n",
+      "        a.\"start\" - features_b.\"end\"\n",
+      "      )\n",
+      "    END AS distance\n",
+      "  FROM features_b\n",
+      "  WHERE\n",
+      "    a.\"chrom\" = features_b.\"chrom\"\n",
+      "  ORDER BY\n",
+      "    ABS(\n",
+      "      CASE\n",
+      "        WHEN a.\"chrom\" <> features_b.\"chrom\"\n",
+      "        THEN NULL\n",
+      "        WHEN a.\"start\" < features_b.\"end\" AND a.\"end\" > features_b.\"start\"\n",
+      "        THEN 0\n",
+      "        WHEN a.\"end\" <= features_b.\"start\"\n",
+      "        THEN (\n",
+      "          features_b.\"start\" - a.\"end\"\n",
+      "        )\n",
+      "        ELSE -(\n",
+      "          a.\"start\" - features_b.\"end\"\n",
+      "        )\n",
+      "      END\n",
+      "    )\n",
+      "  LIMIT 3\n",
+      ") AS b\n",
+      "WHERE\n",
+      "  a.chrom = 'chr1' AND a.start < 500000 AND direction = 'upstream'\n",
       "ORDER BY\n",
-      "  a.chromosome,\n",
-      "  a.start_pos,\n",
+      "  a.chrom,\n",
+      "  a.start,\n",
       "  ABS(distance)\n",
       "\n",
       "================================================================================\n",
       "\n",
-      "Result: 0 total nearest features\n",
-      "Upstream features (distance < 0): 0\n",
+      "Result: 1 total nearest features\n",
+      "Upstream features (distance < 0): 1\n",
       "Downstream features (distance > 0): 0\n",
       "Overlapping features (distance = 0): 0\n"
      ]
@@ -3243,7 +3498,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>chromosome</th>\n",
+       "      <th>chrom</th>\n",
        "      <th>a_start</th>\n",
        "      <th>a_end</th>\n",
        "      <th>b_start</th>\n",
@@ -3253,14 +3508,23 @@
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>chr1</td>\n",
+       "      <td>267909</td>\n",
+       "      <td>268105</td>\n",
+       "      <td>186654</td>\n",
+       "      <td>186964</td>\n",
+       "      <td>-80945</td>\n",
+       "      <td>upstream</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [chromosome, a_start, a_end, b_start, b_end, distance, direction]\n",
-       "Index: []"
+       "  chrom  a_start   a_end  b_start   b_end  distance direction\n",
+       "0  chr1   267909  268105   186654  186964    -80945  upstream"
       ]
      },
      "execution_count": 17,
@@ -3269,15 +3533,13 @@
     }
    ],
    "source": [
-    "# Define GIQL query - find 3 nearest features with directional distances\n",
-    "# signed=true returns negative for upstream (B before A) and positive for downstream (B after A)\n",
     "giql_query = \"\"\"\n",
-    "    SELECT \n",
-    "        a.chromosome,\n",
-    "        a.start_pos AS a_start,\n",
-    "        a.end_pos AS a_end,\n",
-    "        b.start_pos AS b_start,\n",
-    "        b.end_pos AS b_end,\n",
+    "    SELECT\n",
+    "        a.chrom,\n",
+    "        a.start AS a_start,\n",
+    "        a.\"end\" AS a_end,\n",
+    "        b.start AS b_start,\n",
+    "        b.\"end\" AS b_end,\n",
     "        distance,\n",
     "        CASE\n",
     "            WHEN distance < 0 THEN 'upstream'\n",
@@ -3285,27 +3547,19 @@
     "            ELSE 'overlap'\n",
     "        END AS direction\n",
     "    FROM features_a a, NEAREST(features_b, k=3, signed=true) b\n",
-    "    WHERE a.chromosome = 'chr1'\n",
-    "        AND a.start_pos < 500000\n",
+    "    WHERE a.chrom = 'chr1'\n",
+    "        AND a.start < 500000\n",
     "        AND direction = 'upstream'\n",
-    "    ORDER BY a.chromosome, a.start_pos, ABS(distance)\n",
+    "    ORDER BY a.chrom, a.start, ABS(distance)\n",
     "\"\"\"\n",
     "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
+    "sql = transpile(giql_query, tables=[\"features_a\", \"features_b\"])\n",
     "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
+    "print(sqlglot.transpile(sql, pretty=True)[0])\n",
     "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
     "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
+    "result = conn.execute(sql).fetchdf()\n",
     "\n",
-    "# Analyze directionality\n",
     "upstream = (result[\"distance\"] < 0).sum()\n",
     "downstream = (result[\"distance\"] > 0).sum()\n",
     "overlap = (result[\"distance\"] == 0).sum()\n",
@@ -3317,315 +3571,6 @@
     "result.head(15)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Distance-Constrained Queries with max_distance\n",
-    "\n",
-    "Find up to **5 nearest features within 50kb** using `max_distance=50000`.\n",
-    "\n",
-    "This is useful for finding nearby regulatory elements or associated genes within a biologically relevant distance."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Transpiled SQL:\n",
-      "SELECT\n",
-      "  a.chromosome,\n",
-      "  a.start_pos AS a_start,\n",
-      "  a.end_pos AS a_end,\n",
-      "  b.start_pos AS b_start,\n",
-      "  b.end_pos AS b_end,\n",
-      "  distance\n",
-      "FROM\n",
-      "  features_a AS a,\n",
-      "\n",
-      "  (SELECT\n",
-      "     features_b.*,\n",
-      "     CASE\n",
-      "         WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "         WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "              AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "         WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "         ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "     END AS distance\n",
-      "   FROM features_b\n",
-      "   WHERE a.\"chromosome\" = features_b.\"chromosome\"\n",
-      "     AND (ABS(CASE\n",
-      "                  WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "                  WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "                       AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "                  WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "                  ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "              END)) <= 1000000\n",
-      "   ORDER BY ABS(CASE\n",
-      "                    WHEN a.\"chromosome\" != features_b.\"chromosome\" THEN NULL\n",
-      "                    WHEN a.\"start_pos\" < features_b.\"end_pos\"\n",
-      "                         AND a.\"end_pos\" > features_b.\"start_pos\" THEN 0\n",
-      "                    WHEN a.\"end_pos\" <= features_b.\"start_pos\" THEN (features_b.\"start_pos\" - a.\"end_pos\")\n",
-      "                    ELSE (a.\"start_pos\" - features_b.\"end_pos\")\n",
-      "                END)\n",
-      "   LIMIT 5) AS b\n",
-      "WHERE a.chromosome = 'chr1'\n",
-      "  AND a.start_pos < 500000\n",
-      "ORDER BY\n",
-      "  a.chromosome,\n",
-      "  a.start_pos,\n",
-      "  distance\n",
-      "\n",
-      "================================================================================\n",
-      "\n",
-      "Result: 15 total rows (up to 5 neighbors within 50kb per feature in A)\n",
-      "Number of features from A: 3\n",
-      "Average neighbors per feature: 5.0\n",
-      "Max distance found: 731300 bp (should be <= 50000)\n",
-      "Features with at least one neighbor: 3\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>chromosome</th>\n",
-       "      <th>a_start</th>\n",
-       "      <th>a_end</th>\n",
-       "      <th>b_start</th>\n",
-       "      <th>b_end</th>\n",
-       "      <th>distance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>181368</td>\n",
-       "      <td>181564</td>\n",
-       "      <td>186654</td>\n",
-       "      <td>186964</td>\n",
-       "      <td>5090</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>181368</td>\n",
-       "      <td>181564</td>\n",
-       "      <td>267979</td>\n",
-       "      <td>268128</td>\n",
-       "      <td>86415</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>181368</td>\n",
-       "      <td>181564</td>\n",
-       "      <td>850424</td>\n",
-       "      <td>850734</td>\n",
-       "      <td>668860</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>181368</td>\n",
-       "      <td>181564</td>\n",
-       "      <td>869711</td>\n",
-       "      <td>870021</td>\n",
-       "      <td>688147</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>181368</td>\n",
-       "      <td>181564</td>\n",
-       "      <td>912864</td>\n",
-       "      <td>913174</td>\n",
-       "      <td>731300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>186650</td>\n",
-       "      <td>186846</td>\n",
-       "      <td>186654</td>\n",
-       "      <td>186964</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>186650</td>\n",
-       "      <td>186846</td>\n",
-       "      <td>267979</td>\n",
-       "      <td>268128</td>\n",
-       "      <td>81133</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>186650</td>\n",
-       "      <td>186846</td>\n",
-       "      <td>850424</td>\n",
-       "      <td>850734</td>\n",
-       "      <td>663578</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>186650</td>\n",
-       "      <td>186846</td>\n",
-       "      <td>869711</td>\n",
-       "      <td>870021</td>\n",
-       "      <td>682865</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>186650</td>\n",
-       "      <td>186846</td>\n",
-       "      <td>912864</td>\n",
-       "      <td>913174</td>\n",
-       "      <td>726018</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>267909</td>\n",
-       "      <td>268105</td>\n",
-       "      <td>267979</td>\n",
-       "      <td>268128</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>267909</td>\n",
-       "      <td>268105</td>\n",
-       "      <td>186654</td>\n",
-       "      <td>186964</td>\n",
-       "      <td>80945</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>267909</td>\n",
-       "      <td>268105</td>\n",
-       "      <td>850424</td>\n",
-       "      <td>850734</td>\n",
-       "      <td>582319</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>267909</td>\n",
-       "      <td>268105</td>\n",
-       "      <td>869711</td>\n",
-       "      <td>870021</td>\n",
-       "      <td>601606</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>chr1</td>\n",
-       "      <td>267909</td>\n",
-       "      <td>268105</td>\n",
-       "      <td>912864</td>\n",
-       "      <td>913174</td>\n",
-       "      <td>644759</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   chromosome  a_start   a_end  b_start   b_end  distance\n",
-       "0        chr1   181368  181564   186654  186964      5090\n",
-       "1        chr1   181368  181564   267979  268128     86415\n",
-       "2        chr1   181368  181564   850424  850734    668860\n",
-       "3        chr1   181368  181564   869711  870021    688147\n",
-       "4        chr1   181368  181564   912864  913174    731300\n",
-       "5        chr1   186650  186846   186654  186964         0\n",
-       "6        chr1   186650  186846   267979  268128     81133\n",
-       "7        chr1   186650  186846   850424  850734    663578\n",
-       "8        chr1   186650  186846   869711  870021    682865\n",
-       "9        chr1   186650  186846   912864  913174    726018\n",
-       "10       chr1   267909  268105   267979  268128         0\n",
-       "11       chr1   267909  268105   186654  186964     80945\n",
-       "12       chr1   267909  268105   850424  850734    582319\n",
-       "13       chr1   267909  268105   869711  870021    601606\n",
-       "14       chr1   267909  268105   912864  913174    644759"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Define GIQL query - find up to 5 nearest features within 50kb\n",
-    "# max_distance=1000000 filters out features farther than 50kb\n",
-    "giql_query = \"\"\"\n",
-    "    SELECT \n",
-    "        a.chromosome,\n",
-    "        a.start_pos AS a_start,\n",
-    "        a.end_pos AS a_end,\n",
-    "        b.start_pos AS b_start,\n",
-    "        b.end_pos AS b_end,\n",
-    "        distance\n",
-    "    FROM features_a a, NEAREST(features_b, k=5, max_distance=1000000) b\n",
-    "    WHERE a.chromosome = 'chr1'\n",
-    "        AND a.start_pos < 500000\n",
-    "    ORDER BY a.chromosome, a.start_pos, distance\n",
-    "\"\"\"\n",
-    "\n",
-    "# Transpile to SQL\n",
-    "sql = engine.transpile(giql_query)\n",
-    "print(\"Transpiled SQL:\")\n",
-    "print(\n",
-    "    sqlparse.format(\n",
-    "        sql, reindent=True, keyword_case=\"upper\", indent_columns=True, indent_width=2\n",
-    "    )\n",
-    ")\n",
-    "print(\"\\n\" + \"=\" * 80 + \"\\n\")\n",
-    "\n",
-    "# Execute with DuckDB via GIQL engine\n",
-    "cursor = engine.conn.execute(sql)\n",
-    "result = cursor.df()  # Get result as pandas DataFrame\n",
-    "\n",
-    "# Analyze distance constraints\n",
-    "grouped = result.groupby([\"a_start\", \"a_end\"]).size()\n",
-    "print(\n",
-    "    f\"Result: {len(result)} total rows (up to 5 neighbors within 50kb per feature in A)\"\n",
-    ")\n",
-    "print(f\"Number of features from A: {len(grouped)}\")\n",
-    "print(f\"Average neighbors per feature: {grouped.mean():.1f}\")\n",
-    "print(f\"Max distance found: {result['distance'].max()} bp (should be <= 50000)\")\n",
-    "print(f\"Features with at least one neighbor: {len(grouped)}\")\n",
-    "result.head(15)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -3651,79 +3596,6 @@
     "\n",
     "This demonstrates how GIQL provides a high-level, intuitive syntax for genomic interval operations while maintaining compatibility with standard SQL engines through transpilation."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Clean up\n",
-    "# engine.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "9q2y97xvfyj",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Pandas dtypes:\n",
-      "chromosome          str\n",
-      "start_pos         int64\n",
-      "end_pos           int64\n",
-      "name                str\n",
-      "score             int64\n",
-      "strand              str\n",
-      "signal_value    float64\n",
-      "p_value         float64\n",
-      "q_value         float64\n",
-      "peak              int64\n",
-      "dtype: object\n",
-      "\n",
-      "First few rows:\n",
-      "  chromosome  start_pos   end_pos name  score strand  signal_value  p_value  \\\n",
-      "0      chr17   27381032  27381306    .   1000      .     461.10586     -1.0   \n",
-      "1       chr7   25565932  25566199    .   1000      .     457.60096     -1.0   \n",
-      "\n",
-      "   q_value  peak  \n",
-      "0  5.08726   136  \n",
-      "1  5.08726   133  \n"
-     ]
-    }
-   ],
-   "source": [
-    "import polars as pl\n",
-    "\n",
-    "# Load first BED file to check dtypes\n",
-    "bed_schema = {\n",
-    "    \"chromosome\": pl.Utf8,\n",
-    "    \"start_pos\": pl.Int64,\n",
-    "    \"end_pos\": pl.Int64,\n",
-    "    \"name\": pl.Utf8,\n",
-    "    \"score\": pl.Int64,\n",
-    "    \"strand\": pl.Utf8,\n",
-    "    \"signal_value\": pl.Float64,\n",
-    "    \"p_value\": pl.Float64,\n",
-    "    \"q_value\": pl.Float64,\n",
-    "    \"peak\": pl.Int64,\n",
-    "}\n",
-    "\n",
-    "features_a = pl.read_csv(\n",
-    "    \".ENCFF199YFA.bed\", separator=\"\\t\", has_header=False, schema=bed_schema\n",
-    ")\n",
-    "\n",
-    "# Check pandas conversion\n",
-    "adf = features_a.to_pandas()\n",
-    "print(\"Pandas dtypes:\")\n",
-    "print(adf.dtypes)\n",
-    "print(\"\\nFirst few rows:\")\n",
-    "print(adf.head(2))"
-   ]
   }
  ],
  "metadata": {
@@ -3742,7 +3614,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.3"
+   "version": "3.13.2"
   }
  },
  "nbformat": 4,

From d894f9de82229c358099e6e18cb2b3afd9e6e7db Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Fri, 13 Feb 2026 00:02:57 -0500
Subject: [PATCH 3/3] Add repo button to docs

---
 docs/conf.py   | 8 ++++++++
 docs/index.rst | 6 +++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 9a28ad8..5e28805 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -72,3 +72,11 @@
 
 html_theme = "sphinx_book_theme"
 # html_static_path = ['_static']  # Uncomment when you have custom static files
+html_theme_options = {
+    # "logo": {
+    #     "image_light": "_static/logo-light.svg",
+    #     "image_dark": "_static/logo-dark.svg",
+    # },
+    "repository_url": "https://github.com/abdenlab/giql",
+    "use_repository_button": True,
+}
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 83b2834..91c98d9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -4,16 +4,16 @@ Genomic Interval Query Language (GIQL)
 .. toctree::
    :hidden:
 
-   Home <self>
+   GIQL <self>
    quickstart
 
-**GIQL** is an extended SQL dialect that allows you to declaratively express genomic interval operations.
+**GIQL** (pronounced "JEE-quel", rhymes with "sequel") is an extended SQL dialect that allows you to declaratively express genomic interval operations.
 
 See the :doc:`quickstart` to get started.
    
 Dialect
 -------
-GIQL extends the SQL query language with dedicated constructs for these 
+GIQL extends the SQL query language with dedicated constructs for 
 common tasks, allowing you to declare *what* you want to compute rather 
 than how. Whether you're filtering variants by genomic region, finding 
 overlapping features, or calculating distances between intervals, GIQL