From c5da3eec85fe9b6484312fd60f294b84a259d54c Mon Sep 17 00:00:00 2001 From: Ivan Ogasawara Date: Fri, 14 Nov 2025 16:03:20 +0000 Subject: [PATCH 1/2] feat: Add support for int32 for llvm with apache arrow --- src/irx/builders/base.py | 12 +- src/irx/builders/llvmlite_arrow.py | 230 +++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+), 4 deletions(-) create mode 100644 src/irx/builders/llvmlite_arrow.py diff --git a/src/irx/builders/base.py b/src/irx/builders/base.py index c835f4d..78e7f9b 100644 --- a/src/irx/builders/base.py +++ b/src/irx/builders/base.py @@ -11,7 +11,7 @@ import astx -from irx.tools.typing import typechecked +from irx.tools.typing import skip_unused, typechecked @typechecked @@ -45,6 +45,7 @@ def translate(self, expr: astx.AST) -> str: self.visit(expr) return str(self.result) """ + skip_unused(expr) raise Exception("Not implemented yet.") @@ -75,9 +76,9 @@ def module(self) -> astx.Module: """Create a new ASTx Module.""" return astx.Module() - def translate(self, expr: astx.AST) -> str: + def translate(self, node: astx.AST) -> str: """Transpile ASTx to LLVM-IR.""" - return self.translator.translate(expr) + return self.translator.translate(node) @abstractmethod def build( @@ -86,8 +87,11 @@ def build( output_file: str, # noqa: F841, RUF100 ) -> None: """Transpile ASTx to LLVM-IR and build an executable file.""" - ... + skip_unused(expr, output_file) def run(self) -> str: """Run the generated executable.""" + if not self.output_file: + raise RuntimeError("No built output to run.") + return run_command([self.output_file]) diff --git a/src/irx/builders/llvmlite_arrow.py b/src/irx/builders/llvmlite_arrow.py new file mode 100644 index 0000000..f2feaa1 --- /dev/null +++ b/src/irx/builders/llvmlite_arrow.py @@ -0,0 +1,230 @@ +""" +LLVM-IR builder with Arrow C Data Interface (experimental). + +This backend lowers literals to Arrow-compatible shapes using ONLY emitted +LLVM IR (no external C/C++ shims). For now, we model a scalar as an ArrowArray +of length 1 (C Data Interface). + +""" + +from __future__ import annotations + +import tempfile + +from typing import Any, Callable + +import astx +import xh + +from llvmlite import binding as llvm +from llvmlite import ir +from plum import dispatch +from public import public + +from irx.builders.base import Builder +from irx.builders.llvmliteir import LLVMLiteIRVisitor + + +class LLVMLiteArrowIRVisitor(LLVMLiteIRVisitor): + """IR visitor that lowers literals to Arrow C Data Interface objects.""" + + _arrow_array_ty: ir.IdentifiedStructType + + def __init__(self) -> None: + super().__init__() + self._init_arrow_types() + + # C Data Interface: ArrowArray + # struct ArrowArray { + # int64_t length; + # int64_t null_count; + # int64_t offset; + # int64_t n_buffers; + # int64_t n_children; + # const void** buffers; + # struct ArrowArray** children; + # void* dictionary; // we model as i8* + # void (*release)(struct ArrowArray*); + # void* private_data; + # }; + def _init_arrow_types(self) -> None: + ctx = ir.global_context + self._arrow_array_ty = ctx.get_identified_type("struct.ArrowArray") + + i64 = ir.IntType(64) + i8p = ir.IntType(8).as_pointer() + + arr_ptr = self._arrow_array_ty.as_pointer() + # Function pointer type: void (*release)(ArrowArray*) + release_fn_ty = ir.FunctionType(ir.VoidType(), [arr_ptr]).as_pointer() + + # buffers: i8** (const void**) + buffers_ptr_ty = i8p.as_pointer() + # children: ArrowArray** (we won't use it yet; set to null) + children_ptr_ty = arr_ptr.as_pointer() + + self._arrow_array_ty.set_body( + i64, # length + i64, # null_count + i64, # offset + i64, # n_buffers + i64, # n_children + buffers_ptr_ty, # buffers + children_ptr_ty, # children + i8p, # dictionary (opaque) + release_fn_ty, # release + i8p, # private_data + ) + + def _entry_alloca(self, ty: ir.Type, name: str) -> ir.Instruction: + """Allocate in the function entry block (mem2reg-friendly).""" + ib = self._llvm.ir_builder + cur = ib.block + ib.position_at_start(ib.function.entry_basic_block) + slot = ib.alloca(ty, name=name) + ib.position_at_end(cur) + return slot + + @dispatch + def visit(self, node: astx.AST) -> None: + """Define the Generic visit method for AST node.""" + raise Exception("Not implemented.") + + @dispatch # type: ignore[no-redef] + def visit(self, node: astx.LiteralInt32) -> None: + """ + Lower LiteralInt32 to an ArrowArray(length=1). + + Layout (C Data Interface): + - length = 1 + - null_count = 0 + - offset = 0 + - n_buffers = 2 (validity bitmap, values) + - n_children = 0 + - buffers[0] = &validity_byte (i8*, bit 0 set to 1) + - buffers[1] = &value_i32 (i8* to 4-byte i32 storage) + - children = null + - dictionary = null + - release = null (stack lifetime only) + - private_data= null + """ + ib = self._llvm.ir_builder + i8 = ir.IntType(8) + i8p = i8.as_pointer() + i32 = self._llvm.INT32_TYPE + i64 = ir.IntType(64) + + arr_ptr = self._entry_alloca( + self._arrow_array_ty, name="arrow.i32.scalar" + ) + + # Allocate buffers array [2 x i8*] in entry block. + buffers_arr_ty = ir.ArrayType(i8p, 2) + buffers_slot = self._entry_alloca(buffers_arr_ty, name="arrow.buffers") + + # Allocate and initialize validity byte (bitmap) on stack: + # bit 0 = 1 (valid) + valid_slot = self._entry_alloca(i8, name="arrow.valid") + ib.store(ir.Constant(i8, 1), valid_slot) # 0000_0001 + + # Allocate and initialize 4-byte value on stack + value_slot = self._entry_alloca(i32, name="arrow.i32.value") + ib.store(ir.Constant(i32, node.value), value_slot) + + # Compute i8* pointers for buffers[0] and buffers[1] + valid_i8p = ib.bitcast(valid_slot, i8p, name="valid_i8p") + value_i8p = ib.bitcast(value_slot, i8p, name="value_i8p") + + # Fill buffers array + i32_ty = ir.IntType(32) + buf0_ptr = ib.gep( + buffers_slot, + [ir.Constant(i32_ty, 0), ir.Constant(i32_ty, 0)], + inbounds=True, + ) + buf1_ptr = ib.gep( + buffers_slot, + [ir.Constant(i32_ty, 0), ir.Constant(i32_ty, 1)], + inbounds=True, + ) + ib.store(valid_i8p, buf0_ptr) + ib.store(value_i8p, buf1_ptr) + + # Pointer-to-first element: i8** (const void**) + buffers_i8pp = ib.gep( + buffers_slot, + [ir.Constant(i32_ty, 0), ir.Constant(i32_ty, 0)], + inbounds=True, + ) + + # Set ArrowArray fields + # GEP helpers for fields [0..9] + def fld(idx: int): + return ib.gep( + arr_ptr, + [ir.Constant(i32_ty, 0), ir.Constant(i32_ty, idx)], + inbounds=True, + ) + + ib.store(ir.Constant(i64, 1), fld(0)) # length + ib.store(ir.Constant(i64, 0), fld(1)) # null_count + ib.store(ir.Constant(i64, 0), fld(2)) # offset + ib.store(ir.Constant(i64, 2), fld(3)) # n_buffers + ib.store(ir.Constant(i64, 0), fld(4)) # n_children + ib.store(buffers_i8pp, fld(5)) # buffers + # children = null + children_ty = self._arrow_array_ty.as_pointer().as_pointer() + ib.store(ir.Constant(children_ty, None), fld(6)) + # dictionary = null + ib.store(ir.Constant(i8p, None), fld(7)) + # release = null (stack lifetime; do not export) + rel_fn_ptr_ty = ir.FunctionType( + ir.VoidType(), [self._arrow_array_ty.as_pointer()] + ).as_pointer() + ib.store(ir.Constant(rel_fn_ptr_ty, None), fld(8)) + # private_data = null + ib.store(ir.Constant(i8p, None), fld(9)) + + # Result: %ArrowArray* (stack-allocated) + self.result_stack.append(arr_ptr) + + +@public +class LLVMLiteArrowIR(Builder): + """LLVM-IR transpiler that uses LLVMLiteArrowIRVisitor.""" + + def __init__(self) -> None: + super().__init__() + self.translator: LLVMLiteArrowIRVisitor = LLVMLiteArrowIRVisitor() + self.output_file = "" + self.tmp_path = "" + + def build(self, node: astx.AST, output_file: str) -> None: + """ + Transpile ASTx to LLVM-IR and build an executable via clang. + + NOTE: + - no extra libs linked. + """ + # Fresh visitor per build (mirrors your LLVMLiteIR) + self.translator = LLVMLiteArrowIRVisitor() + ir_text = self.translator.translate(node) + + mod = llvm.parse_assembly(ir_text) + obj = self.translator.target_machine.emit_object(mod) + + with tempfile.NamedTemporaryFile(suffix="", delete=False) as temp_file: + self.tmp_path = temp_file.name + + obj_path = f"{self.tmp_path}.o" + with open(obj_path, "wb") as f: + f.write(obj) + + self.output_file = output_file + + clang: Callable[..., Any] = xh.clang + clang(obj_path, "-o", self.output_file) + + import os + + os.chmod(self.output_file, 0o755) From 488f49571daa19102bb8a66b66d6937e22c872c6 Mon Sep 17 00:00:00 2001 From: yuvi-mittal Date: Sat, 20 Dec 2025 09:06:08 +0530 Subject: [PATCH 2/2] Fix llvmlite arrow builder handling --- src/irx/builders/llvmlite_arrow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/irx/builders/llvmlite_arrow.py b/src/irx/builders/llvmlite_arrow.py index f2feaa1..d893d13 100644 --- a/src/irx/builders/llvmlite_arrow.py +++ b/src/irx/builders/llvmlite_arrow.py @@ -47,7 +47,9 @@ def __init__(self) -> None: # void (*release)(struct ArrowArray*); # void* private_data; # }; + def _init_arrow_types(self) -> None: + """Initialize Arrow C Data Interface types once per global context.""" ctx = ir.global_context self._arrow_array_ty = ctx.get_identified_type("struct.ArrowArray")