From 382806c1b771d6f2359c9dac0d3e0e853ba713b9 Mon Sep 17 00:00:00 2001
From: "STARGA Inc." <noreply@star.ga>
Date: Thu, 15 Jan 2026 00:13:08 -0800
Subject: [PATCH] Add Mind programming language

Mind (Machine Intelligence Native Design) is a systems programming language
designed for AI/ML and numerical computing. Features include:

- Static tensor types with compile-time shape checking
- Reverse-mode automatic differentiation
- MLIR/LLVM backend for optimized code generation
- GPU device placement with `on(gpu)` blocks

Sample files (3,490 lines) are from production projects:
- cputer/nikolachess - NNUE chess engine
- cputer/mind-ray - GPU path tracer
- cputer/fractal-voyager - WebGPU fractal explorer
- cputer/mind-spec - Language specification

Website: https://mindlang.dev
---
 .gitmodules                    |   3 +
 grammars.yml                   |   2 +
 lib/linguist/languages.yml     |   7 +
 samples/Mind/camera.mind       | 247 ++++++++++
 samples/Mind/complex.mind      | 190 ++++++++
 samples/Mind/conv2d.mind       | 171 +++++++
 samples/Mind/fractal.mind      | 418 +++++++++++++++++
 samples/Mind/material.mind     | 343 ++++++++++++++
 samples/Mind/mlp_backward.mind | 129 +++++
 samples/Mind/render.mind       | 333 +++++++++++++
 samples/Mind/search.mind       | 829 +++++++++++++++++++++++++++++++++
 samples/Mind/tensor_board.mind | 540 +++++++++++++++++++++
 samples/Mind/vec3.mind         | 290 ++++++++++++
 vendor/grammars/mind-grammar   |   1 +
 14 files changed, 3503 insertions(+)
 create mode 100644 samples/Mind/camera.mind
 create mode 100644 samples/Mind/complex.mind
 create mode 100644 samples/Mind/conv2d.mind
 create mode 100644 samples/Mind/fractal.mind
 create mode 100644 samples/Mind/material.mind
 create mode 100644 samples/Mind/mlp_backward.mind
 create mode 100644 samples/Mind/render.mind
 create mode 100644 samples/Mind/search.mind
 create mode 100644 samples/Mind/tensor_board.mind
 create mode 100644 samples/Mind/vec3.mind
 create mode 160000 vendor/grammars/mind-grammar

diff --git a/.gitmodules b/.gitmodules
index 6cbadcbda9..194a12c06c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1581,3 +1581,6 @@
 [submodule "vendor/grammars/zephir-sublime"]
 	path = vendor/grammars/zephir-sublime
 	url = https://github.com/phalcon/zephir-sublime
+[submodule "vendor/grammars/mind-grammar"]
+	path = vendor/grammars/mind-grammar
+	url = https://github.com/cputer/mind-grammar.git
diff --git a/grammars.yml b/grammars.yml
index 13979f0c14..4ca798246d 100644
--- a/grammars.yml
+++ b/grammars.yml
@@ -907,6 +907,8 @@ vendor/grammars/mediawiki.tmbundle:
 - text.html.mediawiki
 vendor/grammars/mercury-tmlanguage:
 - source.mercury
+vendor/grammars/mind-grammar:
+- source.mind
 vendor/grammars/mint-vscode:
 - source.mint
 vendor/grammars/mlir-grammar:
diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml
index 7ea73879c5..b94cf16fdf 100644
--- a/lib/linguist/languages.yml
+++ b/lib/linguist/languages.yml
@@ -4720,6 +4720,13 @@ MiniZinc Data:
   tm_scope: source.mzn
   ace_mode: text
   language_id: 938193433
+Mind:
+  type: programming
+  color: "#7b2ff7"
+  extensions:
+  - ".mind"
+  tm_scope: source.mind
+  ace_mode: text
 Mint:
   type: programming
   extensions:
diff --git a/samples/Mind/camera.mind b/samples/Mind/camera.mind
new file mode 100644
index 0000000000..c404ddbd2e
--- /dev/null
+++ b/samples/Mind/camera.mind
@@ -0,0 +1,247 @@
+// camera.mind - Camera Model for Mind Ray Path Tracer
+//
+// This module implements a virtual pinhole camera for ray generation.
+// The camera uses a look-at model with configurable field of view
+// and aspect ratio.
+//
+// Coordinate System:
+// - Right-handed coordinate system with Y-up
+// - +X: Right, +Y: Up, +Z: Toward camera (out of screen)
+// - Camera looks along negative Z by default
+// - Image plane is at distance 1 from the camera origin
+//
+// Camera Model:
+// - Pinhole camera (no depth of field by default)
+// - Rays originate from a single point (camera origin)
+// - Image plane defines the viewing frustum
+//
+// The camera generates rays for each pixel, with optional jitter for
+// anti-aliasing (handled in the render module).
+//
+// GPU Compatibility:
+// - All fields are f32 for GPU alignment
+// - No dynamic allocations
+// - Pure functions (no side effects)
+//
+// Author: STARGA Inc.
+// License: MIT
+
+// =============================================================================
+// Constants
+// =============================================================================
+
+/// Pi constant for degree-to-radian conversion
+const PI: f32 = 3.14159265358979323846;
+
+/// Degrees to radians conversion factor
+const DEG_TO_RAD: f32 = 0.017453292519943295;  // PI / 180
+
+/// Epsilon for near-zero vector detection
+const CAMERA_EPSILON: f32 = 0.0001;
+
+// =============================================================================
+// Camera Type
+// =============================================================================
+
+/// Virtual camera for ray generation
+///
+/// Stores precomputed values for efficient ray generation. The camera
+/// defines a viewing frustum with the image plane at unit distance.
+///
+/// # Coordinate Frame
+/// - origin: camera position in world space
+/// - horizontal: vector spanning image width
+/// - vertical: vector spanning image height
+/// - lower_left: position of bottom-left corner of image plane
+pub struct Camera {
+    /// Camera position in world space
+    origin: Vec3,
+    /// Bottom-left corner of the image plane
+    lower_left: Vec3,
+    /// Vector spanning the full image width
+    horizontal: Vec3,
+    /// Vector spanning the full image height
+    vertical: Vec3,
+}
+
+// =============================================================================
+// Camera Construction
+// =============================================================================
+
+/// Create a new camera with look-at positioning
+///
+/// Constructs a camera positioned at `lookfrom`, looking toward `lookat`,
+/// with the specified field of view and aspect ratio.
+///
+/// # Arguments
+/// * `lookfrom` - Camera position in world space
+/// * `lookat` - Point the camera is aimed at
+/// * `vup` - World up vector (typically (0, 1, 0))
+/// * `vfov` - Vertical field of view in degrees (typically 30-90)
+/// * `aspect_ratio` - Image width / height ratio
+///
+/// # Returns
+/// * Configured Camera ready for ray generation
+///
+/// # Camera Axes
+/// - w: points backward (opposite to viewing direction)
+/// - u: points right (perpendicular to w and vup)
+/// - v: points up (perpendicular to w and u, may differ from vup if tilted)
+///
+/// # Example
+/// ```mind
+/// let cam = camera_new(
+///     vec3(0, 1, 5),      // lookfrom: slightly above, behind origin
+///     vec3(0, 0, 0),      // lookat: origin
+///     vec3(0, 1, 0),      // vup: Y is up
+///     60.0,               // 60 degree vertical FOV
+///     16.0 / 9.0          // 16:9 aspect ratio
+/// );
+/// ```
+pub fn camera_new(
+    lookfrom: Vec3,
+    lookat: Vec3,
+    vup: Vec3,
+    vfov: f32,
+    aspect_ratio: f32
+) -> Camera {
+    // Convert vertical FOV from degrees to radians
+    // Clamp vfov to sensible range (0.1 to 179.0) to avoid tan(0) or tan(pi/2)
+    let clamped_vfov = f32_clamp(vfov, 0.1, 179.0);
+    let theta = clamped_vfov * DEG_TO_RAD;
+    // Half-height of viewport at distance 1
+    let h = f32_tan(theta / 2.0);
+    let viewport_height = 2.0 * h;
+    // Ensure aspect_ratio is positive
+    let safe_aspect = if aspect_ratio <= 0.0 { 1.0 } else { aspect_ratio };
+    let viewport_width = safe_aspect * viewport_height;
+
+    // Build orthonormal camera basis vectors
+    // w: points backward (from lookat to lookfrom)
+    let view_dir = v3_sub(lookfrom, lookat);
+
+    // Handle degenerate case: lookfrom == lookat
+    // Default to looking along -Z (standard forward direction)
+    let w = if v3_length_sq(view_dir) < CAMERA_EPSILON * CAMERA_EPSILON {
+        vec3(0.0, 0.0, 1.0)  // Default backward direction (+Z)
+    } else {
+        v3_normalize(view_dir)
+    };
+
+    // u: points right (perpendicular to vup and w)
+    let cross_result = v3_cross(vup, w);
+
+    // Handle degenerate case: vup parallel to w
+    // Use an alternate up vector to construct valid basis
+    let u = if v3_length_sq(cross_result) < CAMERA_EPSILON * CAMERA_EPSILON {
+        // vup is parallel to w, use alternate up vector
+        let alt_up = if f32_abs(w.y) < 0.9 {
+            vec3(0.0, 1.0, 0.0)  // Y-up
+        } else {
+            vec3(1.0, 0.0, 0.0)  // X-up fallback
+        };
+        v3_normalize(v3_cross(alt_up, w))
+    } else {
+        v3_normalize(cross_result)
+    };
+
+    // v: points up (perpendicular to w and u)
+    let v = v3_cross(w, u);
+
+    let origin = lookfrom;
+    // Scale basis vectors to viewport dimensions
+    let horizontal = v3_mul(u, viewport_width);
+    let vertical = v3_mul(v, viewport_height);
+
+    // Compute lower-left corner of image plane
+    // Image plane is at distance 1 from camera (along -w)
+    let lower_left = v3_sub(
+        v3_sub(v3_sub(origin, v3_div(horizontal, 2.0)), v3_div(vertical, 2.0)),
+        w
+    );
+
+    Camera {
+        origin: origin,
+        lower_left: lower_left,
+        horizontal: horizontal,
+        vertical: vertical,
+    }
+}
+
+// =============================================================================
+// Ray Generation
+// =============================================================================
+
+/// Generate a ray from the camera through a point on the image plane
+///
+/// Given normalized image coordinates (u, v), generates a ray from the
+/// camera origin through the corresponding point on the image plane.
+///
+/// # Arguments
+/// * `cam` - Camera configuration
+/// * `u` - Horizontal image coordinate (0 = left edge, 1 = right edge)
+/// * `v` - Vertical image coordinate (0 = bottom edge, 1 = top edge)
+///
+/// # Returns
+/// * Ray from camera origin through the specified image plane point
+///
+/// # Coordinate Mapping
+/// For proper pixel coordinates:
+/// - u = (x + 0.5) / width for pixel center
+/// - v = (y + 0.5) / height for pixel center
+/// Add random offset within [0, 1/width] x [0, 1/height] for anti-aliasing.
+///
+/// # Edge Cases
+/// - u, v outside [0, 1] produces rays outside the normal frustum
+/// - Values are not clamped to allow overscan/bleeding effects
+///
+/// # Performance
+/// ~15 float operations (no branches, no function calls except normalize)
+#[inline]
+pub fn camera_get_ray(cam: Camera, u: f32, v: f32) -> Ray {
+    // Point on image plane = lower_left + u*horizontal + v*vertical
+    // Direction = point - origin
+    let direction = v3_sub(
+        v3_add(
+            v3_add(cam.lower_left, v3_mul(cam.horizontal, u)),
+            v3_mul(cam.vertical, v)
+        ),
+        cam.origin
+    );
+
+    // Create normalized ray
+    ray_new(cam.origin, v3_normalize(direction))
+}
+
+/// Validate camera configuration
+///
+/// Checks that the camera parameters are valid for ray generation.
+///
+/// # Arguments
+/// * `cam` - Camera to validate
+///
+/// # Returns
+/// * true if camera is valid, false otherwise
+#[inline]
+pub fn camera_is_valid(cam: &Camera) -> bool {
+    // Check that vectors are not degenerate (near-zero length)
+    let h_len_sq = v3_length_sq(cam.horizontal);
+    let v_len_sq = v3_length_sq(cam.vertical);
+
+    // Minimum valid vector length squared (1e-6)
+    let min_len_sq = 0.000001;
+
+    h_len_sq > min_len_sq && v_len_sq > min_len_sq
+}
+
+// =============================================================================
+// External Dependencies
+// =============================================================================
+// Math utilities imported from math_stubs.mind:
+// - f32_tan: Tangent function for FOV calculation
+//
+// Vector utilities imported from vec3.mind:
+// - v3_normalize, v3_sub, v3_add, v3_mul, v3_div, v3_cross
+//
+// Ray utilities imported from ray.mind:
+// - ray_new: Ray constructor
diff --git a/samples/Mind/complex.mind b/samples/Mind/complex.mind
new file mode 100644
index 0000000000..9603df89e8
--- /dev/null
+++ b/samples/Mind/complex.mind
@@ -0,0 +1,190 @@
+// complex.mind - Complex Number Operations for Fractal Voyager
+//
+// Complex numbers are represented as Tensor<f32, [2]> where:
+//   - index 0 = real part
+//   - index 1 = imaginary part
+//
+// For batched operations (computing many pixels):
+//   - Tensor<f32, [N, 2]> for N complex numbers
+//   - Tensor<f32, [H, W, 2]> for a 2D grid of complex numbers
+//
+// All operations use MIND's tensor semantics with broadcasting.
+//
+// Author: STARGA Inc. <noreply@star.ga>
+// License: MIT
+// Specification reference: spec/v1.0/language.md
+
+import tensor::zeros;
+import tensor::ones;
+import math::sqrt;
+import math::log;
+
+// =============================================================================
+// Complex Number Creation
+// =============================================================================
+
+// Create complex number from real and imaginary parts
+// Returns: Tensor<f32, [2]> representing (real + imag*i)
+fn complex(real: f32, imag: f32) -> Tensor<f32, [2]> {
+    [real, imag]
+}
+
+// Complex zero: 0 + 0i
+fn complex_zero() -> Tensor<f32, [2]> {
+    [0.0, 0.0]
+}
+
+// Complex one: 1 + 0i
+fn complex_one() -> Tensor<f32, [2]> {
+    [1.0, 0.0]
+}
+
+// Complex i: 0 + 1i
+fn complex_i() -> Tensor<f32, [2]> {
+    [0.0, 1.0]
+}
+
+// =============================================================================
+// Complex Arithmetic (Single Complex Numbers)
+// =============================================================================
+
+// Complex addition: (a + bi) + (c + di) = (a+c) + (b+d)i
+fn c_add(z1: Tensor<f32, [2]>, z2: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    z1 + z2
+}
+
+// Complex subtraction: (a + bi) - (c + di) = (a-c) + (b-d)i
+fn c_sub(z1: Tensor<f32, [2]>, z2: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    z1 - z2
+}
+
+// Complex multiplication: (a + bi) * (c + di) = (ac - bd) + (ad + bc)i
+fn c_mul(z1: Tensor<f32, [2]>, z2: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    let a = z1[0];
+    let b = z1[1];
+    let c = z2[0];
+    let d = z2[1];
+    [a * c - b * d, a * d + b * c]
+}
+
+// Complex square: z^2 = (x + yi)^2 = (x^2 - y^2) + 2xyi
+// Optimized special case of c_mul(z, z)
+fn c_sq(z: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    let x = z[0];
+    let y = z[1];
+    [x * x - y * y, 2.0 * x * y]
+}
+
+// Complex conjugate: conj(a + bi) = a - bi
+fn c_conj(z: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    [z[0], -z[1]]
+}
+
+// Complex magnitude squared: |z|^2 = x^2 + y^2
+// Avoids sqrt for escape radius checks
+fn c_abs_sq(z: Tensor<f32, [2]>) -> f32 {
+    z[0] * z[0] + z[1] * z[1]
+}
+
+// Complex magnitude: |z| = sqrt(x^2 + y^2)
+fn c_abs(z: Tensor<f32, [2]>) -> f32 {
+    sqrt(c_abs_sq(z))
+}
+
+// Scalar multiplication: t * (a + bi) = ta + tbi
+fn c_scale(z: Tensor<f32, [2]>, t: f32) -> Tensor<f32, [2]> {
+    z * t
+}
+
+// Component-wise absolute value (for Burning Ship fractal)
+fn c_abs_components(z: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+    [abs(z[0]), abs(z[1])]
+}
+
+// =============================================================================
+// Batched Complex Operations (for GPU parallelism)
+// =============================================================================
+
+// Batched complex multiplication: compute many multiplications at once
+// Shape: [N, 2] @ [N, 2] -> [N, 2]
+fn c_mul_batch(z1: Tensor<f32, [N, 2]>, z2: Tensor<f32, [N, 2]>) -> Tensor<f32, [N, 2]> {
+    let a = z1[:, 0];  // Shape [N]
+    let b = z1[:, 1];  // Shape [N]
+    let c = z2[:, 0];  // Shape [N]
+    let d = z2[:, 1];  // Shape [N]
+
+    let real = a * c - b * d;  // Shape [N]
+    let imag = a * d + b * c;  // Shape [N]
+
+    stack([real, imag], axis=1)  // Shape [N, 2]
+}
+
+// Batched complex square: z^2 for many complex numbers
+// Shape: [N, 2] -> [N, 2]
+fn c_sq_batch(z: Tensor<f32, [N, 2]>) -> Tensor<f32, [N, 2]> {
+    let x = z[:, 0];  // Shape [N]
+    let y = z[:, 1];  // Shape [N]
+
+    let real = x * x - y * y;
+    let imag = 2.0 * x * y;
+
+    stack([real, imag], axis=1)
+}
+
+// Batched magnitude squared: |z|^2 for many complex numbers
+// Shape: [N, 2] -> [N]
+fn c_abs_sq_batch(z: Tensor<f32, [N, 2]>) -> Tensor<f32, [N]> {
+    let x = z[:, 0];
+    let y = z[:, 1];
+    x * x + y * y
+}
+
+// Batched complex addition
+// Shape: [N, 2] + [N, 2] -> [N, 2]
+fn c_add_batch(z1: Tensor<f32, [N, 2]>, z2: Tensor<f32, [N, 2]>) -> Tensor<f32, [N, 2]> {
+    z1 + z2
+}
+
+// Batched component-wise absolute value (Burning Ship)
+// Shape: [N, 2] -> [N, 2]
+fn c_abs_components_batch(z: Tensor<f32, [N, 2]>) -> Tensor<f32, [N, 2]> {
+    abs(z)
+}
+
+// Batched conjugate (Tricorn)
+// Shape: [N, 2] -> [N, 2]
+fn c_conj_batch(z: Tensor<f32, [N, 2]>) -> Tensor<f32, [N, 2]> {
+    let real = z[:, 0];
+    let imag = -z[:, 1];
+    stack([real, imag], axis=1)
+}
+
+// =============================================================================
+// 2D Grid Complex Operations (for image rendering)
+// =============================================================================
+
+// Complex square for 2D grid of complex numbers
+// Shape: [H, W, 2] -> [H, W, 2]
+fn c_sq_grid(z: Tensor<f32, [H, W, 2]>) -> Tensor<f32, [H, W, 2]> {
+    let x = z[:, :, 0];  // Shape [H, W]
+    let y = z[:, :, 1];  // Shape [H, W]
+
+    let real = x * x - y * y;
+    let imag = 2.0 * x * y;
+
+    stack([real, imag], axis=2)
+}
+
+// Magnitude squared for 2D grid
+// Shape: [H, W, 2] -> [H, W]
+fn c_abs_sq_grid(z: Tensor<f32, [H, W, 2]>) -> Tensor<f32, [H, W]> {
+    let x = z[:, :, 0];
+    let y = z[:, :, 1];
+    x * x + y * y
+}
+
+// Add two grids of complex numbers
+// Shape: [H, W, 2] + [H, W, 2] -> [H, W, 2]
+fn c_add_grid(z1: Tensor<f32, [H, W, 2]>, z2: Tensor<f32, [H, W, 2]>) -> Tensor<f32, [H, W, 2]> {
+    z1 + z2
+}
diff --git a/samples/Mind/conv2d.mind b/samples/Mind/conv2d.mind
new file mode 100644
index 0000000000..666ea26793
--- /dev/null
+++ b/samples/Mind/conv2d.mind
@@ -0,0 +1,171 @@
+// MIND Language Example: 2D Convolution
+// Specification reference: spec/v1.0/ir.md#linear-and-tensor-algebra (Conv2d)
+//
+// This example demonstrates Conv2d operations with various
+// stride and padding configurations. MIND uses NHWC format.
+
+import tensor::zeros;
+import tensor::ones;
+
+// Example 1: Basic convolution (valid padding, stride 1)
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+//
+// Input format: NHWC [batch, height, width, channels_in]
+// Filter format: HWCF [kernel_h, kernel_w, channels_in, channels_out]
+// Output: [batch, out_h, out_w, channels_out]
+//
+// Expected output shape: [1, 3, 3, 2] (5-3+1 = 3 with valid padding)
+fn basic_conv2d() -> Tensor<f32, [1, 3, 3, 2]> {
+    // Input: 1 image, 5x5, 3 channels
+    let input: Tensor<f32, [1, 5, 5, 3]> = ones([1, 5, 5, 3]);
+
+    // Filter: 3x3 kernel, 3 input channels, 2 output channels
+    let filter: Tensor<f32, [3, 3, 3, 2]> = ones([3, 3, 3, 2]);
+
+    // Conv2d with valid padding (no padding) and stride 1
+    // Output height: (5 - 3) / 1 + 1 = 3
+    // Output width:  (5 - 3) / 1 + 1 = 3
+    let output = conv2d(input, filter, strides=[1, 1], padding=Valid);
+
+    output
+}
+
+// Example 2: Convolution with 'same' padding
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// Same padding preserves spatial dimensions (with stride 1)
+//
+// Expected output shape: [1, 5, 5, 4]
+fn same_padding_conv2d() -> Tensor<f32, [1, 5, 5, 4]> {
+    // Input: 1 image, 5x5, 3 channels
+    let input: Tensor<f32, [1, 5, 5, 3]> = ones([1, 5, 5, 3]);
+
+    // Filter: 3x3 kernel, 3 input channels, 4 output channels
+    let filter: Tensor<f32, [3, 3, 3, 4]> = ones([3, 3, 3, 4]);
+
+    // Same padding: output has same spatial dims as input
+    // Padding is computed automatically: pad = (kernel_size - 1) / 2
+    let output = conv2d(input, filter, strides=[1, 1], padding=Same);
+
+    output  // Shape [1, 5, 5, 4]
+}
+
+// Example 3: Strided convolution
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// Stride reduces output spatial dimensions
+//
+// Expected output shape: [1, 2, 2, 8]
+fn strided_conv2d() -> Tensor<f32, [1, 2, 2, 8]> {
+    // Input: 1 image, 6x6, 3 channels
+    let input: Tensor<f32, [1, 6, 6, 3]> = ones([1, 6, 6, 3]);
+
+    // Filter: 3x3 kernel
+    let filter: Tensor<f32, [3, 3, 3, 8]> = ones([3, 3, 3, 8]);
+
+    // Stride 2 in both dimensions
+    // Output height: (6 - 3) / 2 + 1 = 2
+    // Output width:  (6 - 3) / 2 + 1 = 2
+    let output = conv2d(input, filter, strides=[2, 2], padding=Valid);
+
+    output
+}
+
+// Example 4: Custom padding
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// Explicit padding values: [[pad_top, pad_bottom], [pad_left, pad_right]]
+//
+// Expected output shape: [1, 7, 9, 4]
+fn custom_padding_conv2d() -> Tensor<f32, [1, 7, 9, 4]> {
+    // Input: 1 image, 5x5, 3 channels
+    let input: Tensor<f32, [1, 5, 5, 3]> = ones([1, 5, 5, 3]);
+
+    // Filter: 3x3 kernel
+    let filter: Tensor<f32, [3, 3, 3, 4]> = ones([3, 3, 3, 4]);
+
+    // Custom padding: [[top=2, bottom=2], [left=3, right=3]]
+    // Padded input: 5+2+2=9 height, 5+3+3=11 width
+    // Output: (9-3)/1+1=7 height, (11-3)/1+1=9 width
+    let output = conv2d(
+        input, filter,
+        strides=[1, 1],
+        padding=Custom([[2, 2], [3, 3]])
+    );
+
+    output
+}
+
+// Example 5: Batched convolution
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// Same filter applied to each image in batch
+//
+// Expected output shape: [batch=4, 6, 6, 16]
+fn batched_conv2d() -> Tensor<f32, [4, 6, 6, 16]> {
+    // Batch of 4 images
+    let input: Tensor<f32, [4, 8, 8, 3]> = ones([4, 8, 8, 3]);
+
+    // Single filter (applied to all batch elements)
+    let filter: Tensor<f32, [3, 3, 3, 16]> = ones([3, 3, 3, 16]);
+
+    // Each image in batch processed independently
+    let output = conv2d(input, filter, strides=[1, 1], padding=Valid);
+
+    output
+}
+
+// Example 6: 1x1 convolution (pointwise)
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// 1x1 convolutions mix channels without spatial context
+//
+// Expected output shape: [1, 8, 8, 64]
+fn pointwise_conv() -> Tensor<f32, [1, 8, 8, 64]> {
+    // Input with 32 channels
+    let input: Tensor<f32, [1, 8, 8, 32]> = ones([1, 8, 8, 32]);
+
+    // 1x1 filter: channel mixing only
+    let filter: Tensor<f32, [1, 1, 32, 64]> = ones([1, 1, 32, 64]);
+
+    // No spatial reduction with 1x1 kernel
+    let output = conv2d(input, filter, strides=[1, 1], padding=Valid);
+
+    output
+}
+
+// Example 7: Depthwise convolution pattern
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// Each input channel convolved separately (groups = channels)
+//
+// Note: True depthwise conv may require specialized op or grouping
+fn depthwise_pattern() -> Tensor<f32, [1, 6, 6, 3]> {
+    let input: Tensor<f32, [1, 8, 8, 3]> = ones([1, 8, 8, 3]);
+
+    // Depthwise: 1 filter per input channel
+    // For true depthwise, use channels_out = channels_in
+    // This is approximate; real depthwise needs groups parameter
+    let filter: Tensor<f32, [3, 3, 3, 3]> = ones([3, 3, 3, 3]);
+
+    let output = conv2d(input, filter, strides=[1, 1], padding=Valid);
+
+    output
+}
+
+// Example 8: Conv2d output shape formula
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra
+// H_out = floor((H_in + 2*pad_h - kernel_h) / stride_h) + 1
+fn output_shape_demo() {
+    // Demonstrating shape calculation
+    //
+    // Input: [batch=1, H=28, W=28, C=1]
+    // Filter: [H_k=5, W_k=5, C_in=1, C_out=32]
+    // Stride: [2, 2]
+    // Padding: Valid (pad=0)
+    //
+    // H_out = floor((28 + 0 - 5) / 2) + 1 = floor(23/2) + 1 = 11 + 1 = 12
+    // W_out = floor((28 + 0 - 5) / 2) + 1 = 12
+    //
+    // Output shape: [1, 12, 12, 32]
+
+    let input: Tensor<f32, [1, 28, 28, 1]> = zeros([1, 28, 28, 1]);
+    let filter: Tensor<f32, [5, 5, 1, 32]> = zeros([5, 5, 1, 32]);
+
+    let output = conv2d(input, filter, strides=[2, 2], padding=Valid);
+    // output has shape [1, 12, 12, 32]
+}
diff --git a/samples/Mind/fractal.mind b/samples/Mind/fractal.mind
new file mode 100644
index 0000000000..5c13fb6fcc
--- /dev/null
+++ b/samples/Mind/fractal.mind
@@ -0,0 +1,418 @@
+// fractal.mind - Fractal Iteration Algorithms for Fractal Voyager
+//
+// Implements classic fractal iteration algorithms using MIND tensor operations:
+//   - Mandelbrot: z_{n+1} = z_n^2 + c (z_0 = 0)
+//   - Julia: z_{n+1} = z_n^2 + c (z_0 varies, c fixed)
+//   - Burning Ship: z_{n+1} = (|Re(z)| + i|Im(z)|)^2 + c
+//   - Tricorn: z_{n+1} = conj(z)^2 + c
+//
+// All algorithms compute smooth iteration counts for banding-free coloring.
+// Operations are vectorized over entire images for GPU parallelism.
+//
+// Author: STARGA Inc. <noreply@star.ga>
+// License: MIT
+// Specification reference: spec/v1.0/language.md
+
+import tensor::zeros;
+import tensor::ones;
+import math::sqrt;
+import math::log;
+import math::abs;
+import math::max;
+import math::min;
+
+// =============================================================================
+// Constants
+// =============================================================================
+
+// Escape radius squared (|z|^2 > 4 means point escaped)
+fn escape_radius_sq() -> f32 {
+    4.0
+}
+
+// Log(2) for smooth coloring calculations
+fn log2() -> f32 {
+    0.693147180559945
+}
+
+// =============================================================================
+// Fractal Type Codes
+// =============================================================================
+// 0 = Mandelbrot
+// 1 = Julia
+// 2 = Burning Ship
+// 3 = Tricorn
+
+// =============================================================================
+// Single-Point Fractal Iteration
+// =============================================================================
+
+// Mandelbrot iteration for a single point
+// c: complex coordinate to test [real, imag]
+// max_iter: maximum iterations
+// Returns: [smooth_iter, escaped] where escaped is 1.0 if escaped, 0.0 if in set
+fn mandelbrot_point(c: Tensor<f32, [2]>, max_iter: i32) -> Tensor<f32, [2]> {
+    let z = [0.0, 0.0];  // z_0 = 0
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    // Iterative computation
+    let result = iterate(max_iter, z, fn(i: i32, z: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+        let z_sq = z[0] * z[0] + z[1] * z[1];
+
+        // Check escape condition
+        select(z_sq > escape_r_sq, z,
+            // z = z^2 + c
+            [z[0] * z[0] - z[1] * z[1] + c[0],
+             2.0 * z[0] * z[1] + c[1]])
+    });
+
+    // Compute smooth iteration count
+    let z_final = result;
+    let z_sq_final = z_final[0] * z_final[0] + z_final[1] * z_final[1];
+    let escaped = z_sq_final > escape_r_sq;
+
+    let smooth_iter = select(escaped,
+        // Smooth coloring formula: i + 1 - log(log|z|)/log(2)
+        cast(count_iterations(c, max_iter), f32) + 1.0 -
+            log(log(sqrt(z_sq_final)) / log_2) / log_2,
+        cast(max_iter, f32));
+
+    [smooth_iter, select(escaped, 1.0, 0.0)]
+}
+
+// Julia iteration for a single point
+// z0: initial z value (screen coordinate)
+// c: Julia constant
+// max_iter: maximum iterations
+fn julia_point(z0: Tensor<f32, [2]>, c: Tensor<f32, [2]>, max_iter: i32) -> Tensor<f32, [2]> {
+    let z = z0;
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    let result = iterate(max_iter, z, fn(i: i32, z: Tensor<f32, [2]>) -> Tensor<f32, [2]> {
+        let z_sq = z[0] * z[0] + z[1] * z[1];
+
+        select(z_sq > escape_r_sq, z,
+            [z[0] * z[0] - z[1] * z[1] + c[0],
+             2.0 * z[0] * z[1] + c[1]])
+    });
+
+    let z_sq_final = result[0] * result[0] + result[1] * result[1];
+    let escaped = z_sq_final > escape_r_sq;
+
+    let smooth_iter = select(escaped,
+        cast(count_julia_iterations(z0, c, max_iter), f32) + 1.0 -
+            log(log(sqrt(z_sq_final)) / log_2) / log_2,
+        cast(max_iter, f32));
+
+    [smooth_iter, select(escaped, 1.0, 0.0)]
+}
+
+// =============================================================================
+// Vectorized Fractal Computation (GPU-Parallel)
+// =============================================================================
+
+// Compute Mandelbrot for entire image grid
+// c_grid: Tensor<f32, [H, W, 2]> - complex coordinates for each pixel
+// max_iter: maximum iterations
+// Returns: Tensor<f32, [H, W]> - smooth iteration counts
+fn mandelbrot_grid(c_grid: Tensor<f32, [H, W, 2]>, max_iter: i32) -> Tensor<f32, [H, W]> {
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    // Initialize z to zeros for all pixels
+    let z_real = zeros([H, W]);
+    let z_imag = zeros([H, W]);
+    let iterations = zeros([H, W]);
+    let escaped = zeros([H, W]);  // 0 = not escaped, 1 = escaped
+
+    // Extract c components
+    let c_real = c_grid[:, :, 0];  // [H, W]
+    let c_imag = c_grid[:, :, 1];  // [H, W]
+
+    // Iterate all pixels in parallel
+    let state = [z_real, z_imag, iterations, escaped];
+
+    let final_state = fold(range(max_iter), state,
+        fn(i: i32, state: Tensor<f32, [4, H, W]>) -> Tensor<f32, [4, H, W]> {
+            let z_r = state[0];
+            let z_i = state[1];
+            let iter = state[2];
+            let esc = state[3];
+
+            // Magnitude squared: |z|^2
+            let z_sq = z_r * z_r + z_i * z_i;
+
+            // Check which pixels should continue
+            let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc);
+
+            // z^2 = (x^2 - y^2) + 2xyi
+            let new_z_r = z_r * z_r - z_i * z_i + c_real;
+            let new_z_i = 2.0 * z_r * z_i + c_imag;
+
+            // Update only pixels that haven't escaped
+            let z_r_next = select(should_continue > 0.5, new_z_r, z_r);
+            let z_i_next = select(should_continue > 0.5, new_z_i, z_i);
+
+            // Increment iteration count for non-escaped pixels
+            let iter_next = iter + should_continue;
+
+            // Mark newly escaped pixels
+            let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next;
+            let just_escaped = (z_sq_next > escape_r_sq) * should_continue;
+            let esc_next = esc + just_escaped;
+
+            stack([z_r_next, z_i_next, iter_next, esc_next], axis=0)
+        });
+
+    // Extract final state
+    let z_r_final = final_state[0];
+    let z_i_final = final_state[1];
+    let iter_final = final_state[2];
+    let esc_final = final_state[3];
+
+    // Compute smooth iteration count
+    let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final;
+
+    // Smooth coloring: iter + 1 - log(log|z|)/log(2)
+    let log_zn = log(z_sq_final) / 2.0;
+    let nu = log(log_zn / log_2) / log_2;
+    let smooth = iter_final + 1.0 - nu;
+
+    // Return max_iter for points still in set
+    select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W]))
+}
+
+// Compute Julia set for entire image grid
+// z_grid: Tensor<f32, [H, W, 2]> - initial z values (screen coordinates)
+// c: Julia constant [real, imag]
+// max_iter: maximum iterations
+fn julia_grid(z_grid: Tensor<f32, [H, W, 2]>, c: Tensor<f32, [2]>, max_iter: i32) -> Tensor<f32, [H, W]> {
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    // Initialize z from grid
+    let z_real = z_grid[:, :, 0];
+    let z_imag = z_grid[:, :, 1];
+    let iterations = zeros([H, W]);
+    let escaped = zeros([H, W]);
+
+    // Julia constant (broadcast to all pixels)
+    let c_real = c[0];
+    let c_imag = c[1];
+
+    let state = stack([z_real, z_imag, iterations, escaped], axis=0);
+
+    let final_state = fold(range(max_iter), state,
+        fn(i: i32, state: Tensor<f32, [4, H, W]>) -> Tensor<f32, [4, H, W]> {
+            let z_r = state[0];
+            let z_i = state[1];
+            let iter = state[2];
+            let esc = state[3];
+
+            let z_sq = z_r * z_r + z_i * z_i;
+            let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc);
+
+            let new_z_r = z_r * z_r - z_i * z_i + c_real;
+            let new_z_i = 2.0 * z_r * z_i + c_imag;
+
+            let z_r_next = select(should_continue > 0.5, new_z_r, z_r);
+            let z_i_next = select(should_continue > 0.5, new_z_i, z_i);
+            let iter_next = iter + should_continue;
+
+            let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next;
+            let just_escaped = (z_sq_next > escape_r_sq) * should_continue;
+            let esc_next = esc + just_escaped;
+
+            stack([z_r_next, z_i_next, iter_next, esc_next], axis=0)
+        });
+
+    let z_r_final = final_state[0];
+    let z_i_final = final_state[1];
+    let iter_final = final_state[2];
+    let esc_final = final_state[3];
+
+    let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final;
+    let log_zn = log(z_sq_final) / 2.0;
+    let nu = log(log_zn / log_2) / log_2;
+    let smooth = iter_final + 1.0 - nu;
+
+    select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W]))
+}
+
+// Compute Burning Ship for entire image grid
+// Uses |Re(z)| and |Im(z)| before squaring
+fn burning_ship_grid(c_grid: Tensor<f32, [H, W, 2]>, max_iter: i32) -> Tensor<f32, [H, W]> {
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    let z_real = zeros([H, W]);
+    let z_imag = zeros([H, W]);
+    let iterations = zeros([H, W]);
+    let escaped = zeros([H, W]);
+
+    let c_real = c_grid[:, :, 0];
+    let c_imag = c_grid[:, :, 1];
+
+    let state = stack([z_real, z_imag, iterations, escaped], axis=0);
+
+    let final_state = fold(range(max_iter), state,
+        fn(i: i32, state: Tensor<f32, [4, H, W]>) -> Tensor<f32, [4, H, W]> {
+            let z_r = state[0];
+            let z_i = state[1];
+            let iter = state[2];
+            let esc = state[3];
+
+            let z_sq = z_r * z_r + z_i * z_i;
+            let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc);
+
+            // Burning Ship: take absolute values before squaring
+            let z_r_abs = abs(z_r);
+            let z_i_abs = abs(z_i);
+
+            let new_z_r = z_r_abs * z_r_abs - z_i_abs * z_i_abs + c_real;
+            let new_z_i = 2.0 * z_r_abs * z_i_abs + c_imag;
+
+            let z_r_next = select(should_continue > 0.5, new_z_r, z_r);
+            let z_i_next = select(should_continue > 0.5, new_z_i, z_i);
+            let iter_next = iter + should_continue;
+
+            let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next;
+            let just_escaped = (z_sq_next > escape_r_sq) * should_continue;
+            let esc_next = esc + just_escaped;
+
+            stack([z_r_next, z_i_next, iter_next, esc_next], axis=0)
+        });
+
+    let z_r_final = final_state[0];
+    let z_i_final = final_state[1];
+    let iter_final = final_state[2];
+    let esc_final = final_state[3];
+
+    let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final;
+    let log_zn = log(z_sq_final) / 2.0;
+    let nu = log(log_zn / log_2) / log_2;
+    let smooth = iter_final + 1.0 - nu;
+
+    select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W]))
+}
+
+// Compute Tricorn (Mandelbar) for entire image grid
+// Uses conjugate: z_{n+1} = conj(z_n)^2 + c
+fn tricorn_grid(c_grid: Tensor<f32, [H, W, 2]>, max_iter: i32) -> Tensor<f32, [H, W]> {
+    let escape_r_sq = escape_radius_sq();
+    let log_2 = log2();
+
+    let z_real = zeros([H, W]);
+    let z_imag = zeros([H, W]);
+    let iterations = zeros([H, W]);
+    let escaped = zeros([H, W]);
+
+    let c_real = c_grid[:, :, 0];
+    let c_imag = c_grid[:, :, 1];
+
+    let state = stack([z_real, z_imag, iterations, escaped], axis=0);
+
+    let final_state = fold(range(max_iter), state,
+        fn(i: i32, state: Tensor<f32, [4, H, W]>) -> Tensor<f32, [4, H, W]> {
+            let z_r = state[0];
+            let z_i = state[1];
+            let iter = state[2];
+            let esc = state[3];
+
+            let z_sq = z_r * z_r + z_i * z_i;
+            let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc);
+
+            // Tricorn: conj(z)^2 = (x - yi)^2 = (x^2 - y^2) - 2xyi
+            let new_z_r = z_r * z_r - z_i * z_i + c_real;
+            let new_z_i = -2.0 * z_r * z_i + c_imag;  // Note: negative for conjugate
+
+            let z_r_next = select(should_continue > 0.5, new_z_r, z_r);
+            let z_i_next = select(should_continue > 0.5, new_z_i, z_i);
+            let iter_next = iter + should_continue;
+
+            let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next;
+            let just_escaped = (z_sq_next > escape_r_sq) * should_continue;
+            let esc_next = esc + just_escaped;
+
+            stack([z_r_next, z_i_next, iter_next, esc_next], axis=0)
+        });
+
+    let z_r_final = final_state[0];
+    let z_i_final = final_state[1];
+    let iter_final = final_state[2];
+    let esc_final = final_state[3];
+
+    let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final;
+    let log_zn = log(z_sq_final) / 2.0;
+    let nu = log(log_zn / log_2) / log_2;
+    let smooth = iter_final + 1.0 - nu;
+
+    select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W]))
+}
+
+// =============================================================================
+// Unified Interface
+// =============================================================================
+
+// Compute fractal for image grid using specified type
+// fractal_type: 0=Mandelbrot, 1=Julia, 2=Burning Ship, 3=Tricorn
+// c_grid: complex coordinates [H, W, 2]
+// julia_c: Julia constant (only used when fractal_type=1)
+// max_iter: maximum iterations
+fn compute_fractal_grid(
+    fractal_type: i32,
+    c_grid: Tensor<f32, [H, W, 2]>,
+    julia_c: Tensor<f32, [2]>,
+    max_iter: i32
+) -> Tensor<f32, [H, W]> {
+    select(fractal_type == 0, mandelbrot_grid(c_grid, max_iter),
+    select(fractal_type == 1, julia_grid(c_grid, julia_c, max_iter),
+    select(fractal_type == 2, burning_ship_grid(c_grid, max_iter),
+           tricorn_grid(c_grid, max_iter))))
+}
+
+// =============================================================================
+// Coordinate Mapping
+// =============================================================================
+
+// Generate complex coordinate grid from screen parameters
+// width, height: screen dimensions
+// center: view center in complex plane [real, imag]
+// zoom: zoom level (higher = more zoomed in)
+fn make_coordinate_grid(
+    width: i32,
+    height: i32,
+    center: Tensor<f32, [2]>,
+    zoom: f32
+) -> Tensor<f32, [height, width, 2]> {
+    let aspect = cast(width, f32) / cast(height, f32);
+    let scale = 3.5 / zoom;
+
+    // Generate pixel coordinates
+    let x_coords = linspace(0.0, cast(width - 1, f32), width);   // [W]
+    let y_coords = linspace(0.0, cast(height - 1, f32), height); // [H]
+
+    // Create meshgrid
+    let x_grid = broadcast(x_coords, [height, width]);  // [H, W]
+    let y_grid = broadcast(expand_dims(y_coords, 1), [height, width]);  // [H, W]
+
+    // Map to [-0.5, 0.5] range
+    let u = x_grid / cast(width, f32) - 0.5;
+    let v = y_grid / cast(height, f32) - 0.5;
+
+    // Apply zoom, aspect ratio, and center offset
+    let real = u * scale * aspect + center[0];
+    let imag = v * scale + center[1];
+
+    stack([real, imag], axis=2)
+}
+
+// Calculate adaptive iteration count based on zoom level
+fn adaptive_iterations(zoom: f32, base_iter: i32, max_allowed: i32) -> i32 {
+    let log_2 = log2();
+    let zoom_factor = log(max(1.0, zoom)) / log_2;
+    let iter = base_iter + cast(zoom_factor * 50.0, i32);
+    min(iter, max_allowed)
+}
diff --git a/samples/Mind/material.mind b/samples/Mind/material.mind
new file mode 100644
index 0000000000..0a0407ba2a
--- /dev/null
+++ b/samples/Mind/material.mind
@@ -0,0 +1,343 @@
+// material.mind - Material System for Mind Ray Path Tracer
+//
+// This module implements physically-based materials for path tracing.
+// Each material type defines how light interacts with a surface:
+//
+// Material Types:
+// - Diffuse: Lambertian (matte) surfaces that scatter light uniformly
+// - Metal: Reflective surfaces with optional roughness/fuzz
+// - Dielectric: Transparent materials (glass, water) with refraction
+// - Emissive: Light-emitting surfaces for area lights
+//
+// All scattering functions return both the scattered ray and an updated
+// RNG state, enabling deterministic rendering.
+//
+// References:
+// - "Physically Based Rendering" by Pharr, Jakob, Humphreys
+// - "Ray Tracing in One Weekend" by Peter Shirley
+//
+// Author: STARGA Inc.
+// License: MIT
+
+// =============================================================================
+// Material Data Types
+// =============================================================================
+
+/// Diffuse (Lambertian) material
+///
+/// Scatters light uniformly in all directions according to Lambert's cosine law.
+/// The albedo represents the fraction of light reflected (per channel).
+pub struct MaterialDiffuse {
+    /// Surface color/reflectance (RGB, each in [0, 1])
+    albedo: Vec3,
+}
+
+/// Metallic material with optional roughness
+///
+/// Reflects light like a mirror, with optional fuzz/roughness that
+/// perturbs the reflection direction.
+pub struct MaterialMetal {
+    /// Surface reflectance (RGB, each in [0, 1])
+    albedo: Vec3,
+    /// Surface roughness (0 = perfect mirror, 1 = very fuzzy)
+    roughness: f32,
+}
+
+/// Dielectric (glass-like) material
+///
+/// Transmits and reflects light according to Snell's law and
+/// Fresnel equations. Supports total internal reflection.
+pub struct MaterialDielectric {
+    /// Tint color for transmitted light
+    albedo: Vec3,
+    /// Index of refraction (1.0 = air, 1.5 = glass, 1.33 = water)
+    ior: f32,
+}
+
+/// Emissive (light-emitting) material
+///
+/// Emits light of the specified color and strength.
+/// Used for area lights in the scene.
+pub struct MaterialEmissive {
+    /// Emission color (RGB)
+    color: Vec3,
+    /// Emission strength multiplier
+    strength: f32,
+}
+
+/// Sum type for all material variants
+///
+/// Each geometry primitive references a material by index into the scene's
+/// material array. The material enum determines scattering behavior.
+pub enum Material {
+    /// Matte/Lambertian surface
+    Diffuse(MaterialDiffuse),
+    /// Reflective metal surface
+    Metal(MaterialMetal),
+    /// Transparent glass-like surface
+    Dielectric(MaterialDielectric),
+    /// Light-emitting surface
+    Emissive(MaterialEmissive),
+}
+
+// =============================================================================
+// Scatter Result
+// =============================================================================
+
+/// Result of a material scattering event
+///
+/// Contains the scattered ray direction and the attenuation (color filter)
+/// applied to the light traveling along that path.
+pub struct ScatterResult {
+    /// The scattered ray (origin at hit point, direction from material)
+    scattered: Ray,
+    /// Color attenuation applied to light (RGB multiplier)
+    attenuation: Vec3,
+    /// Whether scattering occurred (false = ray absorbed)
+    did_scatter: bool,
+}
+
+// =============================================================================
+// Scattering Functions
+// =============================================================================
+
+/// Scatter light from a diffuse (Lambertian) surface
+///
+/// Generates a random scatter direction in the hemisphere around the normal,
+/// following Lambert's cosine law for diffuse reflection.
+///
+/// # Arguments
+/// * `mat` - Diffuse material properties
+/// * `r_in` - Incoming ray (unused for diffuse, included for API consistency)
+/// * `hit_point` - Point of intersection
+/// * `normal` - Surface normal at hit point
+/// * `rng` - Random number generator state
+///
+/// # Returns
+/// * Tuple of (ScatterResult, updated_rng_state)
+///
+/// # Algorithm
+/// 1. Generate random unit vector
+/// 2. Add to normal (cosine-weighted hemisphere sampling)
+/// 3. Handle degenerate case where sum is near-zero
+pub fn scatter_diffuse(
+    mat: MaterialDiffuse,
+    r_in: Ray,
+    hit_point: Vec3,
+    normal: Vec3,
+    rng: RngState
+) -> (ScatterResult, RngState) {
+    let (random_vec, new_rng) = rng_vec3_unit_vector(rng);
+    let scatter_dir_raw = v3_add(normal, random_vec);
+
+    // Handle degenerate case where random vector is opposite to normal
+    let scatter_dir = if v3_near_zero(scatter_dir_raw) {
+        normal
+    } else {
+        scatter_dir_raw
+    };
+
+    let scattered = ray_new(hit_point, v3_normalize(scatter_dir));
+
+    let result = ScatterResult {
+        scattered: scattered,
+        attenuation: mat.albedo,
+        did_scatter: true,
+    };
+
+    (result, new_rng)
+}
+
+/// Scatter light from a metallic surface
+///
+/// Reflects the incoming ray about the normal with optional roughness.
+/// Roughness adds random perturbation to simulate brushed or imperfect metals.
+///
+/// # Arguments
+/// * `mat` - Metal material properties
+/// * `r_in` - Incoming ray
+/// * `hit_point` - Point of intersection
+/// * `normal` - Surface normal at hit point
+/// * `rng` - Random number generator state
+///
+/// # Returns
+/// * Tuple of (ScatterResult, updated_rng_state)
+/// * did_scatter will be false if the fuzzed ray goes below the surface
+pub fn scatter_metal(
+    mat: MaterialMetal,
+    r_in: Ray,
+    hit_point: Vec3,
+    normal: Vec3,
+    rng: RngState
+) -> (ScatterResult, RngState) {
+    // Perfect reflection direction
+    let reflected = v3_reflect(r_in.direction, normal);
+
+    // Add roughness/fuzz by perturbing the reflection
+    let (fuzz, new_rng) = rng_vec3_unit_sphere(rng);
+    let fuzzed = v3_add(reflected, v3_mul(fuzz, mat.roughness));
+
+    // Handle degenerate case where fuzzed vector is near-zero
+    let safe_fuzzed = if v3_near_zero(fuzzed) {
+        reflected  // Fall back to perfect reflection
+    } else {
+        fuzzed
+    };
+
+    let scattered = ray_new(hit_point, v3_normalize(safe_fuzzed));
+
+    // Reject if fuzzed ray points into the surface
+    let did_scatter = v3_dot(safe_fuzzed, normal) > 0.0;
+
+    let result = ScatterResult {
+        scattered: scattered,
+        attenuation: mat.albedo,
+        did_scatter: did_scatter,
+    };
+
+    (result, new_rng)
+}
+
+/// Scatter light from a dielectric (glass-like) surface
+///
+/// Handles both reflection and refraction using Snell's law.
+/// Uses Schlick's approximation for Fresnel reflectance.
+/// Correctly handles total internal reflection.
+///
+/// # Arguments
+/// * `mat` - Dielectric material properties (includes IOR)
+/// * `r_in` - Incoming ray
+/// * `hit_point` - Point of intersection
+/// * `normal` - Surface normal (always points outward)
+/// * `front_face` - True if ray hit front face, false for back face
+/// * `rng` - Random number generator state
+///
+/// # Returns
+/// * Tuple of (ScatterResult, updated_rng_state)
+///
+/// # Physics
+/// - Snell's law: n1 * sin(theta1) = n2 * sin(theta2)
+/// - Total internal reflection when sin(theta2) > 1
+/// - Fresnel effect: more reflection at grazing angles
+pub fn scatter_dielectric(
+    mat: MaterialDielectric,
+    r_in: Ray,
+    hit_point: Vec3,
+    normal: Vec3,
+    front_face: bool,
+    rng: RngState
+) -> (ScatterResult, RngState) {
+    // Determine refraction ratio based on which side we're hitting
+    // front_face: entering material (air -> glass)
+    // back_face: exiting material (glass -> air)
+    let refraction_ratio = if front_face { 1.0 / mat.ior } else { mat.ior };
+
+    let unit_direction = v3_normalize(r_in.direction);
+    let cos_theta = f32_min(-v3_dot(unit_direction, normal), 1.0);
+    let sin_theta = f32_sqrt(1.0 - cos_theta * cos_theta);
+
+    // Check for total internal reflection
+    let cannot_refract = refraction_ratio * sin_theta > 1.0;
+
+    // Use Schlick's approximation for Fresnel reflectance
+    let (reflectance_val, rng1) = rng_f32(rng);
+    let use_reflect = cannot_refract || (reflectance(cos_theta, refraction_ratio) > reflectance_val);
+
+    let direction = if use_reflect {
+        v3_reflect(unit_direction, normal)
+    } else {
+        v3_refract(unit_direction, normal, refraction_ratio)
+    };
+
+    let scattered = ray_new(hit_point, direction);
+
+    let result = ScatterResult {
+        scattered: scattered,
+        attenuation: mat.albedo,
+        did_scatter: true,
+    };
+
+    (result, rng1)
+}
+
+// =============================================================================
+// Fresnel Approximation
+// =============================================================================
+
+/// Schlick's approximation for Fresnel reflectance
+///
+/// Approximates the fraction of light that is reflected vs transmitted
+/// at a dielectric interface. More light is reflected at grazing angles.
+///
+/// # Arguments
+/// * `cosine` - Cosine of the incident angle
+/// * `ref_idx` - Ratio of refractive indices (n1/n2)
+///
+/// # Returns
+/// * Reflectance coefficient in [0, 1]
+///
+/// # Formula
+/// R(theta) = R0 + (1 - R0) * (1 - cos(theta))^5
+/// where R0 = ((n1 - n2) / (n1 + n2))^2
+#[inline]
+fn reflectance(cosine: f32, ref_idx: f32) -> f32 {
+    // R0: reflectance at normal incidence
+    let r0 = (1.0 - ref_idx) / (1.0 + ref_idx);
+    let r0_sq = r0 * r0;
+
+    // Schlick's polynomial approximation of Fresnel
+    let one_minus_cos = 1.0 - cosine;
+    let power = one_minus_cos * one_minus_cos * one_minus_cos * one_minus_cos * one_minus_cos;
+    r0_sq + (1.0 - r0_sq) * power
+}
+
+// =============================================================================
+// Emissive Material Functions
+// =============================================================================
+
+/// Handle scattering for emissive materials
+///
+/// Emissive materials do not scatter light - they only emit it.
+/// This function always returns did_scatter = false.
+///
+/// # Arguments
+/// * `mat` - Emissive material properties
+/// * `hit_point` - Point of intersection
+/// * `normal` - Surface normal at hit point
+/// * `rng` - Random number generator state
+///
+/// # Returns
+/// * Tuple of (ScatterResult with did_scatter=false, unchanged_rng_state)
+pub fn scatter_emissive(
+    mat: MaterialEmissive,
+    hit_point: Vec3,
+    normal: Vec3,
+    rng: RngState
+) -> (ScatterResult, RngState) {
+    // Emissive materials do not scatter - they only emit light
+    // The scattered ray is arbitrary since did_scatter is false
+    let result = ScatterResult {
+        scattered: ray_new(hit_point, normal),
+        attenuation: vec3_zero(),
+        did_scatter: false,
+    };
+
+    (result, rng)
+}
+
+/// Get the emission color from an emissive material
+///
+/// Returns the emission color multiplied by strength for emissive materials,
+/// or zero for all other material types.
+///
+/// # Arguments
+/// * `mat` - Material to query
+///
+/// # Returns
+/// * Emission color (Vec3) or zero if material is not emissive
+pub fn material_emitted(mat: &Material) -> Vec3 {
+    match mat {
+        Material::Emissive(e) => v3_mul(e.color, e.strength),
+        _ => vec3_zero(),
+    }
+}
diff --git a/samples/Mind/mlp_backward.mind b/samples/Mind/mlp_backward.mind
new file mode 100644
index 0000000000..8280b87775
--- /dev/null
+++ b/samples/Mind/mlp_backward.mind
@@ -0,0 +1,129 @@
+// MIND Language Example: MLP Backpropagation
+// Specification reference: spec/v1.0/autodiff.md
+//
+// This example demonstrates automatic differentiation through
+// a simple multi-layer perceptron (MLP) for training.
+
+import diff::grad;
+import tensor::zeros;
+import tensor::randn;
+
+// MLP structure: input -> hidden -> output
+// Reference: spec/v1.0/autodiff.md (composition of differentiable ops)
+
+// Layer weights (would be parameters in real training)
+struct MLPParams {
+    w1: Tensor<f32, [input_dim, hidden_dim]>,
+    b1: Tensor<f32, [hidden_dim]>,
+    w2: Tensor<f32, [hidden_dim, output_dim]>,
+    b2: Tensor<f32, [output_dim]>,
+}
+
+// Forward pass through MLP
+// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra (MatMul)
+// Reference: spec/v1.0/ir.md#activation-and-elementwise-unary-operations (Relu)
+fn mlp_forward(
+    x: Tensor<f32, [batch, input_dim]>,
+    params: MLPParams
+) -> Tensor<f32, [batch, output_dim]> {
+    // Layer 1: linear + ReLU
+    // z1 = x @ W1 + b1
+    let z1 = matmul(x, params.w1) + params.b1;  // Broadcasting b1
+
+    // Activation: ReLU(z1)
+    // Reference: spec/v1.0/ir.md#activation-and-elementwise-unary-operations
+    let h = relu(z1);
+
+    // Layer 2: linear (no activation for output)
+    // z2 = h @ W2 + b2
+    let output = matmul(h, params.w2) + params.b2;
+
+    output
+}
+
+// Mean squared error loss
+// Reference: spec/v1.0/ir.md#reductions (Mean)
+fn mse_loss(
+    predictions: Tensor<f32, [batch, output_dim]>,
+    targets: Tensor<f32, [batch, output_dim]>
+) -> f32 {
+    let diff = predictions - targets;
+    let squared = diff * diff;
+    mean(squared, axes=[], keepdims=false)
+}
+
+// Combined forward + loss for gradient computation
+fn loss_fn(
+    x: Tensor<f32, [batch, input_dim]>,
+    targets: Tensor<f32, [batch, output_dim]>,
+    params: MLPParams
+) -> f32 {
+    let predictions = mlp_forward(x, params);
+    mse_loss(predictions, targets)
+}
+
+// Compute gradients for all parameters
+// Reference: spec/v1.0/autodiff.md
+// Returns gradients with same shapes as parameters
+fn compute_gradients(
+    x: Tensor<f32, [batch, input_dim]>,
+    targets: Tensor<f32, [batch, output_dim]>,
+    params: MLPParams
+) -> MLPParams {
+    // Gradient of loss with respect to parameters
+    // The autodiff system traces through:
+    //   1. Forward pass (matmul, add, relu, matmul, add)
+    //   2. Loss computation (sub, mul, mean)
+    // Then applies chain rule backward
+
+    let grad_params = grad(|p| loss_fn(x, targets, p))(params);
+
+    grad_params
+}
+
+// Single training step with gradient descent
+fn train_step(
+    x: Tensor<f32, [batch, input_dim]>,
+    targets: Tensor<f32, [batch, output_dim]>,
+    params: MLPParams,
+    learning_rate: f32
+) -> MLPParams {
+    // Compute gradients
+    let grads = compute_gradients(x, targets, params);
+
+    // Update parameters: param = param - lr * grad
+    // Reference: spec/v1.0/shapes.md#broadcasting (scalar broadcast)
+    let new_params = MLPParams {
+        w1: params.w1 - learning_rate * grads.w1,
+        b1: params.b1 - learning_rate * grads.b1,
+        w2: params.w2 - learning_rate * grads.w2,
+        b2: params.b2 - learning_rate * grads.b2,
+    };
+
+    new_params
+}
+
+// Example usage
+fn main() {
+    // Initialize parameters
+    let params = MLPParams {
+        w1: randn([4, 8]),   // 4 inputs, 8 hidden
+        b1: zeros([8]),
+        w2: randn([8, 2]),   // 8 hidden, 2 outputs
+        b2: zeros([2]),
+    };
+
+    // Example batch
+    let x: Tensor<f32, [16, 4]> = randn([16, 4]);
+    let targets: Tensor<f32, [16, 2]> = randn([16, 2]);
+
+    // Training loop
+    let lr: f32 = 0.01;
+    let mut current_params = params;
+
+    // Would typically loop here
+    current_params = train_step(x, targets, current_params, lr);
+
+    // Compute final loss
+    let final_loss = loss_fn(x, targets, current_params);
+}
diff --git a/samples/Mind/render.mind b/samples/Mind/render.mind
new file mode 100644
index 0000000000..db1ec31408
--- /dev/null
+++ b/samples/Mind/render.mind
@@ -0,0 +1,333 @@
+// render.mind - Core Path Tracing Renderer
+//
+// This module implements the Monte Carlo path tracing algorithm. Each pixel
+// is sampled multiple times with random ray directions, and the results are
+// averaged to produce a smooth, noise-reduced image.
+//
+// Key Components:
+// - ray_color: Traces a single ray through the scene, accumulating light
+// - render_pixel: Samples a pixel with anti-aliasing and returns final color
+// - tonemap: Reinhard operator for HDR to LDR conversion
+// - gamma_correct: sRGB gamma encoding (2.2)
+//
+// Path Tracing Algorithm:
+// 1. Cast primary ray from camera through pixel
+// 2. Find closest intersection with scene geometry
+// 3. If hit: evaluate material, accumulate emission, scatter new ray
+// 4. If miss: accumulate background/sky color and terminate
+// 5. Repeat steps 2-4 until max bounces or Russian roulette termination
+//
+// Thread Safety: All functions are pure (no global state mutation) and can
+// be safely called from multiple threads with independent RNG states.
+//
+// Author: STARGA Inc.
+// License: MIT
+
+// =============================================================================
+// Constants
+// =============================================================================
+
+/// Minimum ray hit distance (prevents self-intersection artifacts)
+const RAY_T_MIN: f32 = 0.001;
+
+/// Maximum ray travel distance
+const RAY_T_MAX: f32 = 1000000.0;
+
+/// Minimum bounce count before Russian roulette is applied
+const RR_MIN_BOUNCES: i32 = 3;
+
+/// Maximum Russian roulette survival probability
+const RR_MAX_SURVIVAL: f32 = 0.95;
+
+// =============================================================================
+// Path Tracing
+// =============================================================================
+
+/// Trace a ray through the scene and compute accumulated radiance
+///
+/// Implements unbiased Monte Carlo path tracing with Russian roulette
+/// termination. Handles diffuse, metallic, dielectric, and emissive materials.
+///
+/// # Algorithm
+/// 1. Cast ray into scene
+/// 2. On hit: accumulate emission, scatter ray based on material
+/// 3. On miss: accumulate background color, terminate
+/// 4. Apply Russian roulette after 3+ bounces for efficiency
+///
+/// # Arguments
+/// * `r` - Initial ray to trace
+/// * `scene` - Scene containing geometry and materials
+/// * `max_bounces` - Maximum path length (prevents infinite loops)
+/// * `rng` - Random number generator state
+///
+/// # Returns
+/// * Tuple of (accumulated_color, updated_rng_state)
+///
+/// # Energy Conservation
+/// The path maintains a throughput multiplier that ensures energy conservation.
+/// Russian roulette adjusts throughput to maintain unbiased estimates.
+fn ray_color(
+    r: Ray,
+    scene: Scene,
+    max_bounces: i32,
+    rng: RngState
+) -> (Vec3, RngState) {
+    let mut current_ray = r;
+    let mut current_rng = rng;
+    let mut throughput = vec3_ones();
+    let mut accumulated_light = vec3_zero();
+
+    for bounce in 0..max_bounces {
+        let hit = scene_hit(scene, current_ray, RAY_T_MIN, RAY_T_MAX);
+
+        if !hit.did_hit {
+            // Hit sky/background
+            let sky_color = scene.background;
+            let contribution = v3_hadamard(throughput, sky_color);
+            accumulated_light = v3_add(accumulated_light, contribution);
+            break;
+        }
+
+        // Get material
+        let mat = scene.materials[hit.material_idx];
+
+        // Handle emissive materials
+        match mat {
+            Material::Emissive(emissive) => {
+                let emission = v3_mul(emissive.color, emissive.strength);
+                let contribution = v3_hadamard(throughput, emission);
+                accumulated_light = v3_add(accumulated_light, contribution);
+                // Don't continue bouncing from lights
+                break;
+            },
+            _ => {
+                // Non-emissive material, continue bouncing
+            }
+        }
+
+        // Scatter ray based on material type
+        let (scatter_result, new_rng) = match mat {
+            Material::Diffuse(diff) => {
+                scatter_diffuse(diff, current_ray, hit.point, hit.normal, current_rng)
+            },
+            Material::Metal(metal) => {
+                scatter_metal(metal, current_ray, hit.point, hit.normal, current_rng)
+            },
+            Material::Dielectric(diel) => {
+                scatter_dielectric(diel, current_ray, hit.point, hit.normal, hit.front_face, current_rng)
+            },
+            Material::Emissive(_) => {
+                // Already handled above
+                let no_scatter = ScatterResult {
+                    scattered: current_ray,
+                    attenuation: vec3_zero(),
+                    did_scatter: false,
+                };
+                (no_scatter, current_rng)
+            },
+        };
+
+        current_rng = new_rng;
+
+        if !scatter_result.did_scatter {
+            break;
+        }
+
+        // Update throughput and ray
+        throughput = v3_hadamard(throughput, scatter_result.attenuation);
+        current_ray = scatter_result.scattered;
+
+        // Russian roulette path termination after RR_MIN_BOUNCES
+        // This provides unbiased early termination for dim paths
+        if bounce >= RR_MIN_BOUNCES {
+            let max_component = f32_max(f32_max(throughput.x, throughput.y), throughput.z);
+            let survival_prob = f32_min(max_component, RR_MAX_SURVIVAL);
+
+            // Terminate if survival probability is zero (prevents division by zero)
+            if survival_prob <= 0.0 {
+                break;
+            }
+
+            let (rr_sample, rng_after_rr) = rng_f32(current_rng);
+            current_rng = rng_after_rr;
+
+            if rr_sample > survival_prob {
+                break;
+            }
+
+            // Boost throughput to maintain energy
+            throughput = v3_div(throughput, survival_prob);
+        }
+    }
+
+    (accumulated_light, current_rng)
+}
+
+// =============================================================================
+// Pixel Rendering
+// =============================================================================
+
+/// Render a single pixel with multi-sample anti-aliasing
+///
+/// Casts multiple rays through slightly offset positions within the pixel
+/// to achieve sub-pixel anti-aliasing. Each sample uses the full path tracing
+/// algorithm. Results are averaged and post-processed.
+///
+/// # Algorithm
+/// 1. Initialize deterministic RNG based on pixel coordinates and seed
+/// 2. For each sample:
+///    a. Add random offset within pixel for AA
+///    b. Generate camera ray
+///    c. Trace ray through scene
+///    d. Accumulate color
+/// 3. Average accumulated color
+/// 4. Apply tone mapping (HDR -> LDR)
+/// 5. Apply gamma correction (linear -> sRGB)
+/// 6. Clamp to [0, 1] range
+///
+/// # Arguments
+/// * `x` - Pixel X coordinate (0 to width-1)
+/// * `y` - Pixel Y coordinate (0 to height-1)
+/// * `width` - Image width in pixels
+/// * `height` - Image height in pixels
+/// * `spp` - Samples per pixel (higher = less noise)
+/// * `max_bounces` - Maximum path length per sample
+/// * `camera` - Camera configuration for ray generation
+/// * `scene` - Scene to render
+/// * `seed` - Base RNG seed for deterministic rendering
+///
+/// # Returns
+/// * Final pixel color in [0, 1] range (ready for output)
+///
+/// # Determinism
+/// The same (x, y, seed) inputs will always produce the same output color,
+/// enabling reproducible renders and distributed rendering.
+fn render_pixel(
+    x: i32,
+    y: i32,
+    width: i32,
+    height: i32,
+    spp: i32,
+    max_bounces: i32,
+    camera: Camera,
+    scene: Scene,
+    seed: i32
+) -> Vec3 {
+    let pixel_idx = y * width + x;
+    let base_seed = seed ^ (pixel_idx * 747796405);
+
+    let mut rng = rng_init(base_seed);
+    let mut color = vec3_zero();
+
+    let aspect = (width as f32) / (height as f32);
+
+    for _sample in 0..spp {
+        // Anti-aliasing offset
+        let (offset_x, rng1) = rng_f32(rng);
+        let (offset_y, rng2) = rng_f32(rng1);
+
+        // Use width/height for proper [0,1) mapping without division by zero
+        let u = ((x as f32) + offset_x) / (width as f32);
+        let v = ((y as f32) + offset_y) / (height as f32);
+
+        let ray = camera_get_ray(camera, u, 1.0 - v);  // Flip v for image coords
+
+        let (sample_color, new_rng) = ray_color(ray, scene, max_bounces, rng2);
+
+        color = v3_add(color, sample_color);
+        rng = new_rng;
+    }
+
+    // Average samples (guard against spp=0)
+    if spp > 0 {
+        color = v3_div(color, spp as f32);
+    }
+
+    // Tone mapping and gamma correction
+    color = tonemap(color);
+    color = gamma_correct(color);
+
+    v3_clamp01(color)
+}
+
+// =============================================================================
+// Post-Processing
+// =============================================================================
+
+/// sRGB gamma value for encoding
+const GAMMA: f32 = 2.2;
+
+/// Inverse gamma for decoding (pre-computed for efficiency)
+const INV_GAMMA: f32 = 0.4545454545;  // 1.0 / 2.2
+
+/// Apply Reinhard tone mapping to HDR color
+///
+/// Maps high dynamic range values to [0, 1] display range.
+/// The Reinhard operator is a simple but effective global tone mapper
+/// that preserves relative luminance while compressing bright values.
+///
+/// Formula: L_out = L_in / (1 + L_in)
+///
+/// # Arguments
+/// * `color` - HDR color (can have values > 1.0)
+///
+/// # Returns
+/// * Tone-mapped color in [0, 1] range
+///
+/// # Properties
+/// - Preserves 0: tonemap(0) = 0
+/// - Asymptotes to 1: tonemap(inf) -> 1
+/// - Monotonic: brighter inputs always produce brighter outputs
+#[inline]
+fn tonemap(color: Vec3) -> Vec3 {
+    vec3(
+        tonemap_component(color.x),
+        tonemap_component(color.y),
+        tonemap_component(color.z)
+    )
+}
+
+/// Reinhard tone mapping for a single color component
+///
+/// # Arguments
+/// * `x` - Input value (clamped to non-negative)
+///
+/// # Returns
+/// * Tone-mapped value in [0, 1)
+#[inline]
+fn tonemap_component(x: f32) -> f32 {
+    let clamped = f32_max(0.0, x);
+    clamped / (1.0 + clamped)
+}
+
+/// Apply sRGB gamma correction to linear color
+///
+/// Converts linear-space color to gamma-encoded sRGB for display.
+/// Most displays expect sRGB-encoded values.
+///
+/// Formula: c_out = c_in ^ (1/2.2)
+///
+/// # Arguments
+/// * `color` - Linear-space color (should be in [0, 1] after tonemapping)
+///
+/// # Returns
+/// * Gamma-encoded sRGB color
+///
+/// # Note
+/// For accurate sRGB, a piecewise function should be used for values < 0.0031308.
+/// This simplified gamma curve is sufficient for path tracer output.
+#[inline]
+fn gamma_correct(color: Vec3) -> Vec3 {
+    vec3(
+        f32_pow(color.x, INV_GAMMA),
+        f32_pow(color.y, INV_GAMMA),
+        f32_pow(color.z, INV_GAMMA)
+    )
+}
+
+// =============================================================================
+// External Dependencies
+// =============================================================================
+// Math utilities imported from math_stubs.mind:
+// - f32_pow: Power function for gamma correction
+// - f32_max: Maximum of two floats
diff --git a/samples/Mind/search.mind b/samples/Mind/search.mind
new file mode 100644
index 0000000000..20e624b8e4
--- /dev/null
+++ b/samples/Mind/search.mind
@@ -0,0 +1,829 @@
+// NikolaChess v3.17 - Draw-First Search
+// Copyright (c) 2025 STARGA, Inc. All rights reserved.
+// Search algorithm optimized for finding draws, not wins
+// Key insight: Repetitions are GOOD, not bad!
+
+import std.tensor;
+import std.cuda;
+import std.time;
+import std.mem;
+
+// ============================================================================
+// SEARCH RESULT
+// ============================================================================
+
+struct SearchResult {
+    best_move: Move,
+    draw_score: f32,     // Draw probability [0, 1] - higher = better
+    depth: i32,
+    nodes: i64,
+    time_ms: i64,
+    pv: Vec<Move>,       // Principal variation
+    draw_type: str,      // "book", "tablebase", "repetition", "search"
+}
+
+fn default_result() -> SearchResult {
+    return SearchResult {
+        best_move: MOVE_NULL,
+        draw_score: 0.0,
+        depth: 0,
+        nodes: 0,
+        time_ms: 0,
+        pv: Vec.new(),
+        draw_type: "none",
+    };
+}
+
+// ============================================================================
+// TRANSPOSITION TABLE
+// ============================================================================
+
+struct TTEntry {
+    hash: u64,
+    depth: i32,
+    draw_score: f32,
+    best_move: Move,
+    flag: i32,           // EXACT, LOWER, UPPER
+    age: i32,            // For replacement
+}
+
+const TT_EXACT: i32 = 0;
+const TT_LOWER: i32 = 1;  // Alpha bound
+const TT_UPPER: i32 = 2;  // Beta bound
+
+// Default TT size (16M entries), can be resized via UCI Hash option
+const TT_DEFAULT_SIZE: u64 = 1 << 24;
+
+// Global TT size (set by Hash option)
+static mut TT_SIZE: u64 = TT_DEFAULT_SIZE;
+
+struct TranspositionTable {
+    entries: Vec<TTEntry>,  // Dynamic size instead of fixed tensor
+    size: u64,              // Current size
+    age: i32,
+    use_gpu: bool,          // Whether GPU is available
+}
+
+fn create_tt() -> TranspositionTable {
+    return create_tt_with_size(TT_DEFAULT_SIZE);
+}
+
+fn create_tt_with_size(size_mb: u64) -> TranspositionTable {
+    // Calculate number of entries from MB
+    let entry_size: u64 = 32;  // sizeof(TTEntry) approximately
+    let num_entries = (size_mb * 1024 * 1024) / entry_size;
+    // Round down to power of 2 for fast modulo
+    let mut actual_size: u64 = 1;
+    while actual_size * 2 <= num_entries {
+        actual_size *= 2;
+    }
+
+    let use_gpu = cuda.is_available();
+
+    if use_gpu {
+        // GPU allocation
+        on(gpu0) {
+            let entries = Vec.with_capacity(actual_size as usize);
+            for _ in 0..actual_size {
+                entries.push(TTEntry {
+                    hash: 0, depth: 0, draw_score: 0.0,
+                    best_move: MOVE_NULL, flag: 0, age: 0
+                });
+            }
+            return TranspositionTable {
+                entries: entries,
+                size: actual_size,
+                age: 0,
+                use_gpu: true,
+            };
+        }
+    } else {
+        // CPU fallback
+        let entries = Vec.with_capacity(actual_size as usize);
+        for _ in 0..actual_size {
+            entries.push(TTEntry {
+                hash: 0, depth: 0, draw_score: 0.0,
+                best_move: MOVE_NULL, flag: 0, age: 0
+            });
+        }
+        return TranspositionTable {
+            entries: entries,
+            size: actual_size,
+            age: 0,
+            use_gpu: false,
+        };
+    }
+}
+
+fn tt_resize(tt: &mut TranspositionTable, size_mb: u64) {
+    // Resize TT based on Hash UCI option
+    let new_tt = create_tt_with_size(size_mb);
+    *tt = new_tt;
+}
+
+fn tt_clear(tt: &mut TranspositionTable) {
+    // Fast clear using memset instead of loop
+    // FIX: Handle GPU/CPU memory correctly
+    let entry_size = std::mem::size_of::<TTEntry>();
+    if tt.use_gpu {
+        on(gpu0) {
+            mem.memset(tt.entries.as_mut_ptr(), 0, tt.size as usize * entry_size);
+        }
+    } else {
+        mem.memset(tt.entries.as_mut_ptr(), 0, tt.size as usize * entry_size);
+    }
+    tt.age = 0;
+}
+
+fn tt_probe(tt: &TranspositionTable, hash: u64) -> Option<TTEntry> {
+    let idx = (hash % tt.size) as usize;
+    // FIX: Access GPU memory in correct context
+    if tt.use_gpu {
+        on(gpu0) {
+            let entry = tt.entries[idx];
+            if entry.hash == hash {
+                return Some(entry);
+            }
+        }
+    } else {
+        let entry = tt.entries[idx];
+        if entry.hash == hash {
+            return Some(entry);
+        }
+    }
+    return None;
+}
+
+fn tt_store(tt: &mut TranspositionTable, hash: u64, depth: i32, draw_score: f32, best_move: Move, flag: i32) {
+    let idx = (hash % tt.size) as usize;
+
+    // FIX: Access GPU memory in correct context
+    if tt.use_gpu {
+        on(gpu0) {
+            let existing = tt.entries[idx];
+            // Replace if: empty, older, or deeper search
+            if existing.hash == 0 || existing.age < tt.age || depth >= existing.depth {
+                tt.entries[idx] = TTEntry {
+                    hash: hash,
+                    depth: depth,
+                    draw_score: draw_score,
+                    best_move: best_move,
+                    flag: flag,
+                    age: tt.age,
+                };
+            }
+        }
+    } else {
+        let existing = tt.entries[idx];
+        // Replace if: empty, older, or deeper search
+        if existing.hash == 0 || existing.age < tt.age || depth >= existing.depth {
+            tt.entries[idx] = TTEntry {
+                hash: hash,
+                depth: depth,
+                draw_score: draw_score,
+                best_move: best_move,
+                flag: flag,
+                age: tt.age,
+            };
+        }
+    }
+}
+
+// ============================================================================
+// DRAW-FIRST SEARCH ENGINE
+// ============================================================================
+
+struct DrawSearch {
+    net: DrawNetwork,
+    tt: TranspositionTable,
+    book: OpeningBook,
+    tb: Tablebase,
+    nodes: i64,
+    max_depth: i32,
+    start_time: i64,
+    time_limit: i64,
+    stop: bool,
+
+    // Draw-seeking statistics
+    repetitions_found: i64,
+    fortresses_found: i64,
+    perpetuals_found: i64,
+}
+
+fn create_search(net: DrawNetwork, book: OpeningBook, tb: Tablebase) -> DrawSearch {
+    return DrawSearch {
+        net: net,
+        tt: create_tt(),
+        book: book,
+        tb: tb,
+        nodes: 0,
+        max_depth: 64,
+        start_time: 0,
+        time_limit: 0,
+        stop: false,
+        repetitions_found: 0,
+        fortresses_found: 0,
+        perpetuals_found: 0,
+    };
+}
+
+// ============================================================================
+// MAIN SEARCH FUNCTION
+// ============================================================================
+
+fn search(s: &mut DrawSearch, board: Board, depth: i32, time_ms: i64) -> SearchResult {
+    s.nodes = 0;
+    s.start_time = time.now_ms();
+    s.time_limit = time_ms;
+    s.stop = false;
+    s.tt.age += 1;
+
+    // 1. Check opening book first
+    if let Some(book_move) = s.book.probe(board) {
+        return SearchResult {
+            best_move: book_move,
+            draw_score: 1.0,  // Book moves are pre-verified draws
+            depth: 0,
+            nodes: 1,
+            time_ms: 0,
+            pv: vec![book_move],
+            draw_type: "book",
+        };
+    }
+
+    // 2. Check tablebase
+    if let Some(tb_result) = s.tb.probe(board) {
+        if tb_result.is_draw() {
+            return SearchResult {
+                best_move: tb_result.best_move,
+                draw_score: 1.0,
+                depth: 0,
+                nodes: 1,
+                time_ms: 0,
+                pv: vec![tb_result.best_move],
+                draw_type: "tablebase",
+            };
+        }
+    }
+
+    // 3. Check for immediate draw claims
+    let (can_draw, draw_reason) = can_claim_draw(board, &board.history);
+    if can_draw {
+        return SearchResult {
+            best_move: MOVE_NULL,
+            draw_score: 1.0,
+            depth: 0,
+            nodes: 1,
+            time_ms: 0,
+            pv: vec![],
+            draw_type: draw_reason,
+        };
+    }
+
+    // 4. Iterative deepening search
+    let mut best_result = default_result();  // FIX: Made mutable
+
+    for d in 1..=depth {
+        if s.stop {
+            break;
+        }
+
+        let mut pv = Vec.new();
+        let result = draw_negamax(s, board, d, 0.0, 1.0, &mut pv, 0);
+
+        if !s.stop {
+            best_result = result;
+            best_result.pv = pv;
+            best_result.depth = d;
+
+            // Print UCI info
+            let elapsed = time.now_ms() - s.start_time;
+            print_uci_info(d, best_result.draw_score, s.nodes, elapsed, &best_result.pv);
+        }
+
+        // If we found a guaranteed draw, stop searching
+        // Draw probability threshold
+        if best_result.draw_score >= 0.99 {
+            break;
+        }
+
+        // Time management
+        if time.now_ms() - s.start_time > time_ms / 2 {
+            break;
+        }
+    }
+
+    best_result.nodes = s.nodes;
+    best_result.time_ms = time.now_ms() - s.start_time;
+    return best_result;
+}
+
+// ============================================================================
+// DRAW-FOCUSED NEGAMAX WITH ALPHA-BETA
+// ============================================================================
+
+// Returns draw probability from perspective of side to move
+// Higher = better (more likely to draw = GOOD!)
+fn draw_negamax(
+    s: &mut DrawSearch,
+    board: Board,
+    depth: i32,
+    mut alpha: f32,  // FIX: Made mutable
+    mut beta: f32,   // FIX: Made mutable
+    pv: &mut Vec<Move>,
+    ply: i32
+) -> SearchResult {
+    s.nodes += 1;
+
+    // Time check - check every 1024 nodes for more responsive stopping
+    if s.nodes % 1024 == 0 {
+        if time.now_ms() - s.start_time > s.time_limit {
+            s.stop = true;
+            return default_result();
+        }
+    }
+
+    let hash = board.hash;
+    let mut local_pv = Vec.new();
+
+    // ========================================
+    // DRAW DETECTION (This is GOOD for us!)
+    // Emphasize repetitions
+    // ========================================
+
+    // Check for draw by repetition
+    // FIX: Include side-to-move in repetition check by using board.hash
+    // which already incorporates side-to-move via Zobrist hashing
+    let rep_count = count_repetitions(&board.history, hash);
+    if rep_count >= 2 {
+        s.repetitions_found += 1;
+        return SearchResult {
+            best_move: MOVE_NULL,
+            draw_score: 1.0,  // Guaranteed draw!
+            depth: depth,
+            nodes: 1,
+            time_ms: 0,
+            pv: pv.clone(),
+            draw_type: "repetition",
+        };
+    }
+
+    // Check 50-move rule
+    if board.halfmove >= 100 {
+        return SearchResult {
+            best_move: MOVE_NULL,
+            draw_score: 1.0,
+            depth: depth,
+            nodes: 1,
+            time_ms: 0,
+            pv: pv.clone(),
+            draw_type: "fifty_move",
+        };
+    }
+
+    // Check insufficient material
+    if is_insufficient_material(board) {
+        return SearchResult {
+            best_move: MOVE_NULL,
+            draw_score: 1.0,
+            depth: depth,
+            nodes: 1,
+            time_ms: 0,
+            pv: pv.clone(),
+            draw_type: "insufficient",
+        };
+    }
+
+    // ========================================
+    // TRANSPOSITION TABLE
+    // ========================================
+
+    let mut tt_move = MOVE_NULL;  // FIX: Made mutable
+    if let Some(entry) = tt_probe(&s.tt, hash) {
+        tt_move = entry.best_move;
+
+        if entry.depth >= depth {
+            match entry.flag {
+                TT_EXACT => {
+                    pv.push(entry.best_move);
+                    return SearchResult {
+                        best_move: entry.best_move,
+                        draw_score: entry.draw_score,
+                        depth: depth,
+                        nodes: 1,
+                        time_ms: 0,
+                        pv: pv.clone(),
+                        draw_type: "tt",
+                    };
+                },
+                TT_LOWER => {
+                    alpha = max_f32(alpha, entry.draw_score);
+                },
+                TT_UPPER => {
+                    beta = min_f32(beta, entry.draw_score);
+                },
+            }
+
+            if alpha >= beta {
+                return SearchResult {
+                    best_move: entry.best_move,
+                    draw_score: entry.draw_score,
+                    depth: depth,
+                    nodes: 1,
+                    time_ms: 0,
+                    pv: pv.clone(),
+                    draw_type: "tt_cutoff",
+                };
+            }
+        }
+    }
+
+    // ========================================
+    // TABLEBASE PROBE (Endgame)
+    // ========================================
+
+    if let Some(tb_result) = s.tb.probe(board) {
+        if tb_result.is_draw() {
+            return SearchResult {
+                best_move: tb_result.best_move,
+                draw_score: 1.0,
+                depth: depth,
+                nodes: 1,
+                time_ms: 0,
+                pv: vec![tb_result.best_move],
+                draw_type: "tablebase",
+            };
+        }
+    }
+
+    // ========================================
+    // LEAF NODE EVALUATION
+    // ========================================
+
+    if depth <= 0 {
+        let score = quiescence(s, board, alpha, beta);
+        return SearchResult {
+            best_move: MOVE_NULL,
+            draw_score: score,
+            depth: 0,
+            nodes: 1,
+            time_ms: 0,
+            pv: pv.clone(),
+            draw_type: "eval",
+        };
+    }
+
+    // ========================================
+    // MOVE GENERATION AND ORDERING
+    // ========================================
+
+    let moves = generate_moves(board);
+
+    if moves.is_empty() {
+        // No legal moves - checkmate or stalemate
+        if is_in_check(board) {
+            // Checkmate - worst outcome (we lost)
+            return SearchResult {
+                best_move: MOVE_NULL,
+                draw_score: 0.0,  // Not a draw - we lost
+                depth: depth,
+                nodes: 1,
+                time_ms: 0,
+                pv: pv.clone(),
+                draw_type: "checkmate",
+            };
+        } else {
+            // Stalemate - PERFECT! It's a draw!
+            return SearchResult {
+                best_move: MOVE_NULL,
+                draw_score: 1.0,
+                depth: depth,
+                nodes: 1,
+                time_ms: 0,
+                pv: pv.clone(),
+                draw_type: "stalemate",
+            };
+        }
+    }
+
+    // Order moves for draw-seeking
+    order_moves_for_draw(&mut moves, board, &s.tt, hash, tt_move);
+
+    // ========================================
+    // MAIN SEARCH LOOP
+    // ========================================
+
+    let mut best_score: f32 = -1.0;  // Start below valid range (0-1) so any move improves
+    let mut best_move = moves[0];    // Default to first move
+    let mut flag = TT_UPPER;        // FIX: Made mutable
+
+    for (i, m) in moves.iter().enumerate() {
+        let new_board = make_move(board, *m);
+        let mut child_pv = Vec.new();
+
+        // Principal Variation Search
+        let result = if i == 0 {
+            draw_negamax(s, new_board, depth - 1, alpha, beta, &mut child_pv, ply + 1)
+        } else {
+            // Null window search
+            let null_result = draw_negamax(s, new_board, depth - 1, alpha, alpha + 0.01, &mut child_pv, ply + 1);
+
+            if null_result.draw_score > alpha && null_result.draw_score < beta {
+                // Re-search with full window
+                child_pv.clear();
+                draw_negamax(s, new_board, depth - 1, alpha, beta, &mut child_pv, ply + 1)
+            } else {
+                null_result
+            }
+        };
+
+        if s.stop {
+            break;
+        }
+
+        // We want HIGH draw scores (draws are good!)
+        if result.draw_score > best_score {
+            best_score = result.draw_score;
+            best_move = *m;
+            local_pv.clear();
+            local_pv.push(*m);
+            local_pv.extend(child_pv);
+            flag = TT_EXACT;
+        }
+
+        alpha = max_f32(alpha, best_score);
+
+        if alpha >= beta {
+            flag = TT_LOWER;
+            break;
+        }
+    }
+
+    // Store in transposition table
+    tt_store(&mut s.tt, hash, depth, best_score, best_move, flag);
+
+    *pv = local_pv;
+
+    return SearchResult {
+        best_move: best_move,
+        draw_score: best_score,
+        depth: depth,
+        nodes: s.nodes,
+        time_ms: 0,
+        pv: pv.clone(),
+        draw_type: "search",
+    };
+}
+
+// ============================================================================
+// QUIESCENCE SEARCH
+// ============================================================================
+
+fn quiescence(s: &mut DrawSearch, board: Board, mut alpha: f32, beta: f32) -> f32 {
+    s.nodes += 1;
+
+    // Stand pat evaluation
+    let board_tensor = to_tensor_16ch(board);
+    let features = extract_features(board);
+    let stand_pat = forward(s.net, board_tensor, features);
+
+    if stand_pat >= beta {
+        return beta;
+    }
+
+    alpha = max(alpha, stand_pat);
+
+    // Only search captures
+    let captures = generate_captures(board);
+    order_moves_for_draw(&mut captures, board, &s.tt, board.hash, MOVE_NULL);
+
+    for m in captures {
+        let new_board = make_move(board, m);
+        let score = quiescence(s, new_board, alpha, beta);
+
+        if score >= beta {
+            return beta;
+        }
+        alpha = max(alpha, score);
+    }
+
+    return alpha;
+}
+
+// ============================================================================
+// MOVE ORDERING (Draw-Seeking Priority)
+// ============================================================================
+
+fn order_moves_for_draw(
+    moves: &mut Vec<Move>,
+    board: Board,
+    tt: &TranspositionTable,
+    hash: u64,
+    tt_move: Move
+) {
+    // Sort by draw priority (descending)
+    moves.sort_by(|a, b| {
+        let score_a = move_draw_priority(*a, board, tt, hash, tt_move);
+        let score_b = move_draw_priority(*b, board, tt, hash, tt_move);
+        score_b.partial_cmp(&score_a).unwrap()
+    });
+}
+
+fn move_draw_priority(m: Move, board: Board, tt: &TranspositionTable, hash: u64, tt_move: Move) -> f32 {
+    let mut score = 0.0;
+
+    // Priority order:
+    // 1. TT move (highest priority)
+    // 2. Moves that cause repetition
+    // 3. Moves toward tablebase territory (captures/trades)
+    // 4. Perpetual check moves
+    // 5. Moves that maintain material balance
+    // 6. Defensive moves
+
+    // TT move gets highest priority
+    if m.data == tt_move.data && tt_move.data != 0 {
+        score += 100000.0;
+    }
+
+    let new_board = make_move(board, m);
+    let new_hash = new_board.hash;
+
+    // Repetition moves are EXCELLENT!
+    // (In normal engines these are bad, but we WANT draws)
+    let rep_count = count_repetitions(&board.history, new_hash);
+    if rep_count >= 1 {
+        score += 50000.0;  // Potential threefold!
+    }
+    if rep_count >= 2 {
+        score += 100000.0;  // Guaranteed draw!
+    }
+
+    // Moves that reduce material (toward tablebase)
+    if is_capture(m) {
+        let material_before = total_material(board);
+        let material_after = total_material(new_board);
+        let reduction = (material_before - material_after) as f32;
+        score += reduction * 10.0;
+
+        // Equal trades are especially good
+        let capture_value = piece_value(move_capture(m));
+        let moved_value = piece_value(move_piece(m) % 6);
+        if capture_value >= moved_value {
+            score += 1000.0;
+        }
+    }
+
+    // Checks that might lead to perpetual
+    if gives_check(m, board) {
+        score += 500.0;
+
+        // If we've been giving checks, perpetual is likely
+        // (Check frequency feature from draw_eval)
+        score += 200.0;
+    }
+
+    // Moves that maintain material balance
+    let balance_before = abs(material_balance(board));
+    let balance_after = abs(material_balance(new_board));
+    if balance_after <= balance_before {
+        score += 200.0;
+    }
+
+    // Moves toward opposite-colored bishops (drawish)
+    if detect_opposite_colored_bishops(new_board) && !detect_opposite_colored_bishops(board) {
+        score += 300.0;
+    }
+
+    // Pawn symmetry preservation
+    let sym_before = calc_pawn_symmetry(board);
+    let sym_after = calc_pawn_symmetry(new_board);
+    if sym_after >= sym_before {
+        score += 100.0;
+    }
+
+    return score;
+}
+
+fn piece_value(piece: i32) -> i32 {
+    match piece % 6 {
+        0 => 100,   // Pawn
+        1 => 320,   // Knight
+        2 => 330,   // Bishop
+        3 => 500,   // Rook
+        4 => 900,   // Queen
+        5 => 0,     // King (not capturable)
+        _ => 0,
+    }
+}
+
+// ============================================================================
+// UTILITY FUNCTIONS
+// ============================================================================
+
+fn count_repetitions(history: &Vec<u64>, hash: u64) -> i32 {
+    let mut count: i32 = 0;  // FIX: Made mutable
+    for h in history {
+        if *h == hash {
+            count += 1;
+        }
+    }
+    return count;
+}
+
+fn print_uci_info(depth: i32, draw_prob: f32, nodes: i64, time_ms: i64, pv: &Vec<Move>) {
+    // Convert draw probability to centipawn-like score
+    // 1.0 = draw = 0cp, 0.0 = losing = -10000cp
+    let cp = ((draw_prob - 0.5) * 200.0) as i32;
+
+    let nps = if time_ms > 0 { (nodes * 1000) / time_ms } else { 0 };
+
+    let pv_str = pv.iter().map(|m| move_to_uci(*m)).collect::<Vec<_>>().join(" ");
+
+    print("info depth", depth,
+          "score cp", cp,
+          "drawprob", (draw_prob * 1000.0) as i32,
+          "nodes", nodes,
+          "time", time_ms,
+          "nps", nps,
+          "pv", pv_str);
+}
+
+fn move_to_uci(m: Move) -> str {
+    let from = move_from(m);
+    let to = move_to(m);
+    let promo = move_promo(m);
+
+    let from_file = ('a' as i32 + from % 8) as char;
+    let from_rank = ('1' as i32 + from / 8) as char;
+    let to_file = ('a' as i32 + to % 8) as char;
+    let to_rank = ('1' as i32 + to / 8) as char;
+
+    let mut uci = format!("{}{}{}{}", from_file, from_rank, to_file, to_rank);  // FIX: Made mutable
+
+    if promo != 0 {
+        let promo_char = match promo % 6 {
+            1 => 'n',
+            2 => 'b',
+            3 => 'r',
+            4 => 'q',
+            _ => ' ',
+        };
+        uci = format!("{}{}", uci, promo_char);
+    }
+
+    return uci;
+}
+
+// ============================================================================
+// ASPIRATION WINDOWS (Draw-focused)
+// Narrow window around 0.5
+// ============================================================================
+
+fn aspiration_search(s: &mut DrawSearch, board: Board, depth: i32, prev_score: f32) -> SearchResult {
+    // Start with narrow window around previous score
+    // For draw-seeking: window around 0.5 (draw probability)
+    let mut delta: f32 = 0.05;  // FIX: Made mutable
+    let mut alpha = max_f32(0.0, prev_score - delta);
+    let mut beta = min_f32(1.0, prev_score + delta);
+
+    loop {
+        let mut pv = Vec.new();
+        let result = draw_negamax(s, board, depth, alpha, beta, &mut pv, 0);
+
+        if s.stop {
+            return result;
+        }
+
+        // Check if we need to re-search with wider window
+        if result.draw_score <= alpha {
+            // Fail low - widen alpha
+            alpha = max_f32(0.0, alpha - delta * 2.0);
+            delta *= 2.0;
+        } else if result.draw_score >= beta {
+            // Fail high - widen beta
+            beta = min_f32(1.0, beta + delta * 2.0);
+            delta *= 2.0;
+        } else {
+            // Search succeeded within window
+            return result;
+        }
+
+        // Full window search if delta is too large
+        if delta > 0.5 {
+            alpha = 0.0;
+            beta = 1.0;
+        }
+    }
+}
+
+// ============================================================================
+// HELPER FUNCTIONS
+// ============================================================================
+
+fn max_f32(a: f32, b: f32) -> f32 {
+    if a > b { a } else { b }
+}
+
+fn min_f32(a: f32, b: f32) -> f32 {
+    if a < b { a } else { b }
+}
diff --git a/samples/Mind/tensor_board.mind b/samples/Mind/tensor_board.mind
new file mode 100644
index 0000000000..e8510a2d1d
--- /dev/null
+++ b/samples/Mind/tensor_board.mind
@@ -0,0 +1,540 @@
+// NikolaChess v3.17 - GPU-Accelerated TensorBoard
+// Copyright (c) 2025 STARGA, Inc. All rights reserved.
+// 16-channel packed tensor for RTX Tensor Core acceleration
+
+import std.tensor;
+import std.cuda;
+import std.atomic;
+import std.simd;
+import std.mem;
+
+// ============================================================================
+// TENSORBOARD CONSTANTS
+// ============================================================================
+
+const BOARD_SIZE: i32 = 8;
+const NUM_PIECES: i32 = 12;
+const NUM_CHANNELS: i32 = 16;
+
+// Channel layout:
+// 0-5:   White pieces (P, N, B, R, Q, K)
+// 6-11:  Black pieces (p, n, b, r, q, k)
+// 12:    All white occupancy (precomputed)
+// 13:    All black occupancy (precomputed)
+// 14:    Attack map (GPU-computed)
+// 15:    Draw probability cache
+
+const CH_WHITE_PAWN: i32 = 0;
+const CH_WHITE_KNIGHT: i32 = 1;
+const CH_WHITE_BISHOP: i32 = 2;
+const CH_WHITE_ROOK: i32 = 3;
+const CH_WHITE_QUEEN: i32 = 4;
+const CH_WHITE_KING: i32 = 5;
+const CH_BLACK_PAWN: i32 = 6;
+const CH_BLACK_KNIGHT: i32 = 7;
+const CH_BLACK_BISHOP: i32 = 8;
+const CH_BLACK_ROOK: i32 = 9;
+const CH_BLACK_QUEEN: i32 = 10;
+const CH_BLACK_KING: i32 = 11;
+const CH_WHITE_OCC: i32 = 12;
+const CH_BLACK_OCC: i32 = 13;
+const CH_ATTACKS: i32 = 14;
+const CH_DRAW_CACHE: i32 = 15;
+
+// GPU cache sizes
+const HASH_CACHE_SIZE: i32 = 65536;
+const ATTACK_BATCH_SIZE: i32 = 4096;
+
+// ============================================================================
+// TENSORBOARD STRUCTURE
+// ============================================================================
+
+struct TensorBoard {
+    // Primary 16x8x8 tensor (f16 for RTX Tensor Core acceleration)
+    // Using f16 gives 2x throughput on RTX 4090/4080
+    data: tensor<f16, (NUM_CHANNELS, BOARD_SIZE, BOARD_SIZE)>,
+
+    // GPU-side persistent caches (stay in VRAM)
+    @cuda.device hash_cache: tensor<u64, (HASH_CACHE_SIZE,)>,
+    @cuda.device draw_cache: tensor<f16, (HASH_CACHE_SIZE,)>,
+    @cuda.device attack_cache: tensor<f16, (ATTACK_BATCH_SIZE, 8, 8)>,
+
+    // Current game state (mirrored on CPU for validation)
+    stm: i32,              // Side to move (0=white, 1=black)
+    ep_square: i32,        // En passant square (-1 if none)
+    castling: u8,          // Castling rights (4 bits)
+    halfmove: u8,          // Halfmove clock for 50-move rule
+    fullmove: u16,         // Full move counter
+
+    // Zobrist hash (64-bit for strong collision resistance)
+    hash: u64,
+
+    // Incremental update tracking
+    @atomic dirty: u32,    // Dirty flag for GPU sync
+    last_draw_score: f32,  // Cached draw probability
+
+    // History for repetition detection
+    history: Vec<u64>,
+}
+
+// ============================================================================
+// ZOBRIST HASHING TABLES
+// ============================================================================
+
+// GPU-resident Zobrist keys for fast hashing
+@cuda.constant ZOBRIST_PIECES: tensor<u64, (12, 64)>;
+@cuda.constant ZOBRIST_CASTLING: tensor<u64, (16,)>;
+@cuda.constant ZOBRIST_EP: tensor<u64, (8,)>;
+@cuda.constant ZOBRIST_STM: u64;
+
+fn init_zobrist() {
+    // Initialize with random 64-bit keys
+    let mut rng = create_rng(0xDEADBEEF);
+
+    on(gpu0) {
+        for piece in 0..12 {
+            for sq in 0..64 {
+                ZOBRIST_PIECES[piece, sq] = rng.next_u64();
+            }
+        }
+
+        for castle in 0..16 {
+            ZOBRIST_CASTLING[castle] = rng.next_u64();
+        }
+
+        for file in 0..8 {
+            ZOBRIST_EP[file] = rng.next_u64();
+        }
+
+        ZOBRIST_STM = rng.next_u64();
+    }
+}
+
+// ============================================================================
+// TENSORBOARD CREATION
+// ============================================================================
+
+fn create_tensor_board() -> TensorBoard {
+    return TensorBoard {
+        data: tensor.zeros[f16, (NUM_CHANNELS, BOARD_SIZE, BOARD_SIZE)],
+        hash_cache: tensor.zeros[u64, (HASH_CACHE_SIZE,)],
+        draw_cache: tensor.zeros[f16, (HASH_CACHE_SIZE,)],
+        attack_cache: tensor.zeros[f16, (ATTACK_BATCH_SIZE, 8, 8)],
+        stm: 0,
+        ep_square: -1,
+        castling: 0x0F,  // All castling rights
+        halfmove: 0,
+        fullmove: 1,
+        hash: 0,
+        dirty: atomic.new(0),
+        last_draw_score: 0.5,
+        history: Vec.new(),
+    };
+}
+
+fn starting_tensor_board() -> TensorBoard {
+    let mut board = create_tensor_board();
+
+    // Set up starting position
+    // White pieces
+    board.data[CH_WHITE_PAWN, 1, 0] = 1.0;  // a2
+    board.data[CH_WHITE_PAWN, 1, 1] = 1.0;  // b2
+    board.data[CH_WHITE_PAWN, 1, 2] = 1.0;  // c2
+    board.data[CH_WHITE_PAWN, 1, 3] = 1.0;  // d2
+    board.data[CH_WHITE_PAWN, 1, 4] = 1.0;  // e2
+    board.data[CH_WHITE_PAWN, 1, 5] = 1.0;  // f2
+    board.data[CH_WHITE_PAWN, 1, 6] = 1.0;  // g2
+    board.data[CH_WHITE_PAWN, 1, 7] = 1.0;  // h2
+
+    board.data[CH_WHITE_ROOK, 0, 0] = 1.0;   // a1
+    board.data[CH_WHITE_KNIGHT, 0, 1] = 1.0; // b1
+    board.data[CH_WHITE_BISHOP, 0, 2] = 1.0; // c1
+    board.data[CH_WHITE_QUEEN, 0, 3] = 1.0;  // d1
+    board.data[CH_WHITE_KING, 0, 4] = 1.0;   // e1
+    board.data[CH_WHITE_BISHOP, 0, 5] = 1.0; // f1
+    board.data[CH_WHITE_KNIGHT, 0, 6] = 1.0; // g1
+    board.data[CH_WHITE_ROOK, 0, 7] = 1.0;   // h1
+
+    // Black pieces
+    board.data[CH_BLACK_PAWN, 6, 0] = 1.0;  // a7
+    board.data[CH_BLACK_PAWN, 6, 1] = 1.0;  // b7
+    board.data[CH_BLACK_PAWN, 6, 2] = 1.0;  // c7
+    board.data[CH_BLACK_PAWN, 6, 3] = 1.0;  // d7
+    board.data[CH_BLACK_PAWN, 6, 4] = 1.0;  // e7
+    board.data[CH_BLACK_PAWN, 6, 5] = 1.0;  // f7
+    board.data[CH_BLACK_PAWN, 6, 6] = 1.0;  // g7
+    board.data[CH_BLACK_PAWN, 6, 7] = 1.0;  // h7
+
+    board.data[CH_BLACK_ROOK, 7, 0] = 1.0;   // a8
+    board.data[CH_BLACK_KNIGHT, 7, 1] = 1.0; // b8
+    board.data[CH_BLACK_BISHOP, 7, 2] = 1.0; // c8
+    board.data[CH_BLACK_QUEEN, 7, 3] = 1.0;  // d8
+    board.data[CH_BLACK_KING, 7, 4] = 1.0;   // e8
+    board.data[CH_BLACK_BISHOP, 7, 5] = 1.0; // f8
+    board.data[CH_BLACK_KNIGHT, 7, 6] = 1.0; // g8
+    board.data[CH_BLACK_ROOK, 7, 7] = 1.0;   // h8
+
+    // Compute occupancy channels
+    update_occupancy(&mut board);
+
+    // Compute initial hash
+    board.hash = compute_hash(&board);
+
+    return board;
+}
+
+// ============================================================================
+// GPU KERNELS
+// ============================================================================
+
+// Kernel: Update occupancy channels
+@cuda_kernel
+fn update_occupancy_kernel(data: &mut tensor<f16, (16, 8, 8)>) {
+    let rank = cuda.thread_idx() / 8;
+    let file = cuda.thread_idx() % 8;
+
+    // Sum white pieces
+    let mut white_occ: f16 = 0.0;
+    for ch in 0..6 {
+        white_occ += data[ch, rank, file];
+    }
+    data[CH_WHITE_OCC, rank, file] = if white_occ > 0.0 { 1.0 } else { 0.0 };
+
+    // Sum black pieces
+    let mut black_occ: f16 = 0.0;
+    for ch in 6..12 {
+        black_occ += data[ch, rank, file];
+    }
+    data[CH_BLACK_OCC, rank, file] = if black_occ > 0.0 { 1.0 } else { 0.0 };
+}
+
+fn update_occupancy(board: &mut TensorBoard) {
+    on(gpu0) {
+        update_occupancy_kernel<<<1, 64>>>(&mut board.data);
+    }
+}
+
+// Kernel: Compute attack maps (for all squares simultaneously)
+@cuda_kernel
+fn compute_attacks_kernel(
+    data: tensor<f16, (16, 8, 8)>,
+    stm: i32,
+    attacks: &mut tensor<f16, (8, 8)>
+) {
+    let sq = cuda.thread_idx();
+    let rank = sq / 8;
+    let file = sq % 8;
+
+    let mut is_attacked: f16 = 0.0;
+
+    // Check knight attacks
+    let knight_ch = if stm == 0 { CH_WHITE_KNIGHT } else { CH_BLACK_KNIGHT };
+    let knight_offsets = [
+        (-2, -1), (-2, 1), (-1, -2), (-1, 2),
+        (1, -2), (1, 2), (2, -1), (2, 1)
+    ];
+    for (dr, df) in knight_offsets.iter() {
+        let nr = rank + *dr;
+        let nf = file + *df;
+        if nr >= 0 && nr < 8 && nf >= 0 && nf < 8 {
+            is_attacked += data[knight_ch, nr, nf];
+        }
+    }
+
+    // Check pawn attacks
+    let pawn_ch = if stm == 0 { CH_WHITE_PAWN } else { CH_BLACK_PAWN };
+    let pawn_dir = if stm == 0 { -1 } else { 1 };
+    let pr = rank + pawn_dir;
+    if pr >= 0 && pr < 8 {
+        if file > 0 { is_attacked += data[pawn_ch, pr, file - 1]; }
+        if file < 7 { is_attacked += data[pawn_ch, pr, file + 1]; }
+    }
+
+    // Check king attacks (adjacent squares)
+    let king_ch = if stm == 0 { CH_WHITE_KING } else { CH_BLACK_KING };
+    for dr in -1..2 {
+        for df in -1..2 {
+            if dr == 0 && df == 0 { continue; }
+            let nr = rank + dr;
+            let nf = file + df;
+            if nr >= 0 && nr < 8 && nf >= 0 && nf < 8 {
+                is_attacked += data[king_ch, nr, nf];
+            }
+        }
+    }
+
+    // Sliding pieces (simplified - full implementation uses bitboard rays)
+    // This is a simplified version for the kernel
+
+    attacks[rank, file] = if is_attacked > 0.0 { 1.0 } else { 0.0 };
+}
+
+fn compute_attacks(board: &mut TensorBoard) {
+    on(gpu0) {
+        let mut attacks = tensor.zeros[f16, (8, 8)];
+        compute_attacks_kernel<<<1, 64>>>(board.data, board.stm, &mut attacks);
+
+        // Copy to attack channel
+        for rank in 0..8 {
+            for file in 0..8 {
+                board.data[CH_ATTACKS, rank, file] = attacks[rank, file];
+            }
+        }
+    }
+}
+
+// ============================================================================
+// HASH COMPUTATION
+// ============================================================================
+
+fn compute_hash(board: &TensorBoard) -> u64 {
+    let mut hash: u64 = 0;
+
+    // Piece positions
+    for ch in 0..12 {
+        for rank in 0..8 {
+            for file in 0..8 {
+                if board.data[ch, rank, file] > 0.0 {
+                    let sq = rank * 8 + file;
+                    hash ^= ZOBRIST_PIECES[ch, sq];
+                }
+            }
+        }
+    }
+
+    // Castling rights
+    hash ^= ZOBRIST_CASTLING[board.castling as i32];
+
+    // En passant
+    if board.ep_square >= 0 {
+        let ep_file = board.ep_square % 8;
+        hash ^= ZOBRIST_EP[ep_file];
+    }
+
+    // Side to move
+    if board.stm == 1 {
+        hash ^= ZOBRIST_STM;
+    }
+
+    return hash;
+}
+
+// Incremental hash update (for make_move)
+fn update_hash_move(board: &mut TensorBoard, m: Move) {
+    // Remove piece from source
+    let from_sq = m.from;
+    let from_rank = from_sq / 8;
+    let from_file = from_sq % 8;
+    board.hash ^= ZOBRIST_PIECES[m.piece, from_sq];
+
+    // Add piece to destination
+    let to_sq = m.to;
+    let to_rank = to_sq / 8;
+    let to_file = to_sq % 8;
+
+    if m.promotion != 0 {
+        board.hash ^= ZOBRIST_PIECES[m.promotion, to_sq];
+    } else {
+        board.hash ^= ZOBRIST_PIECES[m.piece, to_sq];
+    }
+
+    // Remove captured piece
+    if m.capture != 0 {
+        board.hash ^= ZOBRIST_PIECES[m.capture, to_sq];
+    }
+
+    // Update castling (if changed)
+    // ... (simplified)
+
+    // Update en passant
+    // ... (simplified)
+
+    // Flip side to move
+    board.hash ^= ZOBRIST_STM;
+}
+
+// ============================================================================
+// MOVE MAKING
+// ============================================================================
+
+fn make_tensor_move(board: &mut TensorBoard, m: Move) -> TensorBoard {
+    let mut new_board = board.clone();
+
+    let from_rank = m.from / 8;
+    let from_file = m.from % 8;
+    let to_rank = m.to / 8;
+    let to_file = m.to % 8;
+
+    // Clear source square
+    new_board.data[m.piece, from_rank, from_file] = 0.0;
+
+    // Clear destination square (capture)
+    if m.capture != 0 {
+        new_board.data[m.capture, to_rank, to_file] = 0.0;
+    }
+
+    // Set destination square
+    if m.promotion != 0 {
+        new_board.data[m.promotion, to_rank, to_file] = 1.0;
+    } else {
+        new_board.data[m.piece, to_rank, to_file] = 1.0;
+    }
+
+    // Update state
+    new_board.stm = 1 - new_board.stm;
+    new_board.halfmove += 1;
+    if new_board.stm == 0 {
+        new_board.fullmove += 1;
+    }
+
+    // Reset halfmove on pawn move or capture
+    if m.piece == CH_WHITE_PAWN || m.piece == CH_BLACK_PAWN || m.capture != 0 {
+        new_board.halfmove = 0;
+    }
+
+    // Update occupancy
+    update_occupancy(&mut new_board);
+
+    // Update hash
+    update_hash_move(&mut new_board, m);
+
+    // Add to history
+    new_board.history.push(new_board.hash);
+
+    return new_board;
+}
+
+// ============================================================================
+// DRAW PROBABILITY CACHE
+// ============================================================================
+
+fn cache_draw_probability(board: &mut TensorBoard, prob: f32) {
+    let cache_idx = (board.hash as i32) % HASH_CACHE_SIZE;
+
+    on(gpu0) {
+        board.hash_cache[cache_idx] = board.hash;
+        board.draw_cache[cache_idx] = prob as f16;
+    }
+
+    board.last_draw_score = prob;
+
+    // Update draw channel in tensor
+    let draw_val = prob as f16;
+    for rank in 0..8 {
+        for file in 0..8 {
+            board.data[CH_DRAW_CACHE, rank, file] = draw_val;
+        }
+    }
+}
+
+fn lookup_draw_probability(board: &TensorBoard) -> (bool, f32) {
+    let cache_idx = (board.hash as i32) % HASH_CACHE_SIZE;
+
+    let cached_hash = board.hash_cache[cache_idx];
+    if cached_hash == board.hash {
+        let cached_prob = board.draw_cache[cache_idx] as f32;
+        return (true, cached_prob);
+    }
+
+    return (false, 0.5);
+}
+
+// ============================================================================
+// BATCH OPERATIONS
+// ============================================================================
+
+const TENSOR_BATCH_SIZE: i32 = 256;
+
+fn evaluate_batch(boards: &[TensorBoard], network: &DrawNetwork) -> Vec<f32> {
+    let n = boards.len();
+    let mut results = Vec.with_capacity(n);
+
+    // Stack boards into batch tensor
+    let mut batch = tensor.zeros[f16, (TENSOR_BATCH_SIZE, NUM_CHANNELS, 8, 8)];
+
+    for (i, board) in boards.iter().enumerate() {
+        if i >= TENSOR_BATCH_SIZE { break; }
+        batch[i] = board.data;
+    }
+
+    // GPU batch evaluation
+    on(gpu0) {
+        let outputs = network_forward_batch(network, batch, n as i32);
+
+        for i in 0..n {
+            results.push(outputs[i] as f32);
+        }
+    }
+
+    return results;
+}
+
+// ============================================================================
+// CONVERSION UTILITIES
+// ============================================================================
+
+fn tensor_to_bitboard(board: &TensorBoard, channel: i32) -> u64 {
+    let mut bb: u64 = 0;
+
+    for rank in 0..8 {
+        for file in 0..8 {
+            if board.data[channel, rank, file] > 0.0 {
+                let sq = rank * 8 + file;
+                bb |= 1u64 << sq;
+            }
+        }
+    }
+
+    return bb;
+}
+
+fn bitboard_to_tensor(bb: u64, tensor: &mut tensor<f16, (8, 8)>) {
+    for sq in 0..64 {
+        let rank = sq / 8;
+        let file = sq % 8;
+        tensor[rank, file] = if (bb & (1u64 << sq)) != 0 { 1.0 } else { 0.0 };
+    }
+}
+
+// ============================================================================
+// STATISTICS
+// ============================================================================
+
+fn tensor_board_stats(board: &TensorBoard) {
+    println!("=== TensorBoard Statistics ===");
+    println!("Hash: 0x{:016X}", board.hash);
+    println!("Side to move: {}", if board.stm == 0 { "White" } else { "Black" });
+    println!("Halfmove: {}", board.halfmove);
+    println!("Fullmove: {}", board.fullmove);
+    println!("Castling: 0x{:02X}", board.castling);
+    println!("EP square: {}", board.ep_square);
+    println!("History length: {}", board.history.len());
+    println!("Cached draw score: {:.3}", board.last_draw_score);
+
+    // Count pieces
+    let mut white_count = 0;
+    let mut black_count = 0;
+
+    for ch in 0..6 {
+        for rank in 0..8 {
+            for file in 0..8 {
+                if board.data[ch, rank, file] > 0.0 {
+                    white_count += 1;
+                }
+            }
+        }
+    }
+
+    for ch in 6..12 {
+        for rank in 0..8 {
+            for file in 0..8 {
+                if board.data[ch, rank, file] > 0.0 {
+                    black_count += 1;
+                }
+            }
+        }
+    }
+
+    println!("White pieces: {}", white_count);
+    println!("Black pieces: {}", black_count);
+}
diff --git a/samples/Mind/vec3.mind b/samples/Mind/vec3.mind
new file mode 100644
index 0000000000..ff4e2bf072
--- /dev/null
+++ b/samples/Mind/vec3.mind
@@ -0,0 +1,290 @@
+// vec3.mind - 3D Vector Mathematics for Mind Ray
+//
+// This module provides 3D vector types and operations essential for ray tracing.
+// All operations are designed to be GPU-friendly with no heap allocation.
+//
+// Design Principles:
+// - All functions are pure (no side effects)
+// - No heap allocation (fixed-size struct)
+// - Numerically stable implementations
+// - Consistent naming: v3_* prefix for operations
+//
+// Coordinate System:
+// - Right-handed coordinate system
+// - Y-axis points up
+// - Z-axis points toward the viewer (camera looks toward -Z)
+//
+// Author: STARGA Inc.
+// License: MIT
+
+// =============================================================================
+// Constants
+// =============================================================================
+
+/// Small epsilon for near-zero comparisons
+const VEC3_EPSILON: f32 = 0.00001;
+
+/// Minimum length for safe normalization
+const NORMALIZE_MIN_LEN: f32 = 0.0001;
+
+// =============================================================================
+// Vector Type
+// =============================================================================
+
+/// 3D vector type for positions, directions, and colors
+///
+/// Uses single-precision floats for GPU efficiency. In ray tracing contexts:
+/// - Positions: world-space coordinates
+/// - Directions: unit vectors (normalized)
+/// - Colors: RGB values (typically in [0, 1] range, but can exceed for HDR)
+pub struct Vec3 {
+    /// X component (red for colors, right for directions)
+    x: f32,
+    /// Y component (green for colors, up for directions)
+    y: f32,
+    /// Z component (blue for colors, forward for directions)
+    z: f32,
+}
+
+// =============================================================================
+// Constructors
+// =============================================================================
+
+/// Create a new Vec3 from components
+///
+/// # Arguments
+/// * `x` - X component
+/// * `y` - Y component
+/// * `z` - Z component
+///
+/// # Returns
+/// * New Vec3 with the specified components
+#[inline]
+pub fn vec3(x: f32, y: f32, z: f32) -> Vec3 {
+    Vec3 { x: x, y: y, z: z }
+}
+
+/// Create a zero vector (0, 0, 0)
+///
+/// Useful for:
+/// - Initializing accumulators
+/// - Representing absence of light/color
+/// - Default/invalid vectors
+#[inline]
+pub fn vec3_zero() -> Vec3 {
+    vec3(0.0, 0.0, 0.0)
+}
+
+/// Create a ones vector (1, 1, 1)
+///
+/// Useful for:
+/// - Initial throughput in path tracing
+/// - White color
+/// - Identity for component-wise multiplication
+#[inline]
+pub fn vec3_ones() -> Vec3 {
+    vec3(1.0, 1.0, 1.0)
+}
+
+// =============================================================================
+// Arithmetic Operations
+// =============================================================================
+
+/// Vector addition: a + b
+#[inline]
+pub fn v3_add(a: Vec3, b: Vec3) -> Vec3 {
+    vec3(a.x + b.x, a.y + b.y, a.z + b.z)
+}
+
+/// Vector subtraction: a - b
+#[inline]
+pub fn v3_sub(a: Vec3, b: Vec3) -> Vec3 {
+    vec3(a.x - b.x, a.y - b.y, a.z - b.z)
+}
+
+/// Scalar multiplication: v * t
+#[inline]
+pub fn v3_mul(v: Vec3, t: f32) -> Vec3 {
+    vec3(v.x * t, v.y * t, v.z * t)
+}
+
+/// Scalar division: v / t
+///
+/// Uses reciprocal multiplication for efficiency (1 div + 3 mul vs 3 div).
+/// Returns zero vector if t is near zero to prevent infinity/NaN.
+///
+/// # Arguments
+/// * `v` - Vector to divide
+/// * `t` - Scalar divisor
+///
+/// # Returns
+/// * Divided vector, or zero vector if divisor is near zero
+#[inline]
+pub fn v3_div(v: Vec3, t: f32) -> Vec3 {
+    // Guard against division by zero or near-zero
+    if f32_abs(t) < VEC3_EPSILON {
+        return vec3_zero();
+    }
+    let inv = 1.0 / t;
+    v3_mul(v, inv)
+}
+
+/// Hadamard (component-wise) product: a * b
+///
+/// Used for color attenuation in path tracing.
+/// Each component is multiplied independently.
+#[inline]
+pub fn v3_hadamard(a: Vec3, b: Vec3) -> Vec3 {
+    vec3(a.x * b.x, a.y * b.y, a.z * b.z)
+}
+
+/// Vector negation: -v
+#[inline]
+pub fn v3_neg(v: Vec3) -> Vec3 {
+    vec3(-v.x, -v.y, -v.z)
+}
+
+// =============================================================================
+// Geometric Operations
+// =============================================================================
+
+/// Dot product: a . b
+///
+/// Returns the scalar projection of a onto b times |b|.
+/// Geometrically: |a| * |b| * cos(angle)
+#[inline]
+pub fn v3_dot(a: Vec3, b: Vec3) -> f32 {
+    a.x * b.x + a.y * b.y + a.z * b.z
+}
+
+/// Cross product: a x b
+///
+/// Returns a vector perpendicular to both a and b.
+/// The magnitude equals |a| * |b| * sin(angle).
+/// Follows right-hand rule for direction.
+#[inline]
+pub fn v3_cross(a: Vec3, b: Vec3) -> Vec3 {
+    vec3(
+        a.y * b.z - a.z * b.y,
+        a.z * b.x - a.x * b.z,
+        a.x * b.y - a.y * b.x
+    )
+}
+
+/// Squared length: |v|^2
+///
+/// Avoids the sqrt operation when only comparing lengths.
+#[inline]
+pub fn v3_length_sq(v: Vec3) -> f32 {
+    v3_dot(v, v)
+}
+
+/// Vector length (magnitude): |v|
+#[inline]
+pub fn v3_length(v: Vec3) -> f32 {
+    f32_sqrt(v3_length_sq(v))
+}
+
+/// Normalize vector to unit length
+///
+/// Returns zero vector if input length is below NORMALIZE_MIN_LEN
+/// to prevent division by near-zero values.
+///
+/// # Arguments
+/// * `v` - Vector to normalize
+///
+/// # Returns
+/// * Unit vector in same direction, or zero if too small
+#[inline]
+pub fn v3_normalize(v: Vec3) -> Vec3 {
+    let len = v3_length(v);
+    if len > NORMALIZE_MIN_LEN {
+        v3_div(v, len)
+    } else {
+        vec3_zero()
+    }
+}
+
+/// Reflect vector v about normal n
+///
+/// Computes the reflection of incident vector v about surface normal n.
+/// Assumes n is a unit vector.
+///
+/// # Arguments
+/// * `v` - Incident direction (pointing toward surface)
+/// * `n` - Surface normal (unit vector, pointing outward)
+///
+/// # Returns
+/// * Reflected direction
+#[inline]
+pub fn v3_reflect(v: Vec3, n: Vec3) -> Vec3 {
+    v3_sub(v, v3_mul(n, 2.0 * v3_dot(v, n)))
+}
+
+/// Refract vector through interface using Snell's law
+///
+/// Computes the refracted direction for a ray passing through a material
+/// boundary with the given ratio of refractive indices.
+///
+/// # Arguments
+/// * `uv` - Incident direction (unit vector, pointing toward surface)
+/// * `n` - Surface normal (unit vector, pointing into incident medium)
+/// * `etai_over_etat` - Ratio of refractive indices (n1/n2)
+///
+/// # Returns
+/// * Refracted direction, or reflected direction if total internal reflection occurs
+///
+/// # Total Internal Reflection
+/// When sin(theta_t) > 1, refraction is impossible and reflection occurs.
+/// This is detected when 1 - |r_out_perp|^2 < 0.
+#[inline]
+pub fn v3_refract(uv: Vec3, n: Vec3, etai_over_etat: f32) -> Vec3 {
+    let cos_theta = f32_min(-v3_dot(uv, n), 1.0);
+    let r_out_perp = v3_mul(v3_add(uv, v3_mul(n, cos_theta)), etai_over_etat);
+    let perp_len_sq = v3_length_sq(r_out_perp);
+    let discriminant = 1.0 - perp_len_sq;
+
+    // Handle total internal reflection: if discriminant < 0, return reflection
+    if discriminant < 0.0 {
+        return v3_reflect(uv, n);
+    }
+
+    let parallel_component = -f32_sqrt(discriminant);
+    let r_out_parallel = v3_mul(n, parallel_component);
+    v3_add(r_out_perp, r_out_parallel)
+}
+
+// =============================================================================
+// Utility Functions
+// =============================================================================
+
+/// Check if vector is approximately zero
+///
+/// Returns true if all components are within VEC3_EPSILON of zero.
+/// Useful for detecting degenerate cases (e.g., parallel vectors in cross product).
+#[inline]
+pub fn v3_near_zero(v: Vec3) -> bool {
+    (f32_abs(v.x) < VEC3_EPSILON) && (f32_abs(v.y) < VEC3_EPSILON) && (f32_abs(v.z) < VEC3_EPSILON)
+}
+
+/// Clamp all components to [0, 1] range
+///
+/// Used for final color output to prevent overflow in image files.
+#[inline]
+pub fn v3_clamp01(v: Vec3) -> Vec3 {
+    vec3(
+        f32_clamp(v.x, 0.0, 1.0),
+        f32_clamp(v.y, 0.0, 1.0),
+        f32_clamp(v.z, 0.0, 1.0)
+    )
+}
+
+// =============================================================================
+// External Dependencies
+// =============================================================================
+// Math utilities imported from math_stubs.mind:
+// - f32_sqrt: Square root
+// - f32_abs: Absolute value
+// - f32_min: Minimum of two floats
+// - f32_max: Maximum of two floats
+// - f32_clamp: Clamp to range
diff --git a/vendor/grammars/mind-grammar b/vendor/grammars/mind-grammar
new file mode 160000
index 0000000000..91e24a6b18
--- /dev/null
+++ b/vendor/grammars/mind-grammar
@@ -0,0 +1 @@
+Subproject commit 91e24a6b1862c50d8cbe91c38c82d9454f9562ca