From 382806c1b771d6f2359c9dac0d3e0e853ba713b9 Mon Sep 17 00:00:00 2001 From: "STARGA Inc." Date: Thu, 15 Jan 2026 00:13:08 -0800 Subject: [PATCH] Add Mind programming language Mind (Machine Intelligence Native Design) is a systems programming language designed for AI/ML and numerical computing. Features include: - Static tensor types with compile-time shape checking - Reverse-mode automatic differentiation - MLIR/LLVM backend for optimized code generation - GPU device placement with `on(gpu)` blocks Sample files (3,490 lines) are from production projects: - cputer/nikolachess - NNUE chess engine - cputer/mind-ray - GPU path tracer - cputer/fractal-voyager - WebGPU fractal explorer - cputer/mind-spec - Language specification Website: https://mindlang.dev --- .gitmodules | 3 + grammars.yml | 2 + lib/linguist/languages.yml | 7 + samples/Mind/camera.mind | 247 ++++++++++ samples/Mind/complex.mind | 190 ++++++++ samples/Mind/conv2d.mind | 171 +++++++ samples/Mind/fractal.mind | 418 +++++++++++++++++ samples/Mind/material.mind | 343 ++++++++++++++ samples/Mind/mlp_backward.mind | 129 +++++ samples/Mind/render.mind | 333 +++++++++++++ samples/Mind/search.mind | 829 +++++++++++++++++++++++++++++++++ samples/Mind/tensor_board.mind | 540 +++++++++++++++++++++ samples/Mind/vec3.mind | 290 ++++++++++++ vendor/grammars/mind-grammar | 1 + 14 files changed, 3503 insertions(+) create mode 100644 samples/Mind/camera.mind create mode 100644 samples/Mind/complex.mind create mode 100644 samples/Mind/conv2d.mind create mode 100644 samples/Mind/fractal.mind create mode 100644 samples/Mind/material.mind create mode 100644 samples/Mind/mlp_backward.mind create mode 100644 samples/Mind/render.mind create mode 100644 samples/Mind/search.mind create mode 100644 samples/Mind/tensor_board.mind create mode 100644 samples/Mind/vec3.mind create mode 160000 vendor/grammars/mind-grammar diff --git a/.gitmodules b/.gitmodules index 6cbadcbda9..194a12c06c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1581,3 +1581,6 @@ [submodule "vendor/grammars/zephir-sublime"] path = vendor/grammars/zephir-sublime url = https://github.com/phalcon/zephir-sublime +[submodule "vendor/grammars/mind-grammar"] + path = vendor/grammars/mind-grammar + url = https://github.com/cputer/mind-grammar.git diff --git a/grammars.yml b/grammars.yml index 13979f0c14..4ca798246d 100644 --- a/grammars.yml +++ b/grammars.yml @@ -907,6 +907,8 @@ vendor/grammars/mediawiki.tmbundle: - text.html.mediawiki vendor/grammars/mercury-tmlanguage: - source.mercury +vendor/grammars/mind-grammar: +- source.mind vendor/grammars/mint-vscode: - source.mint vendor/grammars/mlir-grammar: diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 7ea73879c5..b94cf16fdf 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -4720,6 +4720,13 @@ MiniZinc Data: tm_scope: source.mzn ace_mode: text language_id: 938193433 +Mind: + type: programming + color: "#7b2ff7" + extensions: + - ".mind" + tm_scope: source.mind + ace_mode: text Mint: type: programming extensions: diff --git a/samples/Mind/camera.mind b/samples/Mind/camera.mind new file mode 100644 index 0000000000..c404ddbd2e --- /dev/null +++ b/samples/Mind/camera.mind @@ -0,0 +1,247 @@ +// camera.mind - Camera Model for Mind Ray Path Tracer +// +// This module implements a virtual pinhole camera for ray generation. +// The camera uses a look-at model with configurable field of view +// and aspect ratio. +// +// Coordinate System: +// - Right-handed coordinate system with Y-up +// - +X: Right, +Y: Up, +Z: Toward camera (out of screen) +// - Camera looks along negative Z by default +// - Image plane is at distance 1 from the camera origin +// +// Camera Model: +// - Pinhole camera (no depth of field by default) +// - Rays originate from a single point (camera origin) +// - Image plane defines the viewing frustum +// +// The camera generates rays for each pixel, with optional jitter for +// anti-aliasing (handled in the render module). +// +// GPU Compatibility: +// - All fields are f32 for GPU alignment +// - No dynamic allocations +// - Pure functions (no side effects) +// +// Author: STARGA Inc. +// License: MIT + +// ============================================================================= +// Constants +// ============================================================================= + +/// Pi constant for degree-to-radian conversion +const PI: f32 = 3.14159265358979323846; + +/// Degrees to radians conversion factor +const DEG_TO_RAD: f32 = 0.017453292519943295; // PI / 180 + +/// Epsilon for near-zero vector detection +const CAMERA_EPSILON: f32 = 0.0001; + +// ============================================================================= +// Camera Type +// ============================================================================= + +/// Virtual camera for ray generation +/// +/// Stores precomputed values for efficient ray generation. The camera +/// defines a viewing frustum with the image plane at unit distance. +/// +/// # Coordinate Frame +/// - origin: camera position in world space +/// - horizontal: vector spanning image width +/// - vertical: vector spanning image height +/// - lower_left: position of bottom-left corner of image plane +pub struct Camera { + /// Camera position in world space + origin: Vec3, + /// Bottom-left corner of the image plane + lower_left: Vec3, + /// Vector spanning the full image width + horizontal: Vec3, + /// Vector spanning the full image height + vertical: Vec3, +} + +// ============================================================================= +// Camera Construction +// ============================================================================= + +/// Create a new camera with look-at positioning +/// +/// Constructs a camera positioned at `lookfrom`, looking toward `lookat`, +/// with the specified field of view and aspect ratio. +/// +/// # Arguments +/// * `lookfrom` - Camera position in world space +/// * `lookat` - Point the camera is aimed at +/// * `vup` - World up vector (typically (0, 1, 0)) +/// * `vfov` - Vertical field of view in degrees (typically 30-90) +/// * `aspect_ratio` - Image width / height ratio +/// +/// # Returns +/// * Configured Camera ready for ray generation +/// +/// # Camera Axes +/// - w: points backward (opposite to viewing direction) +/// - u: points right (perpendicular to w and vup) +/// - v: points up (perpendicular to w and u, may differ from vup if tilted) +/// +/// # Example +/// ```mind +/// let cam = camera_new( +/// vec3(0, 1, 5), // lookfrom: slightly above, behind origin +/// vec3(0, 0, 0), // lookat: origin +/// vec3(0, 1, 0), // vup: Y is up +/// 60.0, // 60 degree vertical FOV +/// 16.0 / 9.0 // 16:9 aspect ratio +/// ); +/// ``` +pub fn camera_new( + lookfrom: Vec3, + lookat: Vec3, + vup: Vec3, + vfov: f32, + aspect_ratio: f32 +) -> Camera { + // Convert vertical FOV from degrees to radians + // Clamp vfov to sensible range (0.1 to 179.0) to avoid tan(0) or tan(pi/2) + let clamped_vfov = f32_clamp(vfov, 0.1, 179.0); + let theta = clamped_vfov * DEG_TO_RAD; + // Half-height of viewport at distance 1 + let h = f32_tan(theta / 2.0); + let viewport_height = 2.0 * h; + // Ensure aspect_ratio is positive + let safe_aspect = if aspect_ratio <= 0.0 { 1.0 } else { aspect_ratio }; + let viewport_width = safe_aspect * viewport_height; + + // Build orthonormal camera basis vectors + // w: points backward (from lookat to lookfrom) + let view_dir = v3_sub(lookfrom, lookat); + + // Handle degenerate case: lookfrom == lookat + // Default to looking along -Z (standard forward direction) + let w = if v3_length_sq(view_dir) < CAMERA_EPSILON * CAMERA_EPSILON { + vec3(0.0, 0.0, 1.0) // Default backward direction (+Z) + } else { + v3_normalize(view_dir) + }; + + // u: points right (perpendicular to vup and w) + let cross_result = v3_cross(vup, w); + + // Handle degenerate case: vup parallel to w + // Use an alternate up vector to construct valid basis + let u = if v3_length_sq(cross_result) < CAMERA_EPSILON * CAMERA_EPSILON { + // vup is parallel to w, use alternate up vector + let alt_up = if f32_abs(w.y) < 0.9 { + vec3(0.0, 1.0, 0.0) // Y-up + } else { + vec3(1.0, 0.0, 0.0) // X-up fallback + }; + v3_normalize(v3_cross(alt_up, w)) + } else { + v3_normalize(cross_result) + }; + + // v: points up (perpendicular to w and u) + let v = v3_cross(w, u); + + let origin = lookfrom; + // Scale basis vectors to viewport dimensions + let horizontal = v3_mul(u, viewport_width); + let vertical = v3_mul(v, viewport_height); + + // Compute lower-left corner of image plane + // Image plane is at distance 1 from camera (along -w) + let lower_left = v3_sub( + v3_sub(v3_sub(origin, v3_div(horizontal, 2.0)), v3_div(vertical, 2.0)), + w + ); + + Camera { + origin: origin, + lower_left: lower_left, + horizontal: horizontal, + vertical: vertical, + } +} + +// ============================================================================= +// Ray Generation +// ============================================================================= + +/// Generate a ray from the camera through a point on the image plane +/// +/// Given normalized image coordinates (u, v), generates a ray from the +/// camera origin through the corresponding point on the image plane. +/// +/// # Arguments +/// * `cam` - Camera configuration +/// * `u` - Horizontal image coordinate (0 = left edge, 1 = right edge) +/// * `v` - Vertical image coordinate (0 = bottom edge, 1 = top edge) +/// +/// # Returns +/// * Ray from camera origin through the specified image plane point +/// +/// # Coordinate Mapping +/// For proper pixel coordinates: +/// - u = (x + 0.5) / width for pixel center +/// - v = (y + 0.5) / height for pixel center +/// Add random offset within [0, 1/width] x [0, 1/height] for anti-aliasing. +/// +/// # Edge Cases +/// - u, v outside [0, 1] produces rays outside the normal frustum +/// - Values are not clamped to allow overscan/bleeding effects +/// +/// # Performance +/// ~15 float operations (no branches, no function calls except normalize) +#[inline] +pub fn camera_get_ray(cam: Camera, u: f32, v: f32) -> Ray { + // Point on image plane = lower_left + u*horizontal + v*vertical + // Direction = point - origin + let direction = v3_sub( + v3_add( + v3_add(cam.lower_left, v3_mul(cam.horizontal, u)), + v3_mul(cam.vertical, v) + ), + cam.origin + ); + + // Create normalized ray + ray_new(cam.origin, v3_normalize(direction)) +} + +/// Validate camera configuration +/// +/// Checks that the camera parameters are valid for ray generation. +/// +/// # Arguments +/// * `cam` - Camera to validate +/// +/// # Returns +/// * true if camera is valid, false otherwise +#[inline] +pub fn camera_is_valid(cam: &Camera) -> bool { + // Check that vectors are not degenerate (near-zero length) + let h_len_sq = v3_length_sq(cam.horizontal); + let v_len_sq = v3_length_sq(cam.vertical); + + // Minimum valid vector length squared (1e-6) + let min_len_sq = 0.000001; + + h_len_sq > min_len_sq && v_len_sq > min_len_sq +} + +// ============================================================================= +// External Dependencies +// ============================================================================= +// Math utilities imported from math_stubs.mind: +// - f32_tan: Tangent function for FOV calculation +// +// Vector utilities imported from vec3.mind: +// - v3_normalize, v3_sub, v3_add, v3_mul, v3_div, v3_cross +// +// Ray utilities imported from ray.mind: +// - ray_new: Ray constructor diff --git a/samples/Mind/complex.mind b/samples/Mind/complex.mind new file mode 100644 index 0000000000..9603df89e8 --- /dev/null +++ b/samples/Mind/complex.mind @@ -0,0 +1,190 @@ +// complex.mind - Complex Number Operations for Fractal Voyager +// +// Complex numbers are represented as Tensor where: +// - index 0 = real part +// - index 1 = imaginary part +// +// For batched operations (computing many pixels): +// - Tensor for N complex numbers +// - Tensor for a 2D grid of complex numbers +// +// All operations use MIND's tensor semantics with broadcasting. +// +// Author: STARGA Inc. +// License: MIT +// Specification reference: spec/v1.0/language.md + +import tensor::zeros; +import tensor::ones; +import math::sqrt; +import math::log; + +// ============================================================================= +// Complex Number Creation +// ============================================================================= + +// Create complex number from real and imaginary parts +// Returns: Tensor representing (real + imag*i) +fn complex(real: f32, imag: f32) -> Tensor { + [real, imag] +} + +// Complex zero: 0 + 0i +fn complex_zero() -> Tensor { + [0.0, 0.0] +} + +// Complex one: 1 + 0i +fn complex_one() -> Tensor { + [1.0, 0.0] +} + +// Complex i: 0 + 1i +fn complex_i() -> Tensor { + [0.0, 1.0] +} + +// ============================================================================= +// Complex Arithmetic (Single Complex Numbers) +// ============================================================================= + +// Complex addition: (a + bi) + (c + di) = (a+c) + (b+d)i +fn c_add(z1: Tensor, z2: Tensor) -> Tensor { + z1 + z2 +} + +// Complex subtraction: (a + bi) - (c + di) = (a-c) + (b-d)i +fn c_sub(z1: Tensor, z2: Tensor) -> Tensor { + z1 - z2 +} + +// Complex multiplication: (a + bi) * (c + di) = (ac - bd) + (ad + bc)i +fn c_mul(z1: Tensor, z2: Tensor) -> Tensor { + let a = z1[0]; + let b = z1[1]; + let c = z2[0]; + let d = z2[1]; + [a * c - b * d, a * d + b * c] +} + +// Complex square: z^2 = (x + yi)^2 = (x^2 - y^2) + 2xyi +// Optimized special case of c_mul(z, z) +fn c_sq(z: Tensor) -> Tensor { + let x = z[0]; + let y = z[1]; + [x * x - y * y, 2.0 * x * y] +} + +// Complex conjugate: conj(a + bi) = a - bi +fn c_conj(z: Tensor) -> Tensor { + [z[0], -z[1]] +} + +// Complex magnitude squared: |z|^2 = x^2 + y^2 +// Avoids sqrt for escape radius checks +fn c_abs_sq(z: Tensor) -> f32 { + z[0] * z[0] + z[1] * z[1] +} + +// Complex magnitude: |z| = sqrt(x^2 + y^2) +fn c_abs(z: Tensor) -> f32 { + sqrt(c_abs_sq(z)) +} + +// Scalar multiplication: t * (a + bi) = ta + tbi +fn c_scale(z: Tensor, t: f32) -> Tensor { + z * t +} + +// Component-wise absolute value (for Burning Ship fractal) +fn c_abs_components(z: Tensor) -> Tensor { + [abs(z[0]), abs(z[1])] +} + +// ============================================================================= +// Batched Complex Operations (for GPU parallelism) +// ============================================================================= + +// Batched complex multiplication: compute many multiplications at once +// Shape: [N, 2] @ [N, 2] -> [N, 2] +fn c_mul_batch(z1: Tensor, z2: Tensor) -> Tensor { + let a = z1[:, 0]; // Shape [N] + let b = z1[:, 1]; // Shape [N] + let c = z2[:, 0]; // Shape [N] + let d = z2[:, 1]; // Shape [N] + + let real = a * c - b * d; // Shape [N] + let imag = a * d + b * c; // Shape [N] + + stack([real, imag], axis=1) // Shape [N, 2] +} + +// Batched complex square: z^2 for many complex numbers +// Shape: [N, 2] -> [N, 2] +fn c_sq_batch(z: Tensor) -> Tensor { + let x = z[:, 0]; // Shape [N] + let y = z[:, 1]; // Shape [N] + + let real = x * x - y * y; + let imag = 2.0 * x * y; + + stack([real, imag], axis=1) +} + +// Batched magnitude squared: |z|^2 for many complex numbers +// Shape: [N, 2] -> [N] +fn c_abs_sq_batch(z: Tensor) -> Tensor { + let x = z[:, 0]; + let y = z[:, 1]; + x * x + y * y +} + +// Batched complex addition +// Shape: [N, 2] + [N, 2] -> [N, 2] +fn c_add_batch(z1: Tensor, z2: Tensor) -> Tensor { + z1 + z2 +} + +// Batched component-wise absolute value (Burning Ship) +// Shape: [N, 2] -> [N, 2] +fn c_abs_components_batch(z: Tensor) -> Tensor { + abs(z) +} + +// Batched conjugate (Tricorn) +// Shape: [N, 2] -> [N, 2] +fn c_conj_batch(z: Tensor) -> Tensor { + let real = z[:, 0]; + let imag = -z[:, 1]; + stack([real, imag], axis=1) +} + +// ============================================================================= +// 2D Grid Complex Operations (for image rendering) +// ============================================================================= + +// Complex square for 2D grid of complex numbers +// Shape: [H, W, 2] -> [H, W, 2] +fn c_sq_grid(z: Tensor) -> Tensor { + let x = z[:, :, 0]; // Shape [H, W] + let y = z[:, :, 1]; // Shape [H, W] + + let real = x * x - y * y; + let imag = 2.0 * x * y; + + stack([real, imag], axis=2) +} + +// Magnitude squared for 2D grid +// Shape: [H, W, 2] -> [H, W] +fn c_abs_sq_grid(z: Tensor) -> Tensor { + let x = z[:, :, 0]; + let y = z[:, :, 1]; + x * x + y * y +} + +// Add two grids of complex numbers +// Shape: [H, W, 2] + [H, W, 2] -> [H, W, 2] +fn c_add_grid(z1: Tensor, z2: Tensor) -> Tensor { + z1 + z2 +} diff --git a/samples/Mind/conv2d.mind b/samples/Mind/conv2d.mind new file mode 100644 index 0000000000..666ea26793 --- /dev/null +++ b/samples/Mind/conv2d.mind @@ -0,0 +1,171 @@ +// MIND Language Example: 2D Convolution +// Specification reference: spec/v1.0/ir.md#linear-and-tensor-algebra (Conv2d) +// +// This example demonstrates Conv2d operations with various +// stride and padding configurations. MIND uses NHWC format. + +import tensor::zeros; +import tensor::ones; + +// Example 1: Basic convolution (valid padding, stride 1) +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// +// Input format: NHWC [batch, height, width, channels_in] +// Filter format: HWCF [kernel_h, kernel_w, channels_in, channels_out] +// Output: [batch, out_h, out_w, channels_out] +// +// Expected output shape: [1, 3, 3, 2] (5-3+1 = 3 with valid padding) +fn basic_conv2d() -> Tensor { + // Input: 1 image, 5x5, 3 channels + let input: Tensor = ones([1, 5, 5, 3]); + + // Filter: 3x3 kernel, 3 input channels, 2 output channels + let filter: Tensor = ones([3, 3, 3, 2]); + + // Conv2d with valid padding (no padding) and stride 1 + // Output height: (5 - 3) / 1 + 1 = 3 + // Output width: (5 - 3) / 1 + 1 = 3 + let output = conv2d(input, filter, strides=[1, 1], padding=Valid); + + output +} + +// Example 2: Convolution with 'same' padding +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// Same padding preserves spatial dimensions (with stride 1) +// +// Expected output shape: [1, 5, 5, 4] +fn same_padding_conv2d() -> Tensor { + // Input: 1 image, 5x5, 3 channels + let input: Tensor = ones([1, 5, 5, 3]); + + // Filter: 3x3 kernel, 3 input channels, 4 output channels + let filter: Tensor = ones([3, 3, 3, 4]); + + // Same padding: output has same spatial dims as input + // Padding is computed automatically: pad = (kernel_size - 1) / 2 + let output = conv2d(input, filter, strides=[1, 1], padding=Same); + + output // Shape [1, 5, 5, 4] +} + +// Example 3: Strided convolution +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// Stride reduces output spatial dimensions +// +// Expected output shape: [1, 2, 2, 8] +fn strided_conv2d() -> Tensor { + // Input: 1 image, 6x6, 3 channels + let input: Tensor = ones([1, 6, 6, 3]); + + // Filter: 3x3 kernel + let filter: Tensor = ones([3, 3, 3, 8]); + + // Stride 2 in both dimensions + // Output height: (6 - 3) / 2 + 1 = 2 + // Output width: (6 - 3) / 2 + 1 = 2 + let output = conv2d(input, filter, strides=[2, 2], padding=Valid); + + output +} + +// Example 4: Custom padding +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// Explicit padding values: [[pad_top, pad_bottom], [pad_left, pad_right]] +// +// Expected output shape: [1, 7, 9, 4] +fn custom_padding_conv2d() -> Tensor { + // Input: 1 image, 5x5, 3 channels + let input: Tensor = ones([1, 5, 5, 3]); + + // Filter: 3x3 kernel + let filter: Tensor = ones([3, 3, 3, 4]); + + // Custom padding: [[top=2, bottom=2], [left=3, right=3]] + // Padded input: 5+2+2=9 height, 5+3+3=11 width + // Output: (9-3)/1+1=7 height, (11-3)/1+1=9 width + let output = conv2d( + input, filter, + strides=[1, 1], + padding=Custom([[2, 2], [3, 3]]) + ); + + output +} + +// Example 5: Batched convolution +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// Same filter applied to each image in batch +// +// Expected output shape: [batch=4, 6, 6, 16] +fn batched_conv2d() -> Tensor { + // Batch of 4 images + let input: Tensor = ones([4, 8, 8, 3]); + + // Single filter (applied to all batch elements) + let filter: Tensor = ones([3, 3, 3, 16]); + + // Each image in batch processed independently + let output = conv2d(input, filter, strides=[1, 1], padding=Valid); + + output +} + +// Example 6: 1x1 convolution (pointwise) +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// 1x1 convolutions mix channels without spatial context +// +// Expected output shape: [1, 8, 8, 64] +fn pointwise_conv() -> Tensor { + // Input with 32 channels + let input: Tensor = ones([1, 8, 8, 32]); + + // 1x1 filter: channel mixing only + let filter: Tensor = ones([1, 1, 32, 64]); + + // No spatial reduction with 1x1 kernel + let output = conv2d(input, filter, strides=[1, 1], padding=Valid); + + output +} + +// Example 7: Depthwise convolution pattern +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// Each input channel convolved separately (groups = channels) +// +// Note: True depthwise conv may require specialized op or grouping +fn depthwise_pattern() -> Tensor { + let input: Tensor = ones([1, 8, 8, 3]); + + // Depthwise: 1 filter per input channel + // For true depthwise, use channels_out = channels_in + // This is approximate; real depthwise needs groups parameter + let filter: Tensor = ones([3, 3, 3, 3]); + + let output = conv2d(input, filter, strides=[1, 1], padding=Valid); + + output +} + +// Example 8: Conv2d output shape formula +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra +// H_out = floor((H_in + 2*pad_h - kernel_h) / stride_h) + 1 +fn output_shape_demo() { + // Demonstrating shape calculation + // + // Input: [batch=1, H=28, W=28, C=1] + // Filter: [H_k=5, W_k=5, C_in=1, C_out=32] + // Stride: [2, 2] + // Padding: Valid (pad=0) + // + // H_out = floor((28 + 0 - 5) / 2) + 1 = floor(23/2) + 1 = 11 + 1 = 12 + // W_out = floor((28 + 0 - 5) / 2) + 1 = 12 + // + // Output shape: [1, 12, 12, 32] + + let input: Tensor = zeros([1, 28, 28, 1]); + let filter: Tensor = zeros([5, 5, 1, 32]); + + let output = conv2d(input, filter, strides=[2, 2], padding=Valid); + // output has shape [1, 12, 12, 32] +} diff --git a/samples/Mind/fractal.mind b/samples/Mind/fractal.mind new file mode 100644 index 0000000000..5c13fb6fcc --- /dev/null +++ b/samples/Mind/fractal.mind @@ -0,0 +1,418 @@ +// fractal.mind - Fractal Iteration Algorithms for Fractal Voyager +// +// Implements classic fractal iteration algorithms using MIND tensor operations: +// - Mandelbrot: z_{n+1} = z_n^2 + c (z_0 = 0) +// - Julia: z_{n+1} = z_n^2 + c (z_0 varies, c fixed) +// - Burning Ship: z_{n+1} = (|Re(z)| + i|Im(z)|)^2 + c +// - Tricorn: z_{n+1} = conj(z)^2 + c +// +// All algorithms compute smooth iteration counts for banding-free coloring. +// Operations are vectorized over entire images for GPU parallelism. +// +// Author: STARGA Inc. +// License: MIT +// Specification reference: spec/v1.0/language.md + +import tensor::zeros; +import tensor::ones; +import math::sqrt; +import math::log; +import math::abs; +import math::max; +import math::min; + +// ============================================================================= +// Constants +// ============================================================================= + +// Escape radius squared (|z|^2 > 4 means point escaped) +fn escape_radius_sq() -> f32 { + 4.0 +} + +// Log(2) for smooth coloring calculations +fn log2() -> f32 { + 0.693147180559945 +} + +// ============================================================================= +// Fractal Type Codes +// ============================================================================= +// 0 = Mandelbrot +// 1 = Julia +// 2 = Burning Ship +// 3 = Tricorn + +// ============================================================================= +// Single-Point Fractal Iteration +// ============================================================================= + +// Mandelbrot iteration for a single point +// c: complex coordinate to test [real, imag] +// max_iter: maximum iterations +// Returns: [smooth_iter, escaped] where escaped is 1.0 if escaped, 0.0 if in set +fn mandelbrot_point(c: Tensor, max_iter: i32) -> Tensor { + let z = [0.0, 0.0]; // z_0 = 0 + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + // Iterative computation + let result = iterate(max_iter, z, fn(i: i32, z: Tensor) -> Tensor { + let z_sq = z[0] * z[0] + z[1] * z[1]; + + // Check escape condition + select(z_sq > escape_r_sq, z, + // z = z^2 + c + [z[0] * z[0] - z[1] * z[1] + c[0], + 2.0 * z[0] * z[1] + c[1]]) + }); + + // Compute smooth iteration count + let z_final = result; + let z_sq_final = z_final[0] * z_final[0] + z_final[1] * z_final[1]; + let escaped = z_sq_final > escape_r_sq; + + let smooth_iter = select(escaped, + // Smooth coloring formula: i + 1 - log(log|z|)/log(2) + cast(count_iterations(c, max_iter), f32) + 1.0 - + log(log(sqrt(z_sq_final)) / log_2) / log_2, + cast(max_iter, f32)); + + [smooth_iter, select(escaped, 1.0, 0.0)] +} + +// Julia iteration for a single point +// z0: initial z value (screen coordinate) +// c: Julia constant +// max_iter: maximum iterations +fn julia_point(z0: Tensor, c: Tensor, max_iter: i32) -> Tensor { + let z = z0; + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + let result = iterate(max_iter, z, fn(i: i32, z: Tensor) -> Tensor { + let z_sq = z[0] * z[0] + z[1] * z[1]; + + select(z_sq > escape_r_sq, z, + [z[0] * z[0] - z[1] * z[1] + c[0], + 2.0 * z[0] * z[1] + c[1]]) + }); + + let z_sq_final = result[0] * result[0] + result[1] * result[1]; + let escaped = z_sq_final > escape_r_sq; + + let smooth_iter = select(escaped, + cast(count_julia_iterations(z0, c, max_iter), f32) + 1.0 - + log(log(sqrt(z_sq_final)) / log_2) / log_2, + cast(max_iter, f32)); + + [smooth_iter, select(escaped, 1.0, 0.0)] +} + +// ============================================================================= +// Vectorized Fractal Computation (GPU-Parallel) +// ============================================================================= + +// Compute Mandelbrot for entire image grid +// c_grid: Tensor - complex coordinates for each pixel +// max_iter: maximum iterations +// Returns: Tensor - smooth iteration counts +fn mandelbrot_grid(c_grid: Tensor, max_iter: i32) -> Tensor { + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + // Initialize z to zeros for all pixels + let z_real = zeros([H, W]); + let z_imag = zeros([H, W]); + let iterations = zeros([H, W]); + let escaped = zeros([H, W]); // 0 = not escaped, 1 = escaped + + // Extract c components + let c_real = c_grid[:, :, 0]; // [H, W] + let c_imag = c_grid[:, :, 1]; // [H, W] + + // Iterate all pixels in parallel + let state = [z_real, z_imag, iterations, escaped]; + + let final_state = fold(range(max_iter), state, + fn(i: i32, state: Tensor) -> Tensor { + let z_r = state[0]; + let z_i = state[1]; + let iter = state[2]; + let esc = state[3]; + + // Magnitude squared: |z|^2 + let z_sq = z_r * z_r + z_i * z_i; + + // Check which pixels should continue + let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc); + + // z^2 = (x^2 - y^2) + 2xyi + let new_z_r = z_r * z_r - z_i * z_i + c_real; + let new_z_i = 2.0 * z_r * z_i + c_imag; + + // Update only pixels that haven't escaped + let z_r_next = select(should_continue > 0.5, new_z_r, z_r); + let z_i_next = select(should_continue > 0.5, new_z_i, z_i); + + // Increment iteration count for non-escaped pixels + let iter_next = iter + should_continue; + + // Mark newly escaped pixels + let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next; + let just_escaped = (z_sq_next > escape_r_sq) * should_continue; + let esc_next = esc + just_escaped; + + stack([z_r_next, z_i_next, iter_next, esc_next], axis=0) + }); + + // Extract final state + let z_r_final = final_state[0]; + let z_i_final = final_state[1]; + let iter_final = final_state[2]; + let esc_final = final_state[3]; + + // Compute smooth iteration count + let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final; + + // Smooth coloring: iter + 1 - log(log|z|)/log(2) + let log_zn = log(z_sq_final) / 2.0; + let nu = log(log_zn / log_2) / log_2; + let smooth = iter_final + 1.0 - nu; + + // Return max_iter for points still in set + select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W])) +} + +// Compute Julia set for entire image grid +// z_grid: Tensor - initial z values (screen coordinates) +// c: Julia constant [real, imag] +// max_iter: maximum iterations +fn julia_grid(z_grid: Tensor, c: Tensor, max_iter: i32) -> Tensor { + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + // Initialize z from grid + let z_real = z_grid[:, :, 0]; + let z_imag = z_grid[:, :, 1]; + let iterations = zeros([H, W]); + let escaped = zeros([H, W]); + + // Julia constant (broadcast to all pixels) + let c_real = c[0]; + let c_imag = c[1]; + + let state = stack([z_real, z_imag, iterations, escaped], axis=0); + + let final_state = fold(range(max_iter), state, + fn(i: i32, state: Tensor) -> Tensor { + let z_r = state[0]; + let z_i = state[1]; + let iter = state[2]; + let esc = state[3]; + + let z_sq = z_r * z_r + z_i * z_i; + let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc); + + let new_z_r = z_r * z_r - z_i * z_i + c_real; + let new_z_i = 2.0 * z_r * z_i + c_imag; + + let z_r_next = select(should_continue > 0.5, new_z_r, z_r); + let z_i_next = select(should_continue > 0.5, new_z_i, z_i); + let iter_next = iter + should_continue; + + let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next; + let just_escaped = (z_sq_next > escape_r_sq) * should_continue; + let esc_next = esc + just_escaped; + + stack([z_r_next, z_i_next, iter_next, esc_next], axis=0) + }); + + let z_r_final = final_state[0]; + let z_i_final = final_state[1]; + let iter_final = final_state[2]; + let esc_final = final_state[3]; + + let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final; + let log_zn = log(z_sq_final) / 2.0; + let nu = log(log_zn / log_2) / log_2; + let smooth = iter_final + 1.0 - nu; + + select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W])) +} + +// Compute Burning Ship for entire image grid +// Uses |Re(z)| and |Im(z)| before squaring +fn burning_ship_grid(c_grid: Tensor, max_iter: i32) -> Tensor { + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + let z_real = zeros([H, W]); + let z_imag = zeros([H, W]); + let iterations = zeros([H, W]); + let escaped = zeros([H, W]); + + let c_real = c_grid[:, :, 0]; + let c_imag = c_grid[:, :, 1]; + + let state = stack([z_real, z_imag, iterations, escaped], axis=0); + + let final_state = fold(range(max_iter), state, + fn(i: i32, state: Tensor) -> Tensor { + let z_r = state[0]; + let z_i = state[1]; + let iter = state[2]; + let esc = state[3]; + + let z_sq = z_r * z_r + z_i * z_i; + let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc); + + // Burning Ship: take absolute values before squaring + let z_r_abs = abs(z_r); + let z_i_abs = abs(z_i); + + let new_z_r = z_r_abs * z_r_abs - z_i_abs * z_i_abs + c_real; + let new_z_i = 2.0 * z_r_abs * z_i_abs + c_imag; + + let z_r_next = select(should_continue > 0.5, new_z_r, z_r); + let z_i_next = select(should_continue > 0.5, new_z_i, z_i); + let iter_next = iter + should_continue; + + let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next; + let just_escaped = (z_sq_next > escape_r_sq) * should_continue; + let esc_next = esc + just_escaped; + + stack([z_r_next, z_i_next, iter_next, esc_next], axis=0) + }); + + let z_r_final = final_state[0]; + let z_i_final = final_state[1]; + let iter_final = final_state[2]; + let esc_final = final_state[3]; + + let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final; + let log_zn = log(z_sq_final) / 2.0; + let nu = log(log_zn / log_2) / log_2; + let smooth = iter_final + 1.0 - nu; + + select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W])) +} + +// Compute Tricorn (Mandelbar) for entire image grid +// Uses conjugate: z_{n+1} = conj(z_n)^2 + c +fn tricorn_grid(c_grid: Tensor, max_iter: i32) -> Tensor { + let escape_r_sq = escape_radius_sq(); + let log_2 = log2(); + + let z_real = zeros([H, W]); + let z_imag = zeros([H, W]); + let iterations = zeros([H, W]); + let escaped = zeros([H, W]); + + let c_real = c_grid[:, :, 0]; + let c_imag = c_grid[:, :, 1]; + + let state = stack([z_real, z_imag, iterations, escaped], axis=0); + + let final_state = fold(range(max_iter), state, + fn(i: i32, state: Tensor) -> Tensor { + let z_r = state[0]; + let z_i = state[1]; + let iter = state[2]; + let esc = state[3]; + + let z_sq = z_r * z_r + z_i * z_i; + let should_continue = (z_sq <= escape_r_sq) * (1.0 - esc); + + // Tricorn: conj(z)^2 = (x - yi)^2 = (x^2 - y^2) - 2xyi + let new_z_r = z_r * z_r - z_i * z_i + c_real; + let new_z_i = -2.0 * z_r * z_i + c_imag; // Note: negative for conjugate + + let z_r_next = select(should_continue > 0.5, new_z_r, z_r); + let z_i_next = select(should_continue > 0.5, new_z_i, z_i); + let iter_next = iter + should_continue; + + let z_sq_next = z_r_next * z_r_next + z_i_next * z_i_next; + let just_escaped = (z_sq_next > escape_r_sq) * should_continue; + let esc_next = esc + just_escaped; + + stack([z_r_next, z_i_next, iter_next, esc_next], axis=0) + }); + + let z_r_final = final_state[0]; + let z_i_final = final_state[1]; + let iter_final = final_state[2]; + let esc_final = final_state[3]; + + let z_sq_final = z_r_final * z_r_final + z_i_final * z_i_final; + let log_zn = log(z_sq_final) / 2.0; + let nu = log(log_zn / log_2) / log_2; + let smooth = iter_final + 1.0 - nu; + + select(esc_final > 0.5, smooth, cast(max_iter, f32) * ones([H, W])) +} + +// ============================================================================= +// Unified Interface +// ============================================================================= + +// Compute fractal for image grid using specified type +// fractal_type: 0=Mandelbrot, 1=Julia, 2=Burning Ship, 3=Tricorn +// c_grid: complex coordinates [H, W, 2] +// julia_c: Julia constant (only used when fractal_type=1) +// max_iter: maximum iterations +fn compute_fractal_grid( + fractal_type: i32, + c_grid: Tensor, + julia_c: Tensor, + max_iter: i32 +) -> Tensor { + select(fractal_type == 0, mandelbrot_grid(c_grid, max_iter), + select(fractal_type == 1, julia_grid(c_grid, julia_c, max_iter), + select(fractal_type == 2, burning_ship_grid(c_grid, max_iter), + tricorn_grid(c_grid, max_iter)))) +} + +// ============================================================================= +// Coordinate Mapping +// ============================================================================= + +// Generate complex coordinate grid from screen parameters +// width, height: screen dimensions +// center: view center in complex plane [real, imag] +// zoom: zoom level (higher = more zoomed in) +fn make_coordinate_grid( + width: i32, + height: i32, + center: Tensor, + zoom: f32 +) -> Tensor { + let aspect = cast(width, f32) / cast(height, f32); + let scale = 3.5 / zoom; + + // Generate pixel coordinates + let x_coords = linspace(0.0, cast(width - 1, f32), width); // [W] + let y_coords = linspace(0.0, cast(height - 1, f32), height); // [H] + + // Create meshgrid + let x_grid = broadcast(x_coords, [height, width]); // [H, W] + let y_grid = broadcast(expand_dims(y_coords, 1), [height, width]); // [H, W] + + // Map to [-0.5, 0.5] range + let u = x_grid / cast(width, f32) - 0.5; + let v = y_grid / cast(height, f32) - 0.5; + + // Apply zoom, aspect ratio, and center offset + let real = u * scale * aspect + center[0]; + let imag = v * scale + center[1]; + + stack([real, imag], axis=2) +} + +// Calculate adaptive iteration count based on zoom level +fn adaptive_iterations(zoom: f32, base_iter: i32, max_allowed: i32) -> i32 { + let log_2 = log2(); + let zoom_factor = log(max(1.0, zoom)) / log_2; + let iter = base_iter + cast(zoom_factor * 50.0, i32); + min(iter, max_allowed) +} diff --git a/samples/Mind/material.mind b/samples/Mind/material.mind new file mode 100644 index 0000000000..0a0407ba2a --- /dev/null +++ b/samples/Mind/material.mind @@ -0,0 +1,343 @@ +// material.mind - Material System for Mind Ray Path Tracer +// +// This module implements physically-based materials for path tracing. +// Each material type defines how light interacts with a surface: +// +// Material Types: +// - Diffuse: Lambertian (matte) surfaces that scatter light uniformly +// - Metal: Reflective surfaces with optional roughness/fuzz +// - Dielectric: Transparent materials (glass, water) with refraction +// - Emissive: Light-emitting surfaces for area lights +// +// All scattering functions return both the scattered ray and an updated +// RNG state, enabling deterministic rendering. +// +// References: +// - "Physically Based Rendering" by Pharr, Jakob, Humphreys +// - "Ray Tracing in One Weekend" by Peter Shirley +// +// Author: STARGA Inc. +// License: MIT + +// ============================================================================= +// Material Data Types +// ============================================================================= + +/// Diffuse (Lambertian) material +/// +/// Scatters light uniformly in all directions according to Lambert's cosine law. +/// The albedo represents the fraction of light reflected (per channel). +pub struct MaterialDiffuse { + /// Surface color/reflectance (RGB, each in [0, 1]) + albedo: Vec3, +} + +/// Metallic material with optional roughness +/// +/// Reflects light like a mirror, with optional fuzz/roughness that +/// perturbs the reflection direction. +pub struct MaterialMetal { + /// Surface reflectance (RGB, each in [0, 1]) + albedo: Vec3, + /// Surface roughness (0 = perfect mirror, 1 = very fuzzy) + roughness: f32, +} + +/// Dielectric (glass-like) material +/// +/// Transmits and reflects light according to Snell's law and +/// Fresnel equations. Supports total internal reflection. +pub struct MaterialDielectric { + /// Tint color for transmitted light + albedo: Vec3, + /// Index of refraction (1.0 = air, 1.5 = glass, 1.33 = water) + ior: f32, +} + +/// Emissive (light-emitting) material +/// +/// Emits light of the specified color and strength. +/// Used for area lights in the scene. +pub struct MaterialEmissive { + /// Emission color (RGB) + color: Vec3, + /// Emission strength multiplier + strength: f32, +} + +/// Sum type for all material variants +/// +/// Each geometry primitive references a material by index into the scene's +/// material array. The material enum determines scattering behavior. +pub enum Material { + /// Matte/Lambertian surface + Diffuse(MaterialDiffuse), + /// Reflective metal surface + Metal(MaterialMetal), + /// Transparent glass-like surface + Dielectric(MaterialDielectric), + /// Light-emitting surface + Emissive(MaterialEmissive), +} + +// ============================================================================= +// Scatter Result +// ============================================================================= + +/// Result of a material scattering event +/// +/// Contains the scattered ray direction and the attenuation (color filter) +/// applied to the light traveling along that path. +pub struct ScatterResult { + /// The scattered ray (origin at hit point, direction from material) + scattered: Ray, + /// Color attenuation applied to light (RGB multiplier) + attenuation: Vec3, + /// Whether scattering occurred (false = ray absorbed) + did_scatter: bool, +} + +// ============================================================================= +// Scattering Functions +// ============================================================================= + +/// Scatter light from a diffuse (Lambertian) surface +/// +/// Generates a random scatter direction in the hemisphere around the normal, +/// following Lambert's cosine law for diffuse reflection. +/// +/// # Arguments +/// * `mat` - Diffuse material properties +/// * `r_in` - Incoming ray (unused for diffuse, included for API consistency) +/// * `hit_point` - Point of intersection +/// * `normal` - Surface normal at hit point +/// * `rng` - Random number generator state +/// +/// # Returns +/// * Tuple of (ScatterResult, updated_rng_state) +/// +/// # Algorithm +/// 1. Generate random unit vector +/// 2. Add to normal (cosine-weighted hemisphere sampling) +/// 3. Handle degenerate case where sum is near-zero +pub fn scatter_diffuse( + mat: MaterialDiffuse, + r_in: Ray, + hit_point: Vec3, + normal: Vec3, + rng: RngState +) -> (ScatterResult, RngState) { + let (random_vec, new_rng) = rng_vec3_unit_vector(rng); + let scatter_dir_raw = v3_add(normal, random_vec); + + // Handle degenerate case where random vector is opposite to normal + let scatter_dir = if v3_near_zero(scatter_dir_raw) { + normal + } else { + scatter_dir_raw + }; + + let scattered = ray_new(hit_point, v3_normalize(scatter_dir)); + + let result = ScatterResult { + scattered: scattered, + attenuation: mat.albedo, + did_scatter: true, + }; + + (result, new_rng) +} + +/// Scatter light from a metallic surface +/// +/// Reflects the incoming ray about the normal with optional roughness. +/// Roughness adds random perturbation to simulate brushed or imperfect metals. +/// +/// # Arguments +/// * `mat` - Metal material properties +/// * `r_in` - Incoming ray +/// * `hit_point` - Point of intersection +/// * `normal` - Surface normal at hit point +/// * `rng` - Random number generator state +/// +/// # Returns +/// * Tuple of (ScatterResult, updated_rng_state) +/// * did_scatter will be false if the fuzzed ray goes below the surface +pub fn scatter_metal( + mat: MaterialMetal, + r_in: Ray, + hit_point: Vec3, + normal: Vec3, + rng: RngState +) -> (ScatterResult, RngState) { + // Perfect reflection direction + let reflected = v3_reflect(r_in.direction, normal); + + // Add roughness/fuzz by perturbing the reflection + let (fuzz, new_rng) = rng_vec3_unit_sphere(rng); + let fuzzed = v3_add(reflected, v3_mul(fuzz, mat.roughness)); + + // Handle degenerate case where fuzzed vector is near-zero + let safe_fuzzed = if v3_near_zero(fuzzed) { + reflected // Fall back to perfect reflection + } else { + fuzzed + }; + + let scattered = ray_new(hit_point, v3_normalize(safe_fuzzed)); + + // Reject if fuzzed ray points into the surface + let did_scatter = v3_dot(safe_fuzzed, normal) > 0.0; + + let result = ScatterResult { + scattered: scattered, + attenuation: mat.albedo, + did_scatter: did_scatter, + }; + + (result, new_rng) +} + +/// Scatter light from a dielectric (glass-like) surface +/// +/// Handles both reflection and refraction using Snell's law. +/// Uses Schlick's approximation for Fresnel reflectance. +/// Correctly handles total internal reflection. +/// +/// # Arguments +/// * `mat` - Dielectric material properties (includes IOR) +/// * `r_in` - Incoming ray +/// * `hit_point` - Point of intersection +/// * `normal` - Surface normal (always points outward) +/// * `front_face` - True if ray hit front face, false for back face +/// * `rng` - Random number generator state +/// +/// # Returns +/// * Tuple of (ScatterResult, updated_rng_state) +/// +/// # Physics +/// - Snell's law: n1 * sin(theta1) = n2 * sin(theta2) +/// - Total internal reflection when sin(theta2) > 1 +/// - Fresnel effect: more reflection at grazing angles +pub fn scatter_dielectric( + mat: MaterialDielectric, + r_in: Ray, + hit_point: Vec3, + normal: Vec3, + front_face: bool, + rng: RngState +) -> (ScatterResult, RngState) { + // Determine refraction ratio based on which side we're hitting + // front_face: entering material (air -> glass) + // back_face: exiting material (glass -> air) + let refraction_ratio = if front_face { 1.0 / mat.ior } else { mat.ior }; + + let unit_direction = v3_normalize(r_in.direction); + let cos_theta = f32_min(-v3_dot(unit_direction, normal), 1.0); + let sin_theta = f32_sqrt(1.0 - cos_theta * cos_theta); + + // Check for total internal reflection + let cannot_refract = refraction_ratio * sin_theta > 1.0; + + // Use Schlick's approximation for Fresnel reflectance + let (reflectance_val, rng1) = rng_f32(rng); + let use_reflect = cannot_refract || (reflectance(cos_theta, refraction_ratio) > reflectance_val); + + let direction = if use_reflect { + v3_reflect(unit_direction, normal) + } else { + v3_refract(unit_direction, normal, refraction_ratio) + }; + + let scattered = ray_new(hit_point, direction); + + let result = ScatterResult { + scattered: scattered, + attenuation: mat.albedo, + did_scatter: true, + }; + + (result, rng1) +} + +// ============================================================================= +// Fresnel Approximation +// ============================================================================= + +/// Schlick's approximation for Fresnel reflectance +/// +/// Approximates the fraction of light that is reflected vs transmitted +/// at a dielectric interface. More light is reflected at grazing angles. +/// +/// # Arguments +/// * `cosine` - Cosine of the incident angle +/// * `ref_idx` - Ratio of refractive indices (n1/n2) +/// +/// # Returns +/// * Reflectance coefficient in [0, 1] +/// +/// # Formula +/// R(theta) = R0 + (1 - R0) * (1 - cos(theta))^5 +/// where R0 = ((n1 - n2) / (n1 + n2))^2 +#[inline] +fn reflectance(cosine: f32, ref_idx: f32) -> f32 { + // R0: reflectance at normal incidence + let r0 = (1.0 - ref_idx) / (1.0 + ref_idx); + let r0_sq = r0 * r0; + + // Schlick's polynomial approximation of Fresnel + let one_minus_cos = 1.0 - cosine; + let power = one_minus_cos * one_minus_cos * one_minus_cos * one_minus_cos * one_minus_cos; + r0_sq + (1.0 - r0_sq) * power +} + +// ============================================================================= +// Emissive Material Functions +// ============================================================================= + +/// Handle scattering for emissive materials +/// +/// Emissive materials do not scatter light - they only emit it. +/// This function always returns did_scatter = false. +/// +/// # Arguments +/// * `mat` - Emissive material properties +/// * `hit_point` - Point of intersection +/// * `normal` - Surface normal at hit point +/// * `rng` - Random number generator state +/// +/// # Returns +/// * Tuple of (ScatterResult with did_scatter=false, unchanged_rng_state) +pub fn scatter_emissive( + mat: MaterialEmissive, + hit_point: Vec3, + normal: Vec3, + rng: RngState +) -> (ScatterResult, RngState) { + // Emissive materials do not scatter - they only emit light + // The scattered ray is arbitrary since did_scatter is false + let result = ScatterResult { + scattered: ray_new(hit_point, normal), + attenuation: vec3_zero(), + did_scatter: false, + }; + + (result, rng) +} + +/// Get the emission color from an emissive material +/// +/// Returns the emission color multiplied by strength for emissive materials, +/// or zero for all other material types. +/// +/// # Arguments +/// * `mat` - Material to query +/// +/// # Returns +/// * Emission color (Vec3) or zero if material is not emissive +pub fn material_emitted(mat: &Material) -> Vec3 { + match mat { + Material::Emissive(e) => v3_mul(e.color, e.strength), + _ => vec3_zero(), + } +} diff --git a/samples/Mind/mlp_backward.mind b/samples/Mind/mlp_backward.mind new file mode 100644 index 0000000000..8280b87775 --- /dev/null +++ b/samples/Mind/mlp_backward.mind @@ -0,0 +1,129 @@ +// MIND Language Example: MLP Backpropagation +// Specification reference: spec/v1.0/autodiff.md +// +// This example demonstrates automatic differentiation through +// a simple multi-layer perceptron (MLP) for training. + +import diff::grad; +import tensor::zeros; +import tensor::randn; + +// MLP structure: input -> hidden -> output +// Reference: spec/v1.0/autodiff.md (composition of differentiable ops) + +// Layer weights (would be parameters in real training) +struct MLPParams { + w1: Tensor, + b1: Tensor, + w2: Tensor, + b2: Tensor, +} + +// Forward pass through MLP +// Reference: spec/v1.0/ir.md#linear-and-tensor-algebra (MatMul) +// Reference: spec/v1.0/ir.md#activation-and-elementwise-unary-operations (Relu) +fn mlp_forward( + x: Tensor, + params: MLPParams +) -> Tensor { + // Layer 1: linear + ReLU + // z1 = x @ W1 + b1 + let z1 = matmul(x, params.w1) + params.b1; // Broadcasting b1 + + // Activation: ReLU(z1) + // Reference: spec/v1.0/ir.md#activation-and-elementwise-unary-operations + let h = relu(z1); + + // Layer 2: linear (no activation for output) + // z2 = h @ W2 + b2 + let output = matmul(h, params.w2) + params.b2; + + output +} + +// Mean squared error loss +// Reference: spec/v1.0/ir.md#reductions (Mean) +fn mse_loss( + predictions: Tensor, + targets: Tensor +) -> f32 { + let diff = predictions - targets; + let squared = diff * diff; + mean(squared, axes=[], keepdims=false) +} + +// Combined forward + loss for gradient computation +fn loss_fn( + x: Tensor, + targets: Tensor, + params: MLPParams +) -> f32 { + let predictions = mlp_forward(x, params); + mse_loss(predictions, targets) +} + +// Compute gradients for all parameters +// Reference: spec/v1.0/autodiff.md +// Returns gradients with same shapes as parameters +fn compute_gradients( + x: Tensor, + targets: Tensor, + params: MLPParams +) -> MLPParams { + // Gradient of loss with respect to parameters + // The autodiff system traces through: + // 1. Forward pass (matmul, add, relu, matmul, add) + // 2. Loss computation (sub, mul, mean) + // Then applies chain rule backward + + let grad_params = grad(|p| loss_fn(x, targets, p))(params); + + grad_params +} + +// Single training step with gradient descent +fn train_step( + x: Tensor, + targets: Tensor, + params: MLPParams, + learning_rate: f32 +) -> MLPParams { + // Compute gradients + let grads = compute_gradients(x, targets, params); + + // Update parameters: param = param - lr * grad + // Reference: spec/v1.0/shapes.md#broadcasting (scalar broadcast) + let new_params = MLPParams { + w1: params.w1 - learning_rate * grads.w1, + b1: params.b1 - learning_rate * grads.b1, + w2: params.w2 - learning_rate * grads.w2, + b2: params.b2 - learning_rate * grads.b2, + }; + + new_params +} + +// Example usage +fn main() { + // Initialize parameters + let params = MLPParams { + w1: randn([4, 8]), // 4 inputs, 8 hidden + b1: zeros([8]), + w2: randn([8, 2]), // 8 hidden, 2 outputs + b2: zeros([2]), + }; + + // Example batch + let x: Tensor = randn([16, 4]); + let targets: Tensor = randn([16, 2]); + + // Training loop + let lr: f32 = 0.01; + let mut current_params = params; + + // Would typically loop here + current_params = train_step(x, targets, current_params, lr); + + // Compute final loss + let final_loss = loss_fn(x, targets, current_params); +} diff --git a/samples/Mind/render.mind b/samples/Mind/render.mind new file mode 100644 index 0000000000..db1ec31408 --- /dev/null +++ b/samples/Mind/render.mind @@ -0,0 +1,333 @@ +// render.mind - Core Path Tracing Renderer +// +// This module implements the Monte Carlo path tracing algorithm. Each pixel +// is sampled multiple times with random ray directions, and the results are +// averaged to produce a smooth, noise-reduced image. +// +// Key Components: +// - ray_color: Traces a single ray through the scene, accumulating light +// - render_pixel: Samples a pixel with anti-aliasing and returns final color +// - tonemap: Reinhard operator for HDR to LDR conversion +// - gamma_correct: sRGB gamma encoding (2.2) +// +// Path Tracing Algorithm: +// 1. Cast primary ray from camera through pixel +// 2. Find closest intersection with scene geometry +// 3. If hit: evaluate material, accumulate emission, scatter new ray +// 4. If miss: accumulate background/sky color and terminate +// 5. Repeat steps 2-4 until max bounces or Russian roulette termination +// +// Thread Safety: All functions are pure (no global state mutation) and can +// be safely called from multiple threads with independent RNG states. +// +// Author: STARGA Inc. +// License: MIT + +// ============================================================================= +// Constants +// ============================================================================= + +/// Minimum ray hit distance (prevents self-intersection artifacts) +const RAY_T_MIN: f32 = 0.001; + +/// Maximum ray travel distance +const RAY_T_MAX: f32 = 1000000.0; + +/// Minimum bounce count before Russian roulette is applied +const RR_MIN_BOUNCES: i32 = 3; + +/// Maximum Russian roulette survival probability +const RR_MAX_SURVIVAL: f32 = 0.95; + +// ============================================================================= +// Path Tracing +// ============================================================================= + +/// Trace a ray through the scene and compute accumulated radiance +/// +/// Implements unbiased Monte Carlo path tracing with Russian roulette +/// termination. Handles diffuse, metallic, dielectric, and emissive materials. +/// +/// # Algorithm +/// 1. Cast ray into scene +/// 2. On hit: accumulate emission, scatter ray based on material +/// 3. On miss: accumulate background color, terminate +/// 4. Apply Russian roulette after 3+ bounces for efficiency +/// +/// # Arguments +/// * `r` - Initial ray to trace +/// * `scene` - Scene containing geometry and materials +/// * `max_bounces` - Maximum path length (prevents infinite loops) +/// * `rng` - Random number generator state +/// +/// # Returns +/// * Tuple of (accumulated_color, updated_rng_state) +/// +/// # Energy Conservation +/// The path maintains a throughput multiplier that ensures energy conservation. +/// Russian roulette adjusts throughput to maintain unbiased estimates. +fn ray_color( + r: Ray, + scene: Scene, + max_bounces: i32, + rng: RngState +) -> (Vec3, RngState) { + let mut current_ray = r; + let mut current_rng = rng; + let mut throughput = vec3_ones(); + let mut accumulated_light = vec3_zero(); + + for bounce in 0..max_bounces { + let hit = scene_hit(scene, current_ray, RAY_T_MIN, RAY_T_MAX); + + if !hit.did_hit { + // Hit sky/background + let sky_color = scene.background; + let contribution = v3_hadamard(throughput, sky_color); + accumulated_light = v3_add(accumulated_light, contribution); + break; + } + + // Get material + let mat = scene.materials[hit.material_idx]; + + // Handle emissive materials + match mat { + Material::Emissive(emissive) => { + let emission = v3_mul(emissive.color, emissive.strength); + let contribution = v3_hadamard(throughput, emission); + accumulated_light = v3_add(accumulated_light, contribution); + // Don't continue bouncing from lights + break; + }, + _ => { + // Non-emissive material, continue bouncing + } + } + + // Scatter ray based on material type + let (scatter_result, new_rng) = match mat { + Material::Diffuse(diff) => { + scatter_diffuse(diff, current_ray, hit.point, hit.normal, current_rng) + }, + Material::Metal(metal) => { + scatter_metal(metal, current_ray, hit.point, hit.normal, current_rng) + }, + Material::Dielectric(diel) => { + scatter_dielectric(diel, current_ray, hit.point, hit.normal, hit.front_face, current_rng) + }, + Material::Emissive(_) => { + // Already handled above + let no_scatter = ScatterResult { + scattered: current_ray, + attenuation: vec3_zero(), + did_scatter: false, + }; + (no_scatter, current_rng) + }, + }; + + current_rng = new_rng; + + if !scatter_result.did_scatter { + break; + } + + // Update throughput and ray + throughput = v3_hadamard(throughput, scatter_result.attenuation); + current_ray = scatter_result.scattered; + + // Russian roulette path termination after RR_MIN_BOUNCES + // This provides unbiased early termination for dim paths + if bounce >= RR_MIN_BOUNCES { + let max_component = f32_max(f32_max(throughput.x, throughput.y), throughput.z); + let survival_prob = f32_min(max_component, RR_MAX_SURVIVAL); + + // Terminate if survival probability is zero (prevents division by zero) + if survival_prob <= 0.0 { + break; + } + + let (rr_sample, rng_after_rr) = rng_f32(current_rng); + current_rng = rng_after_rr; + + if rr_sample > survival_prob { + break; + } + + // Boost throughput to maintain energy + throughput = v3_div(throughput, survival_prob); + } + } + + (accumulated_light, current_rng) +} + +// ============================================================================= +// Pixel Rendering +// ============================================================================= + +/// Render a single pixel with multi-sample anti-aliasing +/// +/// Casts multiple rays through slightly offset positions within the pixel +/// to achieve sub-pixel anti-aliasing. Each sample uses the full path tracing +/// algorithm. Results are averaged and post-processed. +/// +/// # Algorithm +/// 1. Initialize deterministic RNG based on pixel coordinates and seed +/// 2. For each sample: +/// a. Add random offset within pixel for AA +/// b. Generate camera ray +/// c. Trace ray through scene +/// d. Accumulate color +/// 3. Average accumulated color +/// 4. Apply tone mapping (HDR -> LDR) +/// 5. Apply gamma correction (linear -> sRGB) +/// 6. Clamp to [0, 1] range +/// +/// # Arguments +/// * `x` - Pixel X coordinate (0 to width-1) +/// * `y` - Pixel Y coordinate (0 to height-1) +/// * `width` - Image width in pixels +/// * `height` - Image height in pixels +/// * `spp` - Samples per pixel (higher = less noise) +/// * `max_bounces` - Maximum path length per sample +/// * `camera` - Camera configuration for ray generation +/// * `scene` - Scene to render +/// * `seed` - Base RNG seed for deterministic rendering +/// +/// # Returns +/// * Final pixel color in [0, 1] range (ready for output) +/// +/// # Determinism +/// The same (x, y, seed) inputs will always produce the same output color, +/// enabling reproducible renders and distributed rendering. +fn render_pixel( + x: i32, + y: i32, + width: i32, + height: i32, + spp: i32, + max_bounces: i32, + camera: Camera, + scene: Scene, + seed: i32 +) -> Vec3 { + let pixel_idx = y * width + x; + let base_seed = seed ^ (pixel_idx * 747796405); + + let mut rng = rng_init(base_seed); + let mut color = vec3_zero(); + + let aspect = (width as f32) / (height as f32); + + for _sample in 0..spp { + // Anti-aliasing offset + let (offset_x, rng1) = rng_f32(rng); + let (offset_y, rng2) = rng_f32(rng1); + + // Use width/height for proper [0,1) mapping without division by zero + let u = ((x as f32) + offset_x) / (width as f32); + let v = ((y as f32) + offset_y) / (height as f32); + + let ray = camera_get_ray(camera, u, 1.0 - v); // Flip v for image coords + + let (sample_color, new_rng) = ray_color(ray, scene, max_bounces, rng2); + + color = v3_add(color, sample_color); + rng = new_rng; + } + + // Average samples (guard against spp=0) + if spp > 0 { + color = v3_div(color, spp as f32); + } + + // Tone mapping and gamma correction + color = tonemap(color); + color = gamma_correct(color); + + v3_clamp01(color) +} + +// ============================================================================= +// Post-Processing +// ============================================================================= + +/// sRGB gamma value for encoding +const GAMMA: f32 = 2.2; + +/// Inverse gamma for decoding (pre-computed for efficiency) +const INV_GAMMA: f32 = 0.4545454545; // 1.0 / 2.2 + +/// Apply Reinhard tone mapping to HDR color +/// +/// Maps high dynamic range values to [0, 1] display range. +/// The Reinhard operator is a simple but effective global tone mapper +/// that preserves relative luminance while compressing bright values. +/// +/// Formula: L_out = L_in / (1 + L_in) +/// +/// # Arguments +/// * `color` - HDR color (can have values > 1.0) +/// +/// # Returns +/// * Tone-mapped color in [0, 1] range +/// +/// # Properties +/// - Preserves 0: tonemap(0) = 0 +/// - Asymptotes to 1: tonemap(inf) -> 1 +/// - Monotonic: brighter inputs always produce brighter outputs +#[inline] +fn tonemap(color: Vec3) -> Vec3 { + vec3( + tonemap_component(color.x), + tonemap_component(color.y), + tonemap_component(color.z) + ) +} + +/// Reinhard tone mapping for a single color component +/// +/// # Arguments +/// * `x` - Input value (clamped to non-negative) +/// +/// # Returns +/// * Tone-mapped value in [0, 1) +#[inline] +fn tonemap_component(x: f32) -> f32 { + let clamped = f32_max(0.0, x); + clamped / (1.0 + clamped) +} + +/// Apply sRGB gamma correction to linear color +/// +/// Converts linear-space color to gamma-encoded sRGB for display. +/// Most displays expect sRGB-encoded values. +/// +/// Formula: c_out = c_in ^ (1/2.2) +/// +/// # Arguments +/// * `color` - Linear-space color (should be in [0, 1] after tonemapping) +/// +/// # Returns +/// * Gamma-encoded sRGB color +/// +/// # Note +/// For accurate sRGB, a piecewise function should be used for values < 0.0031308. +/// This simplified gamma curve is sufficient for path tracer output. +#[inline] +fn gamma_correct(color: Vec3) -> Vec3 { + vec3( + f32_pow(color.x, INV_GAMMA), + f32_pow(color.y, INV_GAMMA), + f32_pow(color.z, INV_GAMMA) + ) +} + +// ============================================================================= +// External Dependencies +// ============================================================================= +// Math utilities imported from math_stubs.mind: +// - f32_pow: Power function for gamma correction +// - f32_max: Maximum of two floats diff --git a/samples/Mind/search.mind b/samples/Mind/search.mind new file mode 100644 index 0000000000..20e624b8e4 --- /dev/null +++ b/samples/Mind/search.mind @@ -0,0 +1,829 @@ +// NikolaChess v3.17 - Draw-First Search +// Copyright (c) 2025 STARGA, Inc. All rights reserved. +// Search algorithm optimized for finding draws, not wins +// Key insight: Repetitions are GOOD, not bad! + +import std.tensor; +import std.cuda; +import std.time; +import std.mem; + +// ============================================================================ +// SEARCH RESULT +// ============================================================================ + +struct SearchResult { + best_move: Move, + draw_score: f32, // Draw probability [0, 1] - higher = better + depth: i32, + nodes: i64, + time_ms: i64, + pv: Vec, // Principal variation + draw_type: str, // "book", "tablebase", "repetition", "search" +} + +fn default_result() -> SearchResult { + return SearchResult { + best_move: MOVE_NULL, + draw_score: 0.0, + depth: 0, + nodes: 0, + time_ms: 0, + pv: Vec.new(), + draw_type: "none", + }; +} + +// ============================================================================ +// TRANSPOSITION TABLE +// ============================================================================ + +struct TTEntry { + hash: u64, + depth: i32, + draw_score: f32, + best_move: Move, + flag: i32, // EXACT, LOWER, UPPER + age: i32, // For replacement +} + +const TT_EXACT: i32 = 0; +const TT_LOWER: i32 = 1; // Alpha bound +const TT_UPPER: i32 = 2; // Beta bound + +// Default TT size (16M entries), can be resized via UCI Hash option +const TT_DEFAULT_SIZE: u64 = 1 << 24; + +// Global TT size (set by Hash option) +static mut TT_SIZE: u64 = TT_DEFAULT_SIZE; + +struct TranspositionTable { + entries: Vec, // Dynamic size instead of fixed tensor + size: u64, // Current size + age: i32, + use_gpu: bool, // Whether GPU is available +} + +fn create_tt() -> TranspositionTable { + return create_tt_with_size(TT_DEFAULT_SIZE); +} + +fn create_tt_with_size(size_mb: u64) -> TranspositionTable { + // Calculate number of entries from MB + let entry_size: u64 = 32; // sizeof(TTEntry) approximately + let num_entries = (size_mb * 1024 * 1024) / entry_size; + // Round down to power of 2 for fast modulo + let mut actual_size: u64 = 1; + while actual_size * 2 <= num_entries { + actual_size *= 2; + } + + let use_gpu = cuda.is_available(); + + if use_gpu { + // GPU allocation + on(gpu0) { + let entries = Vec.with_capacity(actual_size as usize); + for _ in 0..actual_size { + entries.push(TTEntry { + hash: 0, depth: 0, draw_score: 0.0, + best_move: MOVE_NULL, flag: 0, age: 0 + }); + } + return TranspositionTable { + entries: entries, + size: actual_size, + age: 0, + use_gpu: true, + }; + } + } else { + // CPU fallback + let entries = Vec.with_capacity(actual_size as usize); + for _ in 0..actual_size { + entries.push(TTEntry { + hash: 0, depth: 0, draw_score: 0.0, + best_move: MOVE_NULL, flag: 0, age: 0 + }); + } + return TranspositionTable { + entries: entries, + size: actual_size, + age: 0, + use_gpu: false, + }; + } +} + +fn tt_resize(tt: &mut TranspositionTable, size_mb: u64) { + // Resize TT based on Hash UCI option + let new_tt = create_tt_with_size(size_mb); + *tt = new_tt; +} + +fn tt_clear(tt: &mut TranspositionTable) { + // Fast clear using memset instead of loop + // FIX: Handle GPU/CPU memory correctly + let entry_size = std::mem::size_of::(); + if tt.use_gpu { + on(gpu0) { + mem.memset(tt.entries.as_mut_ptr(), 0, tt.size as usize * entry_size); + } + } else { + mem.memset(tt.entries.as_mut_ptr(), 0, tt.size as usize * entry_size); + } + tt.age = 0; +} + +fn tt_probe(tt: &TranspositionTable, hash: u64) -> Option { + let idx = (hash % tt.size) as usize; + // FIX: Access GPU memory in correct context + if tt.use_gpu { + on(gpu0) { + let entry = tt.entries[idx]; + if entry.hash == hash { + return Some(entry); + } + } + } else { + let entry = tt.entries[idx]; + if entry.hash == hash { + return Some(entry); + } + } + return None; +} + +fn tt_store(tt: &mut TranspositionTable, hash: u64, depth: i32, draw_score: f32, best_move: Move, flag: i32) { + let idx = (hash % tt.size) as usize; + + // FIX: Access GPU memory in correct context + if tt.use_gpu { + on(gpu0) { + let existing = tt.entries[idx]; + // Replace if: empty, older, or deeper search + if existing.hash == 0 || existing.age < tt.age || depth >= existing.depth { + tt.entries[idx] = TTEntry { + hash: hash, + depth: depth, + draw_score: draw_score, + best_move: best_move, + flag: flag, + age: tt.age, + }; + } + } + } else { + let existing = tt.entries[idx]; + // Replace if: empty, older, or deeper search + if existing.hash == 0 || existing.age < tt.age || depth >= existing.depth { + tt.entries[idx] = TTEntry { + hash: hash, + depth: depth, + draw_score: draw_score, + best_move: best_move, + flag: flag, + age: tt.age, + }; + } + } +} + +// ============================================================================ +// DRAW-FIRST SEARCH ENGINE +// ============================================================================ + +struct DrawSearch { + net: DrawNetwork, + tt: TranspositionTable, + book: OpeningBook, + tb: Tablebase, + nodes: i64, + max_depth: i32, + start_time: i64, + time_limit: i64, + stop: bool, + + // Draw-seeking statistics + repetitions_found: i64, + fortresses_found: i64, + perpetuals_found: i64, +} + +fn create_search(net: DrawNetwork, book: OpeningBook, tb: Tablebase) -> DrawSearch { + return DrawSearch { + net: net, + tt: create_tt(), + book: book, + tb: tb, + nodes: 0, + max_depth: 64, + start_time: 0, + time_limit: 0, + stop: false, + repetitions_found: 0, + fortresses_found: 0, + perpetuals_found: 0, + }; +} + +// ============================================================================ +// MAIN SEARCH FUNCTION +// ============================================================================ + +fn search(s: &mut DrawSearch, board: Board, depth: i32, time_ms: i64) -> SearchResult { + s.nodes = 0; + s.start_time = time.now_ms(); + s.time_limit = time_ms; + s.stop = false; + s.tt.age += 1; + + // 1. Check opening book first + if let Some(book_move) = s.book.probe(board) { + return SearchResult { + best_move: book_move, + draw_score: 1.0, // Book moves are pre-verified draws + depth: 0, + nodes: 1, + time_ms: 0, + pv: vec![book_move], + draw_type: "book", + }; + } + + // 2. Check tablebase + if let Some(tb_result) = s.tb.probe(board) { + if tb_result.is_draw() { + return SearchResult { + best_move: tb_result.best_move, + draw_score: 1.0, + depth: 0, + nodes: 1, + time_ms: 0, + pv: vec![tb_result.best_move], + draw_type: "tablebase", + }; + } + } + + // 3. Check for immediate draw claims + let (can_draw, draw_reason) = can_claim_draw(board, &board.history); + if can_draw { + return SearchResult { + best_move: MOVE_NULL, + draw_score: 1.0, + depth: 0, + nodes: 1, + time_ms: 0, + pv: vec![], + draw_type: draw_reason, + }; + } + + // 4. Iterative deepening search + let mut best_result = default_result(); // FIX: Made mutable + + for d in 1..=depth { + if s.stop { + break; + } + + let mut pv = Vec.new(); + let result = draw_negamax(s, board, d, 0.0, 1.0, &mut pv, 0); + + if !s.stop { + best_result = result; + best_result.pv = pv; + best_result.depth = d; + + // Print UCI info + let elapsed = time.now_ms() - s.start_time; + print_uci_info(d, best_result.draw_score, s.nodes, elapsed, &best_result.pv); + } + + // If we found a guaranteed draw, stop searching + // Draw probability threshold + if best_result.draw_score >= 0.99 { + break; + } + + // Time management + if time.now_ms() - s.start_time > time_ms / 2 { + break; + } + } + + best_result.nodes = s.nodes; + best_result.time_ms = time.now_ms() - s.start_time; + return best_result; +} + +// ============================================================================ +// DRAW-FOCUSED NEGAMAX WITH ALPHA-BETA +// ============================================================================ + +// Returns draw probability from perspective of side to move +// Higher = better (more likely to draw = GOOD!) +fn draw_negamax( + s: &mut DrawSearch, + board: Board, + depth: i32, + mut alpha: f32, // FIX: Made mutable + mut beta: f32, // FIX: Made mutable + pv: &mut Vec, + ply: i32 +) -> SearchResult { + s.nodes += 1; + + // Time check - check every 1024 nodes for more responsive stopping + if s.nodes % 1024 == 0 { + if time.now_ms() - s.start_time > s.time_limit { + s.stop = true; + return default_result(); + } + } + + let hash = board.hash; + let mut local_pv = Vec.new(); + + // ======================================== + // DRAW DETECTION (This is GOOD for us!) + // Emphasize repetitions + // ======================================== + + // Check for draw by repetition + // FIX: Include side-to-move in repetition check by using board.hash + // which already incorporates side-to-move via Zobrist hashing + let rep_count = count_repetitions(&board.history, hash); + if rep_count >= 2 { + s.repetitions_found += 1; + return SearchResult { + best_move: MOVE_NULL, + draw_score: 1.0, // Guaranteed draw! + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "repetition", + }; + } + + // Check 50-move rule + if board.halfmove >= 100 { + return SearchResult { + best_move: MOVE_NULL, + draw_score: 1.0, + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "fifty_move", + }; + } + + // Check insufficient material + if is_insufficient_material(board) { + return SearchResult { + best_move: MOVE_NULL, + draw_score: 1.0, + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "insufficient", + }; + } + + // ======================================== + // TRANSPOSITION TABLE + // ======================================== + + let mut tt_move = MOVE_NULL; // FIX: Made mutable + if let Some(entry) = tt_probe(&s.tt, hash) { + tt_move = entry.best_move; + + if entry.depth >= depth { + match entry.flag { + TT_EXACT => { + pv.push(entry.best_move); + return SearchResult { + best_move: entry.best_move, + draw_score: entry.draw_score, + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "tt", + }; + }, + TT_LOWER => { + alpha = max_f32(alpha, entry.draw_score); + }, + TT_UPPER => { + beta = min_f32(beta, entry.draw_score); + }, + } + + if alpha >= beta { + return SearchResult { + best_move: entry.best_move, + draw_score: entry.draw_score, + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "tt_cutoff", + }; + } + } + } + + // ======================================== + // TABLEBASE PROBE (Endgame) + // ======================================== + + if let Some(tb_result) = s.tb.probe(board) { + if tb_result.is_draw() { + return SearchResult { + best_move: tb_result.best_move, + draw_score: 1.0, + depth: depth, + nodes: 1, + time_ms: 0, + pv: vec![tb_result.best_move], + draw_type: "tablebase", + }; + } + } + + // ======================================== + // LEAF NODE EVALUATION + // ======================================== + + if depth <= 0 { + let score = quiescence(s, board, alpha, beta); + return SearchResult { + best_move: MOVE_NULL, + draw_score: score, + depth: 0, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "eval", + }; + } + + // ======================================== + // MOVE GENERATION AND ORDERING + // ======================================== + + let moves = generate_moves(board); + + if moves.is_empty() { + // No legal moves - checkmate or stalemate + if is_in_check(board) { + // Checkmate - worst outcome (we lost) + return SearchResult { + best_move: MOVE_NULL, + draw_score: 0.0, // Not a draw - we lost + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "checkmate", + }; + } else { + // Stalemate - PERFECT! It's a draw! + return SearchResult { + best_move: MOVE_NULL, + draw_score: 1.0, + depth: depth, + nodes: 1, + time_ms: 0, + pv: pv.clone(), + draw_type: "stalemate", + }; + } + } + + // Order moves for draw-seeking + order_moves_for_draw(&mut moves, board, &s.tt, hash, tt_move); + + // ======================================== + // MAIN SEARCH LOOP + // ======================================== + + let mut best_score: f32 = -1.0; // Start below valid range (0-1) so any move improves + let mut best_move = moves[0]; // Default to first move + let mut flag = TT_UPPER; // FIX: Made mutable + + for (i, m) in moves.iter().enumerate() { + let new_board = make_move(board, *m); + let mut child_pv = Vec.new(); + + // Principal Variation Search + let result = if i == 0 { + draw_negamax(s, new_board, depth - 1, alpha, beta, &mut child_pv, ply + 1) + } else { + // Null window search + let null_result = draw_negamax(s, new_board, depth - 1, alpha, alpha + 0.01, &mut child_pv, ply + 1); + + if null_result.draw_score > alpha && null_result.draw_score < beta { + // Re-search with full window + child_pv.clear(); + draw_negamax(s, new_board, depth - 1, alpha, beta, &mut child_pv, ply + 1) + } else { + null_result + } + }; + + if s.stop { + break; + } + + // We want HIGH draw scores (draws are good!) + if result.draw_score > best_score { + best_score = result.draw_score; + best_move = *m; + local_pv.clear(); + local_pv.push(*m); + local_pv.extend(child_pv); + flag = TT_EXACT; + } + + alpha = max_f32(alpha, best_score); + + if alpha >= beta { + flag = TT_LOWER; + break; + } + } + + // Store in transposition table + tt_store(&mut s.tt, hash, depth, best_score, best_move, flag); + + *pv = local_pv; + + return SearchResult { + best_move: best_move, + draw_score: best_score, + depth: depth, + nodes: s.nodes, + time_ms: 0, + pv: pv.clone(), + draw_type: "search", + }; +} + +// ============================================================================ +// QUIESCENCE SEARCH +// ============================================================================ + +fn quiescence(s: &mut DrawSearch, board: Board, mut alpha: f32, beta: f32) -> f32 { + s.nodes += 1; + + // Stand pat evaluation + let board_tensor = to_tensor_16ch(board); + let features = extract_features(board); + let stand_pat = forward(s.net, board_tensor, features); + + if stand_pat >= beta { + return beta; + } + + alpha = max(alpha, stand_pat); + + // Only search captures + let captures = generate_captures(board); + order_moves_for_draw(&mut captures, board, &s.tt, board.hash, MOVE_NULL); + + for m in captures { + let new_board = make_move(board, m); + let score = quiescence(s, new_board, alpha, beta); + + if score >= beta { + return beta; + } + alpha = max(alpha, score); + } + + return alpha; +} + +// ============================================================================ +// MOVE ORDERING (Draw-Seeking Priority) +// ============================================================================ + +fn order_moves_for_draw( + moves: &mut Vec, + board: Board, + tt: &TranspositionTable, + hash: u64, + tt_move: Move +) { + // Sort by draw priority (descending) + moves.sort_by(|a, b| { + let score_a = move_draw_priority(*a, board, tt, hash, tt_move); + let score_b = move_draw_priority(*b, board, tt, hash, tt_move); + score_b.partial_cmp(&score_a).unwrap() + }); +} + +fn move_draw_priority(m: Move, board: Board, tt: &TranspositionTable, hash: u64, tt_move: Move) -> f32 { + let mut score = 0.0; + + // Priority order: + // 1. TT move (highest priority) + // 2. Moves that cause repetition + // 3. Moves toward tablebase territory (captures/trades) + // 4. Perpetual check moves + // 5. Moves that maintain material balance + // 6. Defensive moves + + // TT move gets highest priority + if m.data == tt_move.data && tt_move.data != 0 { + score += 100000.0; + } + + let new_board = make_move(board, m); + let new_hash = new_board.hash; + + // Repetition moves are EXCELLENT! + // (In normal engines these are bad, but we WANT draws) + let rep_count = count_repetitions(&board.history, new_hash); + if rep_count >= 1 { + score += 50000.0; // Potential threefold! + } + if rep_count >= 2 { + score += 100000.0; // Guaranteed draw! + } + + // Moves that reduce material (toward tablebase) + if is_capture(m) { + let material_before = total_material(board); + let material_after = total_material(new_board); + let reduction = (material_before - material_after) as f32; + score += reduction * 10.0; + + // Equal trades are especially good + let capture_value = piece_value(move_capture(m)); + let moved_value = piece_value(move_piece(m) % 6); + if capture_value >= moved_value { + score += 1000.0; + } + } + + // Checks that might lead to perpetual + if gives_check(m, board) { + score += 500.0; + + // If we've been giving checks, perpetual is likely + // (Check frequency feature from draw_eval) + score += 200.0; + } + + // Moves that maintain material balance + let balance_before = abs(material_balance(board)); + let balance_after = abs(material_balance(new_board)); + if balance_after <= balance_before { + score += 200.0; + } + + // Moves toward opposite-colored bishops (drawish) + if detect_opposite_colored_bishops(new_board) && !detect_opposite_colored_bishops(board) { + score += 300.0; + } + + // Pawn symmetry preservation + let sym_before = calc_pawn_symmetry(board); + let sym_after = calc_pawn_symmetry(new_board); + if sym_after >= sym_before { + score += 100.0; + } + + return score; +} + +fn piece_value(piece: i32) -> i32 { + match piece % 6 { + 0 => 100, // Pawn + 1 => 320, // Knight + 2 => 330, // Bishop + 3 => 500, // Rook + 4 => 900, // Queen + 5 => 0, // King (not capturable) + _ => 0, + } +} + +// ============================================================================ +// UTILITY FUNCTIONS +// ============================================================================ + +fn count_repetitions(history: &Vec, hash: u64) -> i32 { + let mut count: i32 = 0; // FIX: Made mutable + for h in history { + if *h == hash { + count += 1; + } + } + return count; +} + +fn print_uci_info(depth: i32, draw_prob: f32, nodes: i64, time_ms: i64, pv: &Vec) { + // Convert draw probability to centipawn-like score + // 1.0 = draw = 0cp, 0.0 = losing = -10000cp + let cp = ((draw_prob - 0.5) * 200.0) as i32; + + let nps = if time_ms > 0 { (nodes * 1000) / time_ms } else { 0 }; + + let pv_str = pv.iter().map(|m| move_to_uci(*m)).collect::>().join(" "); + + print("info depth", depth, + "score cp", cp, + "drawprob", (draw_prob * 1000.0) as i32, + "nodes", nodes, + "time", time_ms, + "nps", nps, + "pv", pv_str); +} + +fn move_to_uci(m: Move) -> str { + let from = move_from(m); + let to = move_to(m); + let promo = move_promo(m); + + let from_file = ('a' as i32 + from % 8) as char; + let from_rank = ('1' as i32 + from / 8) as char; + let to_file = ('a' as i32 + to % 8) as char; + let to_rank = ('1' as i32 + to / 8) as char; + + let mut uci = format!("{}{}{}{}", from_file, from_rank, to_file, to_rank); // FIX: Made mutable + + if promo != 0 { + let promo_char = match promo % 6 { + 1 => 'n', + 2 => 'b', + 3 => 'r', + 4 => 'q', + _ => ' ', + }; + uci = format!("{}{}", uci, promo_char); + } + + return uci; +} + +// ============================================================================ +// ASPIRATION WINDOWS (Draw-focused) +// Narrow window around 0.5 +// ============================================================================ + +fn aspiration_search(s: &mut DrawSearch, board: Board, depth: i32, prev_score: f32) -> SearchResult { + // Start with narrow window around previous score + // For draw-seeking: window around 0.5 (draw probability) + let mut delta: f32 = 0.05; // FIX: Made mutable + let mut alpha = max_f32(0.0, prev_score - delta); + let mut beta = min_f32(1.0, prev_score + delta); + + loop { + let mut pv = Vec.new(); + let result = draw_negamax(s, board, depth, alpha, beta, &mut pv, 0); + + if s.stop { + return result; + } + + // Check if we need to re-search with wider window + if result.draw_score <= alpha { + // Fail low - widen alpha + alpha = max_f32(0.0, alpha - delta * 2.0); + delta *= 2.0; + } else if result.draw_score >= beta { + // Fail high - widen beta + beta = min_f32(1.0, beta + delta * 2.0); + delta *= 2.0; + } else { + // Search succeeded within window + return result; + } + + // Full window search if delta is too large + if delta > 0.5 { + alpha = 0.0; + beta = 1.0; + } + } +} + +// ============================================================================ +// HELPER FUNCTIONS +// ============================================================================ + +fn max_f32(a: f32, b: f32) -> f32 { + if a > b { a } else { b } +} + +fn min_f32(a: f32, b: f32) -> f32 { + if a < b { a } else { b } +} diff --git a/samples/Mind/tensor_board.mind b/samples/Mind/tensor_board.mind new file mode 100644 index 0000000000..e8510a2d1d --- /dev/null +++ b/samples/Mind/tensor_board.mind @@ -0,0 +1,540 @@ +// NikolaChess v3.17 - GPU-Accelerated TensorBoard +// Copyright (c) 2025 STARGA, Inc. All rights reserved. +// 16-channel packed tensor for RTX Tensor Core acceleration + +import std.tensor; +import std.cuda; +import std.atomic; +import std.simd; +import std.mem; + +// ============================================================================ +// TENSORBOARD CONSTANTS +// ============================================================================ + +const BOARD_SIZE: i32 = 8; +const NUM_PIECES: i32 = 12; +const NUM_CHANNELS: i32 = 16; + +// Channel layout: +// 0-5: White pieces (P, N, B, R, Q, K) +// 6-11: Black pieces (p, n, b, r, q, k) +// 12: All white occupancy (precomputed) +// 13: All black occupancy (precomputed) +// 14: Attack map (GPU-computed) +// 15: Draw probability cache + +const CH_WHITE_PAWN: i32 = 0; +const CH_WHITE_KNIGHT: i32 = 1; +const CH_WHITE_BISHOP: i32 = 2; +const CH_WHITE_ROOK: i32 = 3; +const CH_WHITE_QUEEN: i32 = 4; +const CH_WHITE_KING: i32 = 5; +const CH_BLACK_PAWN: i32 = 6; +const CH_BLACK_KNIGHT: i32 = 7; +const CH_BLACK_BISHOP: i32 = 8; +const CH_BLACK_ROOK: i32 = 9; +const CH_BLACK_QUEEN: i32 = 10; +const CH_BLACK_KING: i32 = 11; +const CH_WHITE_OCC: i32 = 12; +const CH_BLACK_OCC: i32 = 13; +const CH_ATTACKS: i32 = 14; +const CH_DRAW_CACHE: i32 = 15; + +// GPU cache sizes +const HASH_CACHE_SIZE: i32 = 65536; +const ATTACK_BATCH_SIZE: i32 = 4096; + +// ============================================================================ +// TENSORBOARD STRUCTURE +// ============================================================================ + +struct TensorBoard { + // Primary 16x8x8 tensor (f16 for RTX Tensor Core acceleration) + // Using f16 gives 2x throughput on RTX 4090/4080 + data: tensor, + + // GPU-side persistent caches (stay in VRAM) + @cuda.device hash_cache: tensor, + @cuda.device draw_cache: tensor, + @cuda.device attack_cache: tensor, + + // Current game state (mirrored on CPU for validation) + stm: i32, // Side to move (0=white, 1=black) + ep_square: i32, // En passant square (-1 if none) + castling: u8, // Castling rights (4 bits) + halfmove: u8, // Halfmove clock for 50-move rule + fullmove: u16, // Full move counter + + // Zobrist hash (64-bit for strong collision resistance) + hash: u64, + + // Incremental update tracking + @atomic dirty: u32, // Dirty flag for GPU sync + last_draw_score: f32, // Cached draw probability + + // History for repetition detection + history: Vec, +} + +// ============================================================================ +// ZOBRIST HASHING TABLES +// ============================================================================ + +// GPU-resident Zobrist keys for fast hashing +@cuda.constant ZOBRIST_PIECES: tensor; +@cuda.constant ZOBRIST_CASTLING: tensor; +@cuda.constant ZOBRIST_EP: tensor; +@cuda.constant ZOBRIST_STM: u64; + +fn init_zobrist() { + // Initialize with random 64-bit keys + let mut rng = create_rng(0xDEADBEEF); + + on(gpu0) { + for piece in 0..12 { + for sq in 0..64 { + ZOBRIST_PIECES[piece, sq] = rng.next_u64(); + } + } + + for castle in 0..16 { + ZOBRIST_CASTLING[castle] = rng.next_u64(); + } + + for file in 0..8 { + ZOBRIST_EP[file] = rng.next_u64(); + } + + ZOBRIST_STM = rng.next_u64(); + } +} + +// ============================================================================ +// TENSORBOARD CREATION +// ============================================================================ + +fn create_tensor_board() -> TensorBoard { + return TensorBoard { + data: tensor.zeros[f16, (NUM_CHANNELS, BOARD_SIZE, BOARD_SIZE)], + hash_cache: tensor.zeros[u64, (HASH_CACHE_SIZE,)], + draw_cache: tensor.zeros[f16, (HASH_CACHE_SIZE,)], + attack_cache: tensor.zeros[f16, (ATTACK_BATCH_SIZE, 8, 8)], + stm: 0, + ep_square: -1, + castling: 0x0F, // All castling rights + halfmove: 0, + fullmove: 1, + hash: 0, + dirty: atomic.new(0), + last_draw_score: 0.5, + history: Vec.new(), + }; +} + +fn starting_tensor_board() -> TensorBoard { + let mut board = create_tensor_board(); + + // Set up starting position + // White pieces + board.data[CH_WHITE_PAWN, 1, 0] = 1.0; // a2 + board.data[CH_WHITE_PAWN, 1, 1] = 1.0; // b2 + board.data[CH_WHITE_PAWN, 1, 2] = 1.0; // c2 + board.data[CH_WHITE_PAWN, 1, 3] = 1.0; // d2 + board.data[CH_WHITE_PAWN, 1, 4] = 1.0; // e2 + board.data[CH_WHITE_PAWN, 1, 5] = 1.0; // f2 + board.data[CH_WHITE_PAWN, 1, 6] = 1.0; // g2 + board.data[CH_WHITE_PAWN, 1, 7] = 1.0; // h2 + + board.data[CH_WHITE_ROOK, 0, 0] = 1.0; // a1 + board.data[CH_WHITE_KNIGHT, 0, 1] = 1.0; // b1 + board.data[CH_WHITE_BISHOP, 0, 2] = 1.0; // c1 + board.data[CH_WHITE_QUEEN, 0, 3] = 1.0; // d1 + board.data[CH_WHITE_KING, 0, 4] = 1.0; // e1 + board.data[CH_WHITE_BISHOP, 0, 5] = 1.0; // f1 + board.data[CH_WHITE_KNIGHT, 0, 6] = 1.0; // g1 + board.data[CH_WHITE_ROOK, 0, 7] = 1.0; // h1 + + // Black pieces + board.data[CH_BLACK_PAWN, 6, 0] = 1.0; // a7 + board.data[CH_BLACK_PAWN, 6, 1] = 1.0; // b7 + board.data[CH_BLACK_PAWN, 6, 2] = 1.0; // c7 + board.data[CH_BLACK_PAWN, 6, 3] = 1.0; // d7 + board.data[CH_BLACK_PAWN, 6, 4] = 1.0; // e7 + board.data[CH_BLACK_PAWN, 6, 5] = 1.0; // f7 + board.data[CH_BLACK_PAWN, 6, 6] = 1.0; // g7 + board.data[CH_BLACK_PAWN, 6, 7] = 1.0; // h7 + + board.data[CH_BLACK_ROOK, 7, 0] = 1.0; // a8 + board.data[CH_BLACK_KNIGHT, 7, 1] = 1.0; // b8 + board.data[CH_BLACK_BISHOP, 7, 2] = 1.0; // c8 + board.data[CH_BLACK_QUEEN, 7, 3] = 1.0; // d8 + board.data[CH_BLACK_KING, 7, 4] = 1.0; // e8 + board.data[CH_BLACK_BISHOP, 7, 5] = 1.0; // f8 + board.data[CH_BLACK_KNIGHT, 7, 6] = 1.0; // g8 + board.data[CH_BLACK_ROOK, 7, 7] = 1.0; // h8 + + // Compute occupancy channels + update_occupancy(&mut board); + + // Compute initial hash + board.hash = compute_hash(&board); + + return board; +} + +// ============================================================================ +// GPU KERNELS +// ============================================================================ + +// Kernel: Update occupancy channels +@cuda_kernel +fn update_occupancy_kernel(data: &mut tensor) { + let rank = cuda.thread_idx() / 8; + let file = cuda.thread_idx() % 8; + + // Sum white pieces + let mut white_occ: f16 = 0.0; + for ch in 0..6 { + white_occ += data[ch, rank, file]; + } + data[CH_WHITE_OCC, rank, file] = if white_occ > 0.0 { 1.0 } else { 0.0 }; + + // Sum black pieces + let mut black_occ: f16 = 0.0; + for ch in 6..12 { + black_occ += data[ch, rank, file]; + } + data[CH_BLACK_OCC, rank, file] = if black_occ > 0.0 { 1.0 } else { 0.0 }; +} + +fn update_occupancy(board: &mut TensorBoard) { + on(gpu0) { + update_occupancy_kernel<<<1, 64>>>(&mut board.data); + } +} + +// Kernel: Compute attack maps (for all squares simultaneously) +@cuda_kernel +fn compute_attacks_kernel( + data: tensor, + stm: i32, + attacks: &mut tensor +) { + let sq = cuda.thread_idx(); + let rank = sq / 8; + let file = sq % 8; + + let mut is_attacked: f16 = 0.0; + + // Check knight attacks + let knight_ch = if stm == 0 { CH_WHITE_KNIGHT } else { CH_BLACK_KNIGHT }; + let knight_offsets = [ + (-2, -1), (-2, 1), (-1, -2), (-1, 2), + (1, -2), (1, 2), (2, -1), (2, 1) + ]; + for (dr, df) in knight_offsets.iter() { + let nr = rank + *dr; + let nf = file + *df; + if nr >= 0 && nr < 8 && nf >= 0 && nf < 8 { + is_attacked += data[knight_ch, nr, nf]; + } + } + + // Check pawn attacks + let pawn_ch = if stm == 0 { CH_WHITE_PAWN } else { CH_BLACK_PAWN }; + let pawn_dir = if stm == 0 { -1 } else { 1 }; + let pr = rank + pawn_dir; + if pr >= 0 && pr < 8 { + if file > 0 { is_attacked += data[pawn_ch, pr, file - 1]; } + if file < 7 { is_attacked += data[pawn_ch, pr, file + 1]; } + } + + // Check king attacks (adjacent squares) + let king_ch = if stm == 0 { CH_WHITE_KING } else { CH_BLACK_KING }; + for dr in -1..2 { + for df in -1..2 { + if dr == 0 && df == 0 { continue; } + let nr = rank + dr; + let nf = file + df; + if nr >= 0 && nr < 8 && nf >= 0 && nf < 8 { + is_attacked += data[king_ch, nr, nf]; + } + } + } + + // Sliding pieces (simplified - full implementation uses bitboard rays) + // This is a simplified version for the kernel + + attacks[rank, file] = if is_attacked > 0.0 { 1.0 } else { 0.0 }; +} + +fn compute_attacks(board: &mut TensorBoard) { + on(gpu0) { + let mut attacks = tensor.zeros[f16, (8, 8)]; + compute_attacks_kernel<<<1, 64>>>(board.data, board.stm, &mut attacks); + + // Copy to attack channel + for rank in 0..8 { + for file in 0..8 { + board.data[CH_ATTACKS, rank, file] = attacks[rank, file]; + } + } + } +} + +// ============================================================================ +// HASH COMPUTATION +// ============================================================================ + +fn compute_hash(board: &TensorBoard) -> u64 { + let mut hash: u64 = 0; + + // Piece positions + for ch in 0..12 { + for rank in 0..8 { + for file in 0..8 { + if board.data[ch, rank, file] > 0.0 { + let sq = rank * 8 + file; + hash ^= ZOBRIST_PIECES[ch, sq]; + } + } + } + } + + // Castling rights + hash ^= ZOBRIST_CASTLING[board.castling as i32]; + + // En passant + if board.ep_square >= 0 { + let ep_file = board.ep_square % 8; + hash ^= ZOBRIST_EP[ep_file]; + } + + // Side to move + if board.stm == 1 { + hash ^= ZOBRIST_STM; + } + + return hash; +} + +// Incremental hash update (for make_move) +fn update_hash_move(board: &mut TensorBoard, m: Move) { + // Remove piece from source + let from_sq = m.from; + let from_rank = from_sq / 8; + let from_file = from_sq % 8; + board.hash ^= ZOBRIST_PIECES[m.piece, from_sq]; + + // Add piece to destination + let to_sq = m.to; + let to_rank = to_sq / 8; + let to_file = to_sq % 8; + + if m.promotion != 0 { + board.hash ^= ZOBRIST_PIECES[m.promotion, to_sq]; + } else { + board.hash ^= ZOBRIST_PIECES[m.piece, to_sq]; + } + + // Remove captured piece + if m.capture != 0 { + board.hash ^= ZOBRIST_PIECES[m.capture, to_sq]; + } + + // Update castling (if changed) + // ... (simplified) + + // Update en passant + // ... (simplified) + + // Flip side to move + board.hash ^= ZOBRIST_STM; +} + +// ============================================================================ +// MOVE MAKING +// ============================================================================ + +fn make_tensor_move(board: &mut TensorBoard, m: Move) -> TensorBoard { + let mut new_board = board.clone(); + + let from_rank = m.from / 8; + let from_file = m.from % 8; + let to_rank = m.to / 8; + let to_file = m.to % 8; + + // Clear source square + new_board.data[m.piece, from_rank, from_file] = 0.0; + + // Clear destination square (capture) + if m.capture != 0 { + new_board.data[m.capture, to_rank, to_file] = 0.0; + } + + // Set destination square + if m.promotion != 0 { + new_board.data[m.promotion, to_rank, to_file] = 1.0; + } else { + new_board.data[m.piece, to_rank, to_file] = 1.0; + } + + // Update state + new_board.stm = 1 - new_board.stm; + new_board.halfmove += 1; + if new_board.stm == 0 { + new_board.fullmove += 1; + } + + // Reset halfmove on pawn move or capture + if m.piece == CH_WHITE_PAWN || m.piece == CH_BLACK_PAWN || m.capture != 0 { + new_board.halfmove = 0; + } + + // Update occupancy + update_occupancy(&mut new_board); + + // Update hash + update_hash_move(&mut new_board, m); + + // Add to history + new_board.history.push(new_board.hash); + + return new_board; +} + +// ============================================================================ +// DRAW PROBABILITY CACHE +// ============================================================================ + +fn cache_draw_probability(board: &mut TensorBoard, prob: f32) { + let cache_idx = (board.hash as i32) % HASH_CACHE_SIZE; + + on(gpu0) { + board.hash_cache[cache_idx] = board.hash; + board.draw_cache[cache_idx] = prob as f16; + } + + board.last_draw_score = prob; + + // Update draw channel in tensor + let draw_val = prob as f16; + for rank in 0..8 { + for file in 0..8 { + board.data[CH_DRAW_CACHE, rank, file] = draw_val; + } + } +} + +fn lookup_draw_probability(board: &TensorBoard) -> (bool, f32) { + let cache_idx = (board.hash as i32) % HASH_CACHE_SIZE; + + let cached_hash = board.hash_cache[cache_idx]; + if cached_hash == board.hash { + let cached_prob = board.draw_cache[cache_idx] as f32; + return (true, cached_prob); + } + + return (false, 0.5); +} + +// ============================================================================ +// BATCH OPERATIONS +// ============================================================================ + +const TENSOR_BATCH_SIZE: i32 = 256; + +fn evaluate_batch(boards: &[TensorBoard], network: &DrawNetwork) -> Vec { + let n = boards.len(); + let mut results = Vec.with_capacity(n); + + // Stack boards into batch tensor + let mut batch = tensor.zeros[f16, (TENSOR_BATCH_SIZE, NUM_CHANNELS, 8, 8)]; + + for (i, board) in boards.iter().enumerate() { + if i >= TENSOR_BATCH_SIZE { break; } + batch[i] = board.data; + } + + // GPU batch evaluation + on(gpu0) { + let outputs = network_forward_batch(network, batch, n as i32); + + for i in 0..n { + results.push(outputs[i] as f32); + } + } + + return results; +} + +// ============================================================================ +// CONVERSION UTILITIES +// ============================================================================ + +fn tensor_to_bitboard(board: &TensorBoard, channel: i32) -> u64 { + let mut bb: u64 = 0; + + for rank in 0..8 { + for file in 0..8 { + if board.data[channel, rank, file] > 0.0 { + let sq = rank * 8 + file; + bb |= 1u64 << sq; + } + } + } + + return bb; +} + +fn bitboard_to_tensor(bb: u64, tensor: &mut tensor) { + for sq in 0..64 { + let rank = sq / 8; + let file = sq % 8; + tensor[rank, file] = if (bb & (1u64 << sq)) != 0 { 1.0 } else { 0.0 }; + } +} + +// ============================================================================ +// STATISTICS +// ============================================================================ + +fn tensor_board_stats(board: &TensorBoard) { + println!("=== TensorBoard Statistics ==="); + println!("Hash: 0x{:016X}", board.hash); + println!("Side to move: {}", if board.stm == 0 { "White" } else { "Black" }); + println!("Halfmove: {}", board.halfmove); + println!("Fullmove: {}", board.fullmove); + println!("Castling: 0x{:02X}", board.castling); + println!("EP square: {}", board.ep_square); + println!("History length: {}", board.history.len()); + println!("Cached draw score: {:.3}", board.last_draw_score); + + // Count pieces + let mut white_count = 0; + let mut black_count = 0; + + for ch in 0..6 { + for rank in 0..8 { + for file in 0..8 { + if board.data[ch, rank, file] > 0.0 { + white_count += 1; + } + } + } + } + + for ch in 6..12 { + for rank in 0..8 { + for file in 0..8 { + if board.data[ch, rank, file] > 0.0 { + black_count += 1; + } + } + } + } + + println!("White pieces: {}", white_count); + println!("Black pieces: {}", black_count); +} diff --git a/samples/Mind/vec3.mind b/samples/Mind/vec3.mind new file mode 100644 index 0000000000..ff4e2bf072 --- /dev/null +++ b/samples/Mind/vec3.mind @@ -0,0 +1,290 @@ +// vec3.mind - 3D Vector Mathematics for Mind Ray +// +// This module provides 3D vector types and operations essential for ray tracing. +// All operations are designed to be GPU-friendly with no heap allocation. +// +// Design Principles: +// - All functions are pure (no side effects) +// - No heap allocation (fixed-size struct) +// - Numerically stable implementations +// - Consistent naming: v3_* prefix for operations +// +// Coordinate System: +// - Right-handed coordinate system +// - Y-axis points up +// - Z-axis points toward the viewer (camera looks toward -Z) +// +// Author: STARGA Inc. +// License: MIT + +// ============================================================================= +// Constants +// ============================================================================= + +/// Small epsilon for near-zero comparisons +const VEC3_EPSILON: f32 = 0.00001; + +/// Minimum length for safe normalization +const NORMALIZE_MIN_LEN: f32 = 0.0001; + +// ============================================================================= +// Vector Type +// ============================================================================= + +/// 3D vector type for positions, directions, and colors +/// +/// Uses single-precision floats for GPU efficiency. In ray tracing contexts: +/// - Positions: world-space coordinates +/// - Directions: unit vectors (normalized) +/// - Colors: RGB values (typically in [0, 1] range, but can exceed for HDR) +pub struct Vec3 { + /// X component (red for colors, right for directions) + x: f32, + /// Y component (green for colors, up for directions) + y: f32, + /// Z component (blue for colors, forward for directions) + z: f32, +} + +// ============================================================================= +// Constructors +// ============================================================================= + +/// Create a new Vec3 from components +/// +/// # Arguments +/// * `x` - X component +/// * `y` - Y component +/// * `z` - Z component +/// +/// # Returns +/// * New Vec3 with the specified components +#[inline] +pub fn vec3(x: f32, y: f32, z: f32) -> Vec3 { + Vec3 { x: x, y: y, z: z } +} + +/// Create a zero vector (0, 0, 0) +/// +/// Useful for: +/// - Initializing accumulators +/// - Representing absence of light/color +/// - Default/invalid vectors +#[inline] +pub fn vec3_zero() -> Vec3 { + vec3(0.0, 0.0, 0.0) +} + +/// Create a ones vector (1, 1, 1) +/// +/// Useful for: +/// - Initial throughput in path tracing +/// - White color +/// - Identity for component-wise multiplication +#[inline] +pub fn vec3_ones() -> Vec3 { + vec3(1.0, 1.0, 1.0) +} + +// ============================================================================= +// Arithmetic Operations +// ============================================================================= + +/// Vector addition: a + b +#[inline] +pub fn v3_add(a: Vec3, b: Vec3) -> Vec3 { + vec3(a.x + b.x, a.y + b.y, a.z + b.z) +} + +/// Vector subtraction: a - b +#[inline] +pub fn v3_sub(a: Vec3, b: Vec3) -> Vec3 { + vec3(a.x - b.x, a.y - b.y, a.z - b.z) +} + +/// Scalar multiplication: v * t +#[inline] +pub fn v3_mul(v: Vec3, t: f32) -> Vec3 { + vec3(v.x * t, v.y * t, v.z * t) +} + +/// Scalar division: v / t +/// +/// Uses reciprocal multiplication for efficiency (1 div + 3 mul vs 3 div). +/// Returns zero vector if t is near zero to prevent infinity/NaN. +/// +/// # Arguments +/// * `v` - Vector to divide +/// * `t` - Scalar divisor +/// +/// # Returns +/// * Divided vector, or zero vector if divisor is near zero +#[inline] +pub fn v3_div(v: Vec3, t: f32) -> Vec3 { + // Guard against division by zero or near-zero + if f32_abs(t) < VEC3_EPSILON { + return vec3_zero(); + } + let inv = 1.0 / t; + v3_mul(v, inv) +} + +/// Hadamard (component-wise) product: a * b +/// +/// Used for color attenuation in path tracing. +/// Each component is multiplied independently. +#[inline] +pub fn v3_hadamard(a: Vec3, b: Vec3) -> Vec3 { + vec3(a.x * b.x, a.y * b.y, a.z * b.z) +} + +/// Vector negation: -v +#[inline] +pub fn v3_neg(v: Vec3) -> Vec3 { + vec3(-v.x, -v.y, -v.z) +} + +// ============================================================================= +// Geometric Operations +// ============================================================================= + +/// Dot product: a . b +/// +/// Returns the scalar projection of a onto b times |b|. +/// Geometrically: |a| * |b| * cos(angle) +#[inline] +pub fn v3_dot(a: Vec3, b: Vec3) -> f32 { + a.x * b.x + a.y * b.y + a.z * b.z +} + +/// Cross product: a x b +/// +/// Returns a vector perpendicular to both a and b. +/// The magnitude equals |a| * |b| * sin(angle). +/// Follows right-hand rule for direction. +#[inline] +pub fn v3_cross(a: Vec3, b: Vec3) -> Vec3 { + vec3( + a.y * b.z - a.z * b.y, + a.z * b.x - a.x * b.z, + a.x * b.y - a.y * b.x + ) +} + +/// Squared length: |v|^2 +/// +/// Avoids the sqrt operation when only comparing lengths. +#[inline] +pub fn v3_length_sq(v: Vec3) -> f32 { + v3_dot(v, v) +} + +/// Vector length (magnitude): |v| +#[inline] +pub fn v3_length(v: Vec3) -> f32 { + f32_sqrt(v3_length_sq(v)) +} + +/// Normalize vector to unit length +/// +/// Returns zero vector if input length is below NORMALIZE_MIN_LEN +/// to prevent division by near-zero values. +/// +/// # Arguments +/// * `v` - Vector to normalize +/// +/// # Returns +/// * Unit vector in same direction, or zero if too small +#[inline] +pub fn v3_normalize(v: Vec3) -> Vec3 { + let len = v3_length(v); + if len > NORMALIZE_MIN_LEN { + v3_div(v, len) + } else { + vec3_zero() + } +} + +/// Reflect vector v about normal n +/// +/// Computes the reflection of incident vector v about surface normal n. +/// Assumes n is a unit vector. +/// +/// # Arguments +/// * `v` - Incident direction (pointing toward surface) +/// * `n` - Surface normal (unit vector, pointing outward) +/// +/// # Returns +/// * Reflected direction +#[inline] +pub fn v3_reflect(v: Vec3, n: Vec3) -> Vec3 { + v3_sub(v, v3_mul(n, 2.0 * v3_dot(v, n))) +} + +/// Refract vector through interface using Snell's law +/// +/// Computes the refracted direction for a ray passing through a material +/// boundary with the given ratio of refractive indices. +/// +/// # Arguments +/// * `uv` - Incident direction (unit vector, pointing toward surface) +/// * `n` - Surface normal (unit vector, pointing into incident medium) +/// * `etai_over_etat` - Ratio of refractive indices (n1/n2) +/// +/// # Returns +/// * Refracted direction, or reflected direction if total internal reflection occurs +/// +/// # Total Internal Reflection +/// When sin(theta_t) > 1, refraction is impossible and reflection occurs. +/// This is detected when 1 - |r_out_perp|^2 < 0. +#[inline] +pub fn v3_refract(uv: Vec3, n: Vec3, etai_over_etat: f32) -> Vec3 { + let cos_theta = f32_min(-v3_dot(uv, n), 1.0); + let r_out_perp = v3_mul(v3_add(uv, v3_mul(n, cos_theta)), etai_over_etat); + let perp_len_sq = v3_length_sq(r_out_perp); + let discriminant = 1.0 - perp_len_sq; + + // Handle total internal reflection: if discriminant < 0, return reflection + if discriminant < 0.0 { + return v3_reflect(uv, n); + } + + let parallel_component = -f32_sqrt(discriminant); + let r_out_parallel = v3_mul(n, parallel_component); + v3_add(r_out_perp, r_out_parallel) +} + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/// Check if vector is approximately zero +/// +/// Returns true if all components are within VEC3_EPSILON of zero. +/// Useful for detecting degenerate cases (e.g., parallel vectors in cross product). +#[inline] +pub fn v3_near_zero(v: Vec3) -> bool { + (f32_abs(v.x) < VEC3_EPSILON) && (f32_abs(v.y) < VEC3_EPSILON) && (f32_abs(v.z) < VEC3_EPSILON) +} + +/// Clamp all components to [0, 1] range +/// +/// Used for final color output to prevent overflow in image files. +#[inline] +pub fn v3_clamp01(v: Vec3) -> Vec3 { + vec3( + f32_clamp(v.x, 0.0, 1.0), + f32_clamp(v.y, 0.0, 1.0), + f32_clamp(v.z, 0.0, 1.0) + ) +} + +// ============================================================================= +// External Dependencies +// ============================================================================= +// Math utilities imported from math_stubs.mind: +// - f32_sqrt: Square root +// - f32_abs: Absolute value +// - f32_min: Minimum of two floats +// - f32_max: Maximum of two floats +// - f32_clamp: Clamp to range diff --git a/vendor/grammars/mind-grammar b/vendor/grammars/mind-grammar new file mode 160000 index 0000000000..91e24a6b18 --- /dev/null +++ b/vendor/grammars/mind-grammar @@ -0,0 +1 @@ +Subproject commit 91e24a6b1862c50d8cbe91c38c82d9454f9562ca