fragment.shd


// ---------------------------------------------------------
// Ray marching distance fields plus shading with IBL and AO
// ---------------------------------------------------------

uniform float in_time;
uniform float in_screen_wdh;
uniform float in_screen_hgt;

uniform samplerCube env_reflection;
uniform samplerCube env_cos_1;
uniform samplerCube env_cos_8;
uniform samplerCube env_cos_64;
uniform samplerCube env_cos_512;

uniform sampler1D cornell_geom;

out vec4 frag_color;

// http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
float de_sphere(vec3 pos, float r)
{
    return length(pos) - r;
}
float de_torus(vec3 pos, float torus_size, float torus_r)
{
    vec2 q = vec2(length(pos.xy) - torus_size, pos.z);
    return length(q) - torus_r;
}
float de_rounded_box(vec3 pos, vec3 box, float r)
{
    return length(max(abs(pos) - box, 0.0)) - r;
}
float de_cone(vec3 pos, vec2 c)
{
    // c must be normalized
    float q = length(pos.xz);
    return dot(c, vec2(q, pos.y));
}

// http://en.wikipedia.org/wiki/Spherical_coordinate_system
void cartesian_to_spherical(vec3 p, out float r, out float theta, out float phi)
{
    r     = length(p);
    theta = acos(p.z / r);
    phi   = atan(p.y, p.x);
}
vec3 spherical_to_cartesian(float r, float theta, float phi)
{
    return r * vec3(sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta));
}

vec3 triplex_pow(vec3 w, float power)
{
    // General pow() for our triplex numbers
    //
    // http://blog.hvidtfeldts.net/index.php/2011/09/
    //     distance-estimated-3d-fractals-iv-the-holy-grail/
    //
    // http://blog.hvidtfeldts.net/index.php/2011/09/
    //     distance-estimated-3d-fractals-v-the-mandelbulb-different-de-approximations/

    float r, theta, phi;
    cartesian_to_spherical(w, r, theta, phi);

    // Scale and rotate the point
    float zr = pow(r, power);
    theta    = theta * power;
    phi      = phi * power;

    return spherical_to_cartesian(zr, theta, phi);
}

vec3 triplex_pow8(vec3 w)
{
    // Optimized pow(x, 8) for our triplex numbers (special case without transcendentals)
    //
    // http://www.iquilezles.org/www/articles/mandelbulb/mandelbulb.htm
    //
    // (modified so the Mandelbulb has the same orientation as the general triplex_pow() one)
    //
    // TODO: Have specialized versions of all the integer powers, i.e.
    //       http://www.fractalforums.com/index.php?action=dlattach;topic=742.0;attach=429;image
    //       http://en.wikipedia.org/wiki/Mandelbulb

    float x = w.x; float x2 = x*x; float x4 = x2*x2;
    float y = w.y; float y2 = y*y; float y4 = y2*y2;
    float z = w.z; float z2 = z*z; float z4 = z2*z2;

    float k3 = y2 + x2;
    float k2 = inversesqrt( k3*k3*k3*k3*k3*k3*k3 );
    float k1 = y4 + z4 + x4 - 6.0*z2*x2 - 6.0*y2*z2 + 2.0*x2*y2;
    float k4 = y2 - z2 + x2;

    return vec3( -8.0*z*k4*(y4*y4 - 28.0*y4*y2*x2 + 70.0*y4*x4 - 28.0*y2*x2*x4 + x4*x4)*k1*k2
               , 64.0*y*z*x*(y2-x2)*k4*(y4-6.0*y2*x2+x4)*k1*k2
               , -16.0*z2*k3*k4*k4 + k1*k1
               );
}

float de_mandelbulb(vec3 pos)
{
    // References
    //
    // http://www.skytopia.com/project/fractal/mandelbulb.html
    // http://www.bugman123.com/Hypercomplex/index.html
    // http://blog.hvidtfeldts.net/index.php/2011/09/
    //     distance-estimated-3d-fractals-v-the-mandelbulb-different-de-approximations/
    //
    // TODO: Understand and try out some of the other DE methods from the link above

#ifdef POWER8
    float power = 8;
#else
    // Animate power
    float pow_offs = mod(in_time / 2, 9);
    if (pow_offs > 4.5)
        pow_offs = 9 - pow_offs;
    float power = pow_offs + 2;
#endif
    const float bailout    = 4;
    const int   iterations = 25;

    // Swap some axis so our Mandelbulb is upright instead of lying on the side
    pos = pos.zxy;

    // Iterate. This is pretty much what we'd do for a Mandelbrot set, except that instead of
    // complex numbers we have triplex numbers with a special power operation that rotates
    // and scales in spherical coordinates
    vec3  w  = pos;
    float dr = 1.0;
    float r  = 0.0;
    // vec3 trap = abs(w);
    for (int i=0; i<iterations; i++)
    {
        // TODO: Re-use length(w) term for cartesian_to_spherical() in triplex_pow()
        r = length(w);
        if (r > bailout)
            break;
#ifdef POWER8
        w = triplex_pow8(w);
#else
        w = triplex_pow(w, power);
#endif
        w += pos;

        // Running scalar derivative
        dr = pow(r, power - 1.0) * power * dr + 1.0;

        // Use the three coordinate system axis-aligned planes as orbit traps
        // trap = min(trap, abs(w));
    }

    // surf_col = trap;

    // Distance estimate from running derivative and escape radius
    return 0.5 * log(r) * r / dr;
}

// Generalized Distance Functions
//
// http://www.pouet.net/topic.php?which=7931&page=1#c365231
// http://www.viz.tamu.edu/faculty/ergun/research/implicitmodeling/papers/sm99.pdf
//
const vec3 gd_n1  = vec3( 1.000,  0.000,  0.000);
const vec3 gd_n2  = vec3( 0.000,  1.000,  0.000);
const vec3 gd_n3  = vec3( 0.000,  0.000,  1.000);
const vec3 gd_n4  = vec3( 0.577,  0.577,  0.577);
const vec3 gd_n5  = vec3(-0.577,  0.577,  0.577);
const vec3 gd_n6  = vec3( 0.577, -0.577,  0.577);
const vec3 gd_n7  = vec3( 0.577,  0.577, -0.577);
const vec3 gd_n8  = vec3( 0.000,  0.357,  0.934);
const vec3 gd_n9  = vec3( 0.000, -0.357,  0.934);
const vec3 gd_n10 = vec3( 0.934,  0.000,  0.357);
const vec3 gd_n11 = vec3(-0.934,  0.000,  0.357);
const vec3 gd_n12 = vec3( 0.357,  0.934,  0.000);
const vec3 gd_n13 = vec3(-0.357,  0.934,  0.000);
const vec3 gd_n14 = vec3( 0.000,  0.851,  0.526);
const vec3 gd_n15 = vec3( 0.000, -0.851,  0.526);
const vec3 gd_n16 = vec3( 0.526,  0.000,  0.851);
const vec3 gd_n17 = vec3(-0.526,  0.000,  0.851);
const vec3 gd_n18 = vec3( 0.851,  0.526,  0.000);
const vec3 gd_n19 = vec3(-0.851,  0.526,  0.000);
float de_octahedral(vec3 p, float e, float r)
{
    float s = pow(abs(dot(p, gd_n4)), e);
    s += pow(abs(dot(p, gd_n5)), e);
    s += pow(abs(dot(p, gd_n6)), e);
    s += pow(abs(dot(p, gd_n7)), e);
    s = pow(s, 1.0 / e);
    return s - r;
}
float de_dodecahedral(vec3 p, float e, float r)
{
    float s = pow(abs(dot(p, gd_n14)), e);
    s += pow(abs(dot(p, gd_n15)), e);
    s += pow(abs(dot(p, gd_n16)), e);
    s += pow(abs(dot(p, gd_n17)), e);
    s += pow(abs(dot(p, gd_n18)), e);
    s += pow(abs(dot(p, gd_n19)), e);
    s = pow(s, 1.0 / e);
    return s - r;
}
float de_icosahedral(vec3 p, float e, float r)
{
    float s = pow(abs(dot(p, gd_n4)), e);
    s += pow(abs(dot(p, gd_n5 )), e);
    s += pow(abs(dot(p, gd_n6 )), e);
    s += pow(abs(dot(p, gd_n7 )), e);
    s += pow(abs(dot(p, gd_n8 )), e);
    s += pow(abs(dot(p, gd_n9 )), e);
    s += pow(abs(dot(p, gd_n10)), e);
    s += pow(abs(dot(p, gd_n11)), e);
    s += pow(abs(dot(p, gd_n12)), e);
    s += pow(abs(dot(p, gd_n13)), e);
    s = pow(s, 1.0 / e);
    return s - r;
}
float de_toctahedral(vec3 p, float e, float r)
{
    float s = pow(abs(dot(p, gd_n1)), e);
    s += pow(abs(dot(p, gd_n2)), e);
    s += pow(abs(dot(p, gd_n3)), e);
    s += pow(abs(dot(p, gd_n4)), e);
    s += pow(abs(dot(p, gd_n5)), e);
    s += pow(abs(dot(p, gd_n6)), e);
    s += pow(abs(dot(p, gd_n7)), e);
    s = pow(s, 1.0 / e);
    return s - r;
}
float de_ticosahedral(vec3 p, float e, float r)
{
    float s = pow(abs(dot(p, gd_n4)), e);
    s += pow(abs(dot(p, gd_n5 )), e);
    s += pow(abs(dot(p, gd_n6 )), e);
    s += pow(abs(dot(p, gd_n7 )), e);
    s += pow(abs(dot(p, gd_n8 )), e);
    s += pow(abs(dot(p, gd_n9 )), e);
    s += pow(abs(dot(p, gd_n10)), e);
    s += pow(abs(dot(p, gd_n11)), e);
    s += pow(abs(dot(p, gd_n12)), e);
    s += pow(abs(dot(p, gd_n13)), e);
    s += pow(abs(dot(p, gd_n14)), e);
    s += pow(abs(dot(p, gd_n15)), e);
    s += pow(abs(dot(p, gd_n16)), e);
    s += pow(abs(dot(p, gd_n17)), e);
    s += pow(abs(dot(p, gd_n18)), e);
    s += pow(abs(dot(p, gd_n19)), e);
    s = pow(s, 1.0 / e);
    return s - r;
}

bool intersect_triangle( vec3 orig
                       , vec3 dir
                       , vec3 vert0
                       , vec3 vert1
                       , vec3 vert2
                       , out float t
                       , out float u
                       , out float v
                       )
{
    // Fast, Minimum Storage Ray-Triangle Intersection
    //
    // Tomas Möller and Ben Trumbore. Fast, minimum storage ray-triangle intersection.
    // Journal of graphics tools, 2(1):21-28, 1997
    //
    // http://www.jcenligne.fr/download/little3d/
    //     jgt%20Fast,%20Minumum%20Storage%20Ray-Triangle%20Intersection.htm

    const float JGT_RAYTRI_EPSILON = 0.000001;

    vec3 edge1, edge2, tvec, pvec, qvec;
    float det, inv_det;

    // Find vectors for two edges sharing vert0
    edge1 = vert1 - vert0;
    edge2 = vert2 - vert0;

    // Begin calculating determinant - also used to calculate U parameter
    pvec = cross(dir, edge2);

    // If determinant is near zero, ray lies in plane of triangle
    det = dot(edge1, pvec);

    if (det > -JGT_RAYTRI_EPSILON && det < JGT_RAYTRI_EPSILON)
        return false;
    inv_det = 1.0 / det;

    // Calculate distance from vert0 to ray origin
    tvec = orig - vert0;

    // Calculate U parameter and test bounds
    u = dot(tvec, pvec) * inv_det;
    if (u < 0.0 || u > 1.0)
        return false;

    // Prepare to test V parameter
    qvec = cross(tvec, edge1);

    // Calculate V parameter and test bounds
    v = dot(dir, qvec) * inv_det;
    if (v < 0.0 || u + v > 1.0)
        return false;

    // Calculate t, ray intersects triangle
    t = dot(edge2, qvec) * inv_det;

    return true;
}

float line_seg_min_dist_sq(vec3 a, vec3 b, vec3 p)
{
    // Squared distance to the closest point from p on the line segment a b
    vec3  ab = b - a;
    float len_sq = dot(ab, ab);
    float t = dot(p - a, ab) / len_sq;
    t = clamp(t, 0, 1);
    vec3 proj = a + t * ab;
    return dot(p-proj, p-proj);
}

bool compute_barycentric(vec3 pos, vec3 v0, vec3 v1, vec3 v2, out float u, out float v)
{
    // Compute the barycentric coordinates of a point, return if the point is inside
    // the triangle, or more accurate, inside its triangular prism
    //
    // Source: http://www.blackpawn.com/texts/pointinpoly/

    vec3 e0 = v2 - v0;
    vec3 e1 = v1 - v0;
    vec3 e2 = pos - v0;

    float dot00 = dot(e0, e0);
    float dot01 = dot(e0, e1);
    float dot02 = dot(e0, e2);
    float dot11 = dot(e1, e1);
    float dot12 = dot(e1, e2);

    float inv_denom = 1 / (dot00 * dot11 - dot01 * dot01);
    u = (dot11 * dot02 - dot01 * dot12) * inv_denom;
    v = (dot00 * dot12 - dot01 * dot02) * inv_denom;

    // Check if point is in triangle
    return (u >= 0) && (v >= 0) && (u + v < 1);
}

float de_triangle(vec3 pos, vec3 v0, vec3 v1, vec3 v2)
{
    // Compute the distance between a point and a triangle. This is either the closest
    // point on the plane (if it is inside the triangle), or the closest point on any of
    // the three edges. Note that if we remove the 'inside triangle' case we get a DE for
    // the edges only, allowing us to produce a wireframe rendering
    //
    // TODO: Explore some other, potentially faster methods of computing this
    //       http://www-compsci.swan.ac.uk/~csmark/PDFS/dist.pdf
    //       http://www.ann.jussieu.fr/~frey/papers/divers/
    //           Jones%20M.W.,%203d%20distance%20fields,%20a%20survey.pdf

    float u, v;
    if (compute_barycentric(pos, v0, v1, v2, u, v))
    {
        vec3 point_on_plane = v2 * u + v1 * v + v0 * (1 - (u + v));
        return distance(pos, point_on_plane);
    }
    else
    {
        return sqrt(min(line_seg_min_dist_sq(v0, v1, pos),
                    min(line_seg_min_dist_sq(v0, v2, pos),
                        line_seg_min_dist_sq(v1, v2, pos))));
    }
}

float de_cornell_box(vec3 pos)
{
    // Trying to store the array with the Cornell Box geometry as literal data caused issues.
    // The compiler tries to unroll the entire loop, inlining the de_triangle() function 32
    // times. This will stall glUseProgram(), where the actual code generation / optimization
    // happens, for a very long time (see
    // http://lists.apple.com/archives/mac-opengl/2008/Nov/msg00003.html). Also, in general
    // indexing into a large constant array seems to be something that doesn't map well to
    // shader hardware, so here we store the vertices in a floating-point texture instead
    //
    // TODO: It would be useful to explore acceleration structures for triangle meshes.
    //
    //       Here's a survey with a section on that subject:
    //
    //       http://www.ann.jussieu.fr/~frey/papers/divers/
    //           Jones%20M.W.,%203d%20distance%20fields,%20a%20survey.pdf
    //
    //       This approach seems promising:
    //
    //       https://graphics.stanford.edu/courses/cs468-03-fall/Papers/
    //           completeDistanceFieldRep.pdf
    //
    //       Rather memory and pre-computation intensive, but the final representation is
    //       very GPU traversal friendly and there is no loss of precision as with
    //       the voxelization approaches
    //
    float dist = 999;
    for (int i=0; i<32; i++)
    {
        // TODO: We could just compare squared distance and take the sqrt() at the end
        dist = min(dist, de_triangle(pos, texelFetch(cornell_geom, i * 3 + 0, 0).xyz,
                                          texelFetch(cornell_geom, i * 3 + 1, 0).xyz,
                                          texelFetch(cornell_geom, i * 3 + 2, 0).xyz));
    }
    // dist = min(dist, de_toctahedral(pos + vec3(-0.17, -0.28, -0.06), 40, 0.175));
    // dist = min(dist, de_ticosahedral(pos + vec3(0.2, 0.05, 0.2), 40, 0.175));
    return dist;
}

float smin(float a, float b, float k)
{
    // http://iquilezles.org/www/articles/smin/smin.htm
    float res = exp(-k * a) + exp(-k * b);
    return -log(res) / k;
}

float distance_estimator(vec3 pos)
{
#if defined(MANDELBULB_SCENE)
    return de_mandelbulb(pos);
#elif defined(CORNELL_BOX_SCENE)
    return de_cornell_box(pos);
#else
    // float offset =
    //  0.03*sin(20.0*pos.x+in_time)*sin(20.0*pos.y+in_time)*sin(20.0*pos.z+in_time);
    // return de_triangle(pos, vec3(-0.25, -0.25, 0), vec3(0.25, -0.25, 0), vec3(0, 0.25, 0));
    /*
    return smin(de_rounded_box(pos, vec3(0.05, 0.85, 0.05), 0.05),
             smin(de_rounded_box(pos, vec3(0.1, 0.1, 0.85), 0.05),
               smin(de_sphere(pos, 0.3),
                 de_torus(pos, 0.8, 0.2),
                   32), 32), 64);
    */
    // return de_cone(pos + vec3(0, -1, 0), normalize(vec2(0.2, 0.1)));
    /*
    float min_dist = 999;
    min_dist = min(min_dist, de_octahedral(pos + vec3(-0.5, -0.5, 0), 30, 0.20));
    min_dist = min(min_dist, de_dodecahedral(pos + vec3(-0.5, 0.5, 0), 50, 0.25));
    min_dist = min(min_dist, de_icosahedral(pos + vec3(0.5, 0.5, 0), 50, 0.25));
    min_dist = min(min_dist, de_toctahedral(pos + vec3(0.5, -0.5, 0), 50, 0.25));
    min_dist = min(min_dist, de_ticosahedral(pos + vec3(0, 0, 0), 50, 0.25));
    return min_dist;
    */
    float d_sphere = de_sphere(pos, 0.4);
    float d_torus = smin(smin(
                      de_torus(pos, 0.85, 0.1),
                        de_torus(pos.zxy, 0.85, 0.1), 64),
                          de_torus(pos.yzx, 0.85, 0.1), 64);
    float d_box = smin(smin(
                    de_rounded_box(pos, vec3(0.8, 0.06, 0.06), 0.03),
                      de_rounded_box(pos, vec3(0.06, 0.8, 0.06), 0.03), 64),
                        de_rounded_box(pos, vec3(0.06, 0.06, 0.8), 0.03), 64);
    return smin(d_box, min(d_sphere, d_torus), 64);
#endif
}

// http://en.wikipedia.org/wiki/Finite_difference#Forward.2C_backward.2C_and_central_differences
// http://blog.hvidtfeldts.net/index.php/2011/08/
//     distance-estimated-3d-fractals-ii-lighting-and-coloring/
vec3 normal_backward_difference(vec3 pos)
{
    float c = distance_estimator(pos);
    const float eps = 0.00001;
    return normalize(vec3(c - distance_estimator(pos - vec3(eps, 0.0, 0.0)),
                          c - distance_estimator(pos - vec3(0.0, eps, 0.0)),
                          c - distance_estimator(pos - vec3(0.0, 0.0, eps))));
}
vec3 normal_central_difference(vec3 pos)
{
    const float eps = 0.00001;
    const vec3 epsX = vec3(eps, 0.0, 0.0);
    const vec3 epsY = vec3(0.0, eps, 0.0);
    const vec3 epsZ = vec3(0.0, 0.0, eps);
    return normalize(vec3(distance_estimator(pos + epsX) - distance_estimator(pos - epsX),
                          distance_estimator(pos + epsY) - distance_estimator(pos - epsY),
                          distance_estimator(pos + epsZ) - distance_estimator(pos - epsZ)));
}

// Compute the world-space surface normal from the screen-space partial derivatives
// of the intersection distance (depth) and the camera transform
vec3 normal_screen_space_depth(float dx, float dy, mat4x4 camera)
{
    // TODO: This is wrong, use normal_screen_space_isec()
    return (camera * vec4(normalize(vec3(dx, dy, sqrt(dx*dx + dy*dy))), 0)).xyz;
}

// Normal from position through screen-space partial derivatives
vec3 normal_screen_space_isec(vec3 p)
{
    return cross(normalize(dFdx(p)), normalize(dFdy(p)));
}

// Distance AO based on the following references:
//
// http://www.iquilezles.org/www/material/nvscene2008/rwwtt.pdf
// http://www.mazapan.se/news/2010/07/15/gpu-ray-marching-with-distance-fields/
//
//               5    1
// ao = 1 - k *  E   ---  (i * d - distfield(p + n * i * d))
//              i=1  2^i
//
// The above never really seemed to work properly, though. At the very least it
// seems to be required to divide the 'd - distfield' term by d to have it normalized.
//
// Then, there are still errors due to the distance at p not being zero, which makes
// sense as the ray marcher will stop at a min. distance. A cheap fix is to simply clamp
// the term. There's also some kind of surface acne problem that can be mitigated by back
// stepping on the ray like for the normal computation. The deltas are also poorly setup,
// with some spheres contributing little more than artifacts or a constant occlusion
//
float distance_ao_old(vec3 p, vec3 n)
{
    float weight = 0.5;
    float occl_sum = 0.0;

    for (int i=0; i<5; i++)
    {
        // Test progressively larger spheres further away along the surface normal
        float delta = pow(i + 1.0, 4.0) * 0.001; // i = 0..4, delta = 0.001..0.625

        // Check sphere occlusion. The back stepping epsilon seems fairly large, but
        // anything smaller causes issues. The change in position in combination with
        // the min. distance at which the ray marcher stops will cause the occlusion
        // term to leave its range, for now we fix this by simply clamping it instead
        // of trying to account for these errors
        occl_sum += weight * clamp(
            1.0 - distance_estimator((p + n * 0.001) + n * delta) / delta, 0.0, 1.0);

        // More distant, outer spheres contribute exponentially less to the occlusion sum
        weight *= 0.5;
    }

    // Magic fudge factor to make dark parts darker and bright parts brighter
    occl_sum = (clamp((occl_sum * 2 - 1) * 1.65, -1, 1) + 1) * 0.5;
    return pow(1.0 - occl_sum, 8.0);
}

// Faster, simpler, more stable, less artifacts version of distance_ao_old()
float distance_ao(vec3 p, vec3 n)
{
#ifndef CORNELL_BOX_SCENE
    float occl_sum = 0.0;
    float weight, delta;

    weight = 0.5;
    delta = 0.016;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    weight = 0.25;
    delta = 0.081;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    // Magic fudge factor to make dark parts darker and bright parts brighter
    occl_sum  = 1 - occl_sum;
    occl_sum -= 0.29;
    occl_sum *= 3.5;
    occl_sum *= occl_sum;
    occl_sum  = clamp(occl_sum, 0, 1);
    return occl_sum;

    //occl_sum = (clamp((occl_sum * 2 - 1) * 2.0, -1, 1) + 1) * 0.5;
    //occl_sum = 1 - occl_sum;
    //return occl_sum * occl_sum;
#else
    float occl_sum = 0.0;
    float weight, delta;

    weight = 0.1;
    delta = 0.1;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    weight = 0.2;
    delta = 0.2;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    weight = 0.125;
    delta = 0.4;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    weight = 0.0625;
    delta = 0.5;
    occl_sum += weight * clamp(1.0 - distance_estimator(p + n * delta) / delta, 0.0, 1.0);

    // Magic fudge factor to make dark parts darker and bright parts brighter
    occl_sum = 1 - occl_sum;
    return occl_sum;
#endif
}

// TODO: Could try implementing SSS based on the distance_ao() function

bool ray_sphere( vec3 origin
               , vec3 dir
               , vec3 spherePos
               , float sphereR
               , out float tmin
               , out float tmax
               )
{
    vec3 rs  = spherePos - origin;
    float t  = dot(dir, rs);
    float a  = dot(rs, rs) - t * t;
    float r2 = sphereR * sphereR;

    if (a > r2)
        return false;

    float h  = sqrt(r2 - a);
    tmin     = t - h;
    tmax     = t + h;

    return true;
}

bool ray_march( vec3 origin
              , vec3 dir
              , out float t             // Intersection T along the ray
              , out float step_gradient // Step count based gradient (for cheap fake AO)
              )
{
    // Ray march till we come close enough to a surface or exceed the iteration count
    //
    // References:
    //
    // http://blog.hvidtfeldts.net/index.php/2011/06/distance-estimated-3d-fractals-part-i/
    // http://www.iquilezles.org/www/material/nvscene2008/rwwtt.pdf

    // TODO: Adjust ray marching MIN_DIST, FD normal epsilon and ray step back
    //       based screen projection, like in https://www.shadertoy.com/view/MdfGRr

    const int   MAX_STEPS = 128;
    const float MIN_DIST  = 0.001;

    // First intersect with a bounding sphere. Helps quickly reject rays which can't
    // possibly intersect with the scene and brings our starting point closer
    // to the surface (DEs get very imprecise when we're starting to far away)
    const float b_sphere_r =
#ifdef MANDELBULB_SCENE
    #ifdef POWER8
        1.15;
    #else
        1.5;
    #endif
#else
        1.0;
#endif
    float tspheremin, tspheremax;
    if (!ray_sphere(origin, dir, vec3(0,0,0), b_sphere_r, tspheremin, tspheremax))
        return false;
    t = tspheremin;

    // Ignore intersections behind the origin, might otherwise render scene with flipped
    // ray direction if we're looking away from it
    t = max(0, t);

    for (int steps=0; steps<MAX_STEPS; steps++)
    {
        vec3 pos = origin + t * dir;
        float dist = distance_estimator(pos);
        t += dist;

        if (t > tspheremax) // Left bounding sphere?
            return false;

        if (dist < MIN_DIST) // Close enough to surface?
        {
            step_gradient = 1.0 - float(steps) / float(MAX_STEPS);
            return true;
        }
    }

    return false;
}

vec3 soft_lam(vec3 n, vec3 light, vec3 surface_col)
{
    vec3  warm_col  = vec3(0.9 , 0.9 , 0.7);
    vec3  cool_col  = vec3(0.07, 0.07, 0.1);
    float diff_warm = 0.35;
    float diff_cool = 0.25;

    float ndotl     = (dot(light, n) + 1.0) * 0.5;

    vec3  kcool     = min((cool_col + diff_cool) * surface_col, 1.0);
    vec3  kwarm     = min((warm_col + diff_warm) * surface_col, 1.0);
    vec3  kfinal    = mix(kcool, kwarm, ndotl);

    return min(kfinal, 1.0);
}

float fresnel_conductor( float cosi // Cosine between normal and incident ray
                       , float eta  // Index of refraction
                       , float k    // Absorption coefficient
                       )
{
    // Compute Fresnel term for a conductor, PBRT 1st edition p422

    // Material | Eta   | K
    // ------------------------
    // Gold     | 0.370 | 2.820
    // Silver   | 0.177 | 3.638
    // Copper   | 0.617 | 2.63
    // Steel    | 2.485 | 3.433

    // TODO: Fresnel for dielectrics

    float tmp = (eta * eta + k * k) * cosi * cosi;
    float r_parallel_2 =
        (tmp - (2.0 * eta * cosi) + 1.0) /
        (tmp + (2.0 * eta * cosi) + 1.0);
    float tmp_f = eta * eta + k * k;
    float r_perpend_2 =
        (tmp_f - (2.0 * eta * cosi) + cosi * cosi) /
        (tmp_f + (2.0 * eta * cosi) + cosi * cosi);
    return (r_parallel_2 + r_perpend_2) / 2.0;
}

float normalize_phong_lobe(float power)
{
    return (power + 2) / 2;
}

vec3 render_ray(vec3 origin, vec3 dir, mat4x4 camera)
{
    // Ray march
    float t, step_gradient;
    bool hit = ray_march(origin, dir, t, step_gradient);

    // Can use the iteration count to add a snowy/foggy/glow type effect
    //
    // http://www.fractalforums.com/mandelbulb-implementation/faked-ambient-occlusion/
    //     msg10526/#msg10526
    //
    // vec3 glow = (1.0 - pow((clamp((step_gradient * 2 - 1) * 1.5, -1, 1) + 1) * 0.5, 8.0))
    //             * vec3(0.2, 0.3, 0.3);

    if (hit)
    {
        // Compute intersection
        vec3 isec_pos = origin + dir * t;

        // Step back from the surface a bit before computing the normal
        //
        // Experiments with trying to step back along the surface normal (cheaply computed
        // in screen-space) did not improve results. Not having the step back also works
        // reasonably well, except in a few corner cases like infinitely thin surfaces
        //
        vec3 isec_n = normal_backward_difference(isec_pos - dir * 0.00001);
        // vec3 isec_n = normal_screen_space_isec(isec_pos);

        // TODO: We can fix some numerical problems when computing normals by switching to
        //       screen-space normals when very thin, fin-like surfaces causes errors. This is
        //       most noticeable for some of the lower powers of the mandelbulb, but
        //       unfortunately those surfaces are so disjoint that they also causes issues for
        //       our distanced based AO computations
        //
        // vec3 isec_n_ss = normal_screen_space_isec(isec_pos);
        // if (dot(-dir, isec_n) < 0.0) // Clearly wrong normal?
        //     isec_n = isec_n_ss; // Switch to screen space normal

        // TODO: Better IBL + AO by doing occlusion for a number of cosine lobes to get
        //       directional visibility information
#define DISTANCE_AO
#ifdef DISTANCE_AO
        float ao = distance_ao(isec_pos, isec_n);
#else
        float ao = pow((clamp((step_gradient * 2 - 1) * 1.25, -1, 1) + 1) * 0.5, 8.0);
#endif

        // Shading
        vec3 color;

        //if (gl_FragCoord.x < in_screen_wdh / 2)

        //color = vec3(((isec_n + 1) * 0.5) * ao);
        //color = soft_lam(isec_n, normalize(vec3(1, 1, 1)), vec3(ao));
        //color = ((dot(isec_n, (camera * vec4(0, 0, 1, 0)).xyz) +1) * 0.5 + 0.5) * vec3(ao);
        /*color = clamp(dot(isec_n, vec3(0,0,1)), 0, 1) * vec3(1,0,0) +
                clamp(dot(isec_n, vec3(0,0,-1)), 0, 1) * vec3(0,1,0);*/
        //color = (isec_n + 1) * 0.5;
        //color = vec3(ao);
        /*
        color = ( vec3(max(0, 0.2+dot(isec_n, normalize(vec3(1, 1, 1))))) * vec3(1,0.75,0.75) +
                  vec3(max(0, 0.2+dot(isec_n, normalize(vec3(-1, -1, -1))))) * vec3(0.75,1.0,1.0)
                ) * ao;
        */
        /*
        color =
        (
          max(0.2+dot(isec_n, (camera * vec4(0, 0, 1, 0)).xyz),0)*vec3(0.2)+
          vec3(max(0, pow(dot(reflect(dir,isec_n), normalize(vec3(1,0,1))),5))) * vec3(1,0.4,0)*2 +
          vec3(max(0, pow(dot(reflect(dir,isec_n), normalize(vec3(1,-1,0))),5))) * vec3(0,.51,.51)*2
        ) * ao;
        */

        float fresnel     = fresnel_conductor(dot(-dir, isec_n), 0.4, 0.8);
        //fresnel = 1;
        float diff_weight = 0.5;
        vec3  diff_col    = vec3(1, 0.8, 0.8);
        vec3  spec_col    = vec3(0.8, 0.8, 1);
        float spec_weight = 1.0 - diff_weight;
        color =
        (
          texture(env_cos_1, isec_n).xyz * diff_col * diff_weight
          + texture(env_cos_8, reflect(dir, isec_n)).xyz * spec_col * normalize_phong_lobe(8) * fresnel * spec_weight
          + texture(env_reflection, reflect(dir, isec_n)).xyz * spec_weight * fresnel * 0.1
        ) * 3.0 * ao;
        //color = vec3(fresnel);
        //color = vec3(ao);
        //color = vec3(1,0,1);
        //color = (isec_n + 1) * 0.5;

        return color;
    }
    else
#define BG_GRADIENT
#ifdef BG_GRADIENT
        //return mix(vec3(1, 0.4, 0), vec3(0, 0.51, 0.51), gl_FragCoord.y / in_screen_hgt);
        //return mix(vec3(1), vec3(0), gl_FragCoord.y / in_screen_hgt);
        return texture(env_reflection, dir).xyz;
#else
        return vec3(0);
#endif
}

mat4x4 lookat(vec3 eye, vec3 focus, vec3 up)
{
    vec3 zaxis = normalize(eye - focus);
    vec3 xaxis = normalize(cross(up, zaxis));
    vec3 yaxis = cross(zaxis, xaxis);
    return mat4x4(xaxis.x, xaxis.y, xaxis.z, 0.0,
                  yaxis.x, yaxis.y, yaxis.z, 0.0,
                  zaxis.x, zaxis.y, zaxis.z, 0.0,
                  eye.x  , eye.y  , eye.z  , 1.0);
}

void generate_ray( mat4x4 camera       // Camera transform
                 , vec2 sample_offs    // Sample offset [-.5, +.5]
                 , bool ortho          // Orthographic or perspective camera?
                 , float width_or_hfov // Width of ortho viewing volume or horizontal FOV degrees
                 , out vec3 origin
                 , out vec3 dir
                 )
{
    // Convert fragment coordinates and sample offset to NDC [-1, 1]
    vec2 ndc = (gl_FragCoord.xy + sample_offs) / vec2(in_screen_wdh, in_screen_hgt) * 2.0 - 1.0;

    // Generate ray from NDC and camera transform
    float aspect = in_screen_wdh / in_screen_hgt;
    if (ortho)
    {
        // Orthographic projection. Frame [-w/2, w/2] on X,
        // center interval on Y while keeping aspect
        float width  = width_or_hfov;
        float height = width / aspect;
        origin       = (camera * vec4(ndc * vec2(width / 2.0, height / 2.0), 0, 1)).xyz;
        dir          = mat3(camera) * vec3(0, 0, -1);
    }
    else
    {
        // Perspective projection. Unlike the usual vertical FOV we deal with a horizontal
        // one, just like the orthographic camera defined by its width
        float hfov   = radians(width_or_hfov);
        float fov_xs = tan(hfov / 2);
        origin       = (camera * vec4(0, 0, 0, 1)).xyz;
        dir          = mat3(camera) * normalize(vec3(ndc.x*fov_xs, ndc.y*fov_xs / aspect, -1.0));
    }
}

void main()
{
    // TODO: Move transformations into vertex shader, like here:
    //       http://blog.hvidtfeldts.net/index.php/2014/01/combining-ray-tracing-and-polygons/

    // TODO: Consider a hierarchical Z like setup where we first ray march 4x4 pixel blocks
    //       till we get close to the surface and then start off there at pixel resolution
    //       Also see
    //       http://www.fractalforums.com/mandelbulb-implementation/major-raymarching-optimization/

    // Orbit camera
    vec3 cam_pos = vec3(0,0,2);
#define AUTO_ROTATION
#ifdef AUTO_ROTATION
    #ifdef CORNELL_BOX_SCENE
        cam_pos.x = sin(in_time / 2) * 0.4;
        cam_pos.z = -2;
        cam_pos.y = cos(in_time / 2) * 0.4;
    #else
        cam_pos.x = sin(in_time / 3.0);
        cam_pos.z = cos(in_time / 3.0);
        cam_pos.y = cos(in_time / 4.0);
        // Keep a constant distance. Distance is so that a width = 2 orthographic projection
        // matches up with a HFOV = 45 perspective projection as close as possible
        cam_pos = normalize(cam_pos) * 2.414213562373095;
    #endif
#endif

    // Camera transform. Look at center, orbit around it
    mat4x4 camera = lookat(cam_pos, vec3(0,0,0), vec3(0,1,0));

    // Generate camera ray
    vec3 origin, dir;
//#define CAM_ORTHO
#ifdef CAM_ORTHO
    generate_ray(camera, vec2(0, 0), true, 2.0, origin, dir);
#else
    generate_ray(camera, vec2(0, 0), false, 45.0 * 1.5, origin, dir);
#endif

    // Trace and shade
    vec3 color;
//#define RAY_TRACING_TEST
#ifdef RAY_TRACING_TEST
    // Ray trace Cornell Box
    float mint = 999;
    vec3 n;

    for (int i=0; i<32; i++)
    {
        vec3 v0 = texelFetch(cornell_geom, i * 3 + 0, 0).xyz;
        vec3 v1 = texelFetch(cornell_geom, i * 3 + 1, 0).xyz;
        vec3 v2 = texelFetch(cornell_geom, i * 3 + 2, 0).xyz;

        float t, u, v;
        if (intersect_triangle(origin, dir, v0, v1, v2, t, u, v))
            if (t < mint)
            {
                n    = normalize(cross(v1 - v0, v2 - v0));
                mint = t;
            }
    }

    color = (mint == 999) ? texture(env_reflection, dir).xyz : (n + 1) * 0.5;
#else
    color = render_ray(origin, dir, camera);
#endif

    // Use screen-space derivatives to check the contrast between neighbouring pixels,
    // keep shooting more rays till it passes below a threshold. Works OK from an image
    // quality standpoint, but performance is fairly poor due to the heavy cost of
    // divergence, probably not worth it in practice compared to the naive super sampling
    // we have on the frame buffer level
#ifdef ADAPTIVE_SAMPLING
    float weight = 1.0;
    while (fwidth(pow(color.r / weight, 1.0 / 2.2) /* gamma */) > 0.3 /* threshold*/ && weight < 32)
    {
        // <shoot next ray>
        // weight += 1;
    }
    color /= weight;
#endif

#define GAMMA_CORRECT
#ifdef GAMMA_CORRECT
    // Gamma correct and output
    vec3 gamma = pow(color, vec3(1.0 / 2.2));
    frag_color = vec4(gamma, 1);
#else
    frag_color = vec4(color, 1);
#endif

    // TODO: Add some form of tone mapping to the output, exposure controls
}