// https://mathworld.wolfram.com/BarthDecic.html

// The rendered image has far fewer artifacts with Halley's method than Newton's,
// but is still not completely perfect.
// The 3rd-order Householder's method doesn't improve things much in this implementation, but
// it might be suffering from a lack of numerical precision.

// https://en.wikipedia.org/wiki/Householder%27s_method

// Related: https://www.shadertoy.com/view/DlfXzB

#define AA              2
#define USE_NEWTON      0
#define USE_HALLEY      1
#define USE_HOUSEHOLDER 0

const float circumradius = sqrt((5. + sqrt(5.)) / 2.);

// Sphere intersection function from IQ
vec2 sphIntersect( in vec3 ro, in vec3 rd, in vec3 ce, float ra )
{
    vec3 oc = ro - ce;
    float b = dot( oc, rd );
    vec3 qc = oc - b*rd;
    float h = ra*ra - dot( qc, qc );
    if( h<0.0 ) return vec2(-1.0); // no intersection
    h = sqrt( h );
    return vec2( -b-h, -b+h );
}

// Extended dual number
struct Dual
{
   float  f; // f
   vec3   d; // df/dt, d²f/dt², d³f/dt³
};

const Dual w = Dual(1., vec3(0));
const Dual phi = Dual((1. + sqrt(5.)) / 2., vec3(0));

Dual dMul(Dual a, Dual b)
{
    Dual res;
    res.f = a.f * b.f;
    res.d = vec3(a.d.x * b.f + a.f * b.d.x, // 1st-order product rule
                 a.f * b.d.y + a.d.y * b.f + 2. * a.d.x * b.d.x, // 2nd-order product rule
                 a.f * b.d.z + a.d.z * b.f + 3. * (a.d.y * b.d.x + a.d.x * b.d.y) // 3rd-order product rule
                 );
    return res;
}

Dual dSqr(Dual a)
{
    return dMul(a, a);
}

Dual dAdd(Dual a, Dual b)
{
   return Dual(a.f + b.f, a.d + b.d);
}

Dual dSub(Dual a, Dual b)
{
   return Dual(a.f - b.f, a.d - b.d);
}

Dual dConst(float x)
{
    return Dual(x, vec3(0));
}

Dual dLinear(float x)
{
    return Dual(0., vec3(x, 0., 0.));
}

Dual dPow(Dual a, int n)
{
    Dual r = Dual(0., vec3(0));
    Dual b = a;
    for(int i = 0; i < 4; ++i)
    {
        if((n & (1 << i)) != 0)
            r = dAdd(r, b);
        b = dMul(b, a);
    }
    return r;
}

Dual dNeg(Dual a)
{
    return Dual(-a.f, -a.d);
}

#define GENERATE_OVERLOADS(op) \
    Dual op(Dual a, Dual b, Dual c) { return op(op(a, b), c); } \
    Dual op(Dual a, Dual b, Dual c, Dual d) { return op(op(a, b, c), d); } \
    Dual op(Dual a, Dual b, Dual c, Dual d, Dual e) { return op(op(a, b, c, d), e); } \
    Dual op(Dual a, Dual b, Dual c, Dual d, Dual e, Dual f) { return op(op(a, b, c, d, e), f); }
    
GENERATE_OVERLOADS(dAdd)
GENERATE_OVERLOADS(dSub)
GENERATE_OVERLOADS(dMul)

Dual dBarthDecic(Dual x, Dual y, Dual z)
{
    // 8(x²-f4y²)(y²-f4z²)(z²-f4x²)(x4+y4+z4-2x²y²-2x²z²-2y²z²)+(3+5f)(x²+y²+z²-w²)[x²+y²+z²-(2-f)w²]²w²

    Dual x2 = dSqr(x);
    Dual y2 = dSqr(y);
    Dual z2 = dSqr(z);
    Dual w2 = dSqr(w);
    Dual phi2 = dSqr(phi);
    
    Dual x4 = dSqr(x2);
    Dual y4 = dSqr(y2);
    Dual z4 = dSqr(z2);
    Dual phi4 = dSqr(phi2);
    
    Dual a = dMul(dConst(8.), dMul(dSub(x2, dMul(phi4, y2)), dSub(y2, dMul(phi4, z2)), dSub(z2, dMul(phi4, x2)),
            dAdd(x4, y4, z4, dMul(dConst(-2.), dAdd(dMul(x2, y2), dMul(x2, z2), dMul(y2, z2))))));
    
    Dual b = dMul(dAdd(dConst(3.), dMul(dConst(5.), phi)), dSqr(dAdd(x2, y2, z2, dNeg(w2))),
                dSqr(dAdd(x2, y2, z2, dMul(dConst(phi.f - 2.), w2))), w2);
                
    return dAdd(a, b);
}

mat3 rotX(float a)
{
    return mat3(1., 0., 0., 0., cos(a), sin(a),  0., -sin(a), cos(a));
}

mat3 rotY(float a)
{
    return mat3(cos(a), 0., sin(a), 0., 1., 0., -sin(a), 0., cos(a));
}

vec3 traceRay(vec2 fragCoord)
{
    // Set up ray.
    vec3 ro = vec3(0., 0., circumradius * 2.);
    vec3 rd = normalize(vec3((fragCoord.xy - iResolution.xy / 2.) / iResolution.y, -1.));

    mat3 m = mat3(1.);        
            
    if(iMouse.z > 0.)
        m = rotY((iMouse.x / iResolution.x * 2. - 1.) * 2.) *
            rotX((iMouse.y / iResolution.y * 2. - 1.) * 2.);
    else
        m = rotY(-sin(iTime / 2.)) * rotX(-sin(iTime / 3.));

    ro = m * ro;
    rd = m * rd;

    float t = 1e5;

    // Use ray-bounding-sphere intersection test to find initial estimate for root-finding.
    vec2 s = sphIntersect(ro, rd, vec3(0), circumradius);

    bool hit = false;
    
    if(s.x > 0. && s.x < s.y)
    {
        t = s.x;
        
        for(int i = 0; i < 70; ++i)
        {
            vec3 rp = ro + rd * t;
            // Instead of the usual "divide by length of gradient", here the gradient along
            // the ray is computed directly.
            Dual res = dBarthDecic(dAdd(dConst(rp.x), dLinear(rd.x)),
                                   dAdd(dConst(rp.y), dLinear(rd.y)),
                                   dAdd(dConst(rp.z), dLinear(rd.z)));
            if(t >= s.y)
                break;
                
            if(res.f < 1e-4)
            {
                hit = true;
                break;
            }
#if USE_NEWTON
            // Newton: x = x - f(x) / f'(x)
            t += min(.125, abs((res.f) / (res.d.x)));
#elif USE_HALLEY
            // Halley: x = x - 2f(x)f'(x) / (2(f'(x)²) - f(x)f''(x))
            t += min(.125, abs(2. * res.f * res.d.x / (2. * res.d.x * res.d.x - res.f * res.d.y)));
#elif USE_HOUSEHOLDER
            // 3rd-order Householder:
            float num = 3. * (2. * res.f * pow(res.d.x, 2.) - pow(res.f, 2.) * res.d.y);
            float den = 6. * (pow(res.d.x, 3.) - res.f * res.d.x * res.d.y) + pow(res.f, 2.) * res.d.z;
            t += min(.125, abs(num / den));
#endif
        }
    }
    
    vec3 col = vec3(.06);

    if(hit)
    {
        vec3 rp = ro + rd * t;
        
        // Surface normal from gradient of scalar field.
        Dual res_x = dBarthDecic(dAdd(dConst(rp.x), dLinear(1.)),
                                 dAdd(dConst(rp.y), dLinear(0.)),
                                 dAdd(dConst(rp.z), dLinear(0.)));
        Dual res_y = dBarthDecic(dAdd(dConst(rp.x), dLinear(0.)),
                                 dAdd(dConst(rp.y), dLinear(1.)),
                                 dAdd(dConst(rp.z), dLinear(0.)));
        Dual res_z = dBarthDecic(dAdd(dConst(rp.x), dLinear(0.)),
                                 dAdd(dConst(rp.y), dLinear(0.)),
                                 dAdd(dConst(rp.z), dLinear(1.)));
        vec3 n = -normalize(vec3(res_x.d.x, res_y.d.x, res_z.d.x));

        // Base surface colour
        vec3 diff = mix(vec3(.4, 1., .7).brg, vec3(1., 1., .7), smoothstep(1., 2.2, length(rp)));
        diff = mix(diff, diff * .75, mod(floor(rp.x * 3.) + floor(rp.y * 3.) + floor(rp.z * 3.), 2.));
        
        
        // Basic lighting
        
        float fr = mix(.01, 1., pow(1. - clamp(dot(rd, n), 0., 1.), 2.));
        vec3 r = reflect(-rd, n);

        vec3 l = normalize(vec3(-2,1,1));
        col = diff * (dot(l, -n) * .5 + .5) * (1. - fr) * vec3(1., 1., .9).brg;
        col += diff * mix(.25, 1., (dot(vec3(0,-1,0), -n) * .5 + .5) * (1. - fr)) * vec3(.2,.2,.4).brg;
        col += smoothstep(.5, 1., max(0., dot(-r, l))) * fr * 1.1;
    }
    
    return clamp(col, 0., 1.);
}

void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
    vec3 col = vec3(0);
    
    for(int y = 0; y < AA; ++y)
        for(int x = 0; x < AA; ++x)
        {
            col += traceRay(fragCoord + vec2(x, y) / float(AA));
        }

    col /= float(AA) * float(AA);

    fragColor = vec4(sqrt(col), 1.);
}
