//----------------------------------------------------------------------------
// Anti-Grain Geometry - Version 2.4
// Copyright (C) 2002-2005 Maxim Shemanarev (http://www.antigrain.com)
//
// Permission to copy, use, modify, sell and distribute this software 
// is granted provided this copyright notice appears in all copies. 
// This software is provided "as is" without express or implied
// warranty, and with no claim as to its suitability for any purpose.
//
//----------------------------------------------------------------------------
// Contact: mcseem@antigrain.com
//          mcseemagg@yahoo.com
//          http://www.antigrain.com
//----------------------------------------------------------------------------
//
// The Stack Blur Algorithm was invented by Mario Klingemann, 
// mario@quasimondo.com and described here:
// http://incubator.quasimondo.com/processing/fast_blur_deluxe.php
// (search phrase "Stackblur: Fast But Goodlooking"). 
// The major improvement is that there's no more division table
// that was very expensive to create for large blur radii. Insted, 
// for 8-bit per channel and radius not exceeding 254 the division is 
// replaced by multiplication and shift. 
//
//----------------------------------------------------------------------------

#ifndef AGG_BLUR_INCLUDED
#define AGG_BLUR_INCLUDED

#include "agg_array.h"
#include "agg_pixfmt_base.h"
#include "agg_pixfmt_transposer.h"

namespace agg
{

    template<class T> struct stack_blur_tables
    {
        static int16u const g_stack_blur8_mul[255];
        static int8u  const g_stack_blur8_shr[255];
    };

    //------------------------------------------------------------------------
    template<class T> 
    int16u const stack_blur_tables<T>::g_stack_blur8_mul[255] = 
    {
        512,512,456,512,328,456,335,512,405,328,271,456,388,335,292,512,
        454,405,364,328,298,271,496,456,420,388,360,335,312,292,273,512,
        482,454,428,405,383,364,345,328,312,298,284,271,259,496,475,456,
        437,420,404,388,374,360,347,335,323,312,302,292,282,273,265,512,
        497,482,468,454,441,428,417,405,394,383,373,364,354,345,337,328,
        320,312,305,298,291,284,278,271,265,259,507,496,485,475,465,456,
        446,437,428,420,412,404,396,388,381,374,367,360,354,347,341,335,
        329,323,318,312,307,302,297,292,287,282,278,273,269,265,261,512,
        505,497,489,482,475,468,461,454,447,441,435,428,422,417,411,405,
        399,394,389,383,378,373,368,364,359,354,350,345,341,337,332,328,
        324,320,316,312,309,305,301,298,294,291,287,284,281,278,274,271,
        268,265,262,259,257,507,501,496,491,485,480,475,470,465,460,456,
        451,446,442,437,433,428,424,420,416,412,408,404,400,396,392,388,
        385,381,377,374,370,367,363,360,357,354,350,347,344,341,338,335,
        332,329,326,323,320,318,315,312,310,307,304,302,299,297,294,292,
        289,287,285,282,280,278,275,273,271,269,267,265,263,261,259
    };

    //------------------------------------------------------------------------
    template<class T> 
    int8u const stack_blur_tables<T>::g_stack_blur8_shr[255] = 
    {
          9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 
         17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 
         19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 
         22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
         22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 
         23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
         23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
         23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 
         23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
         24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
         24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
         24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
         24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
    };



    //==============================================================stack_blur
    template<class ColorT, class CalculatorT> class stack_blur
    {
    public:
        typedef ColorT      color_type;
        typedef CalculatorT calculator_type;

        //--------------------------------------------------------------------
        template<class Img> void blur_x(Img& img, unsigned radius)
        {
            if(radius < 1) return;

            unsigned x, y, xp, i;
            unsigned stack_ptr;
            unsigned stack_start;

            color_type      pix;
            color_type*     stack_pix;
            calculator_type sum;
            calculator_type sum_in;
            calculator_type sum_out;

            unsigned w   = img.width();
            unsigned h   = img.height();
            unsigned wm  = w - 1;
            unsigned div = radius * 2 + 1;

            unsigned div_sum = (radius + 1) * (radius + 1);
            unsigned mul_sum = 0;
            unsigned shr_sum = 0;
            unsigned max_val = color_type::base_mask;

            if(max_val <= 255 && radius < 255)
            {
                mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[radius];
                shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[radius];
            }

            m_buf.allocate(w, 128);
            m_stack.allocate(div, 32);

            for(y = 0; y < h; y++)
            {
                sum.clear();
                sum_in.clear();
                sum_out.clear();

                pix = img.pixel(0, y);
                for(i = 0; i <= radius; i++)
                {
                    m_stack[i] = pix;
                    sum.add(pix, i + 1);
                    sum_out.add(pix);
                }
                for(i = 1; i <= radius; i++)
                {
                    pix = img.pixel((i > wm) ? wm : i, y);
                    m_stack[i + radius] = pix;
                    sum.add(pix, radius + 1 - i);
                    sum_in.add(pix);
                }

                stack_ptr = radius;
                for(x = 0; x < w; x++)
                {
                    if(mul_sum) sum.calc_pix(m_buf[x], mul_sum, shr_sum);
                    else        sum.calc_pix(m_buf[x], div_sum);

                    sum.sub(sum_out);
           
                    stack_start = stack_ptr + div - radius;
                    if(stack_start >= div) stack_start -= div;
                    stack_pix = &m_stack[stack_start];

                    sum_out.sub(*stack_pix);

                    xp = x + radius + 1;
                    if(xp > wm) xp = wm;
                    pix = img.pixel(xp, y);
            
                    *stack_pix = pix;
            
                    sum_in.add(pix);
                    sum.add(sum_in);
            
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix = &m_stack[stack_ptr];

                    sum_out.add(*stack_pix);
                    sum_in.sub(*stack_pix);
                }
                img.copy_color_hspan(0, y, w, &m_buf[0]);
            }
        }

        //--------------------------------------------------------------------
        template<class Img> void blur_y(Img& img, unsigned radius)
        {
            pixfmt_transposer<Img> img2(img);
            blur_x(img2, radius);
        }

        //--------------------------------------------------------------------
        template<class Img> void blur(Img& img, unsigned radius)
        {
            blur_x(img, radius);
            pixfmt_transposer<Img> img2(img);
            blur_x(img2, radius);
        }

    private:
        pod_vector<color_type> m_buf;
        pod_vector<color_type> m_stack;
    };

    //====================================================stack_blur_calc_rgba
    template<class T=unsigned> struct stack_blur_calc_rgba
    {
        typedef T value_type;
        value_type r,g,b,a;

        AGG_INLINE void clear() 
        { 
            r = g = b = a = 0; 
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& v)
        {
            r += v.r;
            g += v.g;
            b += v.b;
            a += v.a;
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& v, unsigned k)
        {
            r += v.r * k;
            g += v.g * k;
            b += v.b * k;
            a += v.a * k;
        }

        template<class ArgT> AGG_INLINE void sub(const ArgT& v)
        {
            r -= v.r;
            g -= v.g;
            b -= v.b;
            a -= v.a;
        }

        template<class ArgT> AGG_INLINE void calc_pix(ArgT& v, unsigned div)
        {
            typedef typename ArgT::value_type value_type;
            v.r = value_type(r / div);
            v.g = value_type(g / div);
            v.b = value_type(b / div);
            v.a = value_type(a / div);
        }

        template<class ArgT> 
        AGG_INLINE void calc_pix(ArgT& v, unsigned mul, unsigned shr)
        {
            typedef typename ArgT::value_type value_type;
            v.r = value_type((r * mul) >> shr);
            v.g = value_type((g * mul) >> shr);
            v.b = value_type((b * mul) >> shr);
            v.a = value_type((a * mul) >> shr);
        }
    };


    //=====================================================stack_blur_calc_rgb
    template<class T=unsigned> struct stack_blur_calc_rgb
    {
        typedef T value_type;
        value_type r,g,b;

        AGG_INLINE void clear() 
        { 
            r = g = b = 0; 
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& v)
        {
            r += v.r;
            g += v.g;
            b += v.b;
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& v, unsigned k)
        {
            r += v.r * k;
            g += v.g * k;
            b += v.b * k;
        }

        template<class ArgT> AGG_INLINE void sub(const ArgT& v)
        {
            r -= v.r;
            g -= v.g;
            b -= v.b;
        }

        template<class ArgT> AGG_INLINE void calc_pix(ArgT& v, unsigned div)
        {
            typedef typename ArgT::value_type value_type;
            v.r = value_type(r / div);
            v.g = value_type(g / div);
            v.b = value_type(b / div);
        }

        template<class ArgT> 
        AGG_INLINE void calc_pix(ArgT& v, unsigned mul, unsigned shr)
        {
            typedef typename ArgT::value_type value_type;
            v.r = value_type((r * mul) >> shr);
            v.g = value_type((g * mul) >> shr);
            v.b = value_type((b * mul) >> shr);
        }
    };


    //====================================================stack_blur_calc_gray
    template<class T=unsigned> struct stack_blur_calc_gray
    {
        typedef T value_type;
        value_type v;

        AGG_INLINE void clear() 
        { 
            v = 0; 
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& a)
        {
            v += a.v;
        }

        template<class ArgT> AGG_INLINE void add(const ArgT& a, unsigned k)
        {
            v += a.v * k;
        }

        template<class ArgT> AGG_INLINE void sub(const ArgT& a)
        {
            v -= a.v;
        }

        template<class ArgT> AGG_INLINE void calc_pix(ArgT& a, unsigned div)
        {
            typedef typename ArgT::value_type value_type;
            a.v = value_type(v / div);
        }

        template<class ArgT> 
        AGG_INLINE void calc_pix(ArgT& a, unsigned mul, unsigned shr)
        {
            typedef typename ArgT::value_type value_type;
            a.v = value_type((v * mul) >> shr);
        }
    };



    //========================================================stack_blur_gray8
    template<class Img> 
    void stack_blur_gray8(Img& img, unsigned rx, unsigned ry)
    {
        unsigned x, y, xp, yp, i;
        unsigned stack_ptr;
        unsigned stack_start;

        const int8u* src_pix_ptr;
              int8u* dst_pix_ptr;
        unsigned pix;
        unsigned stack_pix;
        unsigned sum;
        unsigned sum_in;
        unsigned sum_out;

        unsigned w   = img.width();
        unsigned h   = img.height();
        unsigned wm  = w - 1;
        unsigned hm  = h - 1;

        unsigned div;
        unsigned mul_sum;
        unsigned shr_sum;

        pod_vector<int8u> stack;

        if(rx > 0)
        {
            if(rx > 254) rx = 254;
            div = rx * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[rx];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[rx];
            stack.allocate(div);

            for(y = 0; y < h; y++)
            {
                sum = sum_in = sum_out = 0;

                src_pix_ptr = img.pix_ptr(0, y);
                pix = *src_pix_ptr;
                for(i = 0; i <= rx; i++)
                {
                    stack[i] = pix;
                    sum     += pix * (i + 1);
                    sum_out += pix;
                }
                for(i = 1; i <= rx; i++)
                {
                    if(i <= wm) src_pix_ptr += Img::pix_width; 
                    pix = *src_pix_ptr; 
                    stack[i + rx] = pix;
                    sum    += pix * (rx + 1 - i);
                    sum_in += pix;
                }

                stack_ptr = rx;
                xp = rx;
                if(xp > wm) xp = wm;
                src_pix_ptr = img.pix_ptr(xp, y);
                dst_pix_ptr = img.pix_ptr(0, y);
                for(x = 0; x < w; x++)
                {
                    *dst_pix_ptr = (sum * mul_sum) >> shr_sum;
                    dst_pix_ptr += Img::pix_width;

                    sum -= sum_out;
       
                    stack_start = stack_ptr + div - rx;
                    if(stack_start >= div) stack_start -= div;
                    sum_out -= stack[stack_start];

                    if(xp < wm) 
                    {
                        src_pix_ptr += Img::pix_width;
                        pix = *src_pix_ptr;
                        ++xp;
                    }
        
                    stack[stack_start] = pix;
        
                    sum_in += pix;
                    sum    += sum_in;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix = stack[stack_ptr];

                    sum_out += stack_pix;
                    sum_in  -= stack_pix;
                }
            }
        }

        if(ry > 0)
        {
            if(ry > 254) ry = 254;
            div = ry * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[ry];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[ry];
            stack.allocate(div);

            int stride = img.stride();
            for(x = 0; x < w; x++)
            {
                sum = sum_in = sum_out = 0;

                src_pix_ptr = img.pix_ptr(x, 0);
                pix = *src_pix_ptr;
                for(i = 0; i <= ry; i++)
                {
                    stack[i] = pix;
                    sum     += pix * (i + 1);
                    sum_out += pix;
                }
                for(i = 1; i <= ry; i++)
                {
                    if(i <= hm) src_pix_ptr += stride; 
                    pix = *src_pix_ptr; 
                    stack[i + ry] = pix;
                    sum    += pix * (ry + 1 - i);
                    sum_in += pix;
                }

                stack_ptr = ry;
                yp = ry;
                if(yp > hm) yp = hm;
                src_pix_ptr = img.pix_ptr(x, yp);
                dst_pix_ptr = img.pix_ptr(x, 0);
                for(y = 0; y < h; y++)
                {
                    *dst_pix_ptr = (sum * mul_sum) >> shr_sum;
                    dst_pix_ptr += stride;

                    sum -= sum_out;
       
                    stack_start = stack_ptr + div - ry;
                    if(stack_start >= div) stack_start -= div;
                    sum_out -= stack[stack_start];

                    if(yp < hm) 
                    {
                        src_pix_ptr += stride;
                        pix = *src_pix_ptr;
                        ++yp;
                    }
        
                    stack[stack_start] = pix;
        
                    sum_in += pix;
                    sum    += sum_in;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix = stack[stack_ptr];

                    sum_out += stack_pix;
                    sum_in  -= stack_pix;
                }
            }
        }
    }



    //========================================================stack_blur_rgb24
    template<class Img> 
    void stack_blur_rgb24(Img& img, unsigned rx, unsigned ry)
    {
        typedef typename Img::color_type color_type;
        typedef typename Img::order_type order_type;
        enum order_e 
        { 
            R = order_type::R, 
            G = order_type::G, 
            B = order_type::B 
        };

        unsigned x, y, xp, yp, i;
        unsigned stack_ptr;
        unsigned stack_start;

        const int8u* src_pix_ptr;
              int8u* dst_pix_ptr;
        color_type*  stack_pix_ptr;

        unsigned sum_r;
        unsigned sum_g;
        unsigned sum_b;
        unsigned sum_in_r;
        unsigned sum_in_g;
        unsigned sum_in_b;
        unsigned sum_out_r;
        unsigned sum_out_g;
        unsigned sum_out_b;

        unsigned w   = img.width();
        unsigned h   = img.height();
        unsigned wm  = w - 1;
        unsigned hm  = h - 1;

        unsigned div;
        unsigned mul_sum;
        unsigned shr_sum;

        pod_vector<color_type> stack;

        if(rx > 0)
        {
            if(rx > 254) rx = 254;
            div = rx * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[rx];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[rx];
            stack.allocate(div);

            for(y = 0; y < h; y++)
            {
                sum_r = 
                sum_g = 
                sum_b = 
                sum_in_r = 
                sum_in_g = 
                sum_in_b = 
                sum_out_r = 
                sum_out_g = 
                sum_out_b = 0;

                src_pix_ptr = img.pix_ptr(0, y);
                for(i = 0; i <= rx; i++)
                {
                    stack_pix_ptr    = &stack[i];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    sum_r           += src_pix_ptr[R] * (i + 1);
                    sum_g           += src_pix_ptr[G] * (i + 1);
                    sum_b           += src_pix_ptr[B] * (i + 1);
                    sum_out_r       += src_pix_ptr[R];
                    sum_out_g       += src_pix_ptr[G];
                    sum_out_b       += src_pix_ptr[B];
                }
                for(i = 1; i <= rx; i++)
                {
                    if(i <= wm) src_pix_ptr += Img::pix_width; 
                    stack_pix_ptr = &stack[i + rx];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    sum_r           += src_pix_ptr[R] * (rx + 1 - i);
                    sum_g           += src_pix_ptr[G] * (rx + 1 - i);
                    sum_b           += src_pix_ptr[B] * (rx + 1 - i);
                    sum_in_r        += src_pix_ptr[R];
                    sum_in_g        += src_pix_ptr[G];
                    sum_in_b        += src_pix_ptr[B];
                }

                stack_ptr = rx;
                xp = rx;
                if(xp > wm) xp = wm;
                src_pix_ptr = img.pix_ptr(xp, y);
                dst_pix_ptr = img.pix_ptr(0, y);
                for(x = 0; x < w; x++)
                {
                    dst_pix_ptr[R] = (sum_r * mul_sum) >> shr_sum;
                    dst_pix_ptr[G] = (sum_g * mul_sum) >> shr_sum;
                    dst_pix_ptr[B] = (sum_b * mul_sum) >> shr_sum;
                    dst_pix_ptr   += Img::pix_width;

                    sum_r -= sum_out_r;
                    sum_g -= sum_out_g;
                    sum_b -= sum_out_b;
       
                    stack_start = stack_ptr + div - rx;
                    if(stack_start >= div) stack_start -= div;
                    stack_pix_ptr = &stack[stack_start];

                    sum_out_r -= stack_pix_ptr->r;
                    sum_out_g -= stack_pix_ptr->g;
                    sum_out_b -= stack_pix_ptr->b;

                    if(xp < wm) 
                    {
                        src_pix_ptr += Img::pix_width;
                        ++xp;
                    }
        
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
        
                    sum_in_r += src_pix_ptr[R];
                    sum_in_g += src_pix_ptr[G];
                    sum_in_b += src_pix_ptr[B];
                    sum_r    += sum_in_r;
                    sum_g    += sum_in_g;
                    sum_b    += sum_in_b;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix_ptr = &stack[stack_ptr];

                    sum_out_r += stack_pix_ptr->r;
                    sum_out_g += stack_pix_ptr->g;
                    sum_out_b += stack_pix_ptr->b;
                    sum_in_r  -= stack_pix_ptr->r;
                    sum_in_g  -= stack_pix_ptr->g;
                    sum_in_b  -= stack_pix_ptr->b;
                }
            }
        }

        if(ry > 0)
        {
            if(ry > 254) ry = 254;
            div = ry * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[ry];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[ry];
            stack.allocate(div);

            int stride = img.stride();
            for(x = 0; x < w; x++)
            {
                sum_r = 
                sum_g = 
                sum_b = 
                sum_in_r = 
                sum_in_g = 
                sum_in_b = 
                sum_out_r = 
                sum_out_g = 
                sum_out_b = 0;

                src_pix_ptr = img.pix_ptr(x, 0);
                for(i = 0; i <= ry; i++)
                {
                    stack_pix_ptr    = &stack[i];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    sum_r           += src_pix_ptr[R] * (i + 1);
                    sum_g           += src_pix_ptr[G] * (i + 1);
                    sum_b           += src_pix_ptr[B] * (i + 1);
                    sum_out_r       += src_pix_ptr[R];
                    sum_out_g       += src_pix_ptr[G];
                    sum_out_b       += src_pix_ptr[B];
                }
                for(i = 1; i <= ry; i++)
                {
                    if(i <= hm) src_pix_ptr += stride; 
                    stack_pix_ptr = &stack[i + ry];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    sum_r           += src_pix_ptr[R] * (ry + 1 - i);
                    sum_g           += src_pix_ptr[G] * (ry + 1 - i);
                    sum_b           += src_pix_ptr[B] * (ry + 1 - i);
                    sum_in_r        += src_pix_ptr[R];
                    sum_in_g        += src_pix_ptr[G];
                    sum_in_b        += src_pix_ptr[B];
                }

                stack_ptr = ry;
                yp = ry;
                if(yp > hm) yp = hm;
                src_pix_ptr = img.pix_ptr(x, yp);
                dst_pix_ptr = img.pix_ptr(x, 0);
                for(y = 0; y < h; y++)
                {
                    dst_pix_ptr[R] = (sum_r * mul_sum) >> shr_sum;
                    dst_pix_ptr[G] = (sum_g * mul_sum) >> shr_sum;
                    dst_pix_ptr[B] = (sum_b * mul_sum) >> shr_sum;
                    dst_pix_ptr += stride;

                    sum_r -= sum_out_r;
                    sum_g -= sum_out_g;
                    sum_b -= sum_out_b;
       
                    stack_start = stack_ptr + div - ry;
                    if(stack_start >= div) stack_start -= div;

                    stack_pix_ptr = &stack[stack_start];
                    sum_out_r -= stack_pix_ptr->r;
                    sum_out_g -= stack_pix_ptr->g;
                    sum_out_b -= stack_pix_ptr->b;

                    if(yp < hm) 
                    {
                        src_pix_ptr += stride;
                        ++yp;
                    }
        
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
        
                    sum_in_r += src_pix_ptr[R];
                    sum_in_g += src_pix_ptr[G];
                    sum_in_b += src_pix_ptr[B];
                    sum_r    += sum_in_r;
                    sum_g    += sum_in_g;
                    sum_b    += sum_in_b;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix_ptr = &stack[stack_ptr];

                    sum_out_r += stack_pix_ptr->r;
                    sum_out_g += stack_pix_ptr->g;
                    sum_out_b += stack_pix_ptr->b;
                    sum_in_r  -= stack_pix_ptr->r;
                    sum_in_g  -= stack_pix_ptr->g;
                    sum_in_b  -= stack_pix_ptr->b;
                }
            }
        }
    }



    //=======================================================stack_blur_rgba32
    template<class Img> 
    void stack_blur_rgba32(Img& img, unsigned rx, unsigned ry)
    {
        typedef typename Img::color_type color_type;
        typedef typename Img::order_type order_type;
        enum order_e 
        { 
            R = order_type::R, 
            G = order_type::G, 
            B = order_type::B,
            A = order_type::A 
        };

        unsigned x, y, xp, yp, i;
        unsigned stack_ptr;
        unsigned stack_start;

        const int8u* src_pix_ptr;
              int8u* dst_pix_ptr;
        color_type*  stack_pix_ptr;

        unsigned sum_r;
        unsigned sum_g;
        unsigned sum_b;
        unsigned sum_a;
        unsigned sum_in_r;
        unsigned sum_in_g;
        unsigned sum_in_b;
        unsigned sum_in_a;
        unsigned sum_out_r;
        unsigned sum_out_g;
        unsigned sum_out_b;
        unsigned sum_out_a;

        unsigned w   = img.width();
        unsigned h   = img.height();
        unsigned wm  = w - 1;
        unsigned hm  = h - 1;

        unsigned div;
        unsigned mul_sum;
        unsigned shr_sum;

        pod_vector<color_type> stack;

        if(rx > 0)
        {
            if(rx > 254) rx = 254;
            div = rx * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[rx];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[rx];
            stack.allocate(div);

            for(y = 0; y < h; y++)
            {
                sum_r = 
                sum_g = 
                sum_b = 
                sum_a = 
                sum_in_r = 
                sum_in_g = 
                sum_in_b = 
                sum_in_a = 
                sum_out_r = 
                sum_out_g = 
                sum_out_b = 
                sum_out_a = 0;

                src_pix_ptr = img.pix_ptr(0, y);
                for(i = 0; i <= rx; i++)
                {
                    stack_pix_ptr    = &stack[i];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
                    sum_r           += src_pix_ptr[R] * (i + 1);
                    sum_g           += src_pix_ptr[G] * (i + 1);
                    sum_b           += src_pix_ptr[B] * (i + 1);
                    sum_a           += src_pix_ptr[A] * (i + 1);
                    sum_out_r       += src_pix_ptr[R];
                    sum_out_g       += src_pix_ptr[G];
                    sum_out_b       += src_pix_ptr[B];
                    sum_out_a       += src_pix_ptr[A];
                }
                for(i = 1; i <= rx; i++)
                {
                    if(i <= wm) src_pix_ptr += Img::pix_width; 
                    stack_pix_ptr = &stack[i + rx];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
                    sum_r           += src_pix_ptr[R] * (rx + 1 - i);
                    sum_g           += src_pix_ptr[G] * (rx + 1 - i);
                    sum_b           += src_pix_ptr[B] * (rx + 1 - i);
                    sum_a           += src_pix_ptr[A] * (rx + 1 - i);
                    sum_in_r        += src_pix_ptr[R];
                    sum_in_g        += src_pix_ptr[G];
                    sum_in_b        += src_pix_ptr[B];
                    sum_in_a        += src_pix_ptr[A];
                }

                stack_ptr = rx;
                xp = rx;
                if(xp > wm) xp = wm;
                src_pix_ptr = img.pix_ptr(xp, y);
                dst_pix_ptr = img.pix_ptr(0, y);
                for(x = 0; x < w; x++)
                {
                    dst_pix_ptr[R] = (sum_r * mul_sum) >> shr_sum;
                    dst_pix_ptr[G] = (sum_g * mul_sum) >> shr_sum;
                    dst_pix_ptr[B] = (sum_b * mul_sum) >> shr_sum;
                    dst_pix_ptr[A] = (sum_a * mul_sum) >> shr_sum;
                    dst_pix_ptr += Img::pix_width;

                    sum_r -= sum_out_r;
                    sum_g -= sum_out_g;
                    sum_b -= sum_out_b;
                    sum_a -= sum_out_a;
       
                    stack_start = stack_ptr + div - rx;
                    if(stack_start >= div) stack_start -= div;
                    stack_pix_ptr = &stack[stack_start];

                    sum_out_r -= stack_pix_ptr->r;
                    sum_out_g -= stack_pix_ptr->g;
                    sum_out_b -= stack_pix_ptr->b;
                    sum_out_a -= stack_pix_ptr->a;

                    if(xp < wm) 
                    {
                        src_pix_ptr += Img::pix_width;
                        ++xp;
                    }
        
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
        
                    sum_in_r += src_pix_ptr[R];
                    sum_in_g += src_pix_ptr[G];
                    sum_in_b += src_pix_ptr[B];
                    sum_in_a += src_pix_ptr[A];
                    sum_r    += sum_in_r;
                    sum_g    += sum_in_g;
                    sum_b    += sum_in_b;
                    sum_a    += sum_in_a;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix_ptr = &stack[stack_ptr];

                    sum_out_r += stack_pix_ptr->r;
                    sum_out_g += stack_pix_ptr->g;
                    sum_out_b += stack_pix_ptr->b;
                    sum_out_a += stack_pix_ptr->a;
                    sum_in_r  -= stack_pix_ptr->r;
                    sum_in_g  -= stack_pix_ptr->g;
                    sum_in_b  -= stack_pix_ptr->b;
                    sum_in_a  -= stack_pix_ptr->a;
                }
            }
        }

        if(ry > 0)
        {
            if(ry > 254) ry = 254;
            div = ry * 2 + 1;
            mul_sum = stack_blur_tables<int>::g_stack_blur8_mul[ry];
            shr_sum = stack_blur_tables<int>::g_stack_blur8_shr[ry];
            stack.allocate(div);

            int stride = img.stride();
            for(x = 0; x < w; x++)
            {
                sum_r = 
                sum_g = 
                sum_b = 
                sum_a = 
                sum_in_r = 
                sum_in_g = 
                sum_in_b = 
                sum_in_a = 
                sum_out_r = 
                sum_out_g = 
                sum_out_b = 
                sum_out_a = 0;

                src_pix_ptr = img.pix_ptr(x, 0);
                for(i = 0; i <= ry; i++)
                {
                    stack_pix_ptr    = &stack[i];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
                    sum_r           += src_pix_ptr[R] * (i + 1);
                    sum_g           += src_pix_ptr[G] * (i + 1);
                    sum_b           += src_pix_ptr[B] * (i + 1);
                    sum_a           += src_pix_ptr[A] * (i + 1);
                    sum_out_r       += src_pix_ptr[R];
                    sum_out_g       += src_pix_ptr[G];
                    sum_out_b       += src_pix_ptr[B];
                    sum_out_a       += src_pix_ptr[A];
                }
                for(i = 1; i <= ry; i++)
                {
                    if(i <= hm) src_pix_ptr += stride; 
                    stack_pix_ptr = &stack[i + ry];
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
                    sum_r           += src_pix_ptr[R] * (ry + 1 - i);
                    sum_g           += src_pix_ptr[G] * (ry + 1 - i);
                    sum_b           += src_pix_ptr[B] * (ry + 1 - i);
                    sum_a           += src_pix_ptr[A] * (ry + 1 - i);
                    sum_in_r        += src_pix_ptr[R];
                    sum_in_g        += src_pix_ptr[G];
                    sum_in_b        += src_pix_ptr[B];
                    sum_in_a        += src_pix_ptr[A];
                }

                stack_ptr = ry;
                yp = ry;
                if(yp > hm) yp = hm;
                src_pix_ptr = img.pix_ptr(x, yp);
                dst_pix_ptr = img.pix_ptr(x, 0);
                for(y = 0; y < h; y++)
                {
                    dst_pix_ptr[R] = (sum_r * mul_sum) >> shr_sum;
                    dst_pix_ptr[G] = (sum_g * mul_sum) >> shr_sum;
                    dst_pix_ptr[B] = (sum_b * mul_sum) >> shr_sum;
                    dst_pix_ptr[A] = (sum_a * mul_sum) >> shr_sum;
                    dst_pix_ptr += stride;

                    sum_r -= sum_out_r;
                    sum_g -= sum_out_g;
                    sum_b -= sum_out_b;
                    sum_a -= sum_out_a;
       
                    stack_start = stack_ptr + div - ry;
                    if(stack_start >= div) stack_start -= div;

                    stack_pix_ptr = &stack[stack_start];
                    sum_out_r -= stack_pix_ptr->r;
                    sum_out_g -= stack_pix_ptr->g;
                    sum_out_b -= stack_pix_ptr->b;
                    sum_out_a -= stack_pix_ptr->a;

                    if(yp < hm) 
                    {
                        src_pix_ptr += stride;
                        ++yp;
                    }
        
                    stack_pix_ptr->r = src_pix_ptr[R];
                    stack_pix_ptr->g = src_pix_ptr[G];
                    stack_pix_ptr->b = src_pix_ptr[B];
                    stack_pix_ptr->a = src_pix_ptr[A];
        
                    sum_in_r += src_pix_ptr[R];
                    sum_in_g += src_pix_ptr[G];
                    sum_in_b += src_pix_ptr[B];
                    sum_in_a += src_pix_ptr[A];
                    sum_r    += sum_in_r;
                    sum_g    += sum_in_g;
                    sum_b    += sum_in_b;
                    sum_a    += sum_in_a;
        
                    ++stack_ptr;
                    if(stack_ptr >= div) stack_ptr = 0;
                    stack_pix_ptr = &stack[stack_ptr];

                    sum_out_r += stack_pix_ptr->r;
                    sum_out_g += stack_pix_ptr->g;
                    sum_out_b += stack_pix_ptr->b;
                    sum_out_a += stack_pix_ptr->a;
                    sum_in_r  -= stack_pix_ptr->r;
                    sum_in_g  -= stack_pix_ptr->g;
                    sum_in_b  -= stack_pix_ptr->b;
                    sum_in_a  -= stack_pix_ptr->a;
                }
            }
        }
    }



    //===========================================================recursive_blur
    template<class ColorT, class CalculatorT> class recursive_blur
    {
    public:
        typedef ColorT color_type;
        typedef CalculatorT calculator_type;
        typedef typename color_type::value_type value_type;
        typedef typename calculator_type::value_type calc_type;

        //--------------------------------------------------------------------
        template<class Img> void blur_x(Img& img, double radius)
        {
            if(radius < 0.62) return;
            if(img.width() < 3) return;

            calc_type s = calc_type(radius * 0.5);
            calc_type q = calc_type((s < 2.5) ?
                                    3.97156 - 4.14554 * sqrt(1 - 0.26891 * s) :
                                    0.98711 * s - 0.96330);

            calc_type q2 = calc_type(q * q);
            calc_type q3 = calc_type(q2 * q);

            calc_type b0 = calc_type(1.0 / (1.578250 + 
                                            2.444130 * q + 
                                            1.428100 * q2 + 
                                            0.422205 * q3));

            calc_type b1 = calc_type( 2.44413 * q + 
                                      2.85619 * q2 + 
                                      1.26661 * q3);

            calc_type b2 = calc_type(-1.42810 * q2 + 
                                     -1.26661 * q3);

            calc_type b3 = calc_type(0.422205 * q3);

            calc_type b  = calc_type(1 - (b1 + b2 + b3) * b0);

            b1 *= b0;
            b2 *= b0;
            b3 *= b0;

            int w = img.width();
            int h = img.height();
            int wm = w-1;
            int x, y;

            m_sum1.allocate(w);
            m_sum2.allocate(w);
            m_buf.allocate(w);

            for(y = 0; y < h; y++)
            {
                calculator_type c;
                c.from_pix(img.pixel(0, y));
                m_sum1[0].calc(b, b1, b2, b3, c, c, c, c);
                c.from_pix(img.pixel(1, y));
                m_sum1[1].calc(b, b1, b2, b3, c, m_sum1[0], m_sum1[0], m_sum1[0]);
                c.from_pix(img.pixel(2, y));
                m_sum1[2].calc(b, b1, b2, b3, c, m_sum1[1], m_sum1[0], m_sum1[0]);

                for(x = 3; x < w; ++x)
                {
                    c.from_pix(img.pixel(x, y));
                    m_sum1[x].calc(b, b1, b2, b3, c, m_sum1[x-1], m_sum1[x-2], m_sum1[x-3]);
                }
    
                m_sum2[wm  ].calc(b, b1, b2, b3, m_sum1[wm  ], m_sum1[wm  ], m_sum1[wm], m_sum1[wm]);
                m_sum2[wm-1].calc(b, b1, b2, b3, m_sum1[wm-1], m_sum2[wm  ], m_sum2[wm], m_sum2[wm]);
                m_sum2[wm-2].calc(b, b1, b2, b3, m_sum1[wm-2], m_sum2[wm-1], m_sum2[wm], m_sum2[wm]);
                m_sum2[wm  ].to_pix(m_buf[wm  ]);
                m_sum2[wm-1].to_pix(m_buf[wm-1]);
                m_sum2[wm-2].to_pix(m_buf[wm-2]);

                for(x = wm-3; x >= 0; --x)
                {
                    m_sum2[x].calc(b, b1, b2, b3, m_sum1[x], m_sum2[x+1], m_sum2[x+2], m_sum2[x+3]);
                    m_sum2[x].to_pix(m_buf[x]);
                }
                img.copy_color_hspan(0, y, w, &m_buf[0]);
            }
        }

        //--------------------------------------------------------------------
        template<class Img> void blur_y(Img& img, double radius)
        {
            pixfmt_transposer<Img> img2(img);
            blur_x(img2, radius);
        }

        //--------------------------------------------------------------------
        template<class Img> void blur(Img& img, double radius)
        {
            blur_x(img, radius);
            pixfmt_transposer<Img> img2(img);
            blur_x(img2, radius);
        }

    private:
        agg::pod_vector<calculator_type> m_sum1;
        agg::pod_vector<calculator_type> m_sum2;
        agg::pod_vector<color_type>      m_buf;
    };


    //=================================================recursive_blur_calc_rgba
    template<class T=double> struct recursive_blur_calc_rgba
    {
        typedef T value_type;
        typedef recursive_blur_calc_rgba<T> self_type;

        value_type r,g,b,a;

        template<class ColorT> 
        AGG_INLINE void from_pix(const ColorT& c)
        {
            r = c.r;
            g = c.g;
            b = c.b;
            a = c.a;
        }

        AGG_INLINE void calc(value_type b1, 
                             value_type b2, 
                             value_type b3, 
                             value_type b4,
                             const self_type& c1, 
                             const self_type& c2, 
                             const self_type& c3, 
                             const self_type& c4)
        {
            r = b1*c1.r + b2*c2.r + b3*c3.r + b4*c4.r;
            g = b1*c1.g + b2*c2.g + b3*c3.g + b4*c4.g;
            b = b1*c1.b + b2*c2.b + b3*c3.b + b4*c4.b;
            a = b1*c1.a + b2*c2.a + b3*c3.a + b4*c4.a;
        }

        template<class ColorT> 
        AGG_INLINE void to_pix(ColorT& c) const
        {
            typedef typename ColorT::value_type cv_type;
            c.r = cv_type(r);
            c.g = cv_type(g);
            c.b = cv_type(b);
            c.a = cv_type(a);
        }
    };


    //=================================================recursive_blur_calc_rgb
    template<class T=double> struct recursive_blur_calc_rgb
    {
        typedef T value_type;
        typedef recursive_blur_calc_rgb<T> self_type;

        value_type r,g,b;

        template<class ColorT> 
        AGG_INLINE void from_pix(const ColorT& c)
        {
            r = c.r;
            g = c.g;
            b = c.b;
        }

        AGG_INLINE void calc(value_type b1, 
                             value_type b2, 
                             value_type b3, 
                             value_type b4,
                             const self_type& c1, 
                             const self_type& c2, 
                             const self_type& c3, 
                             const self_type& c4)
        {
            r = b1*c1.r + b2*c2.r + b3*c3.r + b4*c4.r;
            g = b1*c1.g + b2*c2.g + b3*c3.g + b4*c4.g;
            b = b1*c1.b + b2*c2.b + b3*c3.b + b4*c4.b;
        }

        template<class ColorT> 
        AGG_INLINE void to_pix(ColorT& c) const
        {
            typedef typename ColorT::value_type cv_type;
            c.r = cv_type(r);
            c.g = cv_type(g);
            c.b = cv_type(b);
        }
    };


    //================================================recursive_blur_calc_gray
    template<class T=double> struct recursive_blur_calc_gray
    {
        typedef T value_type;
        typedef recursive_blur_calc_gray<T> self_type;

        value_type v;

        template<class ColorT> 
        AGG_INLINE void from_pix(const ColorT& c)
        {
            v = c.v;
        }

        AGG_INLINE void calc(value_type b1, 
                             value_type b2, 
                             value_type b3, 
                             value_type b4,
                             const self_type& c1, 
                             const self_type& c2, 
                             const self_type& c3, 
                             const self_type& c4)
        {
            v = b1*c1.v + b2*c2.v + b3*c3.v + b4*c4.v;
        }

        template<class ColorT> 
        AGG_INLINE void to_pix(ColorT& c) const
        {
            typedef typename ColorT::value_type cv_type;
            c.v = cv_type(v);
        }
    };

    //================================================slight_blur
    // Special-purpose filter for applying a Gaussian blur with a radius small enough 
    // that the blur only affects adjacent pixels. A Gaussian curve with a standard
    // deviation of r/2 is used, as per the HTML/CSS spec. At 3 standard deviations, 
    // the contribution drops to less than 0.005, i.e. less than half a percent, 
    // therefore the radius can be at least 1.33 before errors become significant.
    // This filter is useful for smoothing artifacts caused by detail rendered 
    // at the pixel scale, e.g. single-pixel lines. Note that the filter should 
    // only be used with premultiplied pixel formats (or those without alpha).
    // See the "line_thickness" example for a demonstration.
    template<class PixFmt>
    class slight_blur
    {
    public:
        typedef typename PixFmt::pixel_type pixel_type;
        typedef typename PixFmt::value_type value_type;
        typedef typename PixFmt::order_type order_type;

        slight_blur(double r = 1.33)
        {
            radius(r);
        }

        void radius(double r)
        {
            if (r > 0)
            {
                // Sample the gaussian curve at 0 and r/2 standard deviations. 
                // At 3 standard deviations, the response is < 0.005.
                double pi = 3.14159;
                double n = 2 / r;
                m_g0 = 1 / sqrt(2 * pi);
                m_g1 = m_g0 * exp(-n * n);

                // Normalize.
                double sum = m_g0 + 2 * m_g1;
                m_g0 /= sum;
                m_g1 /= sum;
            }
            else
            {
                m_g0 = 1;
                m_g1 = 0;
            }
        }

        void blur(PixFmt& img, rect_i bounds)
        {
            // Make sure we stay within the image area.
            bounds.clip(rect_i(0, 0, img.width() - 1, img.height() - 1));

            int w = bounds.x2 - bounds.x1 + 1;
            int h = bounds.y2 - bounds.y1 + 1;

            if (w < 3 || h < 3) return;

            // Allocate 3 rows of buffer space.
            m_buf.allocate(w * 3);

            // Set up row pointers
            pixel_type * begin = &m_buf[0];
            pixel_type * r0 = begin;
            pixel_type * r1 = r0 + w;
            pixel_type * r2 = r1 + w;
            pixel_type * end = r2 + w;

            // Horizontally blur the first two input rows.
            calc_row(img, bounds.x1, bounds.y1, w, r0);
            memcpy(r1, r0, w * sizeof(pixel_type));

            for (int y = 0; ; )
            {
                // Get pointer to first pixel.
                pixel_type* p = img.pix_value_ptr(bounds.x1, bounds.y1 + y, bounds.x1 + w);

                // Horizontally blur the row below.
                if (y + 1 < h)
                {
                    calc_row(img, bounds.x1, bounds.y1 + y + 1, w, r2);
                }
                else
                {
                    memcpy(r2, r1, w * sizeof(pixel_type)); // duplicate bottom row
                }

                // Combine blurred rows into destination.
                for (int x = 0; x < w; ++x)
                {
                    calc_pixel(*r0++, *r1++, *r2++, *p++);
                }

                if (++y >= h) break;

                // Wrap bottom row pointer around to top of buffer.
                if (r2 == end) r2 = begin;
                else if (r1 == end) r1 = begin;
                else if (r0 == end) r0 = begin;
            }
        }

    private:
        void calc_row(PixFmt& img, int x, int y, int w, pixel_type* row)
        {
            const int wm = w - 1;

            pixel_type* p = img.pix_value_ptr(x, y, w);

            pixel_type c[3];
            pixel_type* p0 = c;
            pixel_type* p1 = c + 1;
            pixel_type* p2 = c + 2;
            pixel_type* end = c + 3;
            *p0 = *p1 = *p;

            for (int x = 0; x < wm; ++x)
            {
                *p2 = *(p = p->next());

                calc_pixel(*p0++, *p1++, *p2++, *row++);

                if (p0 == end) p0 = c;
                else if (p1 == end) p1 = c;
                else if (p2 == end) p2 = c;
            }

            calc_pixel(*p0, *p1, *p1, *row);
        }

        void calc_pixel(
            pixel_type const & c1,
            pixel_type const & c2,
            pixel_type const & c3,
            pixel_type & x)
        {
            calc_pixel(c1, c2, c3, x, PixFmt::pixfmt_category());
        }

        void calc_pixel(
            pixel_type const & c1,
            pixel_type const & c2,
            pixel_type const & c3,
            pixel_type & x,
            pixfmt_gray_tag)
        {
            x.c[0] = calc_value(c1.c[0], c2.c[0], c3.c[0]);
        }

        void calc_pixel(
            pixel_type const & c1,
            pixel_type const & c2,
            pixel_type const & c3,
            pixel_type & x,
            pixfmt_rgb_tag)
        {
            enum { R = order_type::R, G = order_type::G, B = order_type::B };
            x.c[R] = calc_value(c1.c[R], c2.c[R], c3.c[R]);
            x.c[G] = calc_value(c1.c[G], c2.c[G], c3.c[G]);
            x.c[B] = calc_value(c1.c[B], c2.c[B], c3.c[B]);
        }

        void calc_pixel(
            pixel_type const & c1,
            pixel_type const & c2,
            pixel_type const & c3,
            pixel_type & x,
            pixfmt_rgba_tag)
        {
            enum { R = order_type::R, G = order_type::G, B = order_type::B, A = order_type::A };
            x.c[R] = calc_value(c1.c[R], c2.c[R], c3.c[R]);
            x.c[G] = calc_value(c1.c[G], c2.c[G], c3.c[G]);
            x.c[B] = calc_value(c1.c[B], c2.c[B], c3.c[B]);
            x.c[A] = calc_value(c1.c[A], c2.c[A], c3.c[A]);
        }

        value_type calc_value(value_type v1, value_type v2, value_type v3)
        {
            return value_type(m_g1 * v1 + m_g0 * v2 + m_g1 * v3);
        }

        double m_g0, m_g1;
        pod_vector<pixel_type> m_buf;
    };

    // Helper functions for applying blur to a surface without having to create an intermediate object.

    template<class PixFmt>
    void apply_slight_blur(PixFmt& img, const rect_i& bounds, double r = 1)
    {
        if (r > 0) slight_blur<PixFmt>(r).blur(img, bounds);
    }

    template<class PixFmt>
    void apply_slight_blur(PixFmt& img, double r = 1)
    {
        if (r > 0) slight_blur<PixFmt>(r).blur(img, rect_i(0, 0, img.width() - 1, img.height() - 1));
    }

    template<class PixFmt>
    void apply_slight_blur(renderer_base<PixFmt>& img, const rect_i& bounds, double r = 1)
    {
        if (r > 0) slight_blur<PixFmt>(r).blur(img.ren(), bounds);
    }

    template<class PixFmt>
    void apply_slight_blur(renderer_base<PixFmt>& img, double r = 1)
    {
        if (r > 0) slight_blur<PixFmt>(r).blur(img.ren(), img.clip_box());
    }
}




#endif