/*
 * a64 video encoder - multicolor modes
 * Copyright (c) 2009 Tobias Bindhammer
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * a64 video encoder - multicolor modes
 */

#include "a64enc.h"
#include "a64colors.h"
#include "a64tables.h"
#include "elbg.h"
#include "internal.h"
#include "libavutil/intreadwrite.h"

#define DITHERSTEPS   8
#define CHARSET_CHARS 256
#define INTERLACED    1
#define CROP_SCREENS  1

/* gray gradient */
static const int mc_colors[5]={0x0,0xb,0xc,0xf,0x1};

/* other possible gradients - to be tested */
//static const int mc_colors[5]={0x0,0x8,0xa,0xf,0x7};
//static const int mc_colors[5]={0x0,0x9,0x8,0xa,0x3};

static void to_meta_with_crop(AVCodecContext *avctx, AVFrame *p, int *dest)
{
    int blockx, blocky, x, y;
    int luma = 0;
    int height = FFMIN(avctx->height, C64YRES);
    int width  = FFMIN(avctx->width , C64XRES);
    uint8_t *src = p->data[0];

    for (blocky = 0; blocky < C64YRES; blocky += 8) {
        for (blockx = 0; blockx < C64XRES; blockx += 8) {
            for (y = blocky; y < blocky + 8 && y < C64YRES; y++) {
                for (x = blockx; x < blockx + 8 && x < C64XRES; x += 2) {
                    if(x < width && y < height) {
                        /* build average over 2 pixels */
                        luma = (src[(x + 0 + y * p->linesize[0])] +
                                src[(x + 1 + y * p->linesize[0])]) / 2;
                        /* write blocks as linear data now so they are suitable for elbg */
                        dest[0] = luma;
                    }
                    dest++;
                }
            }
        }
    }
}

static void render_charset(AVCodecContext *avctx, uint8_t *charset,
                           uint8_t *colrammap)
{
    A64Context *c = avctx->priv_data;
    uint8_t row1, row2;
    int charpos, x, y;
    int a, b;
    uint8_t pix;
    int lowdiff, highdiff;
    int *best_cb = c->mc_best_cb;
    static uint8_t index1[256];
    static uint8_t index2[256];
    static uint8_t dither[256];
    int i;
    int distance;

    /* generate lookup-tables for dither and index before looping */
    i = 0;
    for (a=0; a < 256; a++) {
        if(i < c->mc_pal_size -1 && a == c->mc_luma_vals[i + 1]) {
            distance = c->mc_luma_vals[i + 1] - c->mc_luma_vals[i];
            for(b = 0; b <= distance; b++) {
                  dither[c->mc_luma_vals[i] + b] = b * (DITHERSTEPS - 1) / distance;
            }
            i++;
        }
        if(i >= c->mc_pal_size - 1) dither[a] = 0;
        index1[a] = i;
        index2[a] = FFMIN(i + 1, c->mc_pal_size - 1);
    }

    /* and render charset */
    for (charpos = 0; charpos < CHARSET_CHARS; charpos++) {
        lowdiff  = 0;
        highdiff = 0;
        for (y = 0; y < 8; y++) {
            row1 = 0; row2 = 0;
            for (x = 0; x < 4; x++) {
                pix = best_cb[y * 4 + x];

                /* accumulate error for brightest/darkest color */
                if (index1[pix] >= 3)
                    highdiff += pix - c->mc_luma_vals[3];
                if (index1[pix] < 1)
                    lowdiff += c->mc_luma_vals[1] - pix;

                row1 <<= 2;

                if (INTERLACED) {
                    row2 <<= 2;
                    if (interlaced_dither_patterns[dither[pix]][(y & 3) * 2 + 0][x & 3])
                        row1 |= 3-(index2[pix] & 3);
                    else
                        row1 |= 3-(index1[pix] & 3);

                    if (interlaced_dither_patterns[dither[pix]][(y & 3) * 2 + 1][x & 3])
                        row2 |= 3-(index2[pix] & 3);
                    else
                        row2 |= 3-(index1[pix] & 3);
                }
                else {
                    if (multi_dither_patterns[dither[pix]][(y & 3)][x & 3])
                        row1 |= 3-(index2[pix] & 3);
                    else
                        row1 |= 3-(index1[pix] & 3);
                }
            }
            charset[y+0x000] = row1;
            if (INTERLACED) charset[y+0x800] = row2;
        }
        /* do we need to adjust pixels? */
        if (highdiff > 0 && lowdiff > 0 && c->mc_use_5col) {
            if (lowdiff > highdiff) {
                for (x = 0; x < 32; x++)
                    best_cb[x] = FFMIN(c->mc_luma_vals[3], best_cb[x]);
            } else {
                for (x = 0; x < 32; x++)
                    best_cb[x] = FFMAX(c->mc_luma_vals[1], best_cb[x]);
            }
            charpos--;          /* redo now adjusted char */
        /* no adjustment needed, all fine */
        } else {
            /* advance pointers */
            best_cb += 32;
            charset += 8;

            /* remember colorram value */
            colrammap[charpos] = (highdiff > 0);
        }
    }
}

static av_cold int a64multi_close_encoder(AVCodecContext *avctx)
{
    A64Context *c = avctx->priv_data;
    av_free(c->mc_meta_charset);
    av_free(c->mc_best_cb);
    av_free(c->mc_charset);
    av_free(c->mc_charmap);
    av_free(c->mc_colram);
    return 0;
}

static av_cold int a64multi_init_encoder(AVCodecContext *avctx)
{
    A64Context *c = avctx->priv_data;
    int a;
    av_lfg_init(&c->randctx, 1);

    if (avctx->global_quality < 1) {
        c->mc_lifetime = 4;
    } else {
        c->mc_lifetime = avctx->global_quality /= FF_QP2LAMBDA;
    }

    av_log(avctx, AV_LOG_INFO, "charset lifetime set to %d frame(s)\n", c->mc_lifetime);

    c->mc_frame_counter = 0;
    c->mc_use_5col      = avctx->codec->id == CODEC_ID_A64_MULTI5;
    c->mc_pal_size      = 4 + c->mc_use_5col;

    /* precalc luma values for later use */
    for (a = 0; a < c->mc_pal_size; a++) {
        c->mc_luma_vals[a]=a64_palette[mc_colors[a]][0] * 0.30 +
                           a64_palette[mc_colors[a]][1] * 0.59 +
                           a64_palette[mc_colors[a]][2] * 0.11;
    }

    if (!(c->mc_meta_charset = av_malloc(32000 * c->mc_lifetime * sizeof(int))) ||
       !(c->mc_best_cb       = av_malloc(CHARSET_CHARS * 32 * sizeof(int)))     ||
       !(c->mc_charmap       = av_mallocz(1000 * c->mc_lifetime * sizeof(int))) ||
       !(c->mc_colram        = av_mallocz(CHARSET_CHARS * sizeof(uint8_t)))     ||
       !(c->mc_charset       = av_malloc(0x800 * (INTERLACED+1) * sizeof(uint8_t)))) {
        av_log(avctx, AV_LOG_ERROR, "Failed to allocate buffer memory.\n");
        return AVERROR(ENOMEM);
    }

    /* set up extradata */
    if (!(avctx->extradata = av_mallocz(8 * 4 + FF_INPUT_BUFFER_PADDING_SIZE))) {
        av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory for extradata.\n");
        return AVERROR(ENOMEM);
    }
    avctx->extradata_size = 8 * 4;
    AV_WB32(avctx->extradata, c->mc_lifetime);
    AV_WB32(avctx->extradata + 16, INTERLACED);

    avcodec_get_frame_defaults(&c->picture);
    avctx->coded_frame            = &c->picture;
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
    avctx->coded_frame->key_frame = 1;
    if (!avctx->codec_tag)
         avctx->codec_tag = AV_RL32("a64m");

    c->next_pts = AV_NOPTS_VALUE;

    return 0;
}

static void a64_compress_colram(unsigned char *buf, int *charmap, uint8_t *colram)
{
    int a;
    uint8_t temp;
    /* only needs to be done in 5col mode */
    /* XXX could be squeezed to 0x80 bytes */
    for (a = 0; a < 256; a++) {
        temp  = colram[charmap[a + 0x000]] << 0;
        temp |= colram[charmap[a + 0x100]] << 1;
        temp |= colram[charmap[a + 0x200]] << 2;
        if (a < 0xe8) temp |= colram[charmap[a + 0x300]] << 3;
        buf[a] = temp << 2;
    }
}

static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                                 const AVFrame *pict, int *got_packet)
{
    A64Context *c = avctx->priv_data;
    AVFrame *const p = &c->picture;

    int frame;
    int x, y;
    int b_height;
    int b_width;

    int req_size, ret;
    uint8_t *buf;

    int *charmap     = c->mc_charmap;
    uint8_t *colram  = c->mc_colram;
    uint8_t *charset = c->mc_charset;
    int *meta        = c->mc_meta_charset;
    int *best_cb     = c->mc_best_cb;

    int charset_size = 0x800 * (INTERLACED + 1);
    int colram_size  = 0x100 * c->mc_use_5col;
    int screen_size;

    if(CROP_SCREENS) {
        b_height = FFMIN(avctx->height,C64YRES) >> 3;
        b_width  = FFMIN(avctx->width ,C64XRES) >> 3;
        screen_size = b_width * b_height;
    } else {
        b_height = C64YRES >> 3;
        b_width  = C64XRES >> 3;
        screen_size = 0x400;
    }

    /* no data, means end encoding asap */
    if (!pict) {
        /* all done, end encoding */
        if (!c->mc_lifetime) return 0;
        /* no more frames in queue, prepare to flush remaining frames */
        if (!c->mc_frame_counter) {
            c->mc_lifetime = 0;
        }
        /* still frames in queue so limit lifetime to remaining frames */
        else c->mc_lifetime = c->mc_frame_counter;
    /* still new data available */
    } else {
        /* fill up mc_meta_charset with data until lifetime exceeds */
        if (c->mc_frame_counter < c->mc_lifetime) {
            *p = *pict;
            p->pict_type = AV_PICTURE_TYPE_I;
            p->key_frame = 1;
            to_meta_with_crop(avctx, p, meta + 32000 * c->mc_frame_counter);
            c->mc_frame_counter++;
            if (c->next_pts == AV_NOPTS_VALUE)
                c->next_pts = pict->pts;
            /* lifetime is not reached so wait for next frame first */
            return 0;
        }
    }

    /* lifetime reached so now convert X frames at once */
    if (c->mc_frame_counter == c->mc_lifetime) {
        req_size = 0;
        /* any frames to encode? */
        if (c->mc_lifetime) {
            req_size = charset_size + c->mc_lifetime*(screen_size + colram_size);
            if ((ret = ff_alloc_packet(pkt, req_size)) < 0) {
                av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", req_size);
                return ret;
            }
            buf = pkt->data;

            /* calc optimal new charset + charmaps */
            ff_init_elbg(meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);
            ff_do_elbg  (meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);

            /* create colorram map and a c64 readable charset */
            render_charset(avctx, charset, colram);

            /* copy charset to buf */
            memcpy(buf, charset, charset_size);

            /* advance pointers */
            buf      += charset_size;
            charset  += charset_size;
        }

        /* write x frames to buf */
        for (frame = 0; frame < c->mc_lifetime; frame++) {
            /* copy charmap to buf. buf is uchar*, charmap is int*, so no memcpy here, sorry */
            for (y = 0; y < b_height; y++) {
                for (x = 0; x < b_width; x++) {
                    buf[y * b_width + x] = charmap[y * b_width + x];
                }
            }
            /* advance pointers */
            buf += screen_size;
            req_size += screen_size;

            /* compress and copy colram to buf */
            if (c->mc_use_5col) {
                a64_compress_colram(buf, charmap, colram);
                /* advance pointers */
                buf += colram_size;
                req_size += colram_size;
            }

            /* advance to next charmap */
            charmap += 1000;
        }

        AV_WB32(avctx->extradata + 4,  c->mc_frame_counter);
        AV_WB32(avctx->extradata + 8,  charset_size);
        AV_WB32(avctx->extradata + 12, screen_size + colram_size);

        /* reset counter */
        c->mc_frame_counter = 0;

        pkt->pts = pkt->dts = c->next_pts;
        c->next_pts         = AV_NOPTS_VALUE;

        pkt->size   = req_size;
        pkt->flags |= AV_PKT_FLAG_KEY;
        *got_packet = !!req_size;
    }
    return 0;
}

AVCodec ff_a64multi_encoder = {
    .name           = "a64multi",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = CODEC_ID_A64_MULTI,
    .priv_data_size = sizeof(A64Context),
    .init           = a64multi_init_encoder,
    .encode2        = a64multi_encode_frame,
    .close          = a64multi_close_encoder,
    .pix_fmts       = (const enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_NONE},
    .long_name      = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"),
    .capabilities   = CODEC_CAP_DELAY,
};

AVCodec ff_a64multi5_encoder = {
    .name           = "a64multi5",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = CODEC_ID_A64_MULTI5,
    .priv_data_size = sizeof(A64Context),
    .init           = a64multi_init_encoder,
    .encode2        = a64multi_encode_frame,
    .close          = a64multi_close_encoder,
    .pix_fmts       = (const enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_NONE},
    .long_name      = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"),
    .capabilities   = CODEC_CAP_DELAY,
};