/*
 * JPEG-LS encoder and decoder
 * Copyright (c) 2003 Michael Niedermayer
 * Copyright (c) 2006 Konstantin Shishkov
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "golomb.h"

/**
 * @file jpeg_ls.c
 * JPEG-LS encoder and decoder.
 */

typedef struct JpeglsContext{
    AVCodecContext *avctx;
    AVFrame picture;
}JpeglsContext;

typedef struct JLSState{
    int T1, T2, T3;
    int A[367], B[367], C[365], N[367];
    int limit, reset, bpp, qbpp, maxval, range;
    int near, twonear;
    int run_index[3];
}JLSState;

static const uint8_t log2_run[32]={
 0, 0, 0, 0, 1, 1, 1, 1,
 2, 2, 2, 2, 3, 3, 3, 3,
 4, 4, 5, 5, 6, 6, 7, 7,
 8, 9,10,11,12,13,14,15
};

/*
* Uncomment this to significantly speed up decoding of broken JPEG-LS
* (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit.
*
* There is no Golomb code with length >= 32 bits possible, so check and
* avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow
* on this errors.
*/
//#define JLS_BROKEN

/********** Functions for both encoder and decoder **********/

/**
 * Calculate initial JPEG-LS parameters
 */
static void ls_init_state(JLSState *state){
    int i;

    state->twonear = state->near * 2 + 1;
    state->range = ((state->maxval + state->twonear - 1) / state->twonear) + 1;

    // QBPP = ceil(log2(RANGE))
    for(state->qbpp = 0; (1 << state->qbpp) < state->range; state->qbpp++);

    if(state->bpp < 8)
        state->limit = 16 + 2 * state->bpp - state->qbpp;
    else
        state->limit = (4 * state->bpp) - state->qbpp;

    for(i = 0; i < 367; i++) {
        state->A[i] = (state->range + 32) >> 6;
        if(state->A[i] < 2)
            state->A[i] = 2;
        state->N[i] = 1;
    }

}

/**
 * Calculate quantized gradient value, used for context determination
 */
static inline int quantize(JLSState *s, int v){ //FIXME optimize
    if(v==0) return 0;
    if(v < 0){
        if(v <= -s->T3) return -4;
        if(v <= -s->T2) return -3;
        if(v <= -s->T1) return -2;
        if(v <  -s->near) return -1;
        return 0;
    }else{
        if(v <= s->near) return 0;
        if(v <  s->T1) return 1;
        if(v <  s->T2) return 2;
        if(v <  s->T3) return 3;
        return 4;
    }
}

/**
 * Custom value clipping function used in T1, T2, T3 calculation
 */
static inline int iso_clip(int v, int vmin, int vmax){
    if(v > vmax || v < vmin) return vmin;
    else                     return v;
}

/**
 * Calculate JPEG-LS codec values
 */
static void reset_ls_coding_parameters(JLSState *s, int reset_all){
    const int basic_t1= 3;
    const int basic_t2= 7;
    const int basic_t3= 21;
    int factor;

    if(s->maxval==0 || reset_all) s->maxval= (1 << s->bpp) - 1;

    if(s->maxval >=128){
        factor= (FFMIN(s->maxval, 4095) + 128)>>8;

        if(s->T1==0     || reset_all)
            s->T1= iso_clip(factor*(basic_t1-2) + 2 + 3*s->near, s->near+1, s->maxval);
        if(s->T2==0     || reset_all)
            s->T2= iso_clip(factor*(basic_t2-3) + 3 + 5*s->near, s->T1, s->maxval);
        if(s->T3==0     || reset_all)
            s->T3= iso_clip(factor*(basic_t3-4) + 4 + 7*s->near, s->T2, s->maxval);
    }else{
        factor= 256 / (s->maxval + 1);

        if(s->T1==0     || reset_all)
            s->T1= iso_clip(FFMAX(2, basic_t1/factor + 3*s->near), s->near+1, s->maxval);
        if(s->T2==0     || reset_all)
            s->T2= iso_clip(FFMAX(3, basic_t2/factor + 5*s->near), s->T1, s->maxval);
        if(s->T3==0     || reset_all)
            s->T3= iso_clip(FFMAX(4, basic_t3/factor + 6*s->near), s->T2, s->maxval);
    }

    if(s->reset==0  || reset_all) s->reset= 64;
//    av_log(NULL, AV_LOG_DEBUG, "[JPEG-LS RESET] T=%i,%i,%i\n", s->T1, s->T2, s->T3);
}


/********** Decoder-specific functions **********/

/**
 * Decode LSE block with initialization parameters
 */
static int decode_lse(MJpegDecodeContext *s)
{
    int len, id;

    /* XXX: verify len field validity */
    len = get_bits(&s->gb, 16);
    id = get_bits(&s->gb, 8);

    switch(id){
    case 1:
        s->maxval= get_bits(&s->gb, 16);
        s->t1= get_bits(&s->gb, 16);
        s->t2= get_bits(&s->gb, 16);
        s->t3= get_bits(&s->gb, 16);
        s->reset= get_bits(&s->gb, 16);

//        reset_ls_coding_parameters(s, 0);
        //FIXME quant table?
        break;
    case 2:
    case 3:
        av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n");
        return -1;
    case 4:
        av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n");
        return -1;
    default:
        av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id);
        return -1;
    }
//    av_log(s->avctx, AV_LOG_DEBUG, "ID=%i, T=%i,%i,%i\n", id, s->t1, s->t2, s->t3);

    return 0;
}


/**
 * Get context-dependent Golomb code, decode it and update context
 */
static inline int ls_get_code_regular(GetBitContext *gb, JLSState *state, int Q){
    int k, ret;

    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);

#ifdef JLS_BROKEN
    if(!show_bits_long(gb, 32))return -1;
#endif
    ret = get_ur_golomb_jpegls(gb, k, state->limit, state->qbpp);

    /* decode mapped error */
    if(ret & 1)
        ret = -((ret + 1) >> 1);
    else
        ret >>= 1;

    /* for NEAR=0, k=0 and 2*B[Q] <= - N[Q] mapping is reversed */
    if(!state->near && !k && (2 * state->B[Q] <= -state->N[Q]))
        ret = -(ret + 1);

    state->A[Q] += ABS(ret);
    ret *= state->twonear;
    state->B[Q] += ret;

    if(state->N[Q] == state->reset) {
        state->A[Q] >>= 1;
        state->B[Q] >>= 1;
        state->N[Q] >>= 1;
    }
    state->N[Q]++;

    if(state->B[Q] <= -state->N[Q]) {
        state->B[Q] += state->N[Q];
        if(state->C[Q] > -128)
            state->C[Q]--;
        if(state->B[Q] <= -state->N[Q])
            state->B[Q] = -state->N[Q] + 1;
    }else if(state->B[Q] > 0){
        state->B[Q] -= state->N[Q];
        if(state->C[Q] < 127)
            state->C[Q]++;
        if(state->B[Q] > 0)
            state->B[Q] = 0;
    }

    return ret;
}

/**
 * Get Golomb code, decode it and update state for run termination
 */
static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RItype, int limit_add){
    int k, ret, temp, map;
    int Q = 365 + RItype;

    if(!RItype)
        temp = state->A[Q];
    else
        temp = state->A[Q] + (state->N[Q] >> 1);

    for(k = 0; (state->N[Q] << k) < temp; k++);

#ifdef JLS_BROKEN
    if(!show_bits_long(gb, 32))return -1;
#endif
    ret = get_ur_golomb_jpegls(gb, k, state->limit - limit_add - 1, state->qbpp);

    /* decode mapped error */
    map = 0;
    if(!k && (RItype || ret) && (2 * state->B[Q] < state->N[Q]))
        map = 1;
    ret += RItype + map;

    if(ret & 1){
        ret = map - ((ret + 1) >> 1);
        state->B[Q]++;
    } else {
        ret = ret >> 1;
    }

    /* update state */
    state->A[Q] += ABS(ret) - RItype;
    ret *= state->twonear;
    if(state->N[Q] == state->reset){
        state->A[Q] >>=1;
        state->B[Q] >>=1;
        state->N[Q] >>=1;
    }
    state->N[Q]++;

    return ret;
}

/**
 * Decode one line of image
 */
static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, uint8_t *last, uint8_t *dst, int last2, int w, int stride, int comp){
    int i, x = 0;
    int Ra, Rb, Rc, Rd;
    int D0, D1, D2;

    while(x < w) {
        int err, pred;

        /* compute gradients */
        Ra = x ? dst[x - stride] : last[x];
        Rb = last[x];
        Rc = x ? last[x - stride] : last2;
        Rd = (x >= w - stride) ? last[x] : last[x + stride];
        D0 = Rd - Rb;
        D1 = Rb - Rc;
        D2 = Rc - Ra;
        /* run mode */
        if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) {
            int r;
            int RItype;

            /* decode full runs while available */
            while(get_bits1(&s->gb)) {
                int r;
                r = 1 << log2_run[state->run_index[comp]];
                if(x + r * stride > w) {
                    r = (w - x) / stride;
                }
                for(i = 0; i < r; i++) {
                    dst[x] = Ra;
                    x += stride;
                }
                /* if EOL reached, we stop decoding */
                if(r != (1 << log2_run[state->run_index[comp]]))
                    return;
                if(state->run_index[comp] < 31)
                    state->run_index[comp]++;
                if(x + stride > w)
                    return;
            }
            /* decode aborted run */
            r = log2_run[state->run_index[comp]];
            if(r)
                r = get_bits_long(&s->gb, r);
            for(i = 0; i < r; i++) {
                dst[x] = Ra;
                x += stride;
            }

            /* decode run termination value */
            Rb = last[x];
            RItype = (ABS(Ra - Rb) <= state->near) ? 1 : 0;
            err = ls_get_code_runterm(&s->gb, state, RItype, log2_run[state->run_index[comp]]);
            if(state->run_index[comp])
                state->run_index[comp]--;

            if(state->near && RItype){
                pred = Ra + err;
            } else {
                if(Rb < Ra)
                    pred = Rb - err;
                else
                    pred = Rb + err;
            }

            if(state->near){
                if(pred < -state->near)
                    pred += state->range * state->twonear;
                else if(pred > state->maxval + state->near)
                    pred -= state->range * state->twonear;
                pred = clip(pred, 0, state->maxval);
            }

            dst[x] = pred;
            x += stride;
        } else { /* regular mode */
            int context, sign;

            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);

            if(context < 0){
                context = -context;
                sign = 1;
            }else{
                sign = 0;
            }

            if(sign){
                pred = clip(pred - state->C[context], 0, state->maxval);
                err = -ls_get_code_regular(&s->gb, state, context);
            } else {
                pred = clip(pred + state->C[context], 0, state->maxval);
                err = ls_get_code_regular(&s->gb, state, context);
            }

            /* we have to do something more for near-lossless coding */
            pred += err;
            if(state->near) {
                if(pred < -state->near)
                    pred += state->range * state->twonear;
                else if(pred > state->maxval + state->near)
                    pred -= state->range * state->twonear;
                pred = clip(pred, 0, state->maxval);
            }

            dst[x] = pred;
            x += stride;
        }
    }
}

static int ls_decode_picture(MJpegDecodeContext *s, int near, int point_transform, int ilv){
    int i, t = 0;
    uint8_t *zero, *last, *cur;
    JLSState *state;
    int off, stride, width;

    zero = av_mallocz(s->picture.linesize[0]);
    last = zero;
    cur = s->picture.data[0];

    state = av_mallocz(sizeof(JLSState));
    /* initialize JPEG-LS state from JPEG parameters */
    state->near = near;
    state->bpp = (s->bits < 2) ? 2 : s->bits;
    state->maxval = s->maxval;
    state->T1 = s->t1;
    state->T2 = s->t2;
    state->T3 = s->t3;
    state->reset = s->reset;
    reset_ls_coding_parameters(state, 0);
    ls_init_state(state);

//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",s->width,s->height,state->near,state->maxval,state->T1,state->T2,state->T3,state->reset,state->limit,state->qbpp, state->range);
//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n", ilv, point_transform, s->bits, s->cur_scan);
    if(ilv == 0) { /* separate planes */
        off = s->cur_scan - 1;
        stride = (s->nb_components > 1) ? 3 : 1;
        width = s->width * stride;
        cur += off;
        for(i = 0; i < s->height; i++) {
            ls_decode_line(state, s, last, cur, t, width, stride, off);
            t = last[0];
            last = cur;
            cur += s->picture.linesize[0];

            if (s->restart_interval && !--s->restart_count) {
                align_get_bits(&s->gb);
                skip_bits(&s->gb, 16); /* skip RSTn */
            }
        }
    } else if(ilv == 1) { /* line interleaving */
        int j;
        int Rc[3] = {0, 0, 0};
        memset(cur, 0, s->picture.linesize[0]);
        width = s->width * 3;
        for(i = 0; i < s->height; i++) {
            for(j = 0; j < 3; j++) {
                ls_decode_line(state, s, last + j, cur + j, Rc[j], width, 3, j);
                Rc[j] = last[j];

                if (s->restart_interval && !--s->restart_count) {
                    align_get_bits(&s->gb);
                    skip_bits(&s->gb, 16); /* skip RSTn */
                }
            }
            last = cur;
            cur += s->picture.linesize[0];
        }
    } else if(ilv == 2) { /* sample interleaving */
        av_log(s->avctx, AV_LOG_ERROR, "Sample interleaved images are not supported.\n");
        return -1;
    }

    av_free(state);
    av_free(zero);

    return 0;
}

#if defined(CONFIG_ENCODERS) && defined(CONFIG_JPEGLS_ENCODER)
/********** Encoder-specific functions **********/

/**
 * Encode error from regular symbol
 */
static inline void ls_encode_regular(JLSState *state, PutBitContext *pb, int Q, int err){
    int k;
    int val;
    int map;

    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);

    map = !state->near && !k && (2 * state->B[Q] <= -state->N[Q]);

    if(err < 0)
        err += state->range;
    if(err >= ((state->range + 1) >> 1)) {
        err -= state->range;
        val = 2 * ABS(err) - 1 - map;
    } else
        val = 2 * err + map;

    set_ur_golomb_jpegls(pb, val, k, state->limit, state->qbpp);

    state->A[Q] += ABS(err);
    state->B[Q] += err * state->twonear;

    if(state->N[Q] == state->reset) {
        state->A[Q] >>= 1;
        state->B[Q] >>= 1;
        state->N[Q] >>= 1;
    }
    state->N[Q]++;

    if(state->B[Q] <= -state->N[Q]) {
        state->B[Q] += state->N[Q];
        if(state->C[Q] > -128)
            state->C[Q]--;
        if(state->B[Q] <= -state->N[Q])
            state->B[Q] = -state->N[Q] + 1;
    }else if(state->B[Q] > 0){
        state->B[Q] -= state->N[Q];
        if(state->C[Q] < 127)
            state->C[Q]++;
        if(state->B[Q] > 0)
            state->B[Q] = 0;
    }
}

/**
 * Encode error from run termination
 */
static inline void ls_encode_runterm(JLSState *state, PutBitContext *pb, int RItype, int err, int limit_add){
    int k;
    int val, map;
    int Q = 365 + RItype;
    int temp;

    temp = state->A[Q];
    if(RItype)
        temp += state->N[Q] >> 1;
    for(k = 0; (state->N[Q] << k) < temp; k++);
    map = 0;
    if(!k && err && (2 * state->B[Q] < state->N[Q]))
        map = 1;

    if(err < 0)
        val = - (2 * err) - 1 - RItype + map;
    else
        val = 2 * err - RItype - map;
    set_ur_golomb_jpegls(pb, val, k, state->limit - limit_add - 1, state->qbpp);

    if(err < 0)
        state->B[Q]++;
    state->A[Q] += (val + 1 - RItype) >> 1;

    if(state->N[Q] == state->reset) {
        state->A[Q] >>= 1;
        state->B[Q] >>= 1;
        state->N[Q] >>= 1;
    }
    state->N[Q]++;
}

/**
 * Encode run value as specified by JPEG-LS standard
 */
static inline void ls_encode_run(JLSState *state, PutBitContext *pb, int run, int comp, int trail){
    while(run >= (1 << log2_run[state->run_index[comp]])){
        put_bits(pb, 1, 1);
        run -= 1 << log2_run[state->run_index[comp]];
        if(state->run_index[comp] < 31)
            state->run_index[comp]++;
    }
    /* if hit EOL, encode another full run, else encode aborted run */
    if(!trail && run) {
        put_bits(pb, 1, 1);
    }else if(trail){
        put_bits(pb, 1, 0);
        if(log2_run[state->run_index[comp]])
            put_bits(pb, log2_run[state->run_index[comp]], run);
    }
}

/**
 * Encode one line of image
 */
static inline void ls_encode_line(JLSState *state, PutBitContext *pb, uint8_t *last, uint8_t *cur, int last2, int w, int stride, int comp){
    int x = 0;
    int Ra, Rb, Rc, Rd;
    int D0, D1, D2;

    while(x < w) {
        int err, pred, sign;

        /* compute gradients */
        Ra = x ? cur[x - stride] : last[x];
        Rb = last[x];
        Rc = x ? last[x - stride] : last2;
        Rd = (x >= w - stride) ? last[x] : last[x + stride];
        D0 = Rd - Rb;
        D1 = Rb - Rc;
        D2 = Rc - Ra;

        /* run mode */
        if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) {
            int RUNval, RItype, run;

            run = 0;
            RUNval = Ra;
            while(x < w && (ABS(cur[x] - RUNval) <= state->near)){
                run++;
                cur[x] = Ra;
                x += stride;
            }
            ls_encode_run(state, pb, run, comp, x < w);
            if(x >= w)
                return;
            Rb = last[x];
            RItype = (ABS(Ra - Rb) <= state->near);
            pred = RItype ? Ra : Rb;
            err = cur[x] - pred;

            if(!RItype && Ra > Rb)
                err = -err;

            if(state->near){
                if(err > 0)
                    err = (state->near + err) / state->twonear;
                else
                    err = -(state->near - err) / state->twonear;

                if(RItype || (Rb >= Ra))
                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
                else
                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
                cur[x] = Ra;
            }
            if(err < 0)
                err += state->range;
            if(err >= ((state->range + 1) >> 1))
                err -= state->range;

            ls_encode_runterm(state, pb, RItype, err, log2_run[state->run_index[comp]]);

            if(state->run_index[comp] > 0)
                state->run_index[comp]--;
            x += stride;
        } else { /* regular mode */
            int context;

            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);

            if(context < 0){
                context = -context;
                sign = 1;
                pred = clip(pred - state->C[context], 0, state->maxval);
                err = pred - cur[x];
            }else{
                sign = 0;
                pred = clip(pred + state->C[context], 0, state->maxval);
                err = cur[x] - pred;
            }

            if(state->near){
                if(err > 0)
                    err = (state->near + err) / state->twonear;
                else
                    err = -(state->near - err) / state->twonear;
                if(!sign)
                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
                else
                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
                cur[x] = Ra;
            }

            ls_encode_regular(state, pb, context, err);
            x += stride;
        }
    }
}

static void ls_store_lse(JLSState *state, PutBitContext *pb){
    /* Test if we have default params and don't need to store LSE */
    JLSState state2;
    memset(&state2, 0, sizeof(JLSState));
    state2.bpp = 8;
    state2.near = state->near;
    reset_ls_coding_parameters(&state2, 1);
    if(state->T1 == state2.T1 && state->T2 == state2.T2 && state->T3 == state2.T3 && state->reset == state2.reset)
        return;
    /* store LSE type 1 */
    put_marker(pb, LSE);
    put_bits(pb, 16, 13);
    put_bits(pb, 8,   1);
    put_bits(pb, 16, state->maxval);
    put_bits(pb, 16, state->T1);
    put_bits(pb, 16, state->T2);
    put_bits(pb, 16, state->T3);
    put_bits(pb, 16, state->reset);
}

static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
    JpeglsContext * const s = avctx->priv_data;
    AVFrame *pict = data;
    AVFrame * const p= (AVFrame*)&s->picture;
    const int near = avctx->prediction_method;
    PutBitContext pb, pb2;
    GetBitContext gb;
    uint8_t *buf2, *zero, *cur, *last;
    JLSState *state;
    int i, size;
    int comps;

    buf2 = av_malloc(buf_size);

    init_put_bits(&pb, buf, buf_size);
    init_put_bits(&pb2, buf2, buf_size);

    *p = *pict;
    p->pict_type= FF_I_TYPE;
    p->key_frame= 1;

    comps = (avctx->pix_fmt == PIX_FMT_GRAY8) ? 1 : 3;

    /* write our own JPEG header, can't use mjpeg_picture_header */
    put_marker(&pb, SOI);
    put_marker(&pb, SOF48);
    put_bits(&pb, 16, 8 + comps * 3); // header size depends on components
    put_bits(&pb,  8, 8);             // bpp
    put_bits(&pb, 16, avctx->height);
    put_bits(&pb, 16, avctx->width);
    put_bits(&pb,  8, comps);         // components
    for(i = 1; i <= comps; i++) {
        put_bits(&pb,  8, i);    // component ID
        put_bits(&pb,  8, 0x11); // subsampling: none
        put_bits(&pb,  8, 0);    // Tiq, used by JPEG-LS ext
    }

    put_marker(&pb, SOS);
    put_bits(&pb, 16, 6 + comps * 2);
    put_bits(&pb,  8, comps);
    for(i = 1; i <= comps; i++) {
        put_bits(&pb,  8, i);  // component ID
        put_bits(&pb,  8, 0);  // mapping index: none
    }
    put_bits(&pb,  8, near);
    put_bits(&pb,  8, (comps > 1) ? 1 : 0); // interleaving: 0 - plane, 1 - line
    put_bits(&pb,  8, 0); // point transform: none

    state = av_mallocz(sizeof(JLSState));
    /* initialize JPEG-LS state from JPEG parameters */
    state->near = near;
    state->bpp = 8;
    reset_ls_coding_parameters(state, 0);
    ls_init_state(state);

    ls_store_lse(state, &pb);

    zero = av_mallocz(p->linesize[0]);
    last = zero;
    cur = p->data[0];
    if(avctx->pix_fmt == PIX_FMT_GRAY8){
        int t = 0;

        for(i = 0; i < avctx->height; i++) {
            ls_encode_line(state, &pb2, last, cur, t, avctx->width, 1, 0);
            t = last[0];
            last = cur;
            cur += p->linesize[0];
        }
    }else if(avctx->pix_fmt == PIX_FMT_RGB24){
        int j, width;
        int Rc[3] = {0, 0, 0};

        width = avctx->width * 3;
        for(i = 0; i < avctx->height; i++) {
            for(j = 0; j < 3; j++) {
                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j);
                Rc[j] = last[j];
            }
            last = cur;
            cur += s->picture.linesize[0];
        }
    }else if(avctx->pix_fmt == PIX_FMT_BGR24){
        int j, width;
        int Rc[3] = {0, 0, 0};

        width = avctx->width * 3;
        for(i = 0; i < avctx->height; i++) {
            for(j = 2; j >= 0; j--) {
                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j);
                Rc[j] = last[j];
            }
            last = cur;
            cur += s->picture.linesize[0];
        }
    }

    av_free(zero);
    av_free(state);

    flush_put_bits(&pb2);
    /* do escape coding */
    size = put_bits_count(&pb2) >> 3;
    init_get_bits(&gb, buf2, size);
    while(get_bits_count(&gb) < size * 8){
        int v;
        v = get_bits(&gb, 8);
        put_bits(&pb, 8, v);
        if(v == 0xFF){
            v = get_bits(&gb, 7);
            put_bits(&pb, 8, v);
        }
    }
    align_put_bits(&pb);
    av_free(buf2);

    /* End of image */
    put_marker(&pb, EOI);
    flush_put_bits(&pb);

    emms_c();

    return put_bits_count(&pb) >> 3;
}

static int encode_init_ls(AVCodecContext *ctx) {
    JpeglsContext *c = (JpeglsContext*)ctx->priv_data;

    c->avctx = ctx;
    ctx->coded_frame = &c->picture;

    if(ctx->pix_fmt != PIX_FMT_GRAY8 && ctx->pix_fmt != PIX_FMT_RGB24 && ctx->pix_fmt != PIX_FMT_BGR24){
        av_log(ctx, AV_LOG_ERROR, "Only grayscale and RGB24/BGR24 images are supported\n");
        return -1;
    }
    return 0;
}

AVCodec jpegls_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them
    "jpegls",
    CODEC_TYPE_VIDEO,
    CODEC_ID_JPEGLS,
    sizeof(JpeglsContext),
    encode_init_ls,
    encode_picture_ls,
    NULL,
    .pix_fmts= (enum PixelFormat[]){PIX_FMT_BGR24, PIX_FMT_RGB24, PIX_FMT_GRAY8, -1},
};
#endif