diff options
author | Fabrice Bellard <fabrice@bellard.org> | 2000-12-20 00:02:47 +0000 |
---|---|---|
committer | Fabrice Bellard <fabrice@bellard.org> | 2000-12-20 00:02:47 +0000 |
commit | 9aeeeb63f7e1ab7b0b7bb839a5f258667a2d2d78 (patch) | |
tree | 133769894d45da35e05ded6ea39d33bb81e7ae18 /libav | |
parent | 77bb6835ba752bb9335d208963a53227bbb1bc63 (diff) | |
download | ffmpeg-9aeeeb63f7e1ab7b0b7bb839a5f258667a2d2d78.tar.gz |
Initial revision
Originally committed as revision 2 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libav')
-rw-r--r-- | libav/Makefile | 17 | ||||
-rw-r--r-- | libav/ac3enc.c | 1460 | ||||
-rw-r--r-- | libav/ac3enc.h | 32 | ||||
-rw-r--r-- | libav/ac3tab.h | 180 | ||||
-rw-r--r-- | libav/avcodec.h | 79 | ||||
-rw-r--r-- | libav/common.c | 174 | ||||
-rw-r--r-- | libav/common.h | 68 | ||||
-rw-r--r-- | libav/h263data.h | 151 | ||||
-rw-r--r-- | libav/h263enc.c | 229 | ||||
-rw-r--r-- | libav/jfdctfst.c | 224 | ||||
-rw-r--r-- | libav/jrevdct.c | 1584 | ||||
-rw-r--r-- | libav/mjpegenc.c | 416 | ||||
-rw-r--r-- | libav/mpegaudio.c | 754 | ||||
-rw-r--r-- | libav/mpegaudio.h | 31 | ||||
-rw-r--r-- | libav/mpegaudiotab.h | 310 | ||||
-rw-r--r-- | libav/mpegencodevlc.h | 311 | ||||
-rw-r--r-- | libav/mpegvideo.c | 1098 | ||||
-rw-r--r-- | libav/mpegvideo.h | 94 | ||||
-rw-r--r-- | libav/resample.c | 245 |
19 files changed, 7457 insertions, 0 deletions
diff --git a/libav/Makefile b/libav/Makefile new file mode 100644 index 0000000000..6664e870cb --- /dev/null +++ b/libav/Makefile @@ -0,0 +1,17 @@ +CFLAGS= -O2 -Wall -g +LDFLAGS= -g + +OBJS= common.o mpegvideo.o h263enc.o jrevdct.o jfdctfst.o \ + mpegaudio.o ac3enc.o mjpegenc.o resample.o +LIB= libav.a + +all: $(LIB) + +$(LIB): $(OBJS) + ar rcs $@ $(OBJS) + +%.o: %.c + gcc $(CFLAGS) -c -o $@ $< + +clean: + rm -f *.o *~ *.a diff --git a/libav/ac3enc.c b/libav/ac3enc.c new file mode 100644 index 0000000000..b1126c4943 --- /dev/null +++ b/libav/ac3enc.c @@ -0,0 +1,1460 @@ +/* + * The simplest AC3 encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <netinet/in.h> +#include <math.h> +#include "avcodec.h" + +#include "ac3enc.h" +#include "ac3tab.h" + +//#define DEBUG +//#define DEBUG_BITALLOC +#define NDEBUG +#include <assert.h> + +#define MDCT_NBITS 9 +#define N (1 << MDCT_NBITS) +#define NB_BLOCKS 6 /* number of PCM blocks inside an AC3 frame */ + +/* new exponents are sent if their Norm 1 exceed this number */ +#define EXP_DIFF_THRESHOLD 1000 + +/* exponent encoding strategy */ +#define EXP_REUSE 0 +#define EXP_NEW 1 + +#define EXP_D15 1 +#define EXP_D25 2 +#define EXP_D45 3 + +static void fft_init(int ln); +static void ac3_crc_init(void); + +static inline INT16 fix15(float a) +{ + int v; + v = (int)(a * (float)(1 << 15)); + if (v < -32767) + v = -32767; + else if (v > 32767) + v = 32767; + return v; +} + +static inline int calc_lowcomp1(int a, int b0, int b1) +{ + if ((b0 + 256) == b1) { + a = 384 ; + } else if (b0 > b1) { + a = a - 64; + if (a < 0) a=0; + } + return a; +} + +static inline int calc_lowcomp(int a, int b0, int b1, int bin) +{ + if (bin < 7) { + if ((b0 + 256) == b1) { + a = 384 ; + } else if (b0 > b1) { + a = a - 64; + if (a < 0) a=0; + } + } else if (bin < 20) { + if ((b0 + 256) == b1) { + a = 320 ; + } else if (b0 > b1) { + a= a - 64; + if (a < 0) a=0; + } + } else { + a = a - 128; + if (a < 0) a=0; + } + return a; +} + +/* AC3 bit allocation. The algorithm is the one described in the AC3 + spec with some optimizations because of our simplified encoding + assumptions. */ +void parametric_bit_allocation(AC3EncodeContext *s, UINT8 *bap, + INT8 *exp, int start, int end, + int snroffset, int fgain) +{ + int bin,i,j,k,end1,v,v1,bndstrt,bndend,lowcomp,begin; + int fastleak,slowleak,address,tmp; + INT16 psd[256]; /* scaled exponents */ + INT16 bndpsd[50]; /* interpolated exponents */ + INT16 excite[50]; /* excitation */ + INT16 mask[50]; /* masking value */ + + /* exponent mapping to PSD */ + for(bin=start;bin<end;bin++) { + psd[bin]=(3072 - (exp[bin] << 7)); + } + + /* PSD integration */ + j=start; + k=masktab[start]; + do { + v=psd[j]; + j++; + end1=bndtab[k+1]; + if (end1 > end) end1=end; + for(i=j;i<end1;i++) { + int c,adr; + /* logadd */ + v1=psd[j]; + c=v-v1; + if (c >= 0) { + adr=c >> 1; + if (adr > 255) adr=255; + v=v + latab[adr]; + } else { + adr=(-c) >> 1; + if (adr > 255) adr=255; + v=v1 + latab[adr]; + } + j++; + } + bndpsd[k]=v; + k++; + } while (end > bndtab[k]); + + /* excitation function */ + bndstrt = masktab[start]; + bndend = masktab[end-1] + 1; + + lowcomp = 0; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[0], bndpsd[1]) ; + excite[0] = bndpsd[0] - fgain - lowcomp ; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[1], bndpsd[2]) ; + excite[1] = bndpsd[1] - fgain - lowcomp ; + begin = 7 ; + for (bin = 2; bin < 7; bin++) { + lowcomp = calc_lowcomp1(lowcomp, bndpsd[bin], bndpsd[bin+1]) ; + fastleak = bndpsd[bin] - fgain ; + slowleak = bndpsd[bin] - s->sgain ; + excite[bin] = fastleak - lowcomp ; + if (bndpsd[bin] <= bndpsd[bin+1]) { + begin = bin + 1 ; + break ; + } + } + + end1=bndend; + if (end1 > 22) end1=22; + + for (bin = begin; bin < end1; bin++) { + lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin) ; + + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak - lowcomp; + if (slowleak > v) v=slowleak; + + excite[bin] = v; + } + + for (bin = 22; bin < bndend; bin++) { + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak; + if (slowleak > v) v = slowleak; + excite[bin] = v; + } + + /* compute masking curve */ + + for (bin = bndstrt; bin < bndend; bin++) { + v1 = excite[bin]; + tmp = s->dbknee - bndpsd[bin]; + if (tmp > 0) { + v1 += tmp >> 2; + } + v=hth[bin >> s->halfratecod][s->fscod]; + if (v1 > v) v=v1; + mask[bin] = v; + } + + /* compute bit allocation */ + + i = start ; + j = masktab[start] ; + do { + v=mask[j]; + v -= snroffset ; + v -= s->floor ; + if (v < 0) v = 0; + v &= 0x1fe0 ; + v += s->floor ; + + end1=bndtab[j] + bndsz[j]; + if (end1 > end) end1=end; + + for (k = i; k < end1; k++) { + address = (psd[i] - v) >> 5 ; + if (address < 0) address=0; + else if (address > 63) address=63; + bap[i] = baptab[address]; + i++; + } + } while (end > bndtab[j++]) ; +} + +typedef struct IComplex { + short re,im; +} IComplex; + +static void fft_init(int ln) +{ + int i, j, m, n; + float alpha; + + n = 1 << ln; + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + costab[i] = fix15(cos(alpha)); + sintab[i] = fix15(sin(alpha)); + } + + for(i=0;i<n;i++) { + m=0; + for(j=0;j<ln;j++) { + m |= ((i >> j) & 1) << (ln-j-1); + } + fft_rev[i]=m; + } +} + +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + int ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax) >> 1;\ + pim = (by + ay) >> 1;\ + qre = (bx - ax) >> 1;\ + qim = (by - ay) >> 1;\ +} + +#define MUL16(a,b) ((a) * (b)) + +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;\ + pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15;\ +} + + +/* do a 2^n point complex fft on 2^ln points. */ +static void fft(IComplex *z, int ln) +{ + int j, l, np, np2; + int nblocks, nloops; + register IComplex *p,*q; + int tmp_re, tmp_im; + + np = 1 << ln; + + /* reverse */ + for(j=0;j<np;j++) { + int k; + IComplex tmp; + k = fft_rev[j]; + if (k < j) { + tmp = z[k]; + z[k] = z[j]; + z[j] = tmp; + } + } + + /* pass 0 */ + + p=&z[0]; + j=(np >> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + p=&z[0]; + j=np >> 2; + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +/* do a 512 point mdct */ +static void mdct512(INT32 *out, INT16 *in) +{ + int i, re, im, re1, im1; + INT16 rot[N]; + IComplex x[N/4]; + + /* shift to simplify computations */ + for(i=0;i<N/4;i++) + rot[i] = -in[i + 3*N/4]; + for(i=N/4;i<N;i++) + rot[i] = in[i - N/4]; + + /* pre rotation */ + for(i=0;i<N/4;i++) { + re = ((int)rot[2*i] - (int)rot[N-1-2*i]) >> 1; + im = -((int)rot[N/2+2*i] - (int)rot[N/2-1-2*i]) >> 1; + CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]); + } + + fft(x, MDCT_NBITS - 2); + + /* post rotation */ + for(i=0;i<N/4;i++) { + re = x[i].re; + im = x[i].im; + CMUL(re1, im1, re, im, xsin1[i], xcos1[i]); + out[2*i] = im1; + out[N/2-1-2*i] = re1; + } +} + +/* XXX: use another norm ? */ +static int calc_exp_diff(UINT8 *exp1, UINT8 *exp2, int n) +{ + int sum, i; + sum = 0; + for(i=0;i<n;i++) { + sum += abs(exp1[i] - exp2[i]); + } + return sum; +} + +static void compute_exp_strategy(UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + int ch) +{ + int i, j; + int exp_diff; + + /* estimate if the exponent variation & decide if they should be + reused in the next frame */ + exp_strategy[0][ch] = EXP_NEW; + for(i=1;i<NB_BLOCKS;i++) { + exp_diff = calc_exp_diff(exp[i][ch], exp[i-1][ch], N/2); +#ifdef DEBUG + printf("exp_diff=%d\n", exp_diff); +#endif + if (exp_diff > EXP_DIFF_THRESHOLD) + exp_strategy[i][ch] = EXP_NEW; + else + exp_strategy[i][ch] = EXP_REUSE; + } + /* now select the encoding strategy type : if exponents are often + recoded, we use a coarse encoding */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) + j++; + switch(j - i) { + case 1: + exp_strategy[i][ch] = EXP_D45; + break; + case 2: + case 3: + exp_strategy[i][ch] = EXP_D25; + break; + default: + exp_strategy[i][ch] = EXP_D15; + break; + } + i = j; + } +} + +/* set exp[i] to min(exp[i], exp1[i]) */ +static void exponent_min(UINT8 exp[N/2], UINT8 exp1[N/2], int n) +{ + int i; + + for(i=0;i<n;i++) { + if (exp1[i] < exp[i]) + exp[i] = exp1[i]; + } +} + +/* update the exponents so that they are the ones the decoder will + decode. Return the number of bits used to code the exponents */ +static int encode_exp(UINT8 encoded_exp[N/2], + UINT8 exp[N/2], + int nb_exps, + int exp_strategy) +{ + int group_size, nb_groups, i, j, k, recurse, exp_min, delta; + UINT8 exp1[N/2]; + + switch(exp_strategy) { + case EXP_D15: + group_size = 1; + break; + case EXP_D25: + group_size = 2; + break; + default: + case EXP_D45: + group_size = 4; + break; + } + nb_groups = ((nb_exps + (group_size * 3) - 4) / (3 * group_size)) * 3; + + /* for each group, compute the minimum exponent */ + exp1[0] = exp[0]; /* DC exponent is handled separately */ + k = 1; + for(i=1;i<=nb_groups;i++) { + exp_min = exp[k]; + assert(exp_min >= 0 && exp_min <= 24); + for(j=1;j<group_size;j++) { + if (exp[k+j] < exp_min) + exp_min = exp[k+j]; + } + exp1[i] = exp_min; + k += group_size; + } + + /* constraint for DC exponent */ + if (exp1[0] > 15) + exp1[0] = 15; + + /* Iterate until the delta constraints between each groups are + satisfyed. I'm sure it is possible to find a better algorithm, + but I am lazy */ + do { + recurse = 0; + for(i=1;i<=nb_groups;i++) { + delta = exp1[i] - exp1[i-1]; + if (delta > 2) { + /* if delta too big, we encode a smaller exponent */ + exp1[i] = exp1[i-1] + 2; + } else if (delta < -2) { + /* if delta is too small, we must decrease the previous + exponent, which means we must recurse */ + recurse = 1; + exp1[i-1] = exp1[i] + 2; + } + } + } while (recurse); + + /* now we have the exponent values the decoder will see */ + encoded_exp[0] = exp1[0]; + k = 1; + for(i=1;i<=nb_groups;i++) { + for(j=0;j<group_size;j++) { + encoded_exp[k+j] = exp1[i]; + } + k += group_size; + } + +#if defined(DEBUG) + printf("exponents: strategy=%d\n", exp_strategy); + for(i=0;i<=nb_groups * group_size;i++) { + printf("%d ", encoded_exp[i]); + } + printf("\n"); +#endif + + return 4 + (nb_groups / 3) * 7; +} + +/* return the size in bits taken by the mantissa */ +int compute_mantissa_size(AC3EncodeContext *s, UINT8 *m, int nb_coefs) +{ + int bits, mant, i; + + bits = 0; + for(i=0;i<nb_coefs;i++) { + mant = m[i]; + switch(mant) { + case 0: + /* nothing */ + break; + case 1: + /* 3 mantissa in 5 bits */ + if (s->mant1_cnt == 0) + bits += 5; + if (++s->mant1_cnt == 3) + s->mant1_cnt = 0; + break; + case 2: + /* 3 mantissa in 7 bits */ + if (s->mant2_cnt == 0) + bits += 7; + if (++s->mant2_cnt == 3) + s->mant2_cnt = 0; + break; + case 3: + bits += 3; + break; + case 4: + /* 2 mantissa in 7 bits */ + if (s->mant4_cnt == 0) + bits += 7; + if (++s->mant4_cnt == 2) + s->mant4_cnt = 0; + break; + case 14: + bits += 14; + break; + case 15: + bits += 16; + break; + default: + bits += mant - 1; + break; + } + } + return bits; +} + + +static int bit_alloc(AC3EncodeContext *s, + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits, int csnroffst, int fsnroffst) +{ + int i, ch; + + /* compute size */ + for(i=0;i<NB_BLOCKS;i++) { + s->mant1_cnt = 0; + s->mant2_cnt = 0; + s->mant4_cnt = 0; + for(ch=0;ch<s->nb_channels;ch++) { + parametric_bit_allocation(s, bap[i][ch], encoded_exp[i][ch], + 0, s->nb_coefs[ch], + (((csnroffst-15) << 4) + + fsnroffst) << 2, + fgaintab[s->fgaincod[ch]]); + frame_bits += compute_mantissa_size(s, bap[i][ch], + s->nb_coefs[ch]); + } + } +#if 0 + printf("csnr=%d fsnr=%d frame_bits=%d diff=%d\n", + csnroffst, fsnroffst, frame_bits, + 16 * s->frame_size - ((frame_bits + 7) & ~7)); +#endif + return 16 * s->frame_size - frame_bits; +} + +#define SNR_INC1 4 + +static int compute_bit_allocation(AC3EncodeContext *s, + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits) +{ + int i, ch; + int csnroffst, fsnroffst; + UINT8 bap1[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + + /* init default parameters */ + s->sdecaycod = 2; + s->fdecaycod = 1; + s->sgaincod = 1; + s->dbkneecod = 2; + s->floorcod = 4; + for(ch=0;ch<s->nb_channels;ch++) + s->fgaincod[ch] = 4; + + /* compute real values */ + s->sdecay = sdecaytab[s->sdecaycod] >> s->halfratecod; + s->fdecay = fdecaytab[s->fdecaycod] >> s->halfratecod; + s->sgain = sgaintab[s->sgaincod]; + s->dbknee = dbkneetab[s->dbkneecod]; + s->floor = floortab[s->floorcod]; + + /* header size */ + frame_bits += 65; + if (s->acmod == 2) + frame_bits += 2; + + /* audio blocks */ + for(i=0;i<NB_BLOCKS;i++) { + frame_bits += s->nb_channels * 2 + 2; + if (s->acmod == 2) + frame_bits++; + frame_bits += 2 * s->nb_channels; + for(ch=0;ch<s->nb_channels;ch++) { + if (exp_strategy[i][ch] != EXP_REUSE) + frame_bits += 6 + 2; + } + frame_bits++; /* baie */ + frame_bits++; /* snr */ + frame_bits += 2; /* delta / skip */ + } + frame_bits++; /* cplinu for block 0 */ + /* bit alloc info */ + frame_bits += 2*4 + 3 + 6 + s->nb_channels * (4 + 3); + + /* CRC */ + frame_bits += 16; + + /* now the big work begins : do the bit allocation. Modify the snr + offset until we can pack everything in the requested frame size */ + + csnroffst = s->csnroffst; + while (csnroffst >= 0 && + bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) + csnroffst -= SNR_INC1; + if (csnroffst < 0) { + fprintf(stderr, "Error !!!\n"); + return -1; + } + while ((csnroffst + SNR_INC1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst + SNR_INC1, 0) >= 0) { + csnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((csnroffst + 1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, csnroffst + 1, 0) >= 0) { + csnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + fsnroffst = 0; + while ((fsnroffst + SNR_INC1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + SNR_INC1) >= 0) { + fsnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((fsnroffst + 1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + 1) >= 0) { + fsnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + s->csnroffst = csnroffst; + for(ch=0;ch<s->nb_channels;ch++) + s->fsnroffst[ch] = fsnroffst; +#if defined(DEBUG_BITALLOC) + { + int j; + + for(i=0;i<6;i++) { + for(ch=0;ch<s->nb_channels;ch++) { + printf("Block #%d Ch%d:\n", i, ch); + printf("bap="); + for(j=0;j<s->nb_coefs[ch];j++) { + printf("%d ",bap[i][ch][j]); + } + printf("\n"); + } + } + } +#endif + return 0; +} + +static int AC3_encode_init(AVEncodeContext *avctx) +{ + int freq = avctx->rate; + int bitrate = avctx->bit_rate; + int channels = avctx->channels; + AC3EncodeContext *s = avctx->priv_data; + int i, j, k, l, ch, v; + float alpha; + static unsigned short freqs[3] = { 48000, 44100, 32000 }; + + avctx->frame_size = AC3_FRAME_SIZE; + avctx->key_frame = 1; /* always key frame */ + + /* number of channels */ + if (channels == 1) + s->acmod = 1; + else if (channels == 2) + s->acmod = 2; + else + return -1; + s->nb_channels = channels; + + /* frequency */ + for(i=0;i<3;i++) { + for(j=0;j<3;j++) + if ((freqs[j] >> i) == freq) + goto found; + } + return -1; + found: + s->sample_rate = freq; + s->halfratecod = i; + s->fscod = j; + s->bsid = 8 + s->halfratecod; + s->bsmod = 0; /* complete main audio service */ + + /* bitrate & frame size */ + bitrate /= 1000; + for(i=0;i<19;i++) { + if ((bitratetab[i] >> s->halfratecod) == bitrate) + break; + } + if (i == 19) + return -1; + s->bit_rate = bitrate; + s->frmsizecod = i << 1; + s->frame_size_min = (bitrate * 1000 * AC3_FRAME_SIZE) / (freq * 16); + /* for now we do not handle fractional sizes */ + s->frame_size = s->frame_size_min; + + /* bit allocation init */ + for(ch=0;ch<s->nb_channels;ch++) { + /* bandwidth for each channel */ + /* XXX: should compute the bandwidth according to the frame + size, so that we avoid anoying high freq artefacts */ + s->chbwcod[ch] = 50; /* sample bandwidth as mpeg audio layer 2 table 0 */ + s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; + } + /* initial snr offset */ + s->csnroffst = 40; + + /* compute bndtab and masktab from bandsz */ + k = 0; + l = 0; + for(i=0;i<50;i++) { + bndtab[i] = l; + v = bndsz[i]; + for(j=0;j<v;j++) masktab[k++]=i; + l += v; + } + bndtab[50] = 0; + + /* mdct init */ + fft_init(MDCT_NBITS - 2); + for(i=0;i<N/4;i++) { + alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)N; + xcos1[i] = fix15(-cos(alpha)); + xsin1[i] = fix15(-sin(alpha)); + } + + ac3_crc_init(); + + return 0; +} + +/* output the AC3 frame header */ +static void output_frame_header(AC3EncodeContext *s, unsigned char *frame) +{ + init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE, NULL, NULL); + + put_bits(&s->pb, 16, 0x0b77); /* frame header */ + put_bits(&s->pb, 16, 0); /* crc1: will be filled later */ + put_bits(&s->pb, 2, s->fscod); + put_bits(&s->pb, 6, s->frmsizecod + (s->frame_size - s->frame_size_min)); + put_bits(&s->pb, 5, s->bsid); + put_bits(&s->pb, 3, s->bsmod); + put_bits(&s->pb, 3, s->acmod); + if (s->acmod == 2) { + put_bits(&s->pb, 2, 0); /* surround not indicated */ + } + put_bits(&s->pb, 1, 0); /* no LFE */ + put_bits(&s->pb, 5, 31); /* dialog norm: -31 db */ + put_bits(&s->pb, 1, 0); /* no compression control word */ + put_bits(&s->pb, 1, 0); /* no lang code */ + put_bits(&s->pb, 1, 0); /* no audio production info */ + put_bits(&s->pb, 1, 0); /* no copyright */ + put_bits(&s->pb, 1, 1); /* original bitstream */ + put_bits(&s->pb, 1, 0); /* no time code 1 */ + put_bits(&s->pb, 1, 0); /* no time code 2 */ + put_bits(&s->pb, 1, 0); /* no addtional bit stream info */ +} + +/* symetric quantization on 'levels' levels */ +static inline int sym_quant(int c, int e, int levels) +{ + int v; + + if (c >= 0) { + v = (levels * (c << e)) >> 25; + v = (levels >> 1) + v; + } else { + v = (levels * ((-c) << e)) >> 25; + v = (levels >> 1) - v; + } + assert (v >= 0 && v < levels); + return v; +} + +/* asymetric quantization on 2^qbits levels */ +static inline int asym_quant(int c, int e, int qbits) +{ + int lshift, m, v; + + lshift = e + qbits - 24; + if (lshift >= 0) + v = c << lshift; + else + v = c >> (-lshift); + /* rounding */ + v = (v + 1) >> 1; + m = (1 << (qbits-1)); + if (v >= m) + v = m - 1; + assert(v >= -m); + return v & ((1 << qbits)-1); +} + +/* Output one audio block. There are NB_BLOCKS audio blocks in one AC3 + frame */ +static void output_audio_block(AC3EncodeContext *s, + UINT8 exp_strategy[AC3_MAX_CHANNELS], + UINT8 encoded_exp[AC3_MAX_CHANNELS][N/2], + UINT8 bap[AC3_MAX_CHANNELS][N/2], + INT32 mdct_coefs[AC3_MAX_CHANNELS][N/2], + INT8 global_exp[AC3_MAX_CHANNELS], + int block_num) +{ + int ch, nb_groups, group_size, i, baie; + UINT8 *p; + UINT16 qmant[AC3_MAX_CHANNELS][N/2]; + int exp0, exp1; + int mant1_cnt, mant2_cnt, mant4_cnt; + UINT16 *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; + int delta0, delta1, delta2; + + for(ch=0;ch<s->nb_channels;ch++) + put_bits(&s->pb, 1, 0); /* 512 point MDCT */ + for(ch=0;ch<s->nb_channels;ch++) + put_bits(&s->pb, 1, 1); /* no dither */ + put_bits(&s->pb, 1, 0); /* no dynamic range */ + if (block_num == 0) { + /* for block 0, even if no coupling, we must say it. This is a + waste of bit :-) */ + put_bits(&s->pb, 1, 1); /* coupling strategy present */ + put_bits(&s->pb, 1, 0); /* no coupling strategy */ + } else { + put_bits(&s->pb, 1, 0); /* no new coupling strategy */ + } + + if (s->acmod == 2) { + put_bits(&s->pb, 1, 0); /* no matrixing (but should be used in the future) */ + } + +#if defined(DEBUG) + { + static int count = 0; + printf("Block #%d (%d)\n", block_num, count++); + } +#endif + /* exponent strategy */ + for(ch=0;ch<s->nb_channels;ch++) { + put_bits(&s->pb, 2, exp_strategy[ch]); + } + + for(ch=0;ch<s->nb_channels;ch++) { + if (exp_strategy[ch] != EXP_REUSE) + put_bits(&s->pb, 6, s->chbwcod[ch]); + } + + /* exponents */ + for (ch = 0; ch < s->nb_channels; ch++) { + switch(exp_strategy[ch]) { + case EXP_REUSE: + continue; + case EXP_D15: + group_size = 1; + break; + case EXP_D25: + group_size = 2; + break; + default: + case EXP_D45: + group_size = 4; + break; + } + nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); + p = encoded_exp[ch]; + + /* first exponent */ + exp1 = *p++; + put_bits(&s->pb, 4, exp1); + + /* next ones are delta encoded */ + for(i=0;i<nb_groups;i++) { + /* merge three delta in one code */ + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta0 = exp1 - exp0 + 2; + + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta1 = exp1 - exp0 + 2; + + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta2 = exp1 - exp0 + 2; + + put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2); + } + + put_bits(&s->pb, 2, 0); /* no gain range info */ + } + + /* bit allocation info */ + baie = (block_num == 0); + put_bits(&s->pb, 1, baie); + if (baie) { + put_bits(&s->pb, 2, s->sdecaycod); + put_bits(&s->pb, 2, s->fdecaycod); + put_bits(&s->pb, 2, s->sgaincod); + put_bits(&s->pb, 2, s->dbkneecod); + put_bits(&s->pb, 3, s->floorcod); + } + + /* snr offset */ + put_bits(&s->pb, 1, baie); /* always present with bai */ + if (baie) { + put_bits(&s->pb, 6, s->csnroffst); + for(ch=0;ch<s->nb_channels;ch++) { + put_bits(&s->pb, 4, s->fsnroffst[ch]); + put_bits(&s->pb, 3, s->fgaincod[ch]); + } + } + + put_bits(&s->pb, 1, 0); /* no delta bit allocation */ + put_bits(&s->pb, 1, 0); /* no data to skip */ + + /* mantissa encoding : we use two passes to handle the grouping. A + one pass method may be faster, but it would necessitate to + modify the output stream. */ + + /* first pass: quantize */ + mant1_cnt = mant2_cnt = mant4_cnt = 0; + qmant1_ptr = qmant2_ptr = qmant4_ptr = NULL; + + for (ch = 0; ch < s->nb_channels; ch++) { + int b, c, e, v; + + for(i=0;i<s->nb_coefs[ch];i++) { + c = mdct_coefs[ch][i]; + e = encoded_exp[ch][i] - global_exp[ch]; + b = bap[ch][i]; + switch(b) { + case 0: + v = 0; + break; + case 1: + v = sym_quant(c, e, 3); + switch(mant1_cnt) { + case 0: + qmant1_ptr = &qmant[ch][i]; + v = 9 * v; + mant1_cnt = 1; + break; + case 1: + *qmant1_ptr += 3 * v; + mant1_cnt = 2; + v = 128; + break; + default: + *qmant1_ptr += v; + mant1_cnt = 0; + v = 128; + break; + } + break; + case 2: + v = sym_quant(c, e, 5); + switch(mant2_cnt) { + case 0: + qmant2_ptr = &qmant[ch][i]; + v = 25 * v; + mant2_cnt = 1; + break; + case 1: + *qmant2_ptr += 5 * v; + mant2_cnt = 2; + v = 128; + break; + default: + *qmant2_ptr += v; + mant2_cnt = 0; + v = 128; + break; + } + break; + case 3: + v = sym_quant(c, e, 7); + break; + case 4: + v = sym_quant(c, e, 11); + switch(mant4_cnt) { + case 0: + qmant4_ptr = &qmant[ch][i]; + v = 11 * v; + mant4_cnt = 1; + break; + default: + *qmant4_ptr += v; + mant4_cnt = 0; + v = 128; + break; + } + break; + case 5: + v = sym_quant(c, e, 15); + break; + case 14: + v = asym_quant(c, e, 14); + break; + case 15: + v = asym_quant(c, e, 16); + break; + default: + v = asym_quant(c, e, b - 1); + break; + } + qmant[ch][i] = v; + } + } + + /* second pass : output the values */ + for (ch = 0; ch < s->nb_channels; ch++) { + int b, q; + + for(i=0;i<s->nb_coefs[ch];i++) { + q = qmant[ch][i]; + b = bap[ch][i]; + switch(b) { + case 0: + break; + case 1: + if (q != 128) + put_bits(&s->pb, 5, q); + break; + case 2: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 3: + put_bits(&s->pb, 3, q); + break; + case 4: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 14: + put_bits(&s->pb, 14, q); + break; + case 15: + put_bits(&s->pb, 16, q); + break; + default: + put_bits(&s->pb, b - 1, q); + break; + } + } + } +} + +/* compute the ac3 crc */ + +#define CRC16_POLY ((1 << 0) | (1 << 2) | (1 << 15) | (1 << 16)) + +static void ac3_crc_init(void) +{ + unsigned int c, n, k; + + for(n=0;n<256;n++) { + c = n << 8; + for (k = 0; k < 8; k++) { + if (c & (1 << 15)) + c = ((c << 1) & 0xffff) ^ (CRC16_POLY & 0xffff); + else + c = c << 1; + } + crc_table[n] = c; + } +} + +static unsigned int ac3_crc(UINT8 *data, int n, unsigned int crc) +{ + int i; + for(i=0;i<n;i++) { + crc = (crc_table[data[i] ^ (crc >> 8)] ^ (crc << 8)) & 0xffff; + } + return crc; +} + +static unsigned int mul_poly(unsigned int a, unsigned int b, unsigned int poly) +{ + unsigned int c; + + c = 0; + while (a) { + if (a & 1) + c ^= b; + a = a >> 1; + b = b << 1; + if (b & (1 << 16)) + b ^= poly; + } + return c; +} + +static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly) +{ + unsigned int r; + r = 1; + while (n) { + if (n & 1) + r = mul_poly(r, a, poly); + a = mul_poly(a, a, poly); + n >>= 1; + } + return r; +} + + +/* compute log2(max(abs(tab[]))) */ +static int log2_tab(INT16 *tab, int n) +{ + int i, v; + + v = 0; + for(i=0;i<n;i++) { + v |= abs(tab[i]); + } + return log2(v); +} + +static void lshift_tab(INT16 *tab, int n, int lshift) +{ + int i; + + if (lshift > 0) { + for(i=0;i<n;i++) { + tab[i] <<= lshift; + } + } else if (lshift < 0) { + lshift = -lshift; + for(i=0;i<n;i++) { + tab[i] >>= lshift; + } + } +} + +/* fill the end of the frame and compute the two crcs */ +static int output_frame_end(AC3EncodeContext *s) +{ + int frame_size, frame_size_58, n, crc1, crc2, crc_inv; + UINT8 *frame; + + frame_size = s->frame_size; /* frame size in words */ + /* align to 8 bits */ + flush_put_bits(&s->pb); + /* add zero bytes to reach the frame size */ + frame = s->pb.buf; + n = 2 * s->frame_size - (s->pb.buf_ptr - frame) - 2; + assert(n >= 0); + memset(s->pb.buf_ptr, 0, n); + + /* Now we must compute both crcs : this is not so easy for crc1 + because it is at the beginning of the data... */ + frame_size_58 = (frame_size >> 1) + (frame_size >> 3); + crc1 = ac3_crc(frame + 4, (2 * frame_size_58) - 4, 0); + /* XXX: could precompute crc_inv */ + crc_inv = pow_poly((CRC16_POLY >> 1), (16 * frame_size_58) - 16, CRC16_POLY); + crc1 = mul_poly(crc_inv, crc1, CRC16_POLY); + frame[2] = crc1 >> 8; + frame[3] = crc1; + + crc2 = ac3_crc(frame + 2 * frame_size_58, (frame_size - frame_size_58) * 2 - 2, 0); + frame[2*frame_size - 2] = crc2 >> 8; + frame[2*frame_size - 1] = crc2; + + // printf("n=%d frame_size=%d\n", n, frame_size); + return frame_size * 2; +} + +int AC3_encode_frame(AVEncodeContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + AC3EncodeContext *s = avctx->priv_data; + short *samples = data; + int i, j, k, v, ch; + INT16 input_samples[N]; + INT32 mdct_coef[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS]; + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + INT8 exp_samples[NB_BLOCKS][AC3_MAX_CHANNELS]; + int frame_bits; + + frame_bits = 0; + for(ch=0;ch<s->nb_channels;ch++) { + /* fixed mdct to the six sub blocks & exponent computation */ + for(i=0;i<NB_BLOCKS;i++) { + INT16 *sptr; + int sinc; + + /* compute input samples */ + memcpy(input_samples, s->last_samples[ch], N/2 * sizeof(INT16)); + sinc = s->nb_channels; + sptr = samples + (sinc * (N/2) * i) + ch; + for(j=0;j<N/2;j++) { + v = *sptr; + input_samples[j + N/2] = v; + s->last_samples[ch][j] = v; + sptr += sinc; + } + + /* apply the MDCT window */ + for(j=0;j<N/2;j++) { + input_samples[j] = MUL16(input_samples[j], + ac3_window[j]) >> 15; + input_samples[N-j-1] = MUL16(input_samples[N-j-1], + ac3_window[j]) >> 15; + } + + /* Normalize the samples to use the maximum available + precision */ + v = 14 - log2_tab(input_samples, N); + if (v < 0) + v = 0; + exp_samples[i][ch] = v - 8; + lshift_tab(input_samples, N, v); + + /* do the MDCT */ + mdct512(mdct_coef[i][ch], input_samples); + + /* compute "exponents". We take into account the + normalization there */ + for(j=0;j<N/2;j++) { + int e; + v = abs(mdct_coef[i][ch][j]); + if (v == 0) + e = 24; + else { + e = 23 - log2(v) + exp_samples[i][ch]; + if (e >= 24) { + e = 24; + mdct_coef[i][ch][j] = 0; + } + } + exp[i][ch][j] = e; + } + } + + compute_exp_strategy(exp_strategy, exp, ch); + + /* compute the exponents as the decoder will see them. The + EXP_REUSE case must be handled carefully : we select the + min of the exponents */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) { + exponent_min(exp[i][ch], exp[j][ch], s->nb_coefs[ch]); + j++; + } + frame_bits += encode_exp(encoded_exp[i][ch], + exp[i][ch], s->nb_coefs[ch], + exp_strategy[i][ch]); + /* copy encoded exponents for reuse case */ + for(k=i+1;k<j;k++) { + memcpy(encoded_exp[k][ch], encoded_exp[i][ch], + s->nb_coefs[ch] * sizeof(UINT8)); + } + i = j; + } + } + + compute_bit_allocation(s, bap, encoded_exp, exp_strategy, frame_bits); + /* everything is known... let's output the frame */ + output_frame_header(s, frame); + + for(i=0;i<NB_BLOCKS;i++) { + output_audio_block(s, exp_strategy[i], encoded_exp[i], + bap[i], mdct_coef[i], exp_samples[i], i); + } + return output_frame_end(s); +} + +#if 0 +/*************************************************************************/ +/* TEST */ + +#define FN (N/4) + +void fft_test(void) +{ + IComplex in[FN], in1[FN]; + int k, n, i; + float sum_re, sum_im, a; + + /* FFT test */ + + for(i=0;i<FN;i++) { + in[i].re = random() % 65535 - 32767; + in[i].im = random() % 65535 - 32767; + in1[i] = in[i]; + } + fft(in, 7); + + /* do it by hand */ + for(k=0;k<FN;k++) { + sum_re = 0; + sum_im = 0; + for(n=0;n<FN;n++) { + a = -2 * M_PI * (n * k) / FN; + sum_re += in1[n].re * cos(a) - in1[n].im * sin(a); + sum_im += in1[n].re * sin(a) + in1[n].im * cos(a); + } + printf("%3d: %6d,%6d %6.0f,%6.0f\n", + k, in[k].re, in[k].im, sum_re / FN, sum_im / FN); + } +} + +void mdct_test(void) +{ + INT16 input[N]; + INT32 output[N/2]; + float input1[N]; + float output1[N/2]; + float s, a, err, e, emax; + int i, k, n; + + for(i=0;i<N;i++) { + input[i] = (random() % 65535 - 32767) * 9 / 10; + input1[i] = input[i]; + } + + mdct512(output, input); + + /* do it by hand */ + for(k=0;k<N/2;k++) { + s = 0; + for(n=0;n<N;n++) { + a = (2*M_PI*(2*n+1+N/2)*(2*k+1) / (4 * N)); + s += input1[n] * cos(a); + } + output1[k] = -2 * s / N; + } + + err = 0; + emax = 0; + for(i=0;i<N/2;i++) { + printf("%3d: %7d %7.0f\n", i, output[i], output1[i]); + e = output[i] - output1[i]; + if (e > emax) + emax = e; + err += e * e; + } + printf("err2=%f emax=%f\n", err / (N/2), emax); +} + +void test_ac3(void) +{ + AC3EncodeContext ctx; + unsigned char frame[AC3_MAX_CODED_FRAME_SIZE]; + short samples[AC3_FRAME_SIZE]; + int ret, i; + + AC3_encode_init(&ctx, 44100, 64000, 1); + + fft_test(); + mdct_test(); + + for(i=0;i<AC3_FRAME_SIZE;i++) + samples[i] = (int)(sin(2*M_PI*i*1000.0/44100) * 10000); + ret = AC3_encode_frame(&ctx, frame, samples); + printf("ret=%d\n", ret); +} +#endif + +AVEncoder ac3_encoder = { + "ac3", + CODEC_TYPE_AUDIO, + CODEC_ID_AC3, + sizeof(AC3EncodeContext), + AC3_encode_init, + AC3_encode_frame, + NULL, +}; diff --git a/libav/ac3enc.h b/libav/ac3enc.h new file mode 100644 index 0000000000..40cc53aced --- /dev/null +++ b/libav/ac3enc.h @@ -0,0 +1,32 @@ + +#define AC3_FRAME_SIZE (6*256) +#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */ +#define AC3_MAX_CHANNELS 2 /* we handle at most two channels, although + AC3 allows 6 channels */ + +typedef struct AC3EncodeContext { + PutBitContext pb; + int nb_channels; + int bit_rate; + int sample_rate; + int bsid; + int frame_size_min; /* minimum frame size in case rounding is necessary */ + int frame_size; /* current frame size in words */ + int halfratecod; + int frmsizecod; + int fscod; /* frequency */ + int acmod; + int bsmod; + short last_samples[AC3_MAX_CHANNELS][256]; + int chbwcod[AC3_MAX_CHANNELS]; + int nb_coefs[AC3_MAX_CHANNELS]; + + /* bitrate allocation control */ + int sgaincod, sdecaycod, fdecaycod, dbkneecod, floorcod; + int sgain, sdecay, fdecay, dbknee, floor; + int csnroffst; + int fgaincod[AC3_MAX_CHANNELS]; + int fsnroffst[AC3_MAX_CHANNELS]; + /* mantissa encoding */ + int mant1_cnt, mant2_cnt, mant4_cnt; +} AC3EncodeContext; diff --git a/libav/ac3tab.h b/libav/ac3tab.h new file mode 100644 index 0000000000..2d379f0404 --- /dev/null +++ b/libav/ac3tab.h @@ -0,0 +1,180 @@ +/* tables taken directly from AC3 spec */ + +/* possible bitrates */ +static const UINT16 bitratetab[19] = { + 32, 40, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384, 448, 512, 576, 640 +}; + +/* AC3 MDCT window */ + +/* MDCT window */ +static const INT16 ac3_window[256]= { + 4, 7, 12, 16, 21, 28, 34, 42, + 51, 61, 72, 84, 97, 111, 127, 145, + 164, 184, 207, 231, 257, 285, 315, 347, + 382, 419, 458, 500, 544, 591, 641, 694, + 750, 810, 872, 937, 1007, 1079, 1155, 1235, + 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016, + 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076, + 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444, + 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127, + 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112, + 8380, 8652, 8927, 9207, 9491, 9778,10069,10363, +10660,10960,11264,11570,11879,12190,12504,12820, +13138,13458,13780,14103,14427,14753,15079,15407, +15735,16063,16392,16720,17049,17377,17705,18032, +18358,18683,19007,19330,19651,19970,20287,20602, +20914,21225,21532,21837,22139,22438,22733,23025, +23314,23599,23880,24157,24430,24699,24964,25225, +25481,25732,25979,26221,26459,26691,26919,27142, +27359,27572,27780,27983,28180,28373,28560,28742, +28919,29091,29258,29420,29577,29729,29876,30018, +30155,30288,30415,30538,30657,30771,30880,30985, +31086,31182,31274,31363,31447,31528,31605,31678, +31747,31814,31877,31936,31993,32046,32097,32145, +32190,32232,32272,32310,32345,32378,32409,32438, +32465,32490,32513,32535,32556,32574,32592,32608, +32623,32636,32649,32661,32671,32681,32690,32698, +32705,32712,32718,32724,32729,32733,32737,32741, +32744,32747,32750,32752,32754,32756,32757,32759, +32760,32761,32762,32763,32764,32764,32765,32765, +32766,32766,32766,32766,32767,32767,32767,32767, +32767,32767,32767,32767,32767,32767,32767,32767, +32767,32767,32767,32767,32767,32767,32767,32767, +}; + +static UINT8 masktab[253]; + +static const UINT8 latab[260]= { +0x0040,0x003f,0x003e,0x003d,0x003c,0x003b,0x003a,0x0039,0x0038,0x0037, +0x0036,0x0035,0x0034,0x0034,0x0033,0x0032,0x0031,0x0030,0x002f,0x002f, +0x002e,0x002d,0x002c,0x002c,0x002b,0x002a,0x0029,0x0029,0x0028,0x0027, +0x0026,0x0026,0x0025,0x0024,0x0024,0x0023,0x0023,0x0022,0x0021,0x0021, +0x0020,0x0020,0x001f,0x001e,0x001e,0x001d,0x001d,0x001c,0x001c,0x001b, +0x001b,0x001a,0x001a,0x0019,0x0019,0x0018,0x0018,0x0017,0x0017,0x0016, +0x0016,0x0015,0x0015,0x0015,0x0014,0x0014,0x0013,0x0013,0x0013,0x0012, +0x0012,0x0012,0x0011,0x0011,0x0011,0x0010,0x0010,0x0010,0x000f,0x000f, +0x000f,0x000e,0x000e,0x000e,0x000d,0x000d,0x000d,0x000d,0x000c,0x000c, +0x000c,0x000c,0x000b,0x000b,0x000b,0x000b,0x000a,0x000a,0x000a,0x000a, +0x000a,0x0009,0x0009,0x0009,0x0009,0x0009,0x0008,0x0008,0x0008,0x0008, +0x0008,0x0008,0x0007,0x0007,0x0007,0x0007,0x0007,0x0007,0x0006,0x0006, +0x0006,0x0006,0x0006,0x0006,0x0006,0x0006,0x0005,0x0005,0x0005,0x0005, +0x0005,0x0005,0x0005,0x0005,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, +0x0004,0x0004,0x0004,0x0004,0x0004,0x0003,0x0003,0x0003,0x0003,0x0003, +0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0002, +0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, +0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +}; + +static const UINT16 hth[50][3]= { +{ 0x04d0,0x04f0,0x0580 }, +{ 0x04d0,0x04f0,0x0580 }, +{ 0x0440,0x0460,0x04b0 }, +{ 0x0400,0x0410,0x0450 }, +{ 0x03e0,0x03e0,0x0420 }, +{ 0x03c0,0x03d0,0x03f0 }, +{ 0x03b0,0x03c0,0x03e0 }, +{ 0x03b0,0x03b0,0x03d0 }, +{ 0x03a0,0x03b0,0x03c0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03a0 }, +{ 0x0390,0x03a0,0x03a0 }, +{ 0x0390,0x0390,0x03a0 }, +{ 0x0390,0x0390,0x03a0 }, +{ 0x0380,0x0390,0x03a0 }, +{ 0x0380,0x0380,0x03a0 }, +{ 0x0370,0x0380,0x03a0 }, +{ 0x0370,0x0380,0x03a0 }, +{ 0x0360,0x0370,0x0390 }, +{ 0x0360,0x0370,0x0390 }, +{ 0x0350,0x0360,0x0390 }, +{ 0x0350,0x0360,0x0390 }, +{ 0x0340,0x0350,0x0380 }, +{ 0x0340,0x0350,0x0380 }, +{ 0x0330,0x0340,0x0380 }, +{ 0x0320,0x0340,0x0370 }, +{ 0x0310,0x0320,0x0360 }, +{ 0x0300,0x0310,0x0350 }, +{ 0x02f0,0x0300,0x0340 }, +{ 0x02f0,0x02f0,0x0330 }, +{ 0x02f0,0x02f0,0x0320 }, +{ 0x02f0,0x02f0,0x0310 }, +{ 0x0300,0x02f0,0x0300 }, +{ 0x0310,0x0300,0x02f0 }, +{ 0x0340,0x0320,0x02f0 }, +{ 0x0390,0x0350,0x02f0 }, +{ 0x03e0,0x0390,0x0300 }, +{ 0x0420,0x03e0,0x0310 }, +{ 0x0460,0x0420,0x0330 }, +{ 0x0490,0x0450,0x0350 }, +{ 0x04a0,0x04a0,0x03c0 }, +{ 0x0460,0x0490,0x0410 }, +{ 0x0440,0x0460,0x0470 }, +{ 0x0440,0x0440,0x04a0 }, +{ 0x0520,0x0480,0x0460 }, +{ 0x0800,0x0630,0x0440 }, +{ 0x0840,0x0840,0x0450 }, +{ 0x0840,0x0840,0x04e0 }, +}; + +static const UINT8 baptab[64]= { + 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, + 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, + 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, + 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, + 15, 15, 15, 15, +}; + +static const UINT8 sdecaytab[4]={ + 0x0f, 0x11, 0x13, 0x15, +}; + +static const UINT8 fdecaytab[4]={ + 0x3f, 0x53, 0x67, 0x7b, +}; + +static const UINT16 sgaintab[4]= { + 0x540, 0x4d8, 0x478, 0x410, +}; + +static const UINT16 dbkneetab[4]= { + 0x000, 0x700, 0x900, 0xb00, +}; + +static const UINT16 floortab[8]= { + 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800, +}; + +static const UINT16 fgaintab[8]= { + 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, +}; + +static const UINT8 bndsz[50]={ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, + 3, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 24, 24, 24, 24, 24 +}; + +static UINT8 bndtab[51]; + +/* fft & mdct sin cos tables */ +static INT16 costab[64]; +static INT16 sintab[64]; +static INT16 fft_rev[512]; +static INT16 xcos1[128]; +static INT16 xsin1[128]; + +static UINT16 crc_table[256]; diff --git a/libav/avcodec.h b/libav/avcodec.h new file mode 100644 index 0000000000..299f81ab32 --- /dev/null +++ b/libav/avcodec.h @@ -0,0 +1,79 @@ +#include "common.h" + +enum CodecID { + CODEC_ID_NONE, + CODEC_ID_MPEG1VIDEO, + CODEC_ID_H263, + CODEC_ID_RV10, + CODEC_ID_MP2, + CODEC_ID_AC3, + CODEC_ID_MJPEG, +}; + +enum CodecType { + CODEC_TYPE_VIDEO, + CODEC_TYPE_AUDIO, +}; + +typedef struct AVEncodeContext { + int bit_rate; + int rate; /* frames per sec or samples per sec */ + + /* video only */ + int width, height; + int gop_size; /* 0 = intra only */ + + /* audio only */ + int channels; + + /* the following data should not be initialized */ + int frame_size; /* in samples, initialized when calling 'init' */ + int frame_number; /* audio or video frame number */ + int key_frame; /* true if the previous compressed frame was + a key frame (intra, or seekable) */ + struct AVEncoder *codec; + void *priv_data; +} AVEncodeContext; + +typedef struct AVEncoder { + char *name; + int type; + int id; + int priv_data_size; + int (*init)(AVEncodeContext *); + int (*encode)(AVEncodeContext *, UINT8 *buf, int buf_size, void *data); + int (*close)(AVEncodeContext *); + struct AVEncoder *next; +} AVEncoder; + +extern AVEncoder ac3_encoder; +extern AVEncoder mp2_encoder; +extern AVEncoder mpeg1video_encoder; +extern AVEncoder h263_encoder; +extern AVEncoder rv10_encoder; +extern AVEncoder mjpeg_encoder; + +/* resample.c */ + +typedef struct { + /* fractional resampling */ + UINT32 incr; /* fractional increment */ + UINT32 frac; + int last_sample; + /* integer down sample */ + int iratio; /* integer divison ratio */ + int icount, isum; + int inv; +} ReSampleChannelContext; + +typedef struct { + ReSampleChannelContext channel_ctx[2]; + float ratio; + /* channel convert */ + int input_channels, output_channels; +} ReSampleContext; + +int audio_resample_init(ReSampleContext *s, + int output_channels, int input_channels, + int output_rate, int input_rate); +int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples); diff --git a/libav/common.c b/libav/common.c new file mode 100644 index 0000000000..e60b0dd85b --- /dev/null +++ b/libav/common.c @@ -0,0 +1,174 @@ +/* + * Common bit/dsp utils + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <netinet/in.h> +#include <math.h> +#include "common.h" + +#define NDEBUG +#include <assert.h> + +void init_put_bits(PutBitContext *s, + UINT8 *buffer, int buffer_size, + void *opaque, + void (*write_data)(void *, UINT8 *, int)) +{ + s->buf = buffer; + s->buf_ptr = s->buf; + s->buf_end = s->buf + buffer_size; + s->bit_cnt=0; + s->bit_buf=0; + s->data_out_size = 0; + s->write_data = write_data; + s->opaque = opaque; +} + +static void flush_buffer(PutBitContext *s) +{ + int size; + if (s->write_data) { + size = s->buf_ptr - s->buf; + if (size > 0) + s->write_data(s->opaque, s->buf, size); + s->buf_ptr = s->buf; + s->data_out_size += size; + } +} + +void put_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_cnt; + + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_cnt = s->bit_cnt; + + // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < (32-bit_cnt)) { + bit_buf |= value << (32 - n - bit_cnt); + bit_cnt+=n; + } else { + bit_buf |= value >> (n + bit_cnt - 32); + *(UINT32 *)s->buf_ptr = htonl(bit_buf); + //printf("bitbuf = %08x\n", bit_buf); + s->buf_ptr+=4; + if (s->buf_ptr >= s->buf_end) + flush_buffer(s); + bit_cnt=bit_cnt + n - 32; + if (bit_cnt == 0) { + bit_buf = 0; + } else { + bit_buf = value << (32 - bit_cnt); + } + } + + s->bit_buf = bit_buf; + s->bit_cnt = bit_cnt; +} + +/* return the number of bits output */ +long long get_bit_count(PutBitContext *s) +{ + return (s->buf_ptr - s->buf + s->data_out_size) * 8 + (long long)s->bit_cnt; +} + +void align_put_bits(PutBitContext *s) +{ + put_bits(s,(8 - s->bit_cnt) & 7,0); +} + +/* pad the end of the output stream with zeros */ +void flush_put_bits(PutBitContext *s) +{ + while (s->bit_cnt > 0) { + /* XXX: should test end of buffer */ + *s->buf_ptr++=s->bit_buf >> 24; + s->bit_buf<<=8; + s->bit_cnt-=8; + } + flush_buffer(s); + s->bit_cnt=0; + s->bit_buf=0; +} + +/* for jpeg : espace 0xff with 0x00 after it */ +void jput_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf, b; + int bit_cnt, i; + + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_cnt = s->bit_cnt; + + //printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < (32-bit_cnt)) { + bit_buf |= value << (32 - n - bit_cnt); + bit_cnt+=n; + } else { + bit_buf |= value >> (n + bit_cnt - 32); + /* handle escape */ + for(i=0;i<4;i++) { + b = (bit_buf >> 24); + *(s->buf_ptr++) = b; + if (b == 0xff) + *(s->buf_ptr++) = 0; + bit_buf <<= 8; + } + /* we flush the buffer sooner to handle worst case */ + if (s->buf_ptr >= (s->buf_end - 8)) + flush_buffer(s); + + bit_cnt=bit_cnt + n - 32; + if (bit_cnt == 0) { + bit_buf = 0; + } else { + bit_buf = value << (32 - bit_cnt); + } + } + + s->bit_buf = bit_buf; + s->bit_cnt = bit_cnt; +} + +/* pad the end of the output stream with zeros */ +void jflush_put_bits(PutBitContext *s) +{ + unsigned int b; + + while (s->bit_cnt > 0) { + b = s->bit_buf >> 24; + *s->buf_ptr++ = b; + if (b == 0xff) + *s->buf_ptr++ = 0; + s->bit_buf<<=8; + s->bit_cnt-=8; + } + flush_buffer(s); + s->bit_cnt=0; + s->bit_buf=0; +} + diff --git a/libav/common.h b/libav/common.h new file mode 100644 index 0000000000..18473eb8e8 --- /dev/null +++ b/libav/common.h @@ -0,0 +1,68 @@ +#ifndef COMMON_H +#define COMMON_H + +typedef unsigned char UINT8; +typedef unsigned short UINT16; +typedef unsigned int UINT32; +typedef signed char INT8; +typedef signed short INT16; +typedef signed int INT32; + +/* bit I/O */ + +struct PutBitContext; + +typedef void (*WriteDataFunc)(void *, UINT8 *, int); + +typedef struct PutBitContext { + UINT8 *buf, *buf_ptr, *buf_end; + int bit_cnt; + UINT32 bit_buf; + long long data_out_size; /* in bytes */ + void *opaque; + WriteDataFunc write_data; +} PutBitContext; + +void init_put_bits(PutBitContext *s, + UINT8 *buffer, int buffer_size, + void *opaque, + void (*write_data)(void *, UINT8 *, int)); +void put_bits(PutBitContext *s, int n, unsigned int value); +long long get_bit_count(PutBitContext *s); +void align_put_bits(PutBitContext *s); +void flush_put_bits(PutBitContext *s); + +/* jpeg specific put_bits */ +void jput_bits(PutBitContext *s, int n, unsigned int value); +void jflush_put_bits(PutBitContext *s); + +/* misc math functions */ + +extern inline int log2(unsigned int v) +{ + int n; + + n = 0; + if (v & 0xffff0000) { + v >>= 16; + n += 16; + } + if (v & 0xff00) { + v >>= 8; + n += 8; + } + if (v & 0xf0) { + v >>= 4; + n += 4; + } + if (v & 0xc) { + v >>= 2; + n += 2; + } + if (v & 0x2) { + n++; + } + return n; +} + +#endif diff --git a/libav/h263data.h b/libav/h263data.h new file mode 100644 index 0000000000..1cf6f4d802 --- /dev/null +++ b/libav/h263data.h @@ -0,0 +1,151 @@ +/* DCT coefficients. Four tables, two for last = 0, two for last = 1. + the sign bit must be added afterwards. */ + +/* first part of coeffs for last = 0. Indexed by [run][level-1] */ + +static const UINT8 coeff_tab0[2][12][2] = +{ + /* run = 0 */ + { + {0x02, 2}, {0x0f, 4}, {0x15, 6}, {0x17, 7}, + {0x1f, 8}, {0x25, 9}, {0x24, 9}, {0x21,10}, + {0x20,10}, {0x07,11}, {0x06,11}, {0x20,11} + }, + /* run = 1 */ + { + {0x06, 3}, {0x14, 6}, {0x1e, 8}, {0x0f,10}, + {0x21,11}, {0x50,12}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0} + } +}; + +/* rest of coeffs for last = 0. indexing by [run-2][level-1] */ + +static const UINT8 coeff_tab1[25][4][2] = +{ + /* run = 2 */ + { + {0x0e, 4}, {0x1d, 8}, {0x0e,10}, {0x51,12} + }, + /* run = 3 */ + { + {0x0d, 5}, {0x23, 9}, {0x0d,10}, {0x00, 0} + }, + /* run = 4-26 */ + { + {0x0c, 5}, {0x22, 9}, {0x52,12}, {0x00, 0} + }, + { + {0x0b, 5}, {0x0c,10}, {0x53,12}, {0x00, 0} + }, + { + {0x13, 6}, {0x0b,10}, {0x54,12}, {0x00, 0} + }, + { + {0x12, 6}, {0x0a,10}, {0x00, 0}, {0x00, 0} + }, + { + {0x11, 6}, {0x09,10}, {0x00, 0}, {0x00, 0} + }, + { + {0x10, 6}, {0x08,10}, {0x00, 0}, {0x00, 0} + }, + { + {0x16, 7}, {0x55,12}, {0x00, 0}, {0x00, 0} + }, + { + {0x15, 7}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x14, 7}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1c, 8}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1b, 8}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x21, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x20, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1f, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1e, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1d, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1c, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1b, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x1a, 9}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x22,11}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x23,11}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x56,12}, {0x00, 0}, {0x00, 0}, {0x00, 0} + }, + { + {0x57,12}, {0x00, 0}, {0x00, 0}, {0x00, 0} + } +}; + +/* first coeffs of last = 1. indexing by [run][level-1] */ + +static const UINT8 coeff_tab2[2][3][2] = +{ + /* run = 0 */ + { + {0x07, 4}, {0x19, 9}, {0x05,11} + }, + /* run = 1 */ + { + {0x0f, 6}, {0x04,11}, {0x00, 0} + } +}; + +/* rest of coeffs for last = 1. indexing by [run-2] */ + +static const UINT8 coeff_tab3[40][2] = +{ + {0x0e, 6}, {0x0d, 6}, {0x0c, 6}, + {0x13, 7}, {0x12, 7}, {0x11, 7}, {0x10, 7}, + {0x1a, 8}, {0x19, 8}, {0x18, 8}, {0x17, 8}, + {0x16, 8}, {0x15, 8}, {0x14, 8}, {0x13, 8}, + {0x18, 9}, {0x17, 9}, {0x16, 9}, {0x15, 9}, + {0x14, 9}, {0x13, 9}, {0x12, 9}, {0x11, 9}, + {0x07,10}, {0x06,10}, {0x05,10}, {0x04,10}, + {0x24,11}, {0x25,11}, {0x26,11}, {0x27,11}, + {0x58,12}, {0x59,12}, {0x5a,12}, {0x5b,12}, + {0x5c,12}, {0x5d,12}, {0x5e,12}, {0x5f,12}, + {0x00, 0} +}; + +/* intra MCBPC, mb_type = 3 */ +static UINT8 intra_MCBPC_code[4] = { 1, 1, 2, 3 }; +static UINT8 intra_MCBPC_bits[4] = { 1, 3, 3, 3 }; + +/* inter MCBPC, mb_type = 0 then 3 */ +static UINT8 inter_MCBPC_code[8] = { 1, 3, 2, 5, 3, 4, 3, 3 }; +static UINT8 inter_MCBPC_bits[8] = { 1, 4, 4, 6, 5, 8, 8, 7 }; + +static UINT8 cbpy_tab[16][2] = +{ + {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4}, + {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2} +}; + + diff --git a/libav/h263enc.c b/libav/h263enc.c new file mode 100644 index 0000000000..59db1ee512 --- /dev/null +++ b/libav/h263enc.c @@ -0,0 +1,229 @@ +/* + * H263 backend for ffmpeg encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <netinet/in.h> +#include "common.h" +#include "mpegvideo.h" +#include "h263data.h" + +void h263_picture_header(MpegEncContext *s, int picture_number) +{ + int format; + + align_put_bits(&s->pb); + put_bits(&s->pb, 22, 0x20); + put_bits(&s->pb, 8, ((s->picture_number * 30) / s->frame_rate) & 0xff); + + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 1, 0); /* h263 id */ + put_bits(&s->pb, 1, 0); /* split screen off */ + put_bits(&s->pb, 1, 0); /* camera off */ + put_bits(&s->pb, 1, 0); /* freeze picture release off */ + + if (s->width == 128 && s->height == 96) + format = 1; + else if (s->width == 176 && s->height == 144) + format = 2; + else if (s->width == 352 && s->height == 288) + format = 3; + else if (s->width == 704 && s->height == 576) + format = 4; + else if (s->width == 1408 && s->height == 1152) + format = 5; + else + abort(); + + put_bits(&s->pb, 3, format); + + put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); + + put_bits(&s->pb, 1, 0); /* unrestricted motion vector: off */ + + put_bits(&s->pb, 1, 0); /* SAC: off */ + + put_bits(&s->pb, 1, 0); /* advanced prediction mode: off */ + + put_bits(&s->pb, 1, 0); /* not PB frame */ + + put_bits(&s->pb, 5, s->qscale); + + put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ + + put_bits(&s->pb, 1, 0); /* no PEI */ +} + +static void h263_encode_block(MpegEncContext *s, DCTELEM *block, + int n); + +void h263_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbpc, cbpy, i, cbp; + + if (!s->mb_intra) { + /* compute cbp */ + cbp = 0; + for(i=0;i<6;i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + if ((cbp | motion_x | motion_y) == 0) { + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + return; + } + + put_bits(&s->pb, 1, 0); /* mb coded */ + cbpc = cbp & 3; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; + cbpy ^= 0xf; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + /* motion vectors: zero */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); + + } else { + /* compute cbp */ + cbp = 0; + for(i=0;i<6;i++) { + if (s->block_last_index[i] >= 1) + cbp |= 1 << (5 - i); + } + + cbpc = cbp & 3; + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, + intra_MCBPC_bits[cbpc], + intra_MCBPC_code[cbpc]); + } else { + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc + 4], + inter_MCBPC_code[cbpc + 4]); + } + cbpy = cbp >> 2; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + } + + /* encode each block */ + for(i=0;i<6;i++) { + h263_encode_block(s, block[i], i); + } +} + +static void h263_encode_block(MpegEncContext *s, DCTELEM *block, int n) +{ + int level, run, last, i, j, last_index, last_non_zero, sign, alevel; + int code, len; + + if (s->mb_intra) { + /* DC coef */ + level = block[0]; + if (level == 128) + put_bits(&s->pb, 8, 0xff); + else + put_bits(&s->pb, 8, level & 0xff); + i = 1; + } else { + i = 0; + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for(;i<=last_index;i++) { + j = zigzag_direct[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + alevel = level; + if (level < 0) { + sign = 1; + alevel = -level; + } + len = 0; + code = 0; /* only to disable warning */ + if (last == 0) { + if (run < 2 && alevel < 13 ) { + len = coeff_tab0[run][alevel-1][1]; + code = coeff_tab0[run][alevel-1][0]; + } else if (run >= 2 && run < 27 && alevel < 5) { + len = coeff_tab1[run-2][alevel-1][1]; + code = coeff_tab1[run-2][alevel-1][0]; + } + } else { + if (run < 2 && alevel < 4) { + len = coeff_tab2[run][alevel-1][1]; + code = coeff_tab2[run][alevel-1][0]; + } else if (run >= 2 && run < 42 && alevel == 1) { + len = coeff_tab3[run-2][1]; + code = coeff_tab3[run-2][0]; + } + } + + if (len != 0) { + code = (code << 1) | sign; + put_bits(&s->pb, len + 1, code); + } else { + /* escape */ + put_bits(&s->pb, 7, 3); + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + put_bits(&s->pb, 8, level & 0xff); + } + + last_non_zero = i; + } + } +} + +/* write RV 1.0 compatible frame header */ +void rv10_encode_picture_header(MpegEncContext *s, int picture_number) +{ + align_put_bits(&s->pb); + + put_bits(&s->pb, 1, 1); /* marker */ + + put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); + + put_bits(&s->pb, 1, 0); /* not PB frame */ + + put_bits(&s->pb, 5, s->qscale); + + if (s->pict_type == I_TYPE) { + /* specific MPEG like DC coding not used */ + } + + /* if multiple packets per frame are sent, the position at which + to display the macro blocks is coded here */ + put_bits(&s->pb, 6, 0); /* mb_x */ + put_bits(&s->pb, 6, 0); /* mb_y */ + put_bits(&s->pb, 12, s->mb_width * s->mb_height); + + put_bits(&s->pb, 3, 0); /* ignored */ +} + diff --git a/libav/jfdctfst.c b/libav/jfdctfst.c new file mode 100644 index 0000000000..620a03078c --- /dev/null +++ b/libav/jfdctfst.c @@ -0,0 +1,224 @@ +/* + * jfdctfst.c + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a fast, not so accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on Arai, Agui, and Nakajima's algorithm for + * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in + * Japanese, but the algorithm is described in the Pennebaker & Mitchell + * JPEG textbook (see REFERENCES section in file README). The following code + * is based directly on figure 4-8 in P&M. + * While an 8-point DCT cannot be done in less than 11 multiplies, it is + * possible to arrange the computation so that many of the multiplies are + * simple scalings of the final outputs. These multiplies can then be + * folded into the multiplications or divisions by the JPEG quantization + * table entries. The AA&N method leaves only 5 multiplies and 29 adds + * to be done in the DCT itself. + * The primary disadvantage of this method is that with fixed-point math, + * accuracy is lost due to imprecise representation of the scaled + * quantization values. The smaller the quantization table entry, the less + * precise the scaled value, so this implementation does worse with high- + * quality-setting files than with low-quality ones. + */ + +#include <stdlib.h> +#include <stdio.h> +#include "common.h" +#include "mpegvideo.h" + +#define DCTSIZE 8 +#define GLOBAL(x) x +#define RIGHT_SHIFT(x, n) ((x) >> (n)) +#define SHIFT_TEMPS + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* Scaling decisions are generally the same as in the LL&M algorithm; + * see jfdctint.c for more details. However, we choose to descale + * (right shift) multiplication products as soon as they are formed, + * rather than carrying additional fractional bits into subsequent additions. + * This compromises accuracy slightly, but it lets us save a few shifts. + * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples) + * everywhere except in the multiplications proper; this saves a good deal + * of work on 16-bit-int machines. + * + * Again to save a few shifts, the intermediate results between pass 1 and + * pass 2 are not upscaled, but are represented only to integral precision. + * + * A final compromise is to represent the multiplicative constants to only + * 8 fractional bits, rather than 13. This saves some shifting work on some + * machines, and may also reduce the cost of multiplication (since there + * are fewer one-bits in the constants). + */ + +#define CONST_BITS 8 + + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 8 +#define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */ +#else +#define FIX_0_382683433 FIX(0.382683433) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_707106781 FIX(0.707106781) +#define FIX_1_306562965 FIX(1.306562965) +#endif + + +/* We can gain a little more speed, with a further compromise in accuracy, + * by omitting the addition in a descaling shift. This yields an incorrectly + * rounded result half the time... + */ + +#ifndef USE_ACCURATE_ROUNDING +#undef DESCALE +#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#endif + + +/* Multiply a DCTELEM variable by an INT32 constant, and immediately + * descale to yield a DCTELEM result. + */ + +#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) + + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +jpeg_fdct_ifast (DCTELEM * data) +{ + DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + DCTELEM tmp10, tmp11, tmp12, tmp13; + DCTELEM z1, z2, z3, z4, z5, z11, z13; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = tmp10 + tmp11; /* phase 3 */ + dataptr[4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[3] = z13 - z2; + dataptr[1] = z11 + z4; + dataptr[7] = z11 - z4; + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE*6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE*3] = z13 - z2; + dataptr[DCTSIZE*1] = z11 + z4; + dataptr[DCTSIZE*7] = z11 - z4; + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/libav/jrevdct.c b/libav/jrevdct.c new file mode 100644 index 0000000000..26715b0b18 --- /dev/null +++ b/libav/jrevdct.c @@ -0,0 +1,1584 @@ +/* + * jrevdct.c + * + * Copyright (C) 1991, 1992, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains the basic inverse-DCT transformation subroutine. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * I've made lots of modifications to attempt to take advantage of the + * sparse nature of the DCT matrices we're getting. Although the logic + * is cumbersome, it's straightforward and the resulting code is much + * faster. + * + * A better way to do this would be to pass in the DCT block as a sparse + * matrix, perhaps with the difference cases encoded. + */ + +typedef int INT32; + +/* Definition of Contant integer scale factor. */ +#define CONST_BITS 13 + +/* Misc DCT definitions */ +#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */ +#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */ + +#define GLOBAL /* a function referenced thru EXTERNs */ + +typedef int DCTELEM; +typedef DCTELEM DCTBLOCK[DCTSIZE2]; + +void j_rev_dct (DCTELEM *data); + + +#define GLOBAL /* a function referenced thru EXTERNs */ +#define ORIG_DCT 1 + +/* We assume that right shift corresponds to signed division by 2 with + * rounding towards minus infinity. This is correct for typical "arithmetic + * shift" instructions that shift in copies of the sign bit. But some + * C compilers implement >> with an unsigned shift. For these machines you + * must define RIGHT_SHIFT_IS_UNSIGNED. + * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity. + * It is only applied with constant shift counts. SHIFT_TEMPS must be + * included in the variables of any routine using RIGHT_SHIFT. + */ + +#ifdef RIGHT_SHIFT_IS_UNSIGNED +#define SHIFT_TEMPS INT32 shift_temp; +#define RIGHT_SHIFT(x,shft) \ + ((shift_temp = (x)) < 0 ? \ + (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \ + (shift_temp >> (shft))) +#else +#define SHIFT_TEMPS +#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) +#endif + +/* + * This routine is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* + * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * The poop on this scaling stuff is as follows: + * + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) + * larger than the true IDCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D IDCT, + * because the y0 and y4 inputs need not be divided by sqrt(N). + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (To scale up 12-bit sample data further, an + * intermediate INT32 array would be needed.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#ifdef EIGHT_BIT_SAMPLES +#define PASS1_BITS 2 +#else +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +#define ONE ((INT32) 1) + +#define CONST_SCALE (ONE << CONST_BITS) + +/* Convert a positive real constant to an integer scaled by CONST_SCALE. + * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, + * you will pay a significant penalty in run time. In that case, figure + * the correct integer constant values and insert them by hand. + */ + +#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) + +/* Descale and correctly round an INT32 value that's scaled by N bits. + * We assume RIGHT_SHIFT rounds towards minus infinity, so adding + * the fudge factor is correct for either sign of X. + */ + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) +#define SCALE(x,n) ((INT32)(x) << n) + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; + * this provides a useful speedup on many machines. + * There is no way to specify a 16x16->32 multiply in portable C, but + * some C compilers will do the right thing if you provide the correct + * combination of casts. + * NB: for 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#ifdef EIGHT_BIT_SAMPLES +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) +#endif +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) +#endif +#endif + +#if 0 +/* force a multiplication for x86 where a multiply is fast). We + force the non constant operand to be in a register because + otherwise it may be a 16 bit memory reference, which is not allowed + by imull */ +#define MULTIPLY(a,b) \ +({\ + int res;\ + asm("imull %2,%1,%0" : "=r" (res) : "r" ((int)(a)), "i" (b));\ + res;\ +}) +#endif + +#ifndef MULTIPLY /* default definition */ +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +#ifndef ORIG_DCT + +#undef SSMUL +#define SSMUL(var1,var2) ((INT16)(var1) * (INT32)(INT16)(var2)) + +/* Precomputed idct value arrays. */ + +STATIC DCTELEM PreIDCT[64][64]; + +/* Pre compute singleton coefficient IDCT values. */ +void init_pre_idct() { + int i; + + for (i = 0; i < 64; i++) { + memset ((char *) PreIDCT[i], 0, 64 * sizeof(DCTELEM)); + PreIDCT[i][i] = 2048; + j_rev_dct (PreIDCT[i]); + } +} + +/* + * Perform the inverse DCT on one block of coefficients. + */ + +void j_rev_dct_sparse (data, pos) + DCTBLOCK data; + int pos; +{ + register DCTELEM *dataptr; + short int val; + DCTELEM *ndataptr; + int coeff, rr; + + /* If DC Coefficient. */ + + if (pos == 0) { + register INT32 *dp; + register INT32 v; + + dp = (INT32*)data; + v = *data; + /* Compute 32 bit value to assign. + * This speeds things up a bit */ + if (v < 0) + val = (short)((v - 3) >> 3); + else + val = (short)((v + 4) >> 3); + v = val | ((INT32)val << 16); + dp[0] = v; dp[1] = v; dp[2] = v; dp[3] = v; + dp[4] = v; dp[5] = v; dp[6] = v; dp[7] = v; + dp[8] = v; dp[9] = v; dp[10] = v; dp[11] = v; + dp[12] = v; dp[13] = v; dp[14] = v; dp[15] = v; + dp[16] = v; dp[17] = v; dp[18] = v; dp[19] = v; + dp[20] = v; dp[21] = v; dp[22] = v; dp[23] = v; + dp[24] = v; dp[25] = v; dp[26] = v; dp[27] = v; + dp[28] = v; dp[29] = v; dp[30] = v; dp[31] = v; + return; + } + + /* Some other coefficient. */ + dataptr = (DCTELEM *)data; + coeff = dataptr[pos]; + ndataptr = PreIDCT[pos]; + + for (rr = 0; rr < 4; rr++) { + dataptr[0] = (DCTELEM)(SSMUL (ndataptr[0] , coeff) >> (CONST_BITS-2)); + dataptr[1] = (DCTELEM)(SSMUL (ndataptr[1] , coeff) >> (CONST_BITS-2)); + dataptr[2] = (DCTELEM)(SSMUL (ndataptr[2] , coeff) >> (CONST_BITS-2)); + dataptr[3] = (DCTELEM)(SSMUL (ndataptr[3] , coeff) >> (CONST_BITS-2)); + dataptr[4] = (DCTELEM)(SSMUL (ndataptr[4] , coeff) >> (CONST_BITS-2)); + dataptr[5] = (DCTELEM)(SSMUL (ndataptr[5] , coeff) >> (CONST_BITS-2)); + dataptr[6] = (DCTELEM)(SSMUL (ndataptr[6] , coeff) >> (CONST_BITS-2)); + dataptr[7] = (DCTELEM)(SSMUL (ndataptr[7] , coeff) >> (CONST_BITS-2)); + dataptr[8] = (DCTELEM)(SSMUL (ndataptr[8] , coeff) >> (CONST_BITS-2)); + dataptr[9] = (DCTELEM)(SSMUL (ndataptr[9] , coeff) >> (CONST_BITS-2)); + dataptr[10] = (DCTELEM)(SSMUL (ndataptr[10], coeff) >> (CONST_BITS-2)); + dataptr[11] = (DCTELEM)(SSMUL (ndataptr[11], coeff) >> (CONST_BITS-2)); + dataptr[12] = (DCTELEM)(SSMUL (ndataptr[12], coeff) >> (CONST_BITS-2)); + dataptr[13] = (DCTELEM)(SSMUL (ndataptr[13], coeff) >> (CONST_BITS-2)); + dataptr[14] = (DCTELEM)(SSMUL (ndataptr[14], coeff) >> (CONST_BITS-2)); + dataptr[15] = (DCTELEM)(SSMUL (ndataptr[15], coeff) >> (CONST_BITS-2)); + dataptr += 16; + ndataptr += 16; + } +} + + +void j_rev_dct (data) + DCTBLOCK data; +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3, z4, z5; + int d0, d1, d2, d3, d4, d5, d6, d7; + register DCTELEM *dataptr; + int rowctr; + SHIFT_TEMPS; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + + for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register INT32 *idataptr = (INT32*)dataptr; + d0 = dataptr[0]; + d1 = dataptr[1]; + if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register INT32 v = (dcval & 0xffff) | + (((INT32)dcval << 16) & 0xffff0000L); + + idataptr[0] = v; + idataptr[1] = v; + idataptr[2] = v; + idataptr[3] = v; + } + + dataptr += DCTSIZE; /* advance pointer to next row */ + continue; + } + d2 = dataptr[2]; + d3 = dataptr[3]; + d4 = dataptr[4]; + d5 = dataptr[5]; + d6 = dataptr[6]; + d7 = dataptr[7]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, -FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = SCALE (d0 + d4, CONST_BITS); + tmp11 = tmp12 = SCALE (d0 - d4, CONST_BITS); + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = SCALE (d4, CONST_BITS); + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = SCALE (d0, CONST_BITS); + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7; + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5; + z3 = d7; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(d7, - FIX(0.601344887)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + tmp1 = MULTIPLY(d5, - FIX(0.509795578)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + z5 = MULTIPLY(d5 + d7, FIX(1.175875602)); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(d3, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(d1, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(d7, - FIX(0.601344887)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + tmp2 = MULTIPLY(d3, FIX(0.509795579)); + z2 = MULTIPLY(d3, - FIX(2.562915447)); + z5 = MULTIPLY(z3, FIX(1.175875602)); + z3 = MULTIPLY(z3, - FIX(0.785694958)); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX(1.175875602)); + + z1 = MULTIPLY(z1, FIX(0.275899379)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + tmp0 = MULTIPLY(d7, - FIX(1.662939224)); + z4 = MULTIPLY(d1, - FIX(0.390180644)); + tmp3 = MULTIPLY(d1, FIX(1.111140466)); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(d7, - FIX(1.387039845)); + tmp1 = MULTIPLY(d7, FIX(1.175875602)); + tmp2 = MULTIPLY(d7, - FIX(0.785694958)); + tmp3 = MULTIPLY(d7, FIX(0.275899379)); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX(1.175875602)); + + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(d1, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(d3, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX(1.175875602)); + tmp1 = MULTIPLY(d5, FIX(1.662939225)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + z2 = MULTIPLY(z2, - FIX(1.387039845)); + tmp2 = MULTIPLY(d3, FIX(1.111140466)); + z3 = MULTIPLY(d3, - FIX(1.961570560)); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX(1.175875602)); + z1 = MULTIPLY(d1, - FIX(0.899976223)); + tmp3 = MULTIPLY(d1, FIX(0.601344887)); + tmp1 = MULTIPLY(d5, - FIX(0.509795578)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z4 = MULTIPLY(z4, FIX(0.785694958)); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX(1.175875602)); + tmp1 = MULTIPLY(d5, FIX(0.275899380)); + tmp2 = MULTIPLY(d5, - FIX(1.387039845)); + tmp3 = MULTIPLY(d5, FIX(0.785694958)); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX(0.211164243)); + tmp2 = MULTIPLY(d3, - FIX(1.451774981)); + z1 = MULTIPLY(d1, FIX(1.061594337)); + z2 = MULTIPLY(d3, - FIX(2.172734803)); + z4 = MULTIPLY(z5, FIX(0.785694958)); + z5 = MULTIPLY(z5, FIX(1.175875602)); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d3, - FIX(0.785694958)); + tmp1 = MULTIPLY(d3, - FIX(1.387039845)); + tmp2 = MULTIPLY(d3, - FIX(0.275899379)); + tmp3 = MULTIPLY(d3, FIX(1.175875602)); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX(0.275899379)); + tmp1 = MULTIPLY(d1, FIX(0.785694958)); + tmp2 = MULTIPLY(d1, FIX(1.175875602)); + tmp3 = MULTIPLY(d1, FIX(1.387039845)); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSIZE*0]; + d1 = dataptr[DCTSIZE*1]; + d2 = dataptr[DCTSIZE*2]; + d3 = dataptr[DCTSIZE*3]; + d4 = dataptr[DCTSIZE*4]; + d5 = dataptr[DCTSIZE*5]; + d6 = dataptr[DCTSIZE*6]; + d7 = dataptr[DCTSIZE*7]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, -FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(d6, - FIX(1.306562965)); + tmp3 = MULTIPLY(d6, FIX(0.541196100)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = SCALE (d0 + d4, CONST_BITS); + tmp11 = tmp12 = SCALE (d0 - d4, CONST_BITS); + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = SCALE (d4, CONST_BITS); + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp0 = SCALE (d0, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX(0.541196100)); + tmp3 = MULTIPLY(d2, FIX(1.306562965)); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = SCALE (d0, CONST_BITS); + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7; + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5; + z3 = d7; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(d7, - FIX(0.601344887)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + tmp1 = MULTIPLY(d5, - FIX(0.509795578)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + z5 = MULTIPLY(d5 + d7, FIX(1.175875602)); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX(1.175875602)); + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(z1, - FIX(0.899976223)); + z2 = MULTIPLY(d3, - FIX(2.562915447)); + z3 = MULTIPLY(z3, - FIX(1.961570560)); + z4 = MULTIPLY(d1, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(d7, - FIX(0.601344887)); + z1 = MULTIPLY(d7, - FIX(0.899976223)); + tmp2 = MULTIPLY(d3, FIX(0.509795579)); + z2 = MULTIPLY(d3, - FIX(2.562915447)); + z5 = MULTIPLY(z3, FIX(1.175875602)); + z3 = MULTIPLY(z3, - FIX(0.785694958)); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX(1.175875602)); + + z1 = MULTIPLY(z1, FIX(0.275899379)); + z3 = MULTIPLY(d7, - FIX(1.961570560)); + tmp0 = MULTIPLY(d7, - FIX(1.662939224)); + z4 = MULTIPLY(d1, - FIX(0.390180644)); + tmp3 = MULTIPLY(d1, FIX(1.111140466)); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(d7, - FIX(1.387039845)); + tmp1 = MULTIPLY(d7, FIX(1.175875602)); + tmp2 = MULTIPLY(d7, - FIX(0.785694958)); + tmp3 = MULTIPLY(d7, FIX(0.275899379)); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX(1.175875602)); + + tmp1 = MULTIPLY(d5, FIX(2.053119869)); + tmp2 = MULTIPLY(d3, FIX(3.072711026)); + tmp3 = MULTIPLY(d1, FIX(1.501321110)); + z1 = MULTIPLY(d1, - FIX(0.899976223)); + z2 = MULTIPLY(z2, - FIX(2.562915447)); + z3 = MULTIPLY(d3, - FIX(1.961570560)); + z4 = MULTIPLY(z4, - FIX(0.390180644)); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX(1.175875602)); + tmp1 = MULTIPLY(d5, FIX(1.662939225)); + z4 = MULTIPLY(d5, - FIX(0.390180644)); + z2 = MULTIPLY(z2, - FIX(1.387039845)); + tmp2 = MULTIPLY(d3, FIX(1.111140466)); + z3 = MULTIPLY(d3, - FIX(1.961570560)); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX(1.175875602)); + z1 = MULTIPLY(d1, - FIX(0.899976223)); + tmp3 = MULTIPLY(d1, FIX(0.601344887)); + tmp1 = MULTIPLY(d5, - FIX(0.509795578)); + z2 = MULTIPLY(d5, - FIX(2.562915447)); + z4 = MULTIPLY(z4, FIX(0.785694958)); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX(1.175875602)); + tmp1 = MULTIPLY(d5, FIX(0.275899380)); + tmp2 = MULTIPLY(d5, - FIX(1.387039845)); + tmp3 = MULTIPLY(d5, FIX(0.785694958)); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX(0.211164243)); + tmp2 = MULTIPLY(d3, - FIX(1.451774981)); + z1 = MULTIPLY(d1, FIX(1.061594337)); + z2 = MULTIPLY(d3, - FIX(2.172734803)); + z4 = MULTIPLY(z5, FIX(0.785694958)); + z5 = MULTIPLY(z5, FIX(1.175875602)); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d3, - FIX(0.785694958)); + tmp1 = MULTIPLY(d3, - FIX(1.387039845)); + tmp2 = MULTIPLY(d3, - FIX(0.275899379)); + tmp3 = MULTIPLY(d3, FIX(1.175875602)); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX(0.275899379)); + tmp1 = MULTIPLY(d1, FIX(0.785694958)); + tmp2 = MULTIPLY(d1, FIX(1.175875602)); + tmp3 = MULTIPLY(d1, FIX(1.387039845)); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + +#else + +/*---- debugging/tracing macros ----*/ + +#if _MSC_VER +#pragma optimize("",on) +#if _MSC_VER > 700 +/*#pragma optimize("l",off)*/ +#endif +#endif + +#define idct_single_pos0() +#define idct_zero_col_stat() +#define idct_zero_row_stat() +#define idct_nonzero_col_stat() +#define idct_nonzero_row_stat() +#define DUMP_COEFS(p) +#define TRACE(args) +#define FAST_DCTPTRS 1 + +#if 0 /* to count cases */ +void idct_single_pos0 (void) { static int count; count++; } +void idct_zero_col_stat (void) { static int count; count++; } +void idct_zero_row_stat (void) { static int count; count++; } +void idct_nonzero_col_stat (void) { static int count; count++; } +void idct_nonzero_row_stat (void) { static int count; count++; } +#undef idct_single_pos0 +#undef idct_zero_col_stat +#undef idct_zero_row_stat +#undef idct_nonzero_col_stat +#undef idct_nonzero_row_stat +#endif + +void init_pre_idct (void) { } + +void j_rev_dct_sparse (DCTBLOCK data, int pos) +{ + /* If just DC Coefficient. */ + + if (pos == 0) { + register DCTELEM *dp, *dq; + DCTELEM dcval; + + idct_single_pos0(); + + dp = data; + dcval = dp[0]; + if (dcval < 0) + dcval = (short)((dcval - 3) >> 3); + else + dcval = (short)((dcval + 4) >> 3); + + if (dcval) { + for (dq = dp + 64; dp < dq; dp += 8) { + dp[3] = dp[2] = dp[1] = dp[0] = dcval; + dp[7] = dp[6] = dp[5] = dp[4] = dcval; + } + } + return; + } + + /* Some other coeff */ + j_rev_dct (data); +} + +#ifndef OPTIMIZE_ASM +void j_rev_dct (DCTBLOCK data) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3, z4, z5; + register DCTELEM *dp; + int rowctr; + SHIFT_TEMPS; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + DUMP_COEFS(data); + + dp = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--, dp += DCTSIZE) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + +#if FAST_DCTPTRS +#define d0 dp[0] +#define d1 dp[1] +#define d2 dp[2] +#define d3 dp[3] +#define d4 dp[4] +#define d5 dp[5] +#define d6 dp[6] +#define d7 dp[7] +#else + int d0 = dp[0]; + int d1 = dp[1]; + int d2 = dp[2]; + int d3 = dp[3]; + int d4 = dp[4]; + int d5 = dp[5]; + int d6 = dp[6]; + int d7 = dp[7]; +#endif + +#ifndef NO_ZERO_ROW_TEST + if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { + /* AC terms all zero */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + + if (d0) { + dp[0] = dcval; + dp[1] = dcval; + dp[2] = dcval; + dp[3] = dcval; + dp[4] = dcval; + dp[5] = dcval; + dp[6] = dcval; + dp[7] = dcval; + } + idct_zero_row_stat(); + continue; + } +#endif + + idct_nonzero_row_stat(); + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */ + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp1 = MULTIPLY(d5, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(d3, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp3 = MULTIPLY(d1, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dp[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + dp[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + dp[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + dp[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + dp[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + dp[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + dp[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + dp[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + } +#if FAST_DCTPTRS +#undef d0 +#undef d1 +#undef d2 +#undef d3 +#undef d4 +#undef d5 +#undef d6 +#undef d7 +#endif + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dp = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--, dp++) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + +#if FAST_DCTPTRS +#define d0 dp[DCTSIZE*0] +#define d1 dp[DCTSIZE*1] +#define d2 dp[DCTSIZE*2] +#define d3 dp[DCTSIZE*3] +#define d4 dp[DCTSIZE*4] +#define d5 dp[DCTSIZE*5] +#define d6 dp[DCTSIZE*6] +#define d7 dp[DCTSIZE*7] +#else + int d0 = dp[DCTSIZE*0]; + int d1 = dp[DCTSIZE*1]; + int d2 = dp[DCTSIZE*2]; + int d3 = dp[DCTSIZE*3]; + int d4 = dp[DCTSIZE*4]; + int d5 = dp[DCTSIZE*5]; + int d6 = dp[DCTSIZE*6]; + int d7 = dp[DCTSIZE*7]; +#endif + +#ifndef NO_ZERO_COLUMN_TEST + if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { + /* AC terms all zero */ + DCTELEM dcval = (DCTELEM) DESCALE((INT32) d0, PASS1_BITS+3); + + if (d0) { + dp[DCTSIZE*0] = dcval; + dp[DCTSIZE*1] = dcval; + dp[DCTSIZE*2] = dcval; + dp[DCTSIZE*3] = dcval; + dp[DCTSIZE*4] = dcval; + dp[DCTSIZE*5] = dcval; + dp[DCTSIZE*6] = dcval; + dp[DCTSIZE*7] = dcval; + } + idct_zero_col_stat(); + continue; + } +#endif + + idct_nonzero_col_stat(); + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); + tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); + tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); + + tmp0 = SCALE (d0 + d4, CONST_BITS); + tmp1 = SCALE (d0 - d4, CONST_BITS); + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */ + + tmp0 = MULTIPLY(d7, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp1 = MULTIPLY(d5, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(d3, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp3 = MULTIPLY(d1, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dp[DCTSIZE*0] = (DCTELEM)DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*7] = (DCTELEM)DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*1] = (DCTELEM)DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*6] = (DCTELEM)DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*2] = (DCTELEM)DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*5] = (DCTELEM)DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*3] = (DCTELEM)DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3); + dp[DCTSIZE*4] = (DCTELEM)DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3); + } +#if FAST_DCTPTRS +#undef d0 +#undef d1 +#undef d2 +#undef d3 +#undef d4 +#undef d5 +#undef d6 +#undef d7 +#endif +} +#endif /* optimize.asm */ + +#endif diff --git a/libav/mjpegenc.c b/libav/mjpegenc.c new file mode 100644 index 0000000000..027287528c --- /dev/null +++ b/libav/mjpegenc.c @@ -0,0 +1,416 @@ +/* + * MJPEG encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include "avcodec.h" +#include "mpegvideo.h" + +typedef struct MJpegContext { + UINT8 huff_size_dc_luminance[12]; + UINT16 huff_code_dc_luminance[12]; + UINT8 huff_size_dc_chrominance[12]; + UINT16 huff_code_dc_chrominance[12]; + + UINT8 huff_size_ac_luminance[256]; + UINT16 huff_code_ac_luminance[256]; + UINT8 huff_size_ac_chrominance[256]; + UINT16 huff_code_ac_chrominance[256]; +} MJpegContext; + +#define SOF0 0xc0 +#define SOI 0xd8 +#define EOI 0xd9 +#define DQT 0xdb +#define DHT 0xc4 +#define SOS 0xda + +#if 0 +/* These are the sample quantization tables given in JPEG spec section K.1. + * The spec says that the values given produce "good" quality, and + * when divided by 2, "very good" quality. + */ +static const unsigned char std_luminance_quant_tbl[64] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 +}; +static const unsigned char std_chrominance_quant_tbl[64] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 +}; +#endif + +/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ +/* IMPORTANT: these are only valid for 8-bit data precision! */ +static const UINT8 bits_dc_luminance[17] = +{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +static const UINT8 val_dc_luminance[] = +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +static const UINT8 bits_dc_chrominance[17] = +{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; +static const UINT8 val_dc_chrominance[] = +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +static const UINT8 bits_ac_luminance[17] = +{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; +static const UINT8 val_ac_luminance[] = +{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + +static const UINT8 bits_ac_chrominance[17] = +{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; + +static const UINT8 val_ac_chrominance[] = +{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + + +/* isn't this function nicer than the one in the libjpeg ? */ +static void build_huffman_codes(UINT8 *huff_size, UINT16 *huff_code, + const UINT8 *bits_table, const UINT8 *val_table) +{ + int i, j, k,nb, code, sym; + + code = 0; + k = 0; + for(i=1;i<=16;i++) { + nb = bits_table[i]; + for(j=0;j<nb;j++) { + sym = val_table[k++]; + huff_size[sym] = i; + huff_code[sym] = code; + code++; + } + code <<= 1; + } +} + +int mjpeg_init(MpegEncContext *s) +{ + MJpegContext *m; + + m = malloc(sizeof(MJpegContext)); + if (!m) + return -1; + + /* build all the huffman tables */ + build_huffman_codes(m->huff_size_dc_luminance, + m->huff_code_dc_luminance, + bits_dc_luminance, + val_dc_luminance); + build_huffman_codes(m->huff_size_dc_chrominance, + m->huff_code_dc_chrominance, + bits_dc_chrominance, + val_dc_chrominance); + build_huffman_codes(m->huff_size_ac_luminance, + m->huff_code_ac_luminance, + bits_ac_luminance, + val_ac_luminance); + build_huffman_codes(m->huff_size_ac_chrominance, + m->huff_code_ac_chrominance, + bits_ac_chrominance, + val_ac_chrominance); + + s->mjpeg_ctx = m; + return 0; +} + +void mjpeg_close(MpegEncContext *s) +{ + free(s->mjpeg_ctx); +} + +static inline void put_marker(PutBitContext *p, int code) +{ + put_bits(p, 8, 0xff); + put_bits(p, 8, code); +} + +/* table_class: 0 = DC coef, 1 = AC coefs */ +static int put_huffman_table(MpegEncContext *s, int table_class, int table_id, + const UINT8 *bits_table, const UINT8 *value_table) +{ + PutBitContext *p = &s->pb; + int n, i; + + put_bits(p, 4, table_class); + put_bits(p, 4, table_id); + + n = 0; + for(i=1;i<=16;i++) { + n += bits_table[i]; + put_bits(p, 8, bits_table[i]); + } + + for(i=0;i<n;i++) + put_bits(p, 8, value_table[i]); + + return n + 17; +} + +static void jpeg_table_header(MpegEncContext *s) +{ + PutBitContext *p = &s->pb; + int i, size; + UINT8 *ptr; + + /* quant matrixes */ + put_marker(p, DQT); + put_bits(p, 16, 2 + 1 * (1 + 64)); + put_bits(p, 4, 0); /* 8 bit precision */ + put_bits(p, 4, 0); /* table 0 */ + for(i=0;i<64;i++) { + put_bits(p, 8, s->init_intra_matrix[i]); + } +#if 0 + put_bits(p, 4, 0); /* 8 bit precision */ + put_bits(p, 4, 1); /* table 1 */ + for(i=0;i<64;i++) { + put_bits(p, 8, m->chrominance_matrix[i]); + } +#endif + + /* huffman table */ + put_marker(p, DHT); + flush_put_bits(p); + ptr = p->buf_ptr; + put_bits(p, 16, 0); /* patched later */ + size = 2; + size += put_huffman_table(s, 0, 0, bits_dc_luminance, val_dc_luminance); + size += put_huffman_table(s, 0, 1, bits_dc_chrominance, val_dc_chrominance); + + size += put_huffman_table(s, 1, 0, bits_ac_luminance, val_ac_luminance); + size += put_huffman_table(s, 1, 1, bits_ac_chrominance, val_ac_chrominance); + ptr[0] = size >> 8; + ptr[1] = size; +} + +void mjpeg_picture_header(MpegEncContext *s) +{ + put_marker(&s->pb, SOI); + + jpeg_table_header(s); + + put_marker(&s->pb, SOF0); + + put_bits(&s->pb, 16, 17); + put_bits(&s->pb, 8, 8); /* 8 bits/component */ + put_bits(&s->pb, 16, s->height); + put_bits(&s->pb, 16, s->width); + put_bits(&s->pb, 8, 3); /* 3 components */ + + /* Y component */ + put_bits(&s->pb, 8, 1); /* component number */ + put_bits(&s->pb, 4, 2); /* H factor */ + put_bits(&s->pb, 4, 2); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* Cb component */ + put_bits(&s->pb, 8, 2); /* component number */ + put_bits(&s->pb, 4, 1); /* H factor */ + put_bits(&s->pb, 4, 1); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* Cr component */ + put_bits(&s->pb, 8, 3); /* component number */ + put_bits(&s->pb, 4, 1); /* H factor */ + put_bits(&s->pb, 4, 1); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* scan header */ + put_marker(&s->pb, SOS); + put_bits(&s->pb, 16, 12); /* length */ + put_bits(&s->pb, 8, 3); /* 3 components */ + + /* Y component */ + put_bits(&s->pb, 8, 1); /* index */ + put_bits(&s->pb, 4, 0); /* DC huffman table index */ + put_bits(&s->pb, 4, 0); /* AC huffman table index */ + + /* Cb component */ + put_bits(&s->pb, 8, 2); /* index */ + put_bits(&s->pb, 4, 1); /* DC huffman table index */ + put_bits(&s->pb, 4, 1); /* AC huffman table index */ + + /* Cr component */ + put_bits(&s->pb, 8, 3); /* index */ + put_bits(&s->pb, 4, 1); /* DC huffman table index */ + put_bits(&s->pb, 4, 1); /* AC huffman table index */ + + put_bits(&s->pb, 8, 0); /* Ss (not used) */ + put_bits(&s->pb, 8, 63); /* Se (not used) */ + put_bits(&s->pb, 8, 0); /* (not used) */ +} + +void mjpeg_picture_trailer(MpegEncContext *s) +{ + jflush_put_bits(&s->pb); + put_marker(&s->pb, EOI); +} + +static inline void encode_dc(MpegEncContext *s, int val, + UINT8 *huff_size, UINT16 *huff_code) +{ + int mant, nbits; + + if (val == 0) { + jput_bits(&s->pb, huff_size[0], huff_code[0]); + } else { + mant = val; + if (val < 0) { + val = -val; + mant--; + } + + /* compute the log (XXX: optimize) */ + nbits = 0; + while (val != 0) { + val = val >> 1; + nbits++; + } + + jput_bits(&s->pb, huff_size[nbits], huff_code[nbits]); + + jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1)); + } +} + +static void encode_block(MpegEncContext *s, DCTELEM *block, int n) +{ + int mant, nbits, code, i, j; + int component, dc, run, last_index, val; + MJpegContext *m = s->mjpeg_ctx; + UINT8 *huff_size_ac; + UINT16 *huff_code_ac; + + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + dc = block[0]; /* overflow is impossible */ + val = dc - s->last_dc[component]; + if (n < 4) { + encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance); + huff_size_ac = m->huff_size_ac_luminance; + huff_code_ac = m->huff_code_ac_luminance; + } else { + encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance); + huff_size_ac = m->huff_size_ac_chrominance; + huff_code_ac = m->huff_code_ac_chrominance; + } + s->last_dc[component] = dc; + + /* AC coefs */ + + run = 0; + last_index = s->block_last_index[n]; + for(i=1;i<=last_index;i++) { + j = zigzag_direct[i]; + val = block[j]; + if (val == 0) { + run++; + } else { + while (run >= 16) { + jput_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]); + run -= 16; + } + mant = val; + if (val < 0) { + val = -val; + mant--; + } + + /* compute the log (XXX: optimize) */ + nbits = 0; + while (val != 0) { + val = val >> 1; + nbits++; + } + code = (run << 4) | nbits; + + jput_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]); + + jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1)); + run = 0; + } + } + + /* output EOB only if not already 64 values */ + if (last_index < 63 || run != 0) + jput_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]); +} + +void mjpeg_encode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int i; + for(i=0;i<6;i++) { + encode_block(s, block[i], i); + } +} diff --git a/libav/mpegaudio.c b/libav/mpegaudio.c new file mode 100644 index 0000000000..50ffc3c200 --- /dev/null +++ b/libav/mpegaudio.c @@ -0,0 +1,754 @@ +/* + * The simplest mpeg audio layer 2 encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <netinet/in.h> +#include <math.h> +#include "avcodec.h" +#include "mpegaudio.h" + +#define NDEBUG +#include <assert.h> + +/* define it to use floats in quantization (I don't like floats !) */ +//#define USE_FLOATS + +#define MPA_STEREO 0 +#define MPA_JSTEREO 1 +#define MPA_DUAL 2 +#define MPA_MONO 3 + +#include "mpegaudiotab.h" + +int MPA_encode_init(AVEncodeContext *avctx) +{ + MpegAudioContext *s = avctx->priv_data; + int freq = avctx->rate; + int bitrate = avctx->bit_rate; + int channels = avctx->channels; + int i, v, table; + float a; + + if (channels != 1) + return -1; + + bitrate = bitrate / 1000; + s->freq = freq; + s->bit_rate = bitrate * 1000; + avctx->frame_size = MPA_FRAME_SIZE; + avctx->key_frame = 1; /* always key frame */ + + /* encoding freq */ + s->lsf = 0; + for(i=0;i<3;i++) { + if (freq_tab[i] == freq) + break; + if ((freq_tab[i] / 2) == freq) { + s->lsf = 1; + break; + } + } + if (i == 3) + return -1; + s->freq_index = i; + + /* encoding bitrate & frequency */ + for(i=0;i<15;i++) { + if (bitrate_tab[1-s->lsf][i] == bitrate) + break; + } + if (i == 15) + return -1; + s->bitrate_index = i; + + /* compute total header size & pad bit */ + + a = (float)(bitrate * 1000 * MPA_FRAME_SIZE) / (freq * 8.0); + s->frame_size = ((int)a) * 8; + + /* frame fractional size to compute padding */ + s->frame_frac = 0; + s->frame_frac_incr = (int)((a - floor(a)) * 65536.0); + + /* select the right allocation table */ + if (!s->lsf) { + if ((freq == 48000 && bitrate >= 56) || + (bitrate >= 56 && bitrate <= 80)) + table = 0; + else if (freq != 48000 && bitrate >= 96) + table = 1; + else if (freq != 32000 && bitrate <= 48) + table = 2; + else + table = 3; + } else { + table = 4; + } + /* number of used subbands */ + s->sblimit = sblimit_table[table]; + s->alloc_table = alloc_tables[table]; + +#ifdef DEBUG + printf("%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n", + bitrate, freq, s->frame_size, table, s->frame_frac_incr); +#endif + + s->samples_offset = 0; + + for(i=0;i<512;i++) { + float a = enwindow[i] * 32768.0 * 16.0; + filter_bank[i] = (int)(a); + } + for(i=0;i<64;i++) { + v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20)); + if (v <= 0) + v = 1; + scale_factor_table[i] = v; +#ifdef USE_FLOATS + scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20); +#else +#define P 15 + scale_factor_shift[i] = 21 - P - (i / 3); + scale_factor_mult[i] = (1 << P) * pow(2.0, (i % 3) / 3.0); +#endif + } + for(i=0;i<128;i++) { + v = i - 64; + if (v <= -3) + v = 0; + else if (v < 0) + v = 1; + else if (v == 0) + v = 2; + else if (v < 3) + v = 3; + else + v = 4; + scale_diff_table[i] = v; + } + + for(i=0;i<17;i++) { + v = quant_bits[i]; + if (v < 0) + v = -v; + else + v = v * 3; + total_quant_bits[i] = 12 * v; + } + + return 0; +} + +/* 32 point floating point IDCT */ +static void idct32(int *out, int *tab, int sblimit, int left_shift) +{ + int i, j; + int *t, *t1, xr; + const int *xp = costab32; + + for(j=31;j>=3;j-=2) tab[j] += tab[j - 2]; + + t = tab + 30; + t1 = tab + 2; + do { + t[0] += t[-4]; + t[1] += t[1 - 4]; + t -= 4; + } while (t != t1); + + t = tab + 28; + t1 = tab + 4; + do { + t[0] += t[-8]; + t[1] += t[1-8]; + t[2] += t[2-8]; + t[3] += t[3-8]; + t -= 8; + } while (t != t1); + + t = tab; + t1 = tab + 32; + do { + t[ 3] = -t[ 3]; + t[ 6] = -t[ 6]; + + t[11] = -t[11]; + t[12] = -t[12]; + t[13] = -t[13]; + t[15] = -t[15]; + t += 16; + } while (t != t1); + + + t = tab; + t1 = tab + 8; + do { + int x1, x2, x3, x4; + + x3 = MUL(t[16], FIX(SQRT2*0.5)); + x4 = t[0] - x3; + x3 = t[0] + x3; + + x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5)); + x1 = MUL((t[8] - x2), xp[0]); + x2 = MUL((t[8] + x2), xp[1]); + + t[ 0] = x3 + x1; + t[ 8] = x4 - x2; + t[16] = x4 + x2; + t[24] = x3 - x1; + t++; + } while (t != t1); + + xp += 2; + t = tab; + t1 = tab + 4; + do { + xr = MUL(t[28],xp[0]); + t[28] = (t[0] - xr); + t[0] = (t[0] + xr); + + xr = MUL(t[4],xp[1]); + t[ 4] = (t[24] - xr); + t[24] = (t[24] + xr); + + xr = MUL(t[20],xp[2]); + t[20] = (t[8] - xr); + t[ 8] = (t[8] + xr); + + xr = MUL(t[12],xp[3]); + t[12] = (t[16] - xr); + t[16] = (t[16] + xr); + t++; + } while (t != t1); + xp += 4; + + for (i = 0; i < 4; i++) { + xr = MUL(tab[30-i*4],xp[0]); + tab[30-i*4] = (tab[i*4] - xr); + tab[ i*4] = (tab[i*4] + xr); + + xr = MUL(tab[ 2+i*4],xp[1]); + tab[ 2+i*4] = (tab[28-i*4] - xr); + tab[28-i*4] = (tab[28-i*4] + xr); + + xr = MUL(tab[31-i*4],xp[0]); + tab[31-i*4] = (tab[1+i*4] - xr); + tab[ 1+i*4] = (tab[1+i*4] + xr); + + xr = MUL(tab[ 3+i*4],xp[1]); + tab[ 3+i*4] = (tab[29-i*4] - xr); + tab[29-i*4] = (tab[29-i*4] + xr); + + xp += 2; + } + + t = tab + 30; + t1 = tab + 1; + do { + xr = MUL(t1[0], *xp); + t1[0] = (t[0] - xr); + t[0] = (t[0] + xr); + t -= 2; + t1 += 2; + xp++; + } while (t >= tab); + + for(i=0;i<32;i++) { + out[i] = tab[bitinv32[i]] << left_shift; + } +} + +static void filter(MpegAudioContext *s, short *samples) +{ + short *p, *q; + int sum, offset, i, j, norm, n; + short tmp[64]; + int tmp1[32]; + int *out; + + // print_pow1(samples, 1152); + + offset = s->samples_offset; + out = &s->sb_samples[0][0][0]; + for(j=0;j<36;j++) { + /* 32 samples at once */ + for(i=0;i<32;i++) + s->samples_buf[offset + (31 - i)] = samples[i]; + + /* filter */ + p = s->samples_buf + offset; + q = filter_bank; + /* maxsum = 23169 */ + for(i=0;i<64;i++) { + sum = p[0*64] * q[0*64]; + sum += p[1*64] * q[1*64]; + sum += p[2*64] * q[2*64]; + sum += p[3*64] * q[3*64]; + sum += p[4*64] * q[4*64]; + sum += p[5*64] * q[5*64]; + sum += p[6*64] * q[6*64]; + sum += p[7*64] * q[7*64]; + tmp[i] = sum >> 14; + p++; + q++; + } + tmp1[0] = tmp[16]; + for( i=1; i<=16; i++ ) tmp1[i] = tmp[i+16]+tmp[16-i]; + for( i=17; i<=31; i++ ) tmp1[i] = tmp[i+16]-tmp[80-i]; + + /* integer IDCT 32 with normalization. XXX: There may be some + overflow left */ + norm = 0; + for(i=0;i<32;i++) { + norm |= abs(tmp1[i]); + } + n = log2(norm) - 12; + if (n > 0) { + for(i=0;i<32;i++) + tmp1[i] >>= n; + } else { + n = 0; + } + + idct32(out, tmp1, s->sblimit, n); + + /* advance of 32 samples */ + samples += 32; + offset -= 32; + out += 32; + /* handle the wrap around */ + if (offset < 0) { + memmove(s->samples_buf + SAMPLES_BUF_SIZE - (512 - 32), + s->samples_buf, (512 - 32) * 2); + offset = SAMPLES_BUF_SIZE - 512; + } + } + s->samples_offset = offset; + + // print_pow(s->sb_samples, 1152); +} + +static void compute_scale_factors(unsigned char scale_code[SBLIMIT], + unsigned char scale_factors[SBLIMIT][3], + int sb_samples[3][12][SBLIMIT], + int sblimit) +{ + int *p, vmax, v, n, i, j, k, code; + int index, d1, d2; + unsigned char *sf = &scale_factors[0][0]; + + for(j=0;j<sblimit;j++) { + for(i=0;i<3;i++) { + /* find the max absolute value */ + p = &sb_samples[i][0][j]; + vmax = abs(*p); + for(k=1;k<12;k++) { + p += SBLIMIT; + v = abs(*p); + if (v > vmax) + vmax = v; + } + /* compute the scale factor index using log 2 computations */ + if (vmax > 0) { + n = log2(vmax); + /* n is the position of the MSB of vmax. now + use at most 2 compares to find the index */ + index = (21 - n) * 3 - 3; + if (index >= 0) { + while (vmax <= scale_factor_table[index+1]) + index++; + } else { + index = 0; /* very unlikely case of overflow */ + } + } else { + index = 63; + } + +#if 0 + printf("%2d:%d in=%x %x %d\n", + j, i, vmax, scale_factor_table[index], index); +#endif + /* store the scale factor */ + assert(index >=0 && index <= 63); + sf[i] = index; + } + + /* compute the transmission factor : look if the scale factors + are close enough to each other */ + d1 = scale_diff_table[sf[0] - sf[1] + 64]; + d2 = scale_diff_table[sf[1] - sf[2] + 64]; + + /* handle the 25 cases */ + switch(d1 * 5 + d2) { + case 0*5+0: + case 0*5+4: + case 3*5+4: + case 4*5+0: + case 4*5+4: + code = 0; + break; + case 0*5+1: + case 0*5+2: + case 4*5+1: + case 4*5+2: + code = 3; + sf[2] = sf[1]; + break; + case 0*5+3: + case 4*5+3: + code = 3; + sf[1] = sf[2]; + break; + case 1*5+0: + case 1*5+4: + case 2*5+4: + code = 1; + sf[1] = sf[0]; + break; + case 1*5+1: + case 1*5+2: + case 2*5+0: + case 2*5+1: + case 2*5+2: + code = 2; + sf[1] = sf[2] = sf[0]; + break; + case 2*5+3: + case 3*5+3: + code = 2; + sf[0] = sf[1] = sf[2]; + break; + case 3*5+0: + case 3*5+1: + case 3*5+2: + code = 2; + sf[0] = sf[2] = sf[1]; + break; + case 1*5+3: + code = 2; + if (sf[0] > sf[2]) + sf[0] = sf[2]; + sf[1] = sf[2] = sf[0]; + break; + default: + abort(); + } + +#if 0 + printf("%d: %2d %2d %2d %d %d -> %d\n", j, + sf[0], sf[1], sf[2], d1, d2, code); +#endif + scale_code[j] = code; + sf += 3; + } +} + +/* The most important function : psycho acoustic module. In this + encoder there is basically none, so this is the worst you can do, + but also this is the simpler. */ +static void psycho_acoustic_model(MpegAudioContext *s, short smr[SBLIMIT]) +{ + int i; + + for(i=0;i<s->sblimit;i++) { + smr[i] = (int)(fixed_smr[i] * 10); + } +} + + +#define SB_NOTALLOCATED 0 +#define SB_ALLOCATED 1 +#define SB_NOMORE 2 + +/* Try to maximize the smr while using a number of bits inferior to + the frame size. I tried to make the code simpler, faster and + smaller than other encoders :-) */ +static void compute_bit_allocation(MpegAudioContext *s, + short smr1[SBLIMIT], + unsigned char bit_alloc[SBLIMIT], + int *padding) +{ + int i, b, max_smr, max_sb, current_frame_size, max_frame_size; + int incr; + short smr[SBLIMIT]; + unsigned char subband_status[SBLIMIT]; + const unsigned char *alloc; + + memcpy(smr, smr1, sizeof(short) * s->sblimit); + memset(subband_status, SB_NOTALLOCATED, s->sblimit); + memset(bit_alloc, 0, s->sblimit); + + /* compute frame size and padding */ + max_frame_size = s->frame_size; + s->frame_frac += s->frame_frac_incr; + if (s->frame_frac >= 65536) { + s->frame_frac -= 65536; + s->do_padding = 1; + max_frame_size += 8; + } else { + s->do_padding = 0; + } + + /* compute the header + bit alloc size */ + current_frame_size = 32; + alloc = s->alloc_table; + for(i=0;i<s->sblimit;i++) { + incr = alloc[0]; + current_frame_size += incr; + alloc += 1 << incr; + } + for(;;) { + /* look for the subband with the largest signal to mask ratio */ + max_sb = -1; + max_smr = 0x80000000; + for(i=0;i<s->sblimit;i++) { + if (smr[i] > max_smr && subband_status[i] != SB_NOMORE) { + max_smr = smr[i]; + max_sb = i; + } + } +#if 0 + printf("current=%d max=%d max_sb=%d alloc=%d\n", + current_frame_size, max_frame_size, max_sb, + bit_alloc[max_sb]); +#endif + if (max_sb < 0) + break; + + /* find alloc table entry (XXX: not optimal, should use + pointer table) */ + alloc = s->alloc_table; + for(i=0;i<max_sb;i++) { + alloc += 1 << alloc[0]; + } + + if (subband_status[max_sb] == SB_NOTALLOCATED) { + /* nothing was coded for this band: add the necessary bits */ + incr = 2 + nb_scale_factors[s->scale_code[max_sb]] * 6; + incr += total_quant_bits[alloc[1]]; + } else { + /* increments bit allocation */ + b = bit_alloc[max_sb]; + incr = total_quant_bits[alloc[b + 1]] - + total_quant_bits[alloc[b]]; + } + + if (current_frame_size + incr <= max_frame_size) { + /* can increase size */ + b = ++bit_alloc[max_sb]; + current_frame_size += incr; + /* decrease smr by the resolution we added */ + smr[max_sb] = smr1[max_sb] - quant_snr[alloc[b]]; + /* max allocation size reached ? */ + if (b == ((1 << alloc[0]) - 1)) + subband_status[max_sb] = SB_NOMORE; + else + subband_status[max_sb] = SB_ALLOCATED; + } else { + /* cannot increase the size of this subband */ + subband_status[max_sb] = SB_NOMORE; + } + } + *padding = max_frame_size - current_frame_size; + assert(*padding >= 0); + +#if 0 + for(i=0;i<s->sblimit;i++) { + printf("%d ", bit_alloc[i]); + } + printf("\n"); +#endif +} + +/* + * Output the mpeg audio layer 2 frame. Note how the code is small + * compared to other encoders :-) + */ +static void encode_frame(MpegAudioContext *s, + unsigned char bit_alloc[SBLIMIT], + int padding) +{ + int i, j, k, l, bit_alloc_bits, b; + unsigned char *sf; + int q[3]; + PutBitContext *p = &s->pb; + + /* header */ + + put_bits(p, 12, 0xfff); + put_bits(p, 1, 1 - s->lsf); /* 1 = mpeg1 ID, 0 = mpeg2 lsf ID */ + put_bits(p, 2, 4-2); /* layer 2 */ + put_bits(p, 1, 1); /* no error protection */ + put_bits(p, 4, s->bitrate_index); + put_bits(p, 2, s->freq_index); + put_bits(p, 1, s->do_padding); /* use padding */ + put_bits(p, 1, 0); /* private_bit */ + put_bits(p, 2, MPA_MONO); + put_bits(p, 2, 0); /* mode_ext */ + put_bits(p, 1, 0); /* no copyright */ + put_bits(p, 1, 1); /* original */ + put_bits(p, 2, 0); /* no emphasis */ + + /* bit allocation */ + j = 0; + for(i=0;i<s->sblimit;i++) { + bit_alloc_bits = s->alloc_table[j]; + put_bits(p, bit_alloc_bits, bit_alloc[i]); + j += 1 << bit_alloc_bits; + } + + /* scale codes */ + for(i=0;i<s->sblimit;i++) { + if (bit_alloc[i]) + put_bits(p, 2, s->scale_code[i]); + } + + /* scale factors */ + sf = &s->scale_factors[0][0]; + for(i=0;i<s->sblimit;i++) { + if (bit_alloc[i]) { + switch(s->scale_code[i]) { + case 0: + put_bits(p, 6, sf[0]); + put_bits(p, 6, sf[1]); + put_bits(p, 6, sf[2]); + break; + case 3: + case 1: + put_bits(p, 6, sf[0]); + put_bits(p, 6, sf[2]); + break; + case 2: + put_bits(p, 6, sf[0]); + break; + } + } + sf += 3; + } + + /* quantization & write sub band samples */ + + for(k=0;k<3;k++) { + for(l=0;l<12;l+=3) { + j = 0; + for(i=0;i<s->sblimit;i++) { + bit_alloc_bits = s->alloc_table[j]; + b = bit_alloc[i]; + if (b) { + int qindex, steps, m, sample, bits; + /* we encode 3 sub band samples of the same sub band at a time */ + qindex = s->alloc_table[j+b]; + steps = quant_steps[qindex]; + for(m=0;m<3;m++) { + sample = s->sb_samples[k][l + m][i]; + /* divide by scale factor */ +#ifdef USE_FLOATS + { + float a; + a = (float)sample * scale_factor_inv_table[s->scale_factors[i][k]]; + q[m] = (int)((a + 1.0) * steps * 0.5); + } +#else + { + int q1, e, shift, mult; + e = s->scale_factors[i][k]; + shift = scale_factor_shift[e]; + mult = scale_factor_mult[e]; + + /* normalize to P bits */ + if (shift < 0) + q1 = sample << (-shift); + else + q1 = sample >> shift; + q1 = (q1 * mult) >> P; + q[m] = ((q1 + (1 << P)) * steps) >> (P + 1); + } +#endif + if (q[m] >= steps) + q[m] = steps - 1; + assert(q[m] >= 0 && q[m] < steps); + } + bits = quant_bits[qindex]; + if (bits < 0) { + /* group the 3 values to save bits */ + put_bits(p, -bits, + q[0] + steps * (q[1] + steps * q[2])); +#if 0 + printf("%d: gr1 %d\n", + i, q[0] + steps * (q[1] + steps * q[2])); +#endif + } else { +#if 0 + printf("%d: gr3 %d %d %d\n", + i, q[0], q[1], q[2]); +#endif + put_bits(p, bits, q[0]); + put_bits(p, bits, q[1]); + put_bits(p, bits, q[2]); + } + } + /* next subband in alloc table */ + j += 1 << bit_alloc_bits; + } + } + } + + /* padding */ + for(i=0;i<padding;i++) + put_bits(p, 1, 0); + + /* flush */ + flush_put_bits(p); +} + +int MPA_encode_frame(AVEncodeContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + MpegAudioContext *s = avctx->priv_data; + short *samples = data; + short smr[SBLIMIT]; + unsigned char bit_alloc[SBLIMIT]; + int padding; + + filter(s, samples); + compute_scale_factors(s->scale_code, s->scale_factors, + s->sb_samples, s->sblimit); + psycho_acoustic_model(s, smr); + compute_bit_allocation(s, smr, bit_alloc, &padding); + + init_put_bits(&s->pb, frame, MPA_MAX_CODED_FRAME_SIZE, NULL, NULL); + + encode_frame(s, bit_alloc, padding); + + s->nb_samples += MPA_FRAME_SIZE; + return s->pb.buf_ptr - s->pb.buf; +} + + +AVEncoder mp2_encoder = { + "mp2", + CODEC_TYPE_AUDIO, + CODEC_ID_MP2, + sizeof(MpegAudioContext), + MPA_encode_init, + MPA_encode_frame, + NULL, +}; diff --git a/libav/mpegaudio.h b/libav/mpegaudio.h new file mode 100644 index 0000000000..0734d3466b --- /dev/null +++ b/libav/mpegaudio.h @@ -0,0 +1,31 @@ + +/* max compressed frame size */ +#define MPA_MAX_CODED_FRAME_SIZE 1200 + +#define MPA_FRAME_SIZE 1152 + +#define SAMPLES_BUF_SIZE 4096 +#define SBLIMIT 32 /* number of subbands */ +#define DCT_BITS 14 /* number of bits for the DCT */ +#define MUL(a,b) (((a) * (b)) >> DCT_BITS) +#define FIX(a) ((int)((a) * (1 << DCT_BITS))) + +typedef struct MpegAudioContext { + PutBitContext pb; + int freq, bit_rate; + int lsf; /* 1 if mpeg2 low bitrate selected */ + int bitrate_index; /* bit rate */ + int freq_index; + int frame_size; /* frame size, in bits, without padding */ + long long nb_samples; /* total number of samples encoded */ + /* padding computation */ + int frame_frac, frame_frac_incr, do_padding; + short samples_buf[SAMPLES_BUF_SIZE]; /* buffer for filter */ + int samples_offset; /* offset in samples_buf */ + int sb_samples[3][12][SBLIMIT]; + unsigned char scale_factors[SBLIMIT][3]; /* scale factors */ + unsigned char scale_code[SBLIMIT]; /* code to group 3 scale factors */ + int sblimit; /* number of used subbands */ + const unsigned char *alloc_table; +} MpegAudioContext; + diff --git a/libav/mpegaudiotab.h b/libav/mpegaudiotab.h new file mode 100644 index 0000000000..05bdb9eea1 --- /dev/null +++ b/libav/mpegaudiotab.h @@ -0,0 +1,310 @@ +/* + * mpeg audio layer 2 tables. Most of them come from the mpeg audio + * specification. + * + * Copyright (c) 2000 Gerard Lantau. + * + * The licence of this code is contained in file LICENCE found in the + * same archive + */ + +static const unsigned short bitrate_tab[2][15] = { + {0,8,16,24,32,40,48,56,64,80,96,112,128,144,160}, /* mpeg2 lsf */ + {0,32,48,56,64,80,96,112,128,160,192,224,256,320,384}, /* mpeg1 */ +}; + +static const unsigned short freq_tab[3] = { 44100, 48000, 32000 }; + +#define SQRT2 1.41421356237309514547 + +static const int costab32[30] = { + FIX(0.54119610014619701222), + FIX(1.3065629648763763537), + + FIX(0.50979557910415917998), + FIX(2.5629154477415054814), + FIX(0.89997622313641556513), + FIX(0.60134488693504528634), + + FIX(0.5024192861881556782), + FIX(5.1011486186891552563), + FIX(0.78815462345125020249), + FIX(0.64682178335999007679), + FIX(0.56694403481635768927), + FIX(1.0606776859903470633), + FIX(1.7224470982383341955), + FIX(0.52249861493968885462), + + FIX(10.19000812354803287), + FIX(0.674808341455005678), + FIX(1.1694399334328846596), + FIX(0.53104259108978413284), + FIX(2.0577810099534108446), + FIX(0.58293496820613388554), + FIX(0.83934964541552681272), + FIX(0.50547095989754364798), + FIX(3.4076084184687189804), + FIX(0.62250412303566482475), + FIX(0.97256823786196078263), + FIX(0.51544730992262455249), + FIX(1.4841646163141661852), + FIX(0.5531038960344445421), + FIX(0.74453627100229857749), + FIX(0.5006029982351962726), +}; + +static const int bitinv32[32] = { + 0, 16, 8, 24, 4, 20, 12, 28, + 2, 18, 10, 26, 6, 22, 14, 30, + 1, 17, 9, 25, 5, 21, 13, 29, + 3, 19, 11, 27, 7, 23, 15, 31 +}; + + +static short filter_bank[512]; + +static const double enwindow[512] = {0.000000000, + -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000954, -0.000000954, + -0.000000954, -0.000000954, -0.000001431, -0.000001431, -0.000001907, -0.000001907, -0.000002384, -0.000002384, + -0.000002861, -0.000003338, -0.000003338, -0.000003815, -0.000004292, -0.000004768, -0.000005245, -0.000006199, + -0.000006676, -0.000007629, -0.000008106, -0.000009060, -0.000010014, -0.000011444, -0.000012398, -0.000013828, + -0.000014782, -0.000016689, -0.000018120, -0.000019550, -0.000021458, -0.000023365, -0.000025272, -0.000027657, + -0.000030041, -0.000032425, -0.000034809, -0.000037670, -0.000040531, -0.000043392, -0.000046253, -0.000049591, + -0.000052929, -0.000055790, -0.000059605, -0.000062943, -0.000066280, -0.000070095, -0.000073433, -0.000076771, + -0.000080585, -0.000083923, -0.000087261, -0.000090599, -0.000093460, -0.000096321, -0.000099182, 0.000101566, + 0.000103951, 0.000105858, 0.000107288, 0.000108242, 0.000108719, 0.000108719, 0.000108242, 0.000106812, + 0.000105381, 0.000102520, 0.000099182, 0.000095367, 0.000090122, 0.000084400, 0.000077724, 0.000069618, + 0.000060558, 0.000050545, 0.000039577, 0.000027180, 0.000013828, -0.000000954, -0.000017166, -0.000034332, + -0.000052929, -0.000072956, -0.000093937, -0.000116348, -0.000140190, -0.000165462, -0.000191212, -0.000218868, + -0.000247478, -0.000277042, -0.000307560, -0.000339031, -0.000371456, -0.000404358, -0.000438213, -0.000472546, + -0.000507355, -0.000542164, -0.000576973, -0.000611782, -0.000646591, -0.000680923, -0.000714302, -0.000747204, + -0.000779152, -0.000809669, -0.000838757, -0.000866413, -0.000891685, -0.000915051, -0.000935555, -0.000954151, + -0.000968933, -0.000980854, -0.000989437, -0.000994205, -0.000995159, -0.000991821, -0.000983715, 0.000971317, + 0.000953674, 0.000930786, 0.000902653, 0.000868797, 0.000829220, 0.000783920, 0.000731945, 0.000674248, + 0.000610352, 0.000539303, 0.000462532, 0.000378609, 0.000288486, 0.000191689, 0.000088215, -0.000021458, + -0.000137329, -0.000259876, -0.000388145, -0.000522137, -0.000661850, -0.000806808, -0.000956535, -0.001111031, + -0.001269817, -0.001432419, -0.001597881, -0.001766682, -0.001937389, -0.002110004, -0.002283096, -0.002457142, + -0.002630711, -0.002803326, -0.002974033, -0.003141880, -0.003306866, -0.003467083, -0.003622532, -0.003771782, + -0.003914356, -0.004048824, -0.004174709, -0.004290581, -0.004395962, -0.004489899, -0.004570484, -0.004638195, + -0.004691124, -0.004728317, -0.004748821, -0.004752159, -0.004737377, -0.004703045, -0.004649162, -0.004573822, + -0.004477024, -0.004357815, -0.004215240, -0.004049301, -0.003858566, -0.003643036, -0.003401756, 0.003134727, + 0.002841473, 0.002521515, 0.002174854, 0.001800537, 0.001399517, 0.000971317, 0.000515938, 0.000033379, + -0.000475883, -0.001011848, -0.001573563, -0.002161503, -0.002774239, -0.003411293, -0.004072189, -0.004756451, + -0.005462170, -0.006189346, -0.006937027, -0.007703304, -0.008487225, -0.009287834, -0.010103703, -0.010933399, + -0.011775017, -0.012627602, -0.013489246, -0.014358521, -0.015233517, -0.016112804, -0.016994476, -0.017876148, + -0.018756866, -0.019634247, -0.020506859, -0.021372318, -0.022228718, -0.023074150, -0.023907185, -0.024725437, + -0.025527000, -0.026310921, -0.027073860, -0.027815342, -0.028532982, -0.029224873, -0.029890060, -0.030526638, + -0.031132698, -0.031706810, -0.032248020, -0.032754898, -0.033225536, -0.033659935, -0.034055710, -0.034412861, + -0.034730434, -0.035007000, -0.035242081, -0.035435200, -0.035586357, -0.035694122, -0.035758972, 0.035780907, + 0.035758972, 0.035694122, 0.035586357, 0.035435200, 0.035242081, 0.035007000, 0.034730434, 0.034412861, + 0.034055710, 0.033659935, 0.033225536, 0.032754898, 0.032248020, 0.031706810, 0.031132698, 0.030526638, + 0.029890060, 0.029224873, 0.028532982, 0.027815342, 0.027073860, 0.026310921, 0.025527000, 0.024725437, + 0.023907185, 0.023074150, 0.022228718, 0.021372318, 0.020506859, 0.019634247, 0.018756866, 0.017876148, + 0.016994476, 0.016112804, 0.015233517, 0.014358521, 0.013489246, 0.012627602, 0.011775017, 0.010933399, + 0.010103703, 0.009287834, 0.008487225, 0.007703304, 0.006937027, 0.006189346, 0.005462170, 0.004756451, + 0.004072189, 0.003411293, 0.002774239, 0.002161503, 0.001573563, 0.001011848, 0.000475883, -0.000033379, + -0.000515938, -0.000971317, -0.001399517, -0.001800537, -0.002174854, -0.002521515, -0.002841473, 0.003134727, + 0.003401756, 0.003643036, 0.003858566, 0.004049301, 0.004215240, 0.004357815, 0.004477024, 0.004573822, + 0.004649162, 0.004703045, 0.004737377, 0.004752159, 0.004748821, 0.004728317, 0.004691124, 0.004638195, + 0.004570484, 0.004489899, 0.004395962, 0.004290581, 0.004174709, 0.004048824, 0.003914356, 0.003771782, + 0.003622532, 0.003467083, 0.003306866, 0.003141880, 0.002974033, 0.002803326, 0.002630711, 0.002457142, + 0.002283096, 0.002110004, 0.001937389, 0.001766682, 0.001597881, 0.001432419, 0.001269817, 0.001111031, + 0.000956535, 0.000806808, 0.000661850, 0.000522137, 0.000388145, 0.000259876, 0.000137329, 0.000021458, + -0.000088215, -0.000191689, -0.000288486, -0.000378609, -0.000462532, -0.000539303, -0.000610352, -0.000674248, + -0.000731945, -0.000783920, -0.000829220, -0.000868797, -0.000902653, -0.000930786, -0.000953674, 0.000971317, + 0.000983715, 0.000991821, 0.000995159, 0.000994205, 0.000989437, 0.000980854, 0.000968933, 0.000954151, + 0.000935555, 0.000915051, 0.000891685, 0.000866413, 0.000838757, 0.000809669, 0.000779152, 0.000747204, + 0.000714302, 0.000680923, 0.000646591, 0.000611782, 0.000576973, 0.000542164, 0.000507355, 0.000472546, + 0.000438213, 0.000404358, 0.000371456, 0.000339031, 0.000307560, 0.000277042, 0.000247478, 0.000218868, + 0.000191212, 0.000165462, 0.000140190, 0.000116348, 0.000093937, 0.000072956, 0.000052929, 0.000034332, + 0.000017166, 0.000000954, -0.000013828, -0.000027180, -0.000039577, -0.000050545, -0.000060558, -0.000069618, + -0.000077724, -0.000084400, -0.000090122, -0.000095367, -0.000099182, -0.000102520, -0.000105381, -0.000106812, + -0.000108242, -0.000108719, -0.000108719, -0.000108242, -0.000107288, -0.000105858, -0.000103951, 0.000101566, + 0.000099182, 0.000096321, 0.000093460, 0.000090599, 0.000087261, 0.000083923, 0.000080585, 0.000076771, + 0.000073433, 0.000070095, 0.000066280, 0.000062943, 0.000059605, 0.000055790, 0.000052929, 0.000049591, + 0.000046253, 0.000043392, 0.000040531, 0.000037670, 0.000034809, 0.000032425, 0.000030041, 0.000027657, + 0.000025272, 0.000023365, 0.000021458, 0.000019550, 0.000018120, 0.000016689, 0.000014782, 0.000013828, + 0.000012398, 0.000011444, 0.000010014, 0.000009060, 0.000008106, 0.000007629, 0.000006676, 0.000006199, + 0.000005245, 0.000004768, 0.000004292, 0.000003815, 0.000003338, 0.000003338, 0.000002861, 0.000002384, + 0.000002384, 0.000001907, 0.000001907, 0.000001431, 0.000001431, 0.000000954, 0.000000954, 0.000000954, + 0.000000954, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477 + }; + +static int scale_factor_table[64]; +#ifdef USE_FLOATS +static float scale_factor_inv_table[64]; +#else +static INT8 scale_factor_shift[64]; +static unsigned short scale_factor_mult[64]; +#endif +static unsigned char scale_diff_table[128]; + +static const int sblimit_table[5] = { 27 , 30 , 8, 12 , 30 }; + +static const int quant_steps[17] = { + 3, 5, 7, 9, 15, + 31, 63, 127, 255, 511, + 1023, 2047, 4095, 8191, 16383, + 32767, 65535 +}; + +/* we use a negative value if grouped */ +static const int quant_bits[17] = { + -5, -7, 3, -10, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16 +}; + +/* signal to noise ratio of each quantification step (could be + computed from quant_steps[]). The values are dB multiplied by 10 +*/ +static unsigned short quant_snr[17] = { + 70, 110, 160, 208, + 253, 316, 378, 439, + 499, 559, 620, 680, + 740, 800, 861, 920, + 980 +}; + + +/* total number of bits per allocation group */ +static unsigned short total_quant_bits[17]; + +/* encoding tables which give the quantization index. Note how it is + possible to store them efficiently ! */ +static const unsigned char alloc_table_0[] = { + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, +}; + +static const unsigned char alloc_table_1[] = { + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, +}; + +static const unsigned char alloc_table_2[] = { + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, +}; + +static const unsigned char alloc_table_3[] = { + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, +}; + +static const unsigned char alloc_table_4[] = { + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, +}; + +const unsigned char *alloc_tables[5] = +{ alloc_table_0, alloc_table_1, alloc_table_2, alloc_table_3, alloc_table_4, }; + +/* fixed psycho acoustic model. Values of SNR taken from the 'toolame' + project */ +const float fixed_smr[SBLIMIT] = { + 30, 17, 16, 10, 3, 12, 8, 2.5, + 5, 5, 6, 6, 5, 6, 10, 6, + -4, -10, -21, -30, -42, -55, -68, -75, + -75, -75, -75, -75, -91, -107, -110, -108 +}; + +const unsigned char nb_scale_factors[4] = { 3, 2, 1, 2 }; diff --git a/libav/mpegencodevlc.h b/libav/mpegencodevlc.h new file mode 100644 index 0000000000..3952fd0472 --- /dev/null +++ b/libav/mpegencodevlc.h @@ -0,0 +1,311 @@ +/* + * RV 1.0 compatible encoder. + * Copyright (c) 2000 Gerard Lantau. + * + * The licence of this code is contained in file LICENCE found in the + * same archive + */ + +const unsigned char vlc_dc_table[256] = { + 0, 1, 2, 2, + 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + +const unsigned char vlc_dc_lum_code[9] = { + 0x4, 0x0, 0x1, 0x5, 0x6, 0xe, 0x1e, 0x3e, 0x7e, +}; +const unsigned char vlc_dc_lum_bits[9] = { + 3, 2, 2, 3, 3, 4, 5, 6, 7, +}; + +const unsigned char vlc_dc_chroma_code[9] = { + 0x0, 0x1, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, +}; +const unsigned char vlc_dc_chroma_bits[9] = { + 2, 2, 2, 3, 4, 5, 6, 7, 8, +}; + +/* + * Copyright (c) 1995 The Regents of the University of California. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF + * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#define HUFF_MAXRUN 32 +#define HUFF_MAXLEVEL 41 + +static const int huff_maxlevel[HUFF_MAXRUN] = { 41, 19, 6, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; + +static const UINT8 huff_table0[41] = { 0x0, 0x6, 0x8, 0xa, 0xc, 0x4c, 0x42, 0x14, 0x3a, 0x30, 0x26, 0x20, 0x34, 0x32, 0x30, 0x2e, 0x3e, 0x3c, 0x3a, 0x38, 0x36, 0x34, 0x32, 0x30, 0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22, 0x20, 0x30, 0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22, 0x20 }; +static const UINT8 huff_bits0[41] = { 0, 3, 5, 6, 8, 9, 9, 11, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16 }; + +static const UINT8 huff_table1[19] = { 0x0, 0x6, 0xc, 0x4a, 0x18, 0x36, 0x2c, 0x2a, 0x3e, 0x3c, 0x3a, 0x38, 0x36, 0x34, 0x32, 0x26, 0x24, 0x22, 0x20 }; +static const UINT8 huff_bits1[19] = { 0, 4, 7, 9, 11, 13, 14, 14, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17 }; + +static const UINT8 huff_table2[6] = { 0x0, 0xa, 0x8, 0x16, 0x28, 0x28 }; +static const UINT8 huff_bits2[6] = { 0, 5, 8, 11, 13, 14 }; + +static const UINT8 huff_table3[5] = { 0x0, 0xe, 0x48, 0x38, 0x26 }; +static const UINT8 huff_bits3[5] = { 0, 6, 9, 13, 14 }; + +static const UINT8 huff_table4[4] = { 0x0, 0xc, 0x1e, 0x24 }; +static const UINT8 huff_bits4[4] = { 0, 6, 11, 13 }; + +static const UINT8 huff_table5[4] = { 0x0, 0xe, 0x12, 0x24 }; +static const UINT8 huff_bits5[4] = { 0, 7, 11, 14 }; + +static const UINT8 huff_table6[4] = { 0x0, 0xa, 0x3c, 0x28 }; +static const UINT8 huff_bits6[4] = { 0, 7, 13, 17 }; + +static const UINT8 huff_table7[3] = { 0x0, 0x8, 0x2a }; +static const UINT8 huff_bits7[3] = { 0, 7, 13 }; + +static const UINT8 huff_table8[3] = { 0x0, 0xe, 0x22 }; +static const UINT8 huff_bits8[3] = { 0, 8, 13 }; + +static const UINT8 huff_table9[3] = { 0x0, 0xa, 0x22 }; +static const UINT8 huff_bits9[3] = { 0, 8, 14 }; + +static const UINT8 huff_table10[3] = { 0x0, 0x4e, 0x20 }; +static const UINT8 huff_bits10[3] = { 0, 9, 14 }; + +static const UINT8 huff_table11[3] = { 0x0, 0x46, 0x34 }; +static const UINT8 huff_bits11[3] = { 0, 9, 17 }; + +static const UINT8 huff_table12[3] = { 0x0, 0x44, 0x32 }; +static const UINT8 huff_bits12[3] = { 0, 9, 17 }; + +static const UINT8 huff_table13[3] = { 0x0, 0x40, 0x30 }; +static const UINT8 huff_bits13[3] = { 0, 9, 17 }; + +static const UINT8 huff_table14[3] = { 0x0, 0x1c, 0x2e }; +static const UINT8 huff_bits14[3] = { 0, 11, 17 }; + +static const UINT8 huff_table15[3] = { 0x0, 0x1a, 0x2c }; +static const UINT8 huff_bits15[3] = { 0, 11, 17 }; + +static const UINT8 huff_table16[3] = { 0x0, 0x10, 0x2a }; +static const UINT8 huff_bits16[3] = { 0, 11, 17 }; + +static const UINT8 huff_table17[2] = { 0x0, 0x3e }; +static const UINT8 huff_bits17[2] = { 0, 13 }; + +static const UINT8 huff_table18[2] = { 0x0, 0x34 }; +static const UINT8 huff_bits18[2] = { 0, 13 }; + +static const UINT8 huff_table19[2] = { 0x0, 0x32 }; +static const UINT8 huff_bits19[2] = { 0, 13 }; + +static const UINT8 huff_table20[2] = { 0x0, 0x2e }; +static const UINT8 huff_bits20[2] = { 0, 13 }; + +static const UINT8 huff_table21[2] = { 0x0, 0x2c }; +static const UINT8 huff_bits21[2] = { 0, 13 }; + +static const UINT8 huff_table22[2] = { 0x0, 0x3e }; +static const UINT8 huff_bits22[2] = { 0, 14 }; + +static const UINT8 huff_table23[2] = { 0x0, 0x3c }; +static const UINT8 huff_bits23[2] = { 0, 14 }; + +static const UINT8 huff_table24[2] = { 0x0, 0x3a }; +static const UINT8 huff_bits24[2] = { 0, 14 }; + +static const UINT8 huff_table25[2] = { 0x0, 0x38 }; +static const UINT8 huff_bits25[2] = { 0, 14 }; + +static const UINT8 huff_table26[2] = { 0x0, 0x36 }; +static const UINT8 huff_bits26[2] = { 0, 14 }; + +static const UINT8 huff_table27[2] = { 0x0, 0x3e }; +static const UINT8 huff_bits27[2] = { 0, 17 }; + +static const UINT8 huff_table28[2] = { 0x0, 0x3c }; +static const UINT8 huff_bits28[2] = { 0, 17 }; + +static const UINT8 huff_table29[2] = { 0x0, 0x3a }; +static const UINT8 huff_bits29[2] = { 0, 17 }; + +static const UINT8 huff_table30[2] = { 0x0, 0x38 }; +static const UINT8 huff_bits30[2] = { 0, 17 }; + +static const UINT8 huff_table31[2] = { 0x0, 0x36 }; +static const UINT8 huff_bits31[2] = { 0, 17 }; + +static const UINT8 *huff_table[32] = { huff_table0, huff_table1, huff_table2, huff_table3, huff_table4, huff_table5, huff_table6, huff_table7, huff_table8, huff_table9, huff_table10, huff_table11, huff_table12, huff_table13, huff_table14, huff_table15, huff_table16, huff_table17, huff_table18, huff_table19, huff_table20, huff_table21, huff_table22, huff_table23, huff_table24, huff_table25, huff_table26, huff_table27, huff_table28, huff_table29, huff_table30, huff_table31 }; + +static const UINT8 *huff_bits[32] = { huff_bits0, huff_bits1, huff_bits2, huff_bits3, huff_bits4, huff_bits5, huff_bits6, huff_bits7, huff_bits8, huff_bits9, huff_bits10, huff_bits11, huff_bits12, huff_bits13, huff_bits14, huff_bits15, huff_bits16, huff_bits17, huff_bits18, huff_bits19, huff_bits20, huff_bits21, huff_bits22, huff_bits23, huff_bits24, huff_bits25, huff_bits26, huff_bits27, huff_bits28, huff_bits29, huff_bits30, huff_bits31 }; + +static const UINT8 mbAddrIncrTable[][2] = { + {0x0, 0}, + {0x1, 1}, + {0x3, 3}, + {0x2, 3}, + {0x3, 4}, + {0x2, 4}, + {0x3, 5}, + {0x2, 5}, + {0x7, 7}, + {0x6, 7}, + {0xb, 8}, + {0xa, 8}, + {0x9, 8}, + {0x8, 8}, + {0x7, 8}, + {0x6, 8}, + {0x17, 10}, + {0x16, 10}, + {0x15, 10}, + {0x14, 10}, + {0x13, 10}, + {0x12, 10}, + {0x23, 11}, + {0x22, 11}, + {0x21, 11}, + {0x20, 11}, + {0x1f, 11}, + {0x1e, 11}, + {0x1d, 11}, + {0x1c, 11}, + {0x1b, 11}, + {0x1a, 11}, + {0x19, 11}, + {0x18, 11}}; + +static const UINT8 mbPatTable[][2] = { + {0x0, 0}, + {0xb, 5}, + {0x9, 5}, + {0xd, 6}, + {0xd, 4}, + {0x17, 7}, + {0x13, 7}, + {0x1f, 8}, + {0xc, 4}, + {0x16, 7}, + {0x12, 7}, + {0x1e, 8}, + {0x13, 5}, + {0x1b, 8}, + {0x17, 8}, + {0x13, 8}, + {0xb, 4}, + {0x15, 7}, + {0x11, 7}, + {0x1d, 8}, + {0x11, 5}, + {0x19, 8}, + {0x15, 8}, + {0x11, 8}, + {0xf, 6}, + {0xf, 8}, + {0xd, 8}, + {0x3, 9}, + {0xf, 5}, + {0xb, 8}, + {0x7, 8}, + {0x7, 9}, + {0xa, 4}, + {0x14, 7}, + {0x10, 7}, + {0x1c, 8}, + {0xe, 6}, + {0xe, 8}, + {0xc, 8}, + {0x2, 9}, + {0x10, 5}, + {0x18, 8}, + {0x14, 8}, + {0x10, 8}, + {0xe, 5}, + {0xa, 8}, + {0x6, 8}, + {0x6, 9}, + {0x12, 5}, + {0x1a, 8}, + {0x16, 8}, + {0x12, 8}, + {0xd, 5}, + {0x9, 8}, + {0x5, 8}, + {0x5, 9}, + {0xc, 5}, + {0x8, 8}, + {0x4, 8}, + {0x4, 9}, + {0x7, 3}, + {0xa, 5}, /* grrr... 61, 62, 63 added - Kevin */ + {0x8, 5}, + {0xc, 6} +}; + +const UINT8 zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static unsigned char const default_intra_matrix[64] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +}; + +/* XXX: could hardcode this matrix */ +static unsigned char const default_non_intra_matrix[64] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, +}; + +static unsigned char const frame_rate_tab[9] = { + 0, 24, 24, 25, 30, 30, 50, 60, 60, +}; diff --git a/libav/mpegvideo.c b/libav/mpegvideo.c new file mode 100644 index 0000000000..4987b38af4 --- /dev/null +++ b/libav/mpegvideo.c @@ -0,0 +1,1098 @@ +/* + * The simplest mpeg encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <netinet/in.h> +#include <math.h> +#include "avcodec.h" +#include "mpegvideo.h" + +//#define DEBUG + +/* depends on JPEG librarie */ +extern void jpeg_fdct_ifast (DCTELEM * data); + +/* depends on mpeg */ +extern void j_rev_dct (DCTELEM *data); + +/* for jpeg fast DCT */ +#define CONST_BITS 14 + +static const unsigned short aanscales[64] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 +}; + +static UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; +static UINT32 squareTbl[512]; + +static void encode_picture(MpegEncContext *s, int picture_number); +static void rate_control_init(MpegEncContext *s); +static int rate_estimate_qscale(MpegEncContext *s); +static void mpeg1_skip_picture(MpegEncContext *s, int pict_num); + +#include "mpegencodevlc.h" + +static void put_header(MpegEncContext *s, int header) +{ + align_put_bits(&s->pb); + put_bits(&s->pb, 32, header); +} + +static void convert_matrix(int *qmat, const UINT8 *quant_matrix, int qscale) +{ + int i; + + for(i=0;i<64;i++) { + qmat[i] = (int)((1 << 22) * 16384.0 / (aanscales[i] * qscale * quant_matrix[i])); + } +} + + +int MPV_encode_init(AVEncodeContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + int pict_size, c_size, i; + UINT8 *pict; + + s->bit_rate = avctx->bit_rate; + s->frame_rate = avctx->rate; + s->width = avctx->width; + s->height = avctx->height; + s->gop_size = avctx->gop_size; + if (s->gop_size <= 1) { + s->intra_only = 1; + s->gop_size = 12; + } else { + s->intra_only = 0; + } + + switch(avctx->codec->id) { + case CODEC_ID_MPEG1VIDEO: + s->out_format = FMT_MPEG1; + break; + case CODEC_ID_MJPEG: + s->out_format = FMT_MJPEG; + s->intra_only = 1; /* force intra only for jpeg */ + if (mjpeg_init(s) < 0) + return -1; + break; + case CODEC_ID_H263: + s->out_format = FMT_H263; + break; + case CODEC_ID_RV10: + s->out_format = FMT_H263; + s->h263_rv10 = 1; + break; + default: + return -1; + } + + switch(s->frame_rate) { + case 24: + s->frame_rate_index = 2; + break; + case 25: + s->frame_rate_index = 3; + break; + case 30: + s->frame_rate_index = 5; + break; + case 50: + s->frame_rate_index = 6; + break; + case 60: + s->frame_rate_index = 8; + break; + default: + /* we accept lower frame rates than 24 for low bit rate mpeg */ + if (s->frame_rate >= 1 && s->frame_rate < 24) { + s->frame_rate_index = 2; + } else { + return -1; + } + break; + } + + /* init */ + s->mb_width = s->width / 16; + s->mb_height = s->height / 16; + + c_size = s->width * s->height; + pict_size = (c_size * 3) / 2; + pict = malloc(pict_size); + if (pict == NULL) + return -1; + s->last_picture[0] = pict; + s->last_picture[1] = pict + c_size; + s->last_picture[2] = pict + c_size + (c_size / 4); + + pict = malloc(pict_size); + if (pict == NULL) + return -1; + s->last_picture[0] = pict; + s->last_picture[1] = pict + c_size; + s->last_picture[2] = pict + c_size + (c_size / 4); + + pict = malloc(pict_size); + if (pict == NULL) { + free(s->last_picture[0]); + return -1; + } + s->current_picture[0] = pict; + s->current_picture[1] = pict + c_size; + s->current_picture[2] = pict + c_size + (c_size / 4); + + for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; + for(i=0;i<MAX_NEG_CROP;i++) { + cropTbl[i] = 0; + cropTbl[i + MAX_NEG_CROP + 256] = 255; + } + + for(i=0;i<512;i++) { + squareTbl[i] = (i - 256) * (i - 256); + } + + /* rate control init */ + rate_control_init(s); + + s->picture_number = 0; + s->fake_picture_number = 0; + + return 0; +} + +int MPV_encode_end(AVEncodeContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; +#if 0 + /* end of sequence */ + if (s->out_format == FMT_MPEG1) { + put_header(s, SEQ_END_CODE); + } + + if (!s->flush_frames) + flush_put_bits(&s->pb); +#endif + free(s->last_picture[0]); + free(s->current_picture[0]); + if (s->out_format == FMT_MJPEG) + mjpeg_close(s); + return 0; +} + +int MPV_encode_picture(AVEncodeContext *avctx, + unsigned char *buf, int buf_size, void *data) +{ + MpegEncContext *s = avctx->priv_data; + int i; + + memcpy(s->new_picture, data, 3 * sizeof(UINT8 *)); + + init_put_bits(&s->pb, buf, buf_size, NULL, NULL); + + /* group of picture */ + if (s->out_format == FMT_MPEG1) { + unsigned int vbv_buffer_size; + unsigned int time_code, fps, n; + + if ((s->picture_number % s->gop_size) == 0) { + /* mpeg1 header repeated every gop */ + put_header(s, SEQ_START_CODE); + + put_bits(&s->pb, 12, s->width); + put_bits(&s->pb, 12, s->height); + put_bits(&s->pb, 4, 1); /* 1/1 aspect ratio */ + put_bits(&s->pb, 4, s->frame_rate_index); + put_bits(&s->pb, 18, 0x3ffff); + put_bits(&s->pb, 1, 1); /* marker */ + /* vbv buffer size: slightly greater than an I frame. We add + some margin just in case */ + vbv_buffer_size = (3 * s->I_frame_bits) / (2 * 8); + put_bits(&s->pb, 10, (vbv_buffer_size + 16383) / 16384); + put_bits(&s->pb, 1, 1); /* constrained parameter flag */ + put_bits(&s->pb, 1, 0); /* no custom intra matrix */ + put_bits(&s->pb, 1, 0); /* no custom non intra matrix */ + + put_header(s, GOP_START_CODE); + put_bits(&s->pb, 1, 0); /* do drop frame */ + /* time code : we must convert from the real frame rate to a + fake mpeg frame rate in case of low frame rate */ + fps = frame_rate_tab[s->frame_rate_index]; + time_code = s->fake_picture_number; + s->gop_picture_number = time_code; + put_bits(&s->pb, 5, (time_code / (fps * 3600)) % 24); + put_bits(&s->pb, 6, (time_code / (fps * 60)) % 60); + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 6, (time_code / fps) % 60); + put_bits(&s->pb, 6, (time_code % fps)); + put_bits(&s->pb, 1, 1); /* closed gop */ + put_bits(&s->pb, 1, 0); /* broken link */ + } + + if (s->frame_rate < 24 && s->picture_number > 0) { + /* insert empty P pictures to slow down to the desired + frame rate. Each fake pictures takes about 20 bytes */ + fps = frame_rate_tab[s->frame_rate_index]; + n = ((s->picture_number * fps) / s->frame_rate) - 1; + while (s->fake_picture_number < n) { + mpeg1_skip_picture(s, s->fake_picture_number - + s->gop_picture_number); + s->fake_picture_number++; + } + + } + s->fake_picture_number++; + } + + + if (!s->intra_only) { + /* first picture of GOP is intra */ + if ((s->picture_number % s->gop_size) == 0) + s->pict_type = I_TYPE; + else + s->pict_type = P_TYPE; + } else { + s->pict_type = I_TYPE; + } + avctx->key_frame = (s->pict_type == I_TYPE); + + encode_picture(s, s->picture_number); + + /* swap current and last picture */ + for(i=0;i<3;i++) { + UINT8 *tmp; + + tmp = s->last_picture[i]; + s->last_picture[i] = s->current_picture[i]; + s->current_picture[i] = tmp; + } + s->picture_number++; + + if (s->out_format == FMT_MJPEG) + mjpeg_picture_trailer(s); + + flush_put_bits(&s->pb); + s->total_bits += (s->pb.buf_ptr - s->pb.buf) * 8; + return s->pb.buf_ptr - s->pb.buf; +} + +/* insert a fake P picture */ +static void mpeg1_skip_picture(MpegEncContext *s, int pict_num) +{ + unsigned int mb_incr; + + /* mpeg1 picture header */ + put_header(s, PICTURE_START_CODE); + /* temporal reference */ + put_bits(&s->pb, 10, pict_num & 0x3ff); + + put_bits(&s->pb, 3, P_TYPE); + put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */ + + put_bits(&s->pb, 1, 1); /* integer coordinates */ + put_bits(&s->pb, 3, 1); /* forward_f_code */ + + put_bits(&s->pb, 1, 0); /* extra bit picture */ + + /* only one slice */ + put_header(s, SLICE_MIN_START_CODE); + put_bits(&s->pb, 5, 1); /* quantizer scale */ + put_bits(&s->pb, 1, 0); /* slice extra information */ + + mb_incr = 1; + put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], + mbAddrIncrTable[mb_incr][0]); + + /* empty macroblock */ + put_bits(&s->pb, 3, 1); /* motion only */ + + /* zero motion x & y */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); + + /* output a number of empty slice */ + mb_incr = s->mb_width * s->mb_height - 1; + while (mb_incr > 33) { + put_bits(&s->pb, 11, 0x008); + mb_incr -= 33; + } + put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], + mbAddrIncrTable[mb_incr][0]); + + /* empty macroblock */ + put_bits(&s->pb, 3, 1); /* motion only */ + + /* zero motion x & y */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); +} + +static int pix_sum(UINT8 *pix, int line_size) +{ + int s, i, j; + + s = 0; + for(i=0;i<16;i++) { + for(j=0;j<16;j+=8) { + s += pix[0]; + s += pix[1]; + s += pix[2]; + s += pix[3]; + s += pix[4]; + s += pix[5]; + s += pix[6]; + s += pix[7]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static int pix_norm1(UINT8 *pix, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for(i=0;i<16;i++) { + for(j=0;j<16;j+=8) { + s += sq[pix[0]]; + s += sq[pix[1]]; + s += sq[pix[2]]; + s += sq[pix[3]]; + s += sq[pix[4]]; + s += sq[pix[5]]; + s += sq[pix[6]]; + s += sq[pix[7]]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static int pix_norm(UINT8 *pix1, UINT8 *pix2, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for(i=0;i<16;i++) { + for(j=0;j<16;j+=8) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + s += sq[pix1[4] - pix2[4]]; + s += sq[pix1[5] - pix2[5]]; + s += sq[pix1[6] - pix2[6]]; + s += sq[pix1[7] - pix2[7]]; + pix1 += 8; + pix2 += 8; + } + pix1 += line_size - 16; + pix2 += line_size - 16; + } + return s; +} + + +static int estimate_motion(MpegEncContext *s, + int mb_x, int mb_y, + int *mx_ptr, int *my_ptr) +{ + UINT8 *pix, *ppix; + int sum, varc, vard; + + pix = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16; + ppix = s->last_picture[0] + (mb_y * 16 * s->width) + mb_x * 16; + + sum = pix_sum(pix, s->width); + varc = pix_norm1(pix, s->width); + vard = pix_norm(pix, ppix, s->width); + + vard = vard >> 8; + sum = sum >> 8; + varc = (varc >> 8) - sum * sum; + + *mx_ptr = 0; + *my_ptr = 0; + if (vard <= 64) { + return 0; + } else if (vard < varc) { + return 0; + } else { + return 1; + } +} + +static void get_pixels(DCTELEM *block, const UINT8 *pixels, int line_size); +static void put_pixels(const DCTELEM *block, UINT8 *pixels, int line_size); +static void sub_pixels(DCTELEM *block, const UINT8 *pixels, int line_size); +static void add_pixels(DCTELEM *block, const UINT8 *pixels, int line_size); +static int dct_quantize(MpegEncContext *s, DCTELEM *block, int qscale); +static void encode_block(MpegEncContext *s, + DCTELEM *block, + int component); +static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int qscale); +static void mpeg1_encode_mb(MpegEncContext *s, int mb_x, int mb_y, + DCTELEM block[6][64], + int motion_x, int motion_y); + +static void encode_picture(MpegEncContext *s, int picture_number) +{ + int mb_x, mb_y; + UINT8 *ptr; + DCTELEM block[6][64]; + int i, motion_x, motion_y; + + s->picture_number = picture_number; + s->qscale = rate_estimate_qscale(s); + + /* precompute matrix */ + if (s->out_format == FMT_MJPEG) { + /* for mjpeg, we do include qscale in the matrix */ + s->init_intra_matrix[0] = default_intra_matrix[0]; + for(i=1;i<64;i++) + s->init_intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3; + convert_matrix(s->intra_matrix, s->init_intra_matrix, 8); + } else { + convert_matrix(s->intra_matrix, default_intra_matrix, s->qscale); + convert_matrix(s->non_intra_matrix, default_non_intra_matrix, s->qscale); + } + + switch(s->out_format) { + case FMT_MJPEG: + mjpeg_picture_header(s); + break; + case FMT_H263: + if (s->h263_rv10) + rv10_encode_picture_header(s, picture_number); + else + h263_picture_header(s, picture_number); + break; + case FMT_MPEG1: + /* mpeg1 picture header */ + put_header(s, PICTURE_START_CODE); + /* temporal reference */ + put_bits(&s->pb, 10, (s->fake_picture_number - + s->gop_picture_number) & 0x3ff); + + put_bits(&s->pb, 3, s->pict_type); + put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */ + + if (s->pict_type == P_TYPE) { + put_bits(&s->pb, 1, 1); /* integer coordinates */ + put_bits(&s->pb, 3, 1); /* forward_f_code */ + } + + put_bits(&s->pb, 1, 0); /* extra bit picture */ + + /* only one slice */ + put_header(s, SLICE_MIN_START_CODE); + put_bits(&s->pb, 5, s->qscale); /* quantizer scale */ + put_bits(&s->pb, 1, 0); /* slice extra information */ + break; + } + + /* init last dc values */ + /* XXX: quant matrix value is implied here */ + s->last_dc[0] = 128; + s->last_dc[1] = 128; + s->last_dc[2] = 128; + s->mb_incr = 1; + + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + /* compute motion vector and macro block type (intra or non intra) */ + motion_x = 0; + motion_y = 0; + if (s->pict_type == P_TYPE) { + s->mb_intra = estimate_motion(s, mb_x, mb_y, + &motion_x, + &motion_y); + } else { + s->mb_intra = 1; + } + + /* reset intra predictors if non intra mb */ + if (!s->mb_intra) { + s->last_dc[0] = 128; + s->last_dc[1] = 128; + s->last_dc[2] = 128; + } + + /* get the pixels */ + ptr = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16; + get_pixels(block[0], ptr, s->width); + get_pixels(block[1], ptr + 8, s->width); + get_pixels(block[2], ptr + 8 * s->width, s->width); + get_pixels(block[3], ptr + 8 * s->width + 8, s->width); + ptr = s->new_picture[1] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8; + get_pixels(block[4],ptr, s->width >> 1); + + ptr = s->new_picture[2] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8; + get_pixels(block[5],ptr, s->width >> 1); + + /* subtract previous frame if non intra */ + if (!s->mb_intra) { + ptr = s->last_picture[0] + + ((mb_y * 16 + motion_y) * s->width) + (mb_x * 16 + motion_x); + + sub_pixels(block[0], ptr, s->width); + sub_pixels(block[1], ptr + 8, s->width); + sub_pixels(block[2], ptr + s->width * 8, s->width); + sub_pixels(block[3], ptr + 8 + s->width * 8, s->width); + ptr = s->last_picture[1] + + ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + + (mb_x * 8 + (motion_x >> 1)); + sub_pixels(block[4], ptr, s->width >> 1); + ptr = s->last_picture[2] + + ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + + (mb_x * 8 + (motion_x >> 1)); + sub_pixels(block[5], ptr, s->width >> 1); + } + + /* DCT & quantize */ + for(i=0;i<6;i++) { + int last_index; + last_index = dct_quantize(s, block[i], s->qscale); + s->block_last_index[i] = last_index; + } + + /* huffman encode */ + switch(s->out_format) { + case FMT_MPEG1: + mpeg1_encode_mb(s, mb_x, mb_y, block, motion_x, motion_y); + break; + case FMT_H263: + h263_encode_mb(s, block, motion_x, motion_y); + break; + case FMT_MJPEG: + mjpeg_encode_mb(s, block); + break; + } + + /* decompress blocks so that we keep the state of the decoder */ + if (!s->intra_only) { + for(i=0;i<6;i++) { + if (s->block_last_index[i] >= 0) { + dct_unquantize(s, block[i], s->qscale); + } + } + + if (!s->mb_intra) { + ptr = s->last_picture[0] + + ((mb_y * 16 + motion_y) * s->width) + (mb_x * 16 + motion_x); + + add_pixels(block[0], ptr, s->width); + add_pixels(block[1], ptr + 8, s->width); + add_pixels(block[2], ptr + s->width * 8, s->width); + add_pixels(block[3], ptr + 8 + s->width * 8, s->width); + ptr = s->last_picture[1] + + ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + + (mb_x * 8 + (motion_x >> 1)); + add_pixels(block[4], ptr, s->width >> 1); + ptr = s->last_picture[2] + + ((mb_y * 8 + (motion_y >> 1)) * (s->width >> 1)) + + (mb_x * 8 + (motion_x >> 1)); + add_pixels(block[5], ptr, s->width >> 1); + } + + /* write the pixels */ + ptr = s->current_picture[0] + (mb_y * 16 * s->width) + mb_x * 16; + put_pixels(block[0], ptr, s->width); + put_pixels(block[1], ptr + 8, s->width); + put_pixels(block[2], ptr + 8 * s->width, s->width); + put_pixels(block[3], ptr + 8 * s->width + 8, s->width); + ptr = s->current_picture[1] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8; + put_pixels(block[4],ptr, s->width >> 1); + + ptr = s->current_picture[2] + (mb_y * 8 * (s->width >> 1)) + mb_x * 8; + put_pixels(block[5],ptr, s->width >> 1); + } + } + } +} + +static void mpeg1_encode_mb(MpegEncContext *s, int mb_x, int mb_y, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int mb_incr, i, cbp; + + /* compute cbp */ + cbp = 0; + for(i=0;i<6;i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + + /* skip macroblock, except if first or last macroblock of a slice */ + if ((cbp | motion_x | motion_y) == 0 && + (!((mb_x | mb_y) == 0 || + (mb_x == s->mb_width - 1 && mb_y == s->mb_height - 1)))) { + s->mb_incr++; + } else { + /* output mb incr */ + mb_incr = s->mb_incr; + + while (mb_incr > 33) { + put_bits(&s->pb, 11, 0x008); + mb_incr -= 33; + } + put_bits(&s->pb, mbAddrIncrTable[mb_incr][1], + mbAddrIncrTable[mb_incr][0]); + + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, 1, 1); /* macroblock_type : macroblock_quant = 0 */ + } else { + if (s->mb_intra) { + put_bits(&s->pb, 5, 0x03); + } else { + if (motion_x == 0 && motion_y == 0) { + if (cbp != 0) { + put_bits(&s->pb, 2, 1); /* macroblock_pattern only */ + put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + } else { + put_bits(&s->pb, 3, 1); /* motion only & zero motion vectors */ + /* zero motion x & y */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); + } + } else { + /* XXX: not used yet */ + put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + } + } + + } + + for(i=0;i<6;i++) { + if (cbp & (1 << (5 - i))) { + encode_block(s, block[i], i); + } + } + s->mb_incr = 1; + } +} + +static void get_pixels(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + DCTELEM *p; + const UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + p[0] = pix[0]; + p[1] = pix[1]; + p[2] = pix[2]; + p[3] = pix[3]; + p[4] = pix[4]; + p[5] = pix[5]; + p[6] = pix[6]; + p[7] = pix[7]; + pix += line_size; + p += 8; + } +} + +static void put_pixels(const DCTELEM *block, UINT8 *pixels, int line_size) +{ + const DCTELEM *p; + UINT8 *pix; + int i; + UINT8 *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + pix[0] = cm[p[0]]; + pix[1] = cm[p[1]]; + pix[2] = cm[p[2]]; + pix[3] = cm[p[3]]; + pix[4] = cm[p[4]]; + pix[5] = cm[p[5]]; + pix[6] = cm[p[6]]; + pix[7] = cm[p[7]]; + pix += line_size; + p += 8; + } +} + +static void sub_pixels(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + DCTELEM *p; + const UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + p[0] -= pix[0]; + p[1] -= pix[1]; + p[2] -= pix[2]; + p[3] -= pix[3]; + p[4] -= pix[4]; + p[5] -= pix[5]; + p[6] -= pix[6]; + p[7] -= pix[7]; + pix += line_size; + p += 8; + } +} + +static void add_pixels(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + DCTELEM *p; + const UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + p[0] += pix[0]; + p[1] += pix[1]; + p[2] += pix[2]; + p[3] += pix[3]; + p[4] += pix[4]; + p[5] += pix[5]; + p[6] += pix[6]; + p[7] += pix[7]; + pix += line_size; + p += 8; + } +} + +#define USE_FAST_MUL + +static int dct_quantize(MpegEncContext *s, + DCTELEM *block, + int qscale) +{ + int i, j, level, last_non_zero; +#ifdef USE_FAST_MUL + const int *qmat; +#else + const UINT8 *qmat; +#endif + + jpeg_fdct_ifast (block); + + if (s->mb_intra) { + block[0] = (block[0] + 4 * 8) >> 6; + i = 1; + last_non_zero = 0; + if (s->out_format == FMT_H263) { +#ifdef USE_FAST_MUL + qmat = s->non_intra_matrix; +#else + qmat = default_non_intra_matrix; +#endif + } else { +#ifdef USE_FAST_MUL + qmat = s->intra_matrix; +#else + qmat = default_intra_matrix; +#endif + } + } else { + i = 0; + last_non_zero = -1; +#ifdef USE_FAST_MUL + qmat = s->non_intra_matrix; +#else + qmat = default_non_intra_matrix; +#endif + } + + for(;i<64;i++) { + j = zigzag_direct[i]; + level = block[j]; +#ifdef USE_FAST_MUL + level = (level * qmat[j]) / (1 << 22); +#else + /* post dct normalization */ + level = (level << 11) / aanscales[j]; + /* quantification */ + level = (8 * level) / (qscale * qmat[j]); +#endif + block[j] = level; + if (level) + last_non_zero = i; + } + return last_non_zero; +} + +static void dct_unquantize(MpegEncContext *s, + DCTELEM *block, int qscale) +{ + int i, level, coeff; + const UINT8 *quant_matrix; + + if (s->mb_intra) { + block[0] = block[0] << 3; + if (s->out_format == FMT_H263) { + i = 1; + goto unquant_even; + } + quant_matrix = default_intra_matrix; + for(i=1;i<64;i++) { + block[i] = (block[i] * qscale * quant_matrix[i]) >> 3; + } + } else { + i = 0; + unquant_even: + quant_matrix = default_non_intra_matrix; + for(;i<64;i++) { + level = block[i]; + if (level) { + if (level < 0) { + coeff = (((level << 1) - 1) * qscale * + ((int) (quant_matrix[i]))) >> 4; + coeff += (coeff & 1); + } else { + coeff = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[i]))) >> 4; + coeff -= (coeff & 1); + } + block[i] = coeff; + } + } + } + + j_rev_dct(block); +} + + +static inline void encode_dc(MpegEncContext *s, int diff, int component) +{ + int adiff, index; + + // printf("dc=%d c=%d\n", diff, component); + adiff = abs(diff); + index = vlc_dc_table[adiff]; + if (component == 0) { + put_bits(&s->pb, vlc_dc_lum_bits[index], vlc_dc_lum_code[index]); + } else { + put_bits(&s->pb, vlc_dc_chroma_bits[index], vlc_dc_chroma_code[index]); + } + if (diff > 0) { + put_bits(&s->pb, index, (diff & ((1 << index) - 1))); + } else if (diff < 0) { + put_bits(&s->pb, index, ((diff - 1) & ((1 << index) - 1))); + } +} + +static void encode_block(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int alevel, level, last_non_zero, dc, diff, i, j, run, last_index; + int code, nbits, component; + + last_index = s->block_last_index[n]; + + /* DC coef */ + if (s->mb_intra) { + component = (n <= 3 ? 0 : n - 4 + 1); + dc = block[0]; /* overflow is impossible */ + diff = dc - s->last_dc[component]; + encode_dc(s, diff, component); + s->last_dc[component] = dc; + i = 1; + } else { + /* encode the first coefficient : needs to be done here because + it is handled slightly differently */ + level = block[0]; + if (abs(level) == 1) { + code = ((UINT32)level >> 31); /* the sign bit */ + put_bits(&s->pb, 2, code | 0x02); + i = 1; + } else { + i = 0; + last_non_zero = -1; + goto next_coef; + } + } + + /* now quantify & encode AC coefs */ + last_non_zero = i - 1; + for(;i<=last_index;i++) { + j = zigzag_direct[i]; + level = block[j]; + next_coef: +#if 0 + if (level != 0) + printf("level[%d]=%d\n", i, level); +#endif + /* encode using VLC */ + if (level != 0) { + run = i - last_non_zero - 1; + alevel = abs(level); + // printf("run=%d level=%d\n", run, level); + if ( (run < HUFF_MAXRUN) && (alevel < huff_maxlevel[run])) { + /* encode using the Huffman tables */ + code = (huff_table[run])[alevel]; + nbits = (huff_bits[run])[alevel]; + code |= ((UINT32)level >> 31); /* the sign bit */ + + put_bits(&s->pb, nbits, code); + } else { + /* escape: only clip in this case */ + if (level > 255) + level = 255; + else if (level < -255) + level = -255; + put_bits(&s->pb, 6, 0x1); + put_bits(&s->pb, 6, run); + if (alevel < 128) { + put_bits(&s->pb, 8, level & 0xff); + } else { + if (level < 0) { + put_bits(&s->pb, 16, 0x8001 + level + 255); + } else { + put_bits(&s->pb, 16, level & 0xffff); + } + } + } + last_non_zero = i; + } + } + /* end of block */ + put_bits(&s->pb, 2, 0x2); +} + + +/* rate control */ + +/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */ +#define I_FRAME_SIZE_RATIO 1.5 +#define QSCALE_K 20 + +static void rate_control_init(MpegEncContext *s) +{ + s->wanted_bits = 0; + + if (s->intra_only) { + s->I_frame_bits = s->bit_rate / s->frame_rate; + s->P_frame_bits = s->I_frame_bits; + } else { + s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) / + (float)(s->frame_rate * (I_FRAME_SIZE_RATIO + s->gop_size - 1))); + s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO); + } + +#if defined(DEBUG) + printf("I_frame_size=%d P_frame_size=%d\n", + s->I_frame_bits, s->P_frame_bits); +#endif +} + + +/* + * This heuristic is rather poor, but at least we do not have to + * change the qscale at every macroblock. + */ +static int rate_estimate_qscale(MpegEncContext *s) +{ + long long total_bits = s->total_bits; + float q; + int qscale, diff; + + if (s->pict_type == I_TYPE) { + s->wanted_bits += s->I_frame_bits; + } else { + s->wanted_bits += s->P_frame_bits; + } + diff = s->wanted_bits - total_bits; + q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width); + /* adjust for I frame */ + if (s->pict_type == I_TYPE && !s->intra_only) { + q /= I_FRAME_SIZE_RATIO; + } + + if (q < 1) + q = 1; + else if (q > 31) + q = 31; + qscale = (int)(q + 0.5); +#if defined(DEBUG) + printf("%d: total=%Ld br=%0.1f diff=%d qest=%0.1f\n", + s->picture_number, + total_bits, (float)s->frame_rate * total_bits / s->picture_number, + diff, q); +#endif + return qscale; +} + +AVEncoder mpeg1video_encoder = { + "mpeg1video", + CODEC_TYPE_VIDEO, + CODEC_ID_MPEG1VIDEO, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVEncoder h263_encoder = { + "h263", + CODEC_TYPE_VIDEO, + CODEC_ID_H263, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVEncoder rv10_encoder = { + "rv10", + CODEC_TYPE_VIDEO, + CODEC_ID_RV10, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVEncoder mjpeg_encoder = { + "mjpeg", + CODEC_TYPE_VIDEO, + CODEC_ID_MJPEG, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; diff --git a/libav/mpegvideo.h b/libav/mpegvideo.h new file mode 100644 index 0000000000..e1fbe044a0 --- /dev/null +++ b/libav/mpegvideo.h @@ -0,0 +1,94 @@ +/* mpegencode.c */ + +/* Start codes. */ +#define SEQ_END_CODE 0x000001b7 +#define SEQ_START_CODE 0x000001b3 +#define GOP_START_CODE 0x000001b8 +#define PICTURE_START_CODE 0x00000100 +#define SLICE_MIN_START_CODE 0x00000101 +#define SLICE_MAX_START_CODE 0x000001af +#define EXT_START_CODE 0x000001b5 +#define USER_START_CODE 0x000001b2 + +/* Macros for picture code type. */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 + +typedef int DCTELEM; + +enum OutputFormat { + FMT_MPEG1, + FMT_H263, + FMT_MJPEG, +}; + +#define MAX_NEG_CROP 384 + +#define MPEG_BUF_SIZE (16 * 1024) + +typedef struct MpegEncContext { + /* the following parameters must be initialized before encoding */ + int width, height; /* picture size. must be a multiple of 16 */ + int gop_size; + int frame_rate; /* number of frames per second */ + int intra_only; /* if true, only intra pictures are generated */ + int bit_rate; /* wanted bit rate */ + enum OutputFormat out_format; /* output format */ + int h263_rv10; /* use RV10 variation for H263 */ + + /* the following fields are managed internally by the encoder */ + + /* bit output */ + PutBitContext pb; + + /* sequence parameters */ + int picture_number; + int fake_picture_number; /* picture number at the bitstream frame rate */ + int gop_picture_number; /* index of the first picture of a GOP */ + int mb_width, mb_height; + UINT8 *new_picture[3]; /* picture to be compressed */ + UINT8 *last_picture[3]; /* previous picture */ + UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */ + int last_dc[3]; + int qscale; + int pict_type; + int frame_rate_index; + /* macroblock layer */ + int mb_incr; + int mb_intra; + /* matrix transmitted in the bitstream */ + UINT8 init_intra_matrix[64]; + /* precomputed matrix (combine qscale and DCT renorm) */ + int intra_matrix[64]; + int non_intra_matrix[64]; + int block_last_index[6]; /* last non zero coefficient in block */ + + void *opaque; /* private data for the user */ + + /* bit rate control */ + int I_frame_bits; /* wanted number of bits per I frame */ + int P_frame_bits; /* same for P frame */ + long long wanted_bits; + long long total_bits; + struct MJpegContext *mjpeg_ctx; +} MpegEncContext; + +extern const UINT8 zigzag_direct[64]; + +/* h263enc.c */ + +void h263_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y); +void h263_picture_header(MpegEncContext *s, int picture_number); +void rv10_encode_picture_header(MpegEncContext *s, int picture_number); + +/* mjpegenc.c */ + +int mjpeg_init(MpegEncContext *s); +void mjpeg_close(MpegEncContext *s); +void mjpeg_encode_mb(MpegEncContext *s, + DCTELEM block[6][64]); +void mjpeg_picture_header(MpegEncContext *s); +void mjpeg_picture_trailer(MpegEncContext *s); diff --git a/libav/resample.c b/libav/resample.c new file mode 100644 index 0000000000..008153b0d6 --- /dev/null +++ b/libav/resample.c @@ -0,0 +1,245 @@ +/* + * Sample rate convertion for both audio and video + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <netinet/in.h> +#include <math.h> +#include "avcodec.h" + +#define NDEBUG +#include <assert.h> + +#define FRAC_BITS 16 +#define FRAC (1 << FRAC_BITS) + +static void init_mono_resample(ReSampleChannelContext *s, float ratio) +{ + ratio = 1.0 / ratio; + s->iratio = (int)floor(ratio); + if (s->iratio == 0) + s->iratio = 1; + s->incr = (int)((ratio / s->iratio) * FRAC); + s->frac = 0; + s->last_sample = 0; + s->icount = s->iratio; + s->isum = 0; + s->inv = (FRAC / s->iratio); +} + +/* fractional audio resampling */ +static int fractional_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + unsigned int frac, incr; + int l0, l1; + short *q, *p, *pend; + + l0 = s->last_sample; + incr = s->incr; + frac = s->frac; + + p = input; + pend = input + nb_samples; + q = output; + + l1 = *p++; + for(;;) { + /* interpolate */ + *q++ = (l0 * (FRAC - frac) + l1 * frac) >> FRAC_BITS; + frac = frac + s->incr; + while (frac >= FRAC) { + if (p >= pend) + goto the_end; + frac -= FRAC; + l0 = l1; + l1 = *p++; + } + } + the_end: + s->last_sample = l1; + s->frac = frac; + return q - output; +} + +static int integer_downsample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + short *q, *p, *pend; + int c, sum; + + p = input; + pend = input + nb_samples; + q = output; + + c = s->icount; + sum = s->isum; + + for(;;) { + sum += *p++; + if (--c == 0) { + *q++ = (sum * s->inv) >> FRAC_BITS; + c = s->iratio; + sum = 0; + } + if (p >= pend) + break; + } + s->isum = sum; + s->icount = c; + return q - output; +} + +/* n1: number of samples */ +static void stereo_to_mono(short *output, short *input, int n1) +{ + short *p, *q; + int n = n1; + + p = input; + q = output; + while (n >= 4) { + q[0] = (p[0] + p[1]) >> 1; + q[1] = (p[2] + p[3]) >> 1; + q[2] = (p[4] + p[5]) >> 1; + q[3] = (p[6] + p[7]) >> 1; + q += 4; + p += 8; + n -= 4; + } + while (n > 0) { + q[0] = (p[0] + p[1]) >> 1; + q++; + p += 2; + n--; + } +} + +/* XXX: should use more abstract 'N' channels system */ +static void stereo_split(short *output1, short *output2, short *input, int n) +{ + int i; + + for(i=0;i<n;i++) { + *output1++ = *input++; + *output2++ = *input++; + } +} + +static void stereo_mux(short *output, short *input1, short *input2, int n) +{ + int i; + + for(i=0;i<n;i++) { + *output++ = *input1++; + *output++ = *input2++; + } +} + +static int mono_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + short buf1[nb_samples]; + short *buftmp; + + /* first downsample by an integer factor with averaging filter */ + if (s->iratio > 1) { + buftmp = buf1; + nb_samples = integer_downsample(s, buftmp, input, nb_samples); + } else { + buftmp = input; + } + + /* then do a fractional resampling with linear interpolation */ + if (s->incr != FRAC) { + nb_samples = fractional_resample(s, output, buftmp, nb_samples); + } else { + memcpy(output, buftmp, nb_samples * sizeof(short)); + } + return nb_samples; +} + +/* ratio = output_rate / input_rate */ +int audio_resample_init(ReSampleContext *s, + int output_channels, int input_channels, + int output_rate, int input_rate) +{ + int i; + + s->ratio = (float)output_rate / (float)input_rate; + + if (output_channels > 2 || input_channels > 2) + return -1; + s->input_channels = input_channels; + s->output_channels = output_channels; + + for(i=0;i<output_channels;i++) { + init_mono_resample(&s->channel_ctx[i], s->ratio); + } + return 0; +} + +/* resample audio. 'nb_samples' is the number of input samples */ +/* XXX: optimize it ! */ +/* XXX: do it with polyphase filters, since the quality here is + HORRIBLE. Return the number of samples available in output */ +int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples) +{ + int i, nb_samples1; + short buf[5][nb_samples]; + short *buftmp1, *buftmp2[2], *buftmp3[2]; + + if (s->input_channels == s->output_channels && s->ratio == 1.0) { + /* nothing to do */ + memcpy(output, input, nb_samples * s->input_channels * sizeof(short)); + return nb_samples; + } + + if (s->input_channels == 2 && + s->output_channels == 1) { + buftmp1 = buf[0]; + stereo_to_mono(buftmp1, input, nb_samples); + } else if (s->input_channels == 1 && + s->output_channels == 2) { + /* XXX: do it */ + abort(); + } else { + buftmp1 = input; + } + + if (s->output_channels == 2) { + buftmp2[0] = buf[1]; + buftmp2[1] = buf[2]; + buftmp3[0] = buf[3]; + buftmp3[1] = buf[4]; + stereo_split(buftmp2[0], buftmp2[1], buftmp1, nb_samples); + } else { + buftmp2[0] = buftmp1; + buftmp3[0] = output; + } + + /* resample each channel */ + nb_samples1 = 0; /* avoid warning */ + for(i=0;i<s->output_channels;i++) { + nb_samples1 = mono_resample(&s->channel_ctx[i], buftmp3[i], buftmp2[i], nb_samples); + } + + if (s->output_channels == 2) { + stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1); + } + + return nb_samples1; +} |