diff options
author | Marc Hoffman <mmhoffm@gmail.com> | 2007-06-12 19:13:50 +0000 |
---|---|---|
committer | Marc Hoffman <mmhoffm@gmail.com> | 2007-06-12 19:13:50 +0000 |
commit | 50ee61dda2b956bc09fa8238d54ba86620798b42 (patch) | |
tree | d2d1ccea5829a5a7561daa677c9e9f90803170dc | |
parent | ea6ad30ce9f1a5dbdb18748f4c3c24745f6513fe (diff) | |
download | ffmpeg-50ee61dda2b956bc09fa8238d54ba86620798b42.tar.gz |
Blackfin dct_quantize_bfin routine
2x performance boost in performing quantization for mpeg encoding
Originally committed as revision 9293 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/bfin/dsputil_bfin.h | 68 | ||||
-rw-r--r-- | libavcodec/bfin/mpegvideo_bfin.c | 152 |
2 files changed, 220 insertions, 0 deletions
diff --git a/libavcodec/bfin/dsputil_bfin.h b/libavcodec/bfin/dsputil_bfin.h new file mode 100644 index 0000000000..d972e7faec --- /dev/null +++ b/libavcodec/bfin/dsputil_bfin.h @@ -0,0 +1,68 @@ +/* + * BlackFin DSPUTILS COMMON OPTIMIZATIONS HEADER + * + * Copyright (C) 2007 Marc Hoffman <mmh@pleasantst.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef DSPUTIL_BFIN_H +#define DSPUTIL_BFIN_H + +#define attribute_l1_text __attribute__ ((l1_text)) + +#ifdef BFIN_PROFILE + +static double Telem[16]; +static char *TelemNames[16]; +static int TelemCnt; + +#define PROF(lab,e) { int __e = e; char*__lab = lab; uint64_t _t0 = read_time(); +#define EPROF() _t0 = read_time()-_t0; Telem[__e] = Telem[__e] + _t0; TelemNames[__e] = __lab; } + +static void prof_report (void) +{ + int i; + double s = 0; + for (i=0;i<16;i++) { + double v; + if (TelemNames[i]) { + v = Telem[i]/TelemCnt; + av_log (NULL,AV_LOG_DEBUG,"%-20s: %12.4f\t%12.4f\n", TelemNames[i],v,v/64); + s = s + Telem[i]; + } + } + av_log (NULL,AV_LOG_DEBUG,"%-20s: %12.4f\t%12.4f\n%20.4f\t%d\n", + "total",s/TelemCnt,s/TelemCnt/64,s,TelemCnt); +} + +static void bfprof (void) +{ + static int init; + if (!init) atexit (prof_report); + init=1; + TelemCnt++; +} + +#else +#define PROF(a,b) +#define EPROF() +#define bfprof() +#endif + +#endif diff --git a/libavcodec/bfin/mpegvideo_bfin.c b/libavcodec/bfin/mpegvideo_bfin.c new file mode 100644 index 0000000000..9dd121bafd --- /dev/null +++ b/libavcodec/bfin/mpegvideo_bfin.c @@ -0,0 +1,152 @@ +/* + * BlackFin MPEGVIDEO OPTIMIZATIONS + * + * Copyright (C) 2007 Marc Hoffman <mmh@pleasantst.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dsputil.h" +#include "mpegvideo.h" +#include "avcodec.h" +#include "dsputil_bfin.h" + + +extern void ff_bfin_fdct (DCTELEM *block) attribute_l1_text; + + +static int dct_quantize_bfin (MpegEncContext *s, + DCTELEM *block, int n, + int qscale, int *overflow) +{ + int last_non_zero, q, start_i; + const short *qmat; + short *bias; + const uint8_t *scantable= s->intra_scantable.scantable; + short dc; + int max=0; + + PROF("fdct",0); + ff_bfin_fdct (block); + EPROF(); + + PROF("denoise",1); + if(s->dct_error_sum) + s->denoise_dct(s, block); + EPROF(); + + PROF("quant-init",2); + if (s->mb_intra) { + if (!s->h263_aic) { + if (n < 4) + q = s->y_dc_scale; + else + q = s->c_dc_scale; + q = q << 3; + } else + /* For AIC we skip quant/dequant of INTRADC */ + q = 1 << 3; + + /* note: block[0] is assumed to be positive */ + dc = block[0] = (block[0] + (q >> 1)) / q; + start_i = 1; + last_non_zero = 0; + bias = s->q_intra_matrix16[qscale][1]; + qmat = s->q_intra_matrix16[qscale][0]; + + } else { + start_i = 0; + last_non_zero = -1; + bias = s->q_inter_matrix16[qscale][1]; + qmat = s->q_inter_matrix16[qscale][0]; + + } + EPROF(); + + PROF("quantize",4); + + /* for(i=start_i; i<64; i++) { */ + /* sign = (block[i]>>15)|1; */ + /* level = ((abs(block[i])+bias[0])*qmat[i])>>16; */ + /* if (level < 0) level = 0; */ + /* max |= level; */ + /* level = level * sign; */ + /* block[i] = level; */ + /* } */ + + asm volatile + ("i2=%1;\n\t" + "r1=[%1++]; \n\t" + "r0=r1>>>15 (v); \n\t" + "lsetup (0f,1f) lc0=%3; \n\t" + "0: r0=r0|%4; \n\t" + " r1=abs r1 (v) || r2=[%2++];\n\t" + " r1=r1+|+%5; \n\t" + " r1=max(r1,%6) (v); \n\t" + " r1.h=(a1 =r1.h*r2.h), r1.l=(a0 =r1.l*r2.l) (tfu); \n\t" + " %0=%0|r1; \n\t" + " r0.h=(a1 =r1.h*r0.h), r0.l=(a0 =r1.l*r0.l) (is) || r1=[%1++];\n\t" + "1: r0=r1>>>15 (v) || [i2++]=r0;\n\t" + "r1=%0>>16; \n\t" + "%0=%0|r1; \n\t" + "%0.h=0; \n\t" + : "=&d" (max) + : "b" (block), "b" (qmat), "a" (32), "d" (0x00010001), "d" (bias[0]*0x10001), "d" (0) + : "R0","R1","R2", "I2"); + if (start_i == 1) block[0] = dc; + + EPROF(); + + + PROF("zzscan",5); + + asm volatile + ("r0=b[%1--] (x); \n\t" + "lsetup (0f,1f) lc0=%3; \n\t" /* for(i=63; i>=start_i; i--) { */ + "0: p0=r0; \n\t" /* j = scantable[i]; */ + " p0=%2+(p0<<1); \n\t" /* if (block[j]) { */ + " r0=w[p0]; \n\t" /* last_non_zero = i; */ + " cc=r0==0; \n\t" /* break; */ + " if !cc jump 2f; \n\t" /* } */ + "1: r0=b[%1--] (x); \n\t" /* } */ + " %0=%4; \n\t" + " jump 3f; \n\t" + "2: %0=lc0; \n\t" + "3:\n\t" + + : "=d" (last_non_zero) + : "a" (scantable+63), "a" (block), "a" (63), "d" (last_non_zero) + : "P0","R0"); + + EPROF(); + + *overflow= s->max_qcoeff < max; //overflow might have happened + + bfprof(); + + /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ + if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) + ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); + + return last_non_zero; +} + +void MPV_common_init_bfin (MpegEncContext *s) +{ + s->dct_quantize= dct_quantize_bfin; +} + |