diff options
author | Christophe GISQUET <christophe.gisquet@gmail.com> | 2012-03-19 22:46:28 +0100 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2012-04-10 10:06:48 -0700 |
commit | 272b252c0110225188c7d7f31167941210aac197 (patch) | |
tree | 47bea5996c88057a418e8872a655bac8f261736e /libavcodec/x86/rv40dsp_init.c | |
parent | d3c59d5003a483f1a23e225fc71c19bd1116d11c (diff) | |
download | ffmpeg-272b252c0110225188c7d7f31167941210aac197.tar.gz |
rv40dsp: implement prescaled versions for biweight.
Quite often, the original weights are multiple of 512. By prescaling them
by 1/512 when they are computed (once per frame), no intermediate shifting
is needed, and no prescaling on each call either.
The x86 code already used that trick.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/rv40dsp_init.c')
-rw-r--r-- | libavcodec/x86/rv40dsp_init.c | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index 79c70f78c3..df468aa9e5 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -41,10 +41,14 @@ void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); #define DECLARE_WEIGHT(opt) \ -void ff_rv40_weight_func_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ - int w1, int w2, ptrdiff_t stride); \ -void ff_rv40_weight_func_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \ - int w1, int w2, ptrdiff_t stride); +void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ + int w1, int w2, ptrdiff_t stride); \ +void ff_rv40_weight_func_rnd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \ + int w1, int w2, ptrdiff_t stride); \ +void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ + int w1, int w2, ptrdiff_t stride); \ +void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \ + int w1, int w2, ptrdiff_t stride); DECLARE_WEIGHT(mmx) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) @@ -57,8 +61,10 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) if (mm_flags & AV_CPU_FLAG_MMX) { c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; - c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_mmx; - c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_mmx; + c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx; + c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx; + c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx; + c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx; } if (mm_flags & AV_CPU_FLAG_MMX2) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; @@ -68,12 +74,16 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; } if (mm_flags & AV_CPU_FLAG_SSE2) { - c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_sse2; - c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_sse2; + c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; + c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; + c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; + c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2; } if (mm_flags & AV_CPU_FLAG_SSSE3) { - c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_ssse3; - c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_ssse3; + c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; + c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; + c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; + c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3; } #endif } |