diff options
author | David Conrad <lessen42@gmail.com> | 2010-06-04 04:46:26 +0000 |
---|---|---|
committer | David Conrad <lessen42@gmail.com> | 2010-06-04 04:46:26 +0000 |
commit | 413abbe16465a7b49472ac110e42939e853e24a1 (patch) | |
tree | da789212b4e9bd916d42005ef41801db220424a8 /libavcodec/x86/dsputil_mmx_avg_template.c | |
parent | 784824a68c00d95dd81085483950b92203345a65 (diff) | |
download | ffmpeg-413abbe16465a7b49472ac110e42939e853e24a1.tar.gz |
Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
Originally committed as revision 23463 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/dsputil_mmx_avg_template.c')
-rw-r--r-- | libavcodec/x86/dsputil_mmx_avg_template.c | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c index 8220867328..69575e3ae7 100644 --- a/libavcodec/x86/dsputil_mmx_avg_template.c +++ b/libavcodec/x86/dsputil_mmx_avg_template.c @@ -586,6 +586,49 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in :"%"REG_a, "memory"); } +static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm__ volatile ( + "pcmpeqb %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq (%1, %3,2), %%mm0 \n\t" + "movq 1(%1, %3,2), %%mm1 \n\t" + "movq (%1, %4), %%mm2 \n\t" + "movq 1(%1, %4), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "movq %%mm0, (%2, %3,2) \n\t" + "movq %%mm2, (%2, %4) \n\t" + "lea (%1, %3,4), %1 \n\t" + "lea (%2, %3,4), %2 \n\t" + "subl $4, %0 \n\t" + "jg 1b \n\t" + : "+g"(h), "+r"(pixels), "+r"(block) + : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) + : "memory" + ); +} + static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm__ volatile( @@ -650,6 +693,44 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in :"%"REG_a, "memory"); } +static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm__ volatile ( + "movq (%1), %%mm0 \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" + "add %3, %1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "1: \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq (%1, %3,2), %%mm1 \n\t" + "movq (%1, %4), %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm2, (%2, %3,2) \n\t" + "movq %%mm1, (%2, %4) \n\t" + "lea (%1, %3,4), %1 \n\t" + "lea (%2, %3,4), %2 \n\t" + "subl $4, %0 \n\t" + "jg 1b \n\t" + :"+g"(h), "+r"(pixels), "+r" (block) + :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) + :"memory" + ); +} + static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm__ volatile( |