diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-05-22 17:48:20 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-24 03:45:17 +0200 |
commit | 9722a6a3f35c824d6809a54964900f2490cc82dd (patch) | |
tree | c1cd33a5a9942320a839ef58733a8b7c6682a6a8 /libavcodec | |
parent | f0aca50e0b21d7c97b091f8e551719e0da574e12 (diff) | |
download | ffmpeg-9722a6a3f35c824d6809a54964900f2490cc82dd.tar.gz |
x86: hpeldsp: implement SSE2 put_pixels16_xy2
This is obviously equivalent to the avg version, without the avg.
3223(mmx) -> 2006(sse2)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/hpeldsp.asm | 21 | ||||
-rw-r--r-- | libavcodec/x86/hpeldsp_init.c | 3 |
2 files changed, 18 insertions, 6 deletions
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 1d26c4516e..4af423aee5 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -551,11 +551,11 @@ AVG_APPROX_PIXELS8_XY2 ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro AVG_PIXELS_XY2 0 +%macro SET_PIXELS_XY2 1 %if cpuflag(sse2) -cglobal avg_pixels16_xy2, 4,5,8 +cglobal %1_pixels16_xy2, 4,5,8 %else -cglobal avg_pixels8_xy2, 4,5 +cglobal %1_pixels8_xy2, 4,5 %endif pxor m7, m7 mova m6, [pw_2] @@ -588,9 +588,13 @@ cglobal avg_pixels8_xy2, 4,5 paddusw m5, m1 psrlw m4, 2 psrlw m5, 2 +%ifidn %1, avg mova m3, [r0+r4] packuswb m4, m5 PAVGB m4, m3 +%else + packuswb m4, m5 +%endif mova [r0+r4], m4 add r4, r2 @@ -610,9 +614,13 @@ cglobal avg_pixels8_xy2, 4,5 paddusw m1, m5 psrlw m0, 2 psrlw m1, 2 +%ifidn %1, avg mova m3, [r0+r4] packuswb m0, m1 PAVGB m0, m3 +%else + packuswb m0, m1 +%endif mova [r0+r4], m0 add r4, r2 sub r3d, 2 @@ -621,8 +629,9 @@ cglobal avg_pixels8_xy2, 4,5 %endmacro INIT_MMX mmxext -AVG_PIXELS_XY2 +SET_PIXELS_XY2 avg INIT_MMX 3dnow -AVG_PIXELS_XY2 +SET_PIXELS_XY2 avg INIT_XMM sse2 -AVG_PIXELS_XY2 +SET_PIXELS_XY2 put +SET_PIXELS_XY2 avg diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 05bd561f59..cda16dc722 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -48,6 +48,8 @@ void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, @@ -296,6 +298,7 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags) c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_sse2; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_sse2; + c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_sse2; c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2; c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2; c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2; |