diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-05-22 17:48:19 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-24 03:29:48 +0200 |
commit | f0aca50e0b21d7c97b091f8e551719e0da574e12 (patch) | |
tree | 11152fa7e47b24ce24513ea75ccafdde41f22c82 /libavcodec/x86/hpeldsp_init.c | |
parent | 9eaa8c22bc40ce3c5c6911e65f4e021587088881 (diff) | |
download | ffmpeg-f0aca50e0b21d7c97b091f8e551719e0da574e12.tar.gz |
x86: hpeldsp: implement SSE2 versions
Those are mostly used in codecs older than H.264, eg MPEG-2.
put16 versions:
mmx mmx2 sse2
x2: 1888 1185 552
y2: 1778 1092 510
avg16 xy2: 3509(mmx2) -> 2169(sse2)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hpeldsp_init.c')
-rw-r--r-- | libavcodec/x86/hpeldsp_init.c | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 5e2ecb53a8..05bd561f59 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -40,6 +40,16 @@ void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, @@ -284,7 +294,12 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags) // these functions are slower than mmx on AMD, but faster on Intel c->put_pixels_tab[0][0] = ff_put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2; + c->put_pixels_tab[0][1] = ff_put_pixels16_x2_sse2; + c->put_pixels_tab[0][2] = ff_put_pixels16_y2_sse2; c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2; + c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2; + c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2; + c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2; } #endif /* HAVE_SSE2_EXTERNAL */ } |