diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-05-28 21:57:38 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-30 15:20:36 +0200 |
commit | f743fa9c7f872a23672e634c39d61c7b0cd45fcf (patch) | |
tree | 1f49a1fd4e69084456150f8f9462352154fcdb8a /libavcodec | |
parent | 7be79c76d3f7711c6c7e0c6d0f3ade5632bfb591 (diff) | |
download | ffmpeg-f743fa9c7f872a23672e634c39d61c7b0cd45fcf.tar.gz |
x86: huffyuvdsp: add_hfyu_left_pred_bgr32
C MMX SSE2
Cycles: 3092 1053 578
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 39 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_init.c | 10 |
2 files changed, 48 insertions, 1 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 7ebb07c14e..73c476495a 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -1,6 +1,7 @@ ;****************************************************************************** ;* SIMD-optimized HuffYUV functions ;* Copyright (c) 2008 Loren Merritt +;* Copyright (c) 2014 Christophe Gisquet ;* ;* This file is part of FFmpeg. ;* @@ -222,3 +223,41 @@ INIT_MMX mmx ADD_BYTES INIT_XMM sse2 ADD_BYTES + +; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, +; intptr_t w, uint8_t *left) +%macro LEFT_BGR32 0 +cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left + shl wq, 2 + movd m0, [leftq] + lea dstq, [dstq + wq] + lea srcq, [srcq + wq] + LSHIFT m0, mmsize-4 + neg wq +.loop: + movu m1, [srcq+wq] + mova m2, m1 +%if mmsize == 8 + punpckhdq m0, m0 +%endif + LSHIFT m1, 4 + paddb m1, m2 +%if mmsize == 16 + pshufd m0, m0, q3333 + mova m2, m1 + LSHIFT m1, 8 + paddb m1, m2 +%endif + paddb m0, m1 + movu [dstq+wq], m0 + add wq, mmsize + jl .loop + movd m0, [dstq-4] + movd [leftq], m0 + REP_RET +%endmacro + +INIT_MMX mmx +LEFT_BGR32 +INIT_XMM sse2 +LEFT_BGR32 diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index 9628724090..7ea36c71b9 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -41,6 +41,11 @@ int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, intptr_t w, int left); +void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, + intptr_t w, uint8_t *left); +void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, + intptr_t w, uint8_t *left); + av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -50,8 +55,10 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov; #endif - if (EXTERNAL_MMX(cpu_flags)) + if (EXTERNAL_MMX(cpu_flags)) { c->add_bytes = ff_add_bytes_mmx; + c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; + } if (EXTERNAL_MMXEXT(cpu_flags)) { /* slower than cmov version on AMD */ @@ -62,6 +69,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) if (EXTERNAL_SSE2(cpu_flags)) { c->add_bytes = ff_add_bytes_sse2; c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2; + c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { |