aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-05-28 21:57:38 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-05-30 15:20:36 +0200
commitf743fa9c7f872a23672e634c39d61c7b0cd45fcf (patch)
tree1f49a1fd4e69084456150f8f9462352154fcdb8a /libavcodec
parent7be79c76d3f7711c6c7e0c6d0f3ade5632bfb591 (diff)
downloadffmpeg-f743fa9c7f872a23672e634c39d61c7b0cd45fcf.tar.gz
x86: huffyuvdsp: add_hfyu_left_pred_bgr32
C MMX SSE2 Cycles: 3092 1053 578 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/huffyuvdsp.asm39
-rw-r--r--libavcodec/x86/huffyuvdsp_init.c10
2 files changed, 48 insertions, 1 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 7ebb07c14e..73c476495a 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -1,6 +1,7 @@
;******************************************************************************
;* SIMD-optimized HuffYUV functions
;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2014 Christophe Gisquet
;*
;* This file is part of FFmpeg.
;*
@@ -222,3 +223,41 @@ INIT_MMX mmx
ADD_BYTES
INIT_XMM sse2
ADD_BYTES
+
+; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
+; intptr_t w, uint8_t *left)
+%macro LEFT_BGR32 0
+cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
+ shl wq, 2
+ movd m0, [leftq]
+ lea dstq, [dstq + wq]
+ lea srcq, [srcq + wq]
+ LSHIFT m0, mmsize-4
+ neg wq
+.loop:
+ movu m1, [srcq+wq]
+ mova m2, m1
+%if mmsize == 8
+ punpckhdq m0, m0
+%endif
+ LSHIFT m1, 4
+ paddb m1, m2
+%if mmsize == 16
+ pshufd m0, m0, q3333
+ mova m2, m1
+ LSHIFT m1, 8
+ paddb m1, m2
+%endif
+ paddb m0, m1
+ movu [dstq+wq], m0
+ add wq, mmsize
+ jl .loop
+ movd m0, [dstq-4]
+ movd [leftq], m0
+ REP_RET
+%endmacro
+
+INIT_MMX mmx
+LEFT_BGR32
+INIT_XMM sse2
+LEFT_BGR32
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 9628724090..7ea36c71b9 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -41,6 +41,11 @@ int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src,
intptr_t w, int left);
+void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
+ intptr_t w, uint8_t *left);
+void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
+ intptr_t w, uint8_t *left);
+
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -50,8 +55,10 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
#endif
- if (EXTERNAL_MMX(cpu_flags))
+ if (EXTERNAL_MMX(cpu_flags)) {
c->add_bytes = ff_add_bytes_mmx;
+ c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
+ }
if (EXTERNAL_MMXEXT(cpu_flags)) {
/* slower than cmov version on AMD */
@@ -62,6 +69,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2;
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
+ c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {