aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-05-28 01:03:23 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-05-30 14:57:57 +0200
commit884078d2df0fecd769afdd916fac8727dbd7d632 (patch)
treea661a73d13aa2c9836fc89663d90748107502707 /libavcodec
parentb3dfebd6416058d5b849317a7d96b3d9085943fd (diff)
downloadffmpeg-884078d2df0fecd769afdd916fac8727dbd7d632.tar.gz
x86: huffyuvdsp: add SSE2 median prediction
From 5010c to 4566 on lagarith YUY2. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/huffyuvdsp.asm98
-rw-r--r--libavcodec/x86/huffyuvdsp_init.c4
2 files changed, 64 insertions, 38 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index a923e70e1e..7ebb07c14e 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -33,64 +33,86 @@ SECTION_TEXT
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
-INIT_MMX mmxext
-cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top
- movq mm0, [topq]
- movq mm2, mm0
- movd mm4, [left_topq]
- psllq mm2, 8
- movq mm1, mm0
- por mm4, mm2
- movd mm3, [leftq]
- psubb mm0, mm4 ; t-tl
+%macro LSHIFT 2
+%if mmsize > 8
+ pslldq %1, %2
+%else
+ psllq %1, 8*(%2)
+%endif
+%endmacro
+
+%macro RSHIFT 2
+%if mmsize > 8
+ psrldq %1, %2
+%else
+ psrlq %1, 8*(%2)
+%endif
+%endmacro
+
+%macro HFYU_MEDIAN 0
+cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top
+ movu m0, [topq]
+ mova m2, m0
+ movd m4, [left_topq]
+ LSHIFT m2, 1
+ mova m1, m0
+ por m4, m2
+ movd m3, [leftq]
+ psubb m0, m4 ; t-tl
add dstq, wq
add topq, wq
add diffq, wq
neg wq
jmp .skip
.loop:
- movq mm4, [topq+wq]
- movq mm0, mm4
- psllq mm4, 8
- por mm4, mm1
- movq mm1, mm0 ; t
- psubb mm0, mm4 ; t-tl
+ movu m4, [topq+wq]
+ mova m0, m4
+ LSHIFT m4, 1
+ por m4, m1
+ mova m1, m0 ; t
+ psubb m0, m4 ; t-tl
.skip:
- movq mm2, [diffq+wq]
+ movu m2, [diffq+wq]
%assign i 0
-%rep 8
- movq mm4, mm0
- paddb mm4, mm3 ; t-tl+l
- movq mm5, mm3
- pmaxub mm3, mm1
- pminub mm5, mm1
- pminub mm3, mm4
- pmaxub mm3, mm5 ; median
- paddb mm3, mm2 ; +residual
+%rep mmsize
+ mova m4, m0
+ paddb m4, m3 ; t-tl+l
+ mova m5, m3
+ pmaxub m3, m1
+ pminub m5, m1
+ pminub m3, m4
+ pmaxub m3, m5 ; median
+ paddb m3, m2 ; +residual
%if i==0
- movq mm7, mm3
- psllq mm7, 56
+ mova m7, m3
+ LSHIFT m7, mmsize-1
%else
- movq mm6, mm3
- psrlq mm7, 8
- psllq mm6, 56
- por mm7, mm6
+ mova m6, m3
+ RSHIFT m7, 1
+ LSHIFT m6, mmsize-1
+ por m7, m6
%endif
-%if i<7
- psrlq mm0, 8
- psrlq mm1, 8
- psrlq mm2, 8
+%if i<mmsize-1
+ RSHIFT m0, 1
+ RSHIFT m1, 1
+ RSHIFT m2, 1
%endif
%assign i i+1
%endrep
- movq [dstq+wq], mm7
- add wq, 8
+ movu [dstq+wq], m7
+ add wq, mmsize
jl .loop
movzx r2d, byte [dstq-1]
mov [leftq], r2d
movzx r2d, byte [topq-1]
mov [left_topq], r2d
RET
+%endmacro
+
+INIT_MMX mmxext
+HFYU_MEDIAN
+INIT_XMM sse2
+HFYU_MEDIAN
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 8a755e65b0..86e482ce70 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
+void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top,
+ const uint8_t *diff, int w,
+ int *left, int *left_top);
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int w, int left);
@@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2;
+ c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {