diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-05-28 01:03:23 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-30 14:57:57 +0200 |
commit | 884078d2df0fecd769afdd916fac8727dbd7d632 (patch) | |
tree | a661a73d13aa2c9836fc89663d90748107502707 /libavcodec | |
parent | b3dfebd6416058d5b849317a7d96b3d9085943fd (diff) | |
download | ffmpeg-884078d2df0fecd769afdd916fac8727dbd7d632.tar.gz |
x86: huffyuvdsp: add SSE2 median prediction
From 5010c to 4566 on lagarith YUY2.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 98 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_init.c | 4 |
2 files changed, 64 insertions, 38 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index a923e70e1e..7ebb07c14e 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -33,64 +33,86 @@ SECTION_TEXT ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, ; const uint8_t *diff, int w, ; int *left, int *left_top) -INIT_MMX mmxext -cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top - movq mm0, [topq] - movq mm2, mm0 - movd mm4, [left_topq] - psllq mm2, 8 - movq mm1, mm0 - por mm4, mm2 - movd mm3, [leftq] - psubb mm0, mm4 ; t-tl +%macro LSHIFT 2 +%if mmsize > 8 + pslldq %1, %2 +%else + psllq %1, 8*(%2) +%endif +%endmacro + +%macro RSHIFT 2 +%if mmsize > 8 + psrldq %1, %2 +%else + psrlq %1, 8*(%2) +%endif +%endmacro + +%macro HFYU_MEDIAN 0 +cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top + movu m0, [topq] + mova m2, m0 + movd m4, [left_topq] + LSHIFT m2, 1 + mova m1, m0 + por m4, m2 + movd m3, [leftq] + psubb m0, m4 ; t-tl add dstq, wq add topq, wq add diffq, wq neg wq jmp .skip .loop: - movq mm4, [topq+wq] - movq mm0, mm4 - psllq mm4, 8 - por mm4, mm1 - movq mm1, mm0 ; t - psubb mm0, mm4 ; t-tl + movu m4, [topq+wq] + mova m0, m4 + LSHIFT m4, 1 + por m4, m1 + mova m1, m0 ; t + psubb m0, m4 ; t-tl .skip: - movq mm2, [diffq+wq] + movu m2, [diffq+wq] %assign i 0 -%rep 8 - movq mm4, mm0 - paddb mm4, mm3 ; t-tl+l - movq mm5, mm3 - pmaxub mm3, mm1 - pminub mm5, mm1 - pminub mm3, mm4 - pmaxub mm3, mm5 ; median - paddb mm3, mm2 ; +residual +%rep mmsize + mova m4, m0 + paddb m4, m3 ; t-tl+l + mova m5, m3 + pmaxub m3, m1 + pminub m5, m1 + pminub m3, m4 + pmaxub m3, m5 ; median + paddb m3, m2 ; +residual %if i==0 - movq mm7, mm3 - psllq mm7, 56 + mova m7, m3 + LSHIFT m7, mmsize-1 %else - movq mm6, mm3 - psrlq mm7, 8 - psllq mm6, 56 - por mm7, mm6 + mova m6, m3 + RSHIFT m7, 1 + LSHIFT m6, mmsize-1 + por m7, m6 %endif -%if i<7 - psrlq mm0, 8 - psrlq mm1, 8 - psrlq mm2, 8 +%if i<mmsize-1 + RSHIFT m0, 1 + RSHIFT m1, 1 + RSHIFT m2, 1 %endif %assign i i+1 %endrep - movq [dstq+wq], mm7 - add wq, 8 + movu [dstq+wq], m7 + add wq, mmsize jl .loop movzx r2d, byte [dstq-1] mov [leftq], r2d movzx r2d, byte [topq-1] mov [left_topq], r2d RET +%endmacro + +INIT_MMX mmxext +HFYU_MEDIAN +INIT_XMM sse2 +HFYU_MEDIAN %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index 8a755e65b0..86e482ce70 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); +void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top, + const uint8_t *diff, int w, + int *left, int *left_top); int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); @@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) if (EXTERNAL_SSE2(cpu_flags)) { c->add_bytes = ff_add_bytes_sse2; + c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { |