diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-01-22 21:10:33 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-01-22 21:11:40 +0100 |
commit | fee97f25fa1275e2a35485cb16283a466c28aadc (patch) | |
tree | 91a539e05cf0b6934fe15e786dbaed200a3351da | |
parent | 631939bde6e29e29131a0ca389e5e8dea4c3d038 (diff) | |
download | ffmpeg-fee97f25fa1275e2a35485cb16283a466c28aadc.tar.gz |
avcodec/x86/lossless_videodsp: port add_hfyu_median_prediction_mmxext to 16bit
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 65 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp_init.c | 5 |
2 files changed, 70 insertions, 0 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index 37663d70df..531bf17b79 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -234,3 +234,68 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left ADD_HFYU_LEFT_LOOP_INT16 0, 1 .src_unaligned: ADD_HFYU_LEFT_LOOP_INT16 0, 0 + +; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) +INIT_MMX mmxext +cglobal add_hfyu_median_prediction_int16, 7,7,0, dst, top, diff, mask, w, left, left_top + add wq, wq + movd mm6, maskd + SPLATW mm6, mm6 + movq mm0, [topq] + movq mm2, mm0 + movd mm4, [left_topq] + psllq mm2, 16 + movq mm1, mm0 + por mm4, mm2 + movd mm3, [leftq] + psubw mm0, mm4 ; t-tl + add dstq, wq + add topq, wq + add diffq, wq + neg wq + jmp .skip +.loop: + movq mm4, [topq+wq] + movq mm0, mm4 + psllq mm4, 16 + por mm4, mm1 + movq mm1, mm0 ; t + psubw mm0, mm4 ; t-tl +.skip: + movq mm2, [diffq+wq] +%assign i 0 +%rep 4 + movq mm4, mm0 + paddw mm4, mm3 ; t-tl+l + pand mm4, mm6 + movq mm5, mm3 + pmaxsw mm3, mm1 + pminsw mm5, mm1 + pminsw mm3, mm4 + pmaxsw mm3, mm5 ; median + paddw mm3, mm2 ; +residual + pand mm3, mm6 +%if i==0 + movq mm7, mm3 + psllq mm7, 48 +%else + movq mm4, mm3 + psrlq mm7, 16 + psllq mm4, 48 + por mm7, mm4 +%endif +%if i<3 + psrlq mm0, 16 + psrlq mm1, 16 + psrlq mm2, 16 +%endif +%assign i i+1 +%endrep + movq [dstq+wq], mm7 + add wq, 8 + jl .loop + movzx r2d, word [dstq-2] + mov [leftq], r2d + movzx r2d, word [topq-2] + mov [left_topq], r2d + RET diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 9927ca38f3..4eca2a11b1 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -27,6 +27,7 @@ void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w); int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); +void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); void ff_llviddsp_init_x86(LLVidDSPContext *c) { @@ -37,6 +38,10 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c) c->diff_int16 = ff_diff_int16_mmx; } + if (EXTERNAL_MMXEXT(cpu_flags)) { + c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext; + } + if (EXTERNAL_SSE2(cpu_flags)) { c->add_int16 = ff_add_int16_sse2; c->diff_int16 = ff_diff_int16_sse2; |