diff options
author | James Almer <jamrial@gmail.com> | 2017-01-07 21:10:46 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-01-12 22:53:04 -0300 |
commit | 30c1f27299d3fc2b0c0858c003066cc5e36a28af (patch) | |
tree | 594a5c933605cd1386a66d5e2b595394e8991fab /libavcodec/x86/huffyuvencdsp.asm | |
parent | 5ac1dd8e231987c022a860c6b1961b038a84b613 (diff) | |
download | ffmpeg-30c1f27299d3fc2b0c0858c003066cc5e36a28af.tar.gz |
huffyuvencdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/huffyuvencdsp.asm')
-rw-r--r-- | libavcodec/x86/huffyuvencdsp.asm | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm index a55a1de65d..78ad202249 100644 --- a/libavcodec/x86/huffyuvencdsp.asm +++ b/libavcodec/x86/huffyuvencdsp.asm @@ -148,3 +148,116 @@ DIFF_BYTES_PROLOGUE DIFF_BYTES_BODY u, u %undef i %endif + +%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub + movd m4, maskd + SPLATW m4, m4 + add wd, wd + test wq, 2*mmsize - 1 + jz %%.tomainloop + push tmpq +%%.wordloop: + sub wq, 2 +%ifidn %2, add + mov tmpw, [srcq+wq] + add tmpw, [dstq+wq] +%else + mov tmpw, [src1q+wq] + sub tmpw, [src2q+wq] +%endif + and tmpw, maskw + mov [dstq+wq], tmpw + test wq, 2*mmsize - 1 + jnz %%.wordloop + pop tmpq +%%.tomainloop: +%ifidn %2, add + add srcq, wq +%else + add src1q, wq + add src2q, wq +%endif + add dstq, wq + neg wq + jz %%.end +%%.loop: +%ifidn %2, add + mov%1 m0, [srcq+wq] + mov%1 m1, [dstq+wq] + mov%1 m2, [srcq+wq+mmsize] + mov%1 m3, [dstq+wq+mmsize] +%else + mov%1 m0, [src1q+wq] + mov%1 m1, [src2q+wq] + mov%1 m2, [src1q+wq+mmsize] + mov%1 m3, [src2q+wq+mmsize] +%endif + p%2w m0, m1 + p%2w m2, m3 + pand m0, m4 + pand m2, m4 + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp + INT16_LOOP a, sub +%endif + +INIT_XMM sse2 +cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp + test src1q, mmsize-1 + jnz .unaligned + test src2q, mmsize-1 + jnz .unaligned + test dstq, mmsize-1 + jnz .unaligned + INT16_LOOP a, sub +.unaligned: + INT16_LOOP u, sub + +INIT_MMX mmxext +cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top + add wd, wd + movd mm7, maskd + SPLATW mm7, mm7 + movq mm0, [src1q] + movq mm2, [src2q] + psllq mm0, 16 + psllq mm2, 16 + movd mm6, [left_topq] + por mm0, mm6 + movd mm6, [leftq] + por mm2, mm6 + xor maskq, maskq +.loop: + movq mm1, [src1q + maskq] + movq mm3, [src2q + maskq] + movq mm4, mm2 + psubw mm2, mm0 + paddw mm2, mm1 + pand mm2, mm7 + movq mm5, mm4 + pmaxsw mm4, mm1 + pminsw mm1, mm5 + pminsw mm4, mm2 + pmaxsw mm4, mm1 + psubw mm3, mm4 + pand mm3, mm7 + movq [dstq + maskq], mm3 + add maskq, 8 + movq mm0, [src1q + maskq - 2] + movq mm2, [src2q + maskq - 2] + cmp maskq, wq + jb .loop + movzx maskd, word [src1q + wq - 2] + mov [left_topq], maskd + movzx maskd, word [src2q + wq - 2] + mov [leftq], maskd + RET |