diff options
author | Timothy Gu <timothygu99@gmail.com> | 2016-01-31 11:42:24 -0800 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2016-02-01 17:01:11 -0800 |
commit | 838abfc1d711b42beaf401153b36ef80922b85b8 (patch) | |
tree | 14aa4ca3f18a1a65ea8d91ef4640175f10951a9f /libavcodec/x86/vc1dsp.asm | |
parent | b62825a480517eed151bfb105323c1549b325d00 (diff) | |
download | ffmpeg-838abfc1d711b42beaf401153b36ef80922b85b8.tar.gz |
x86: vc1dsp: Convert vc1_inv_trans_*_dc to NASM format
Diffstat (limited to 'libavcodec/x86/vc1dsp.asm')
-rw-r--r-- | libavcodec/x86/vc1dsp.asm | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm index 6415a836c0..91a1991851 100644 --- a/libavcodec/x86/vc1dsp.asm +++ b/libavcodec/x86/vc1dsp.asm @@ -395,3 +395,101 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride jnz .loop REP_RET %endif ; HAVE_MMX_INLINE + +%macro INV_TRANS_INIT 0 + movsxdifnidn linesizeq, linesized + movd m0, blockd + SPLATW m0, m0 + pxor m1, m1 + psubw m1, m0 + packuswb m0, m0 + packuswb m1, m1 + + DEFINE_ARGS dest, linesize, linesize3 + lea linesize3q, [linesizeq*3] +%endmacro + +%macro INV_TRANS_PROCESS 1 + mov%1 m2, [destq+linesizeq*0] + mov%1 m3, [destq+linesizeq*1] + mov%1 m4, [destq+linesizeq*2] + mov%1 m5, [destq+linesize3q] + paddusb m2, m0 + paddusb m3, m0 + paddusb m4, m0 + paddusb m5, m0 + psubusb m2, m1 + psubusb m3, m1 + psubusb m4, m1 + psubusb m5, m1 + mov%1 [linesizeq*0+destq], m2 + mov%1 [linesizeq*1+destq], m3 + mov%1 [linesizeq*2+destq], m4 + mov%1 [linesize3q +destq], m5 +%endmacro + +; ff_vc1_inv_trans_?x?_dc_mmxext(uint8_t *dest, int linesize, int16_t *block) +INIT_MMX mmxext +cglobal vc1_inv_trans_4x4_dc, 3,4,0, dest, linesize, block + movsx r3d, WORD [blockq] + mov blockd, r3d ; dc + shl blockd, 4 ; 16 * dc + lea blockd, [blockq+r3+4] ; 17 * dc + 4 + sar blockd, 3 ; >> 3 + mov r3d, blockd ; dc + shl blockd, 4 ; 16 * dc + lea blockd, [blockq+r3+64] ; 17 * dc + 64 + sar blockd, 7 ; >> 7 + + INV_TRANS_INIT + + INV_TRANS_PROCESS h + RET + +INIT_MMX mmxext +cglobal vc1_inv_trans_4x8_dc, 3,4,0, dest, linesize, block + movsx r3d, WORD [blockq] + mov blockd, r3d ; dc + shl blockd, 4 ; 16 * dc + lea blockd, [blockq+r3+4] ; 17 * dc + 4 + sar blockd, 3 ; >> 3 + shl blockd, 2 ; 4 * dc + lea blockd, [blockq*3+64] ; 12 * dc + 64 + sar blockd, 7 ; >> 7 + + INV_TRANS_INIT + + INV_TRANS_PROCESS h + lea destq, [destq+linesizeq*4] + INV_TRANS_PROCESS h + RET + +INIT_MMX mmxext +cglobal vc1_inv_trans_8x4_dc, 3,4,0, dest, linesize, block + movsx blockd, WORD [blockq] ; dc + lea blockd, [blockq*3+1] ; 3 * dc + 1 + sar blockd, 1 ; >> 1 + mov r3d, blockd ; dc + shl blockd, 4 ; 16 * dc + lea blockd, [blockq+r3+64] ; 17 * dc + 64 + sar blockd, 7 ; >> 7 + + INV_TRANS_INIT + + INV_TRANS_PROCESS a + RET + +INIT_MMX mmxext +cglobal vc1_inv_trans_8x8_dc, 3,3,0, dest, linesize, block + movsx blockd, WORD [blockq] ; dc + lea blockd, [blockq*3+1] ; 3 * dc + 1 + sar blockd, 1 ; >> 1 + lea blockd, [blockq*3+16] ; 3 * dc + 16 + sar blockd, 5 ; >> 5 + + INV_TRANS_INIT + + INV_TRANS_PROCESS a + lea destq, [destq+linesizeq*4] + INV_TRANS_PROCESS a + RET |