aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/dsputil.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-02-07 14:35:36 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-02-07 14:35:49 +0100
commit54d8322355c0b9dafe89f57fa9b2d4e5cf3b51af (patch)
tree313c6d08aa1d0d3896471209085b1bb8e257d25e /libavcodec/x86/dsputil.asm
parentc7002e3d3dad3e026ec304066de65b5a21910067 (diff)
parenta1d36730342edf7281e5992a7f8aafabc2464ed0 (diff)
downloadffmpeg-54d8322355c0b9dafe89f57fa9b2d4e5cf3b51af.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: dsputil: x86: Fix compile error dsputil: x86: Convert h263 loop filter to yasm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/dsputil.asm')
-rw-r--r--libavcodec/x86/dsputil.asm163
1 files changed, 163 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 587d5ee968..02bec2d516 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -22,6 +22,8 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
+cextern pb_FC
+cextern h263_loop_filter_strength
pb_f: times 16 db 15
pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7
@@ -648,3 +650,164 @@ BSWAP32_BUF
INIT_XMM ssse3
BSWAP32_BUF
+
+
+%macro H263_LOOP_FILTER 5
+ pxor m7, m7
+ mova m0, [%1]
+ mova m1, [%1]
+ mova m2, [%4]
+ mova m3, [%4]
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ psubw m0, m2
+ psubw m1, m3
+ mova m2, [%2]
+ mova m3, [%2]
+ mova m4, [%3]
+ mova m5, [%3]
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ punpcklbw m4, m7
+ punpckhbw m5, m7
+ psubw m4, m2
+ psubw m5, m3
+ psllw m4, 2
+ psllw m5, 2
+ paddw m4, m0
+ paddw m5, m1
+ pxor m6, m6
+ pcmpgtw m6, m4
+ pcmpgtw m7, m5
+ pxor m4, m6
+ pxor m5, m7
+ psubw m4, m6
+ psubw m5, m7
+ psrlw m4, 3
+ psrlw m5, 3
+ packuswb m4, m5
+ packsswb m6, m7
+ pxor m7, m7
+ movd m2, %5
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ psubusb m2, m4
+ mova m3, m2
+ psubusb m3, m4
+ psubb m2, m3
+ mova m3, [%2]
+ mova m4, [%3]
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m3, m2
+ psubusb m4, m2
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m2, m2
+ packsswb m0, m1
+ pcmpgtb m7, m0
+ pxor m0, m7
+ psubb m0, m7
+ mova m1, m0
+ psubusb m0, m2
+ psubb m1, m0
+ pand m1, [pb_FC]
+ psrlw m1, 2
+ pxor m1, m7
+ psubb m1, m7
+ mova m5, [%1]
+ mova m6, [%4]
+ psubb m5, m1
+ paddb m6, m1
+%endmacro
+
+INIT_MMX mmx
+; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
+cglobal h263_v_loop_filter, 3,5
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ mov r3, r0
+ sub r3, r1
+ mov r4, r3
+ sub r4, r1
+ H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
+
+ mova [r3], m3
+ mova [r0], m4
+ mova [r4], m5
+ mova [r0+r1], m6
+ RET
+
+%macro TRANSPOSE4X4 2
+ movd m0, [%1]
+ movd m1, [%1+r1]
+ movd m2, [%1+r1*2]
+ movd m3, [%1+r3]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ mova m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ movd [%2+ 0], m0
+ punpckhdq m0, m0
+ movd [%2+ 8], m0
+ movd [%2+16], m1
+ punpckhdq m1, m1
+ movd [%2+24], m1
+%endmacro
+
+
+; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
+INIT_MMX mmx
+cglobal h263_h_loop_filter, 3,5,0,32
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ sub r0, 2
+ lea r3, [r1*3]
+
+ TRANSPOSE4X4 r0, rsp
+ lea r4, [r0+r1*4]
+ TRANSPOSE4X4 r4, rsp+4
+
+ H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
+
+ mova m1, m5
+ mova m0, m4
+ punpcklbw m5, m3
+ punpcklbw m4, m6
+ punpckhbw m1, m3
+ punpckhbw m0, m6
+ mova m3, m5
+ mova m6, m1
+ punpcklwd m5, m4
+ punpcklwd m1, m0
+ punpckhwd m3, m4
+ punpckhwd m6, m0
+ movd [r0], m5
+ punpckhdq m5, m5
+ movd [r0+r1*1], m5
+ movd [r0+r1*2], m3
+ punpckhdq m3, m3
+ movd [r0+r3], m3
+ movd [r4], m1
+ punpckhdq m1, m1
+ movd [r4+r1*1], m1
+ movd [r4+r1*2], m6
+ punpckhdq m6, m6
+ movd [r4+r3], m6
+ RET