aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-02-19 12:41:27 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-02-19 12:41:27 +0100
commitfa09ad5c9e68bda47efddd357ed4715f7dab563b (patch)
tree5b4d724baa255e7871c8c17712b651d2d29b16f6
parentcf10616cc0db272bab9206a2f0f921f965e379da (diff)
parent9acd23d655b5e3a3b56f9916480356fe0e48c70c (diff)
downloadffmpeg-fa09ad5c9e68bda47efddd357ed4715f7dab563b.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: x86: dsputil: Fix h263 loop filter link error in some configurations Conflicts: libavcodec/x86/dsputil.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/dsputil.asm164
-rw-r--r--libavcodec/x86/h263_loopfilter.asm187
3 files changed, 189 insertions, 164 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 30264d78c6..ff7ea776ef 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -52,6 +52,8 @@ YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o
x86/dwt_yasm.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
+YASM-OBJS-$(CONFIG_H263_DECODER) += x86/h263_loopfilter.o
+YASM-OBJS-$(CONFIG_H263_ENCODER) += x86/h263_loopfilter.o
YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
x86/h264_chromamc_10bit.o
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 19acd8fec9..9970c02b6a 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -24,8 +24,6 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
-cextern pb_FC
-cextern h263_loop_filter_strength
pb_f: times 16 db 15
pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7
@@ -652,165 +650,3 @@ BSWAP32_BUF
INIT_XMM ssse3
BSWAP32_BUF
-
-%if CONFIG_H263_DECODER || CONFIG_H263_ENCODER
-%macro H263_LOOP_FILTER 5
- pxor m7, m7
- mova m0, [%1]
- mova m1, [%1]
- mova m2, [%4]
- mova m3, [%4]
- punpcklbw m0, m7
- punpckhbw m1, m7
- punpcklbw m2, m7
- punpckhbw m3, m7
- psubw m0, m2
- psubw m1, m3
- mova m2, [%2]
- mova m3, [%2]
- mova m4, [%3]
- mova m5, [%3]
- punpcklbw m2, m7
- punpckhbw m3, m7
- punpcklbw m4, m7
- punpckhbw m5, m7
- psubw m4, m2
- psubw m5, m3
- psllw m4, 2
- psllw m5, 2
- paddw m4, m0
- paddw m5, m1
- pxor m6, m6
- pcmpgtw m6, m4
- pcmpgtw m7, m5
- pxor m4, m6
- pxor m5, m7
- psubw m4, m6
- psubw m5, m7
- psrlw m4, 3
- psrlw m5, 3
- packuswb m4, m5
- packsswb m6, m7
- pxor m7, m7
- movd m2, %5
- punpcklbw m2, m2
- punpcklbw m2, m2
- punpcklbw m2, m2
- psubusb m2, m4
- mova m3, m2
- psubusb m3, m4
- psubb m2, m3
- mova m3, [%2]
- mova m4, [%3]
- pxor m3, m6
- pxor m4, m6
- paddusb m3, m2
- psubusb m4, m2
- pxor m3, m6
- pxor m4, m6
- paddusb m2, m2
- packsswb m0, m1
- pcmpgtb m7, m0
- pxor m0, m7
- psubb m0, m7
- mova m1, m0
- psubusb m0, m2
- psubb m1, m0
- pand m1, [pb_FC]
- psrlw m1, 2
- pxor m1, m7
- psubb m1, m7
- mova m5, [%1]
- mova m6, [%4]
- psubb m5, m1
- paddb m6, m1
-%endmacro
-
-INIT_MMX mmx
-; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
-cglobal h263_v_loop_filter, 3,5
- movsxdifnidn r1, r1d
- movsxdifnidn r2, r2d
-
- lea r4, [h263_loop_filter_strength]
- movzx r3d, BYTE [r4+r2]
- movsx r2, r3b
- shl r2, 1
-
- mov r3, r0
- sub r3, r1
- mov r4, r3
- sub r4, r1
- H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
-
- mova [r3], m3
- mova [r0], m4
- mova [r4], m5
- mova [r0+r1], m6
- RET
-
-%macro TRANSPOSE4X4 2
- movd m0, [%1]
- movd m1, [%1+r1]
- movd m2, [%1+r1*2]
- movd m3, [%1+r3]
- punpcklbw m0, m1
- punpcklbw m2, m3
- mova m1, m0
- punpcklwd m0, m2
- punpckhwd m1, m2
- movd [%2+ 0], m0
- punpckhdq m0, m0
- movd [%2+ 8], m0
- movd [%2+16], m1
- punpckhdq m1, m1
- movd [%2+24], m1
-%endmacro
-
-
-; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
-INIT_MMX mmx
-cglobal h263_h_loop_filter, 3,5,0,32
- movsxdifnidn r1, r1d
- movsxdifnidn r2, r2d
-
- lea r4, [h263_loop_filter_strength]
- movzx r3d, BYTE [r4+r2]
- movsx r2, r3b
- shl r2, 1
-
- sub r0, 2
- lea r3, [r1*3]
-
- TRANSPOSE4X4 r0, rsp
- lea r4, [r0+r1*4]
- TRANSPOSE4X4 r4, rsp+4
-
- H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
-
- mova m1, m5
- mova m0, m4
- punpcklbw m5, m3
- punpcklbw m4, m6
- punpckhbw m1, m3
- punpckhbw m0, m6
- mova m3, m5
- mova m6, m1
- punpcklwd m5, m4
- punpcklwd m1, m0
- punpckhwd m3, m4
- punpckhwd m6, m0
- movd [r0], m5
- punpckhdq m5, m5
- movd [r0+r1*1], m5
- movd [r0+r1*2], m3
- punpckhdq m3, m3
- movd [r0+r3], m3
- movd [r4], m1
- punpckhdq m1, m1
- movd [r4+r1*1], m1
- movd [r4+r1*2], m6
- punpckhdq m6, m6
- movd [r4+r3], m6
- RET
-%endif ; CONFIG_H263_DECODER || CONFIG_H263_ENCODER
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
new file mode 100644
index 0000000000..a940aad07a
--- /dev/null
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -0,0 +1,187 @@
+;******************************************************************************
+;* MMX-optimized H.263 loop filter
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+cextern pb_FC
+cextern h263_loop_filter_strength
+
+SECTION_TEXT
+
+%macro H263_LOOP_FILTER 5
+ pxor m7, m7
+ mova m0, [%1]
+ mova m1, [%1]
+ mova m2, [%4]
+ mova m3, [%4]
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ psubw m0, m2
+ psubw m1, m3
+ mova m2, [%2]
+ mova m3, [%2]
+ mova m4, [%3]
+ mova m5, [%3]
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ punpcklbw m4, m7
+ punpckhbw m5, m7
+ psubw m4, m2
+ psubw m5, m3
+ psllw m4, 2
+ psllw m5, 2
+ paddw m4, m0
+ paddw m5, m1
+ pxor m6, m6
+ pcmpgtw m6, m4
+ pcmpgtw m7, m5
+ pxor m4, m6
+ pxor m5, m7
+ psubw m4, m6
+ psubw m5, m7
+ psrlw m4, 3
+ psrlw m5, 3
+ packuswb m4, m5
+ packsswb m6, m7
+ pxor m7, m7
+ movd m2, %5
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ psubusb m2, m4
+ mova m3, m2
+ psubusb m3, m4
+ psubb m2, m3
+ mova m3, [%2]
+ mova m4, [%3]
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m3, m2
+ psubusb m4, m2
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m2, m2
+ packsswb m0, m1
+ pcmpgtb m7, m0
+ pxor m0, m7
+ psubb m0, m7
+ mova m1, m0
+ psubusb m0, m2
+ psubb m1, m0
+ pand m1, [pb_FC]
+ psrlw m1, 2
+ pxor m1, m7
+ psubb m1, m7
+ mova m5, [%1]
+ mova m6, [%4]
+ psubb m5, m1
+ paddb m6, m1
+%endmacro
+
+INIT_MMX mmx
+; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
+cglobal h263_v_loop_filter, 3,5
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ mov r3, r0
+ sub r3, r1
+ mov r4, r3
+ sub r4, r1
+ H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
+
+ mova [r3], m3
+ mova [r0], m4
+ mova [r4], m5
+ mova [r0+r1], m6
+ RET
+
+%macro TRANSPOSE4X4 2
+ movd m0, [%1]
+ movd m1, [%1+r1]
+ movd m2, [%1+r1*2]
+ movd m3, [%1+r3]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ mova m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ movd [%2+ 0], m0
+ punpckhdq m0, m0
+ movd [%2+ 8], m0
+ movd [%2+16], m1
+ punpckhdq m1, m1
+ movd [%2+24], m1
+%endmacro
+
+
+; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
+INIT_MMX mmx
+cglobal h263_h_loop_filter, 3,5,0,32
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ sub r0, 2
+ lea r3, [r1*3]
+
+ TRANSPOSE4X4 r0, rsp
+ lea r4, [r0+r1*4]
+ TRANSPOSE4X4 r4, rsp+4
+
+ H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
+
+ mova m1, m5
+ mova m0, m4
+ punpcklbw m5, m3
+ punpcklbw m4, m6
+ punpckhbw m1, m3
+ punpckhbw m0, m6
+ mova m3, m5
+ mova m6, m1
+ punpcklwd m5, m4
+ punpcklwd m1, m0
+ punpckhwd m3, m4
+ punpckhwd m6, m0
+ movd [r0], m5
+ punpckhdq m5, m5
+ movd [r0+r1*1], m5
+ movd [r0+r1*2], m3
+ punpckhdq m3, m3
+ movd [r0+r3], m3
+ movd [r4], m1
+ punpckhdq m1, m1
+ movd [r4+r1*1], m1
+ movd [r4+r1*2], m6
+ punpckhdq m6, m6
+ movd [r4+r3], m6
+ RET