aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-03-12 07:28:11 -0700
committerMartin Storsjö <martin@martin.st>2013-04-10 11:04:05 +0300
commitb93b27edb0455287f49182fbf11b68e575f59225 (patch)
tree2b28d3f9edeb10c7de0e0dc33bc780182306a945 /libavcodec/x86
parent85deb51a01f1ecc5ac5faa52ad8ea141c384e23a (diff)
downloadffmpeg-b93b27edb0455287f49182fbf11b68e575f59225.tar.gz
dsputil: Make dsputil selectable
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile18
-rw-r--r--libavcodec/x86/constants.c42
-rw-r--r--libavcodec/x86/dsputil_mmx.c25
-rw-r--r--libavcodec/x86/dsputil_mmx.h7
-rw-r--r--libavcodec/x86/h264_chromamc.asm2
-rw-r--r--libavcodec/x86/h264_deblock.asm2
-rw-r--r--libavcodec/x86/vp8dsp.asm13
7 files changed, 60 insertions, 49 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index cc60f2f0b3..61672b8041 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -1,4 +1,5 @@
-OBJS += x86/fmtconvert_init.o
+OBJS += x86/constants.o \
+ x86/fmtconvert_init.o \
OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o
OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o
@@ -29,7 +30,7 @@ OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
-MMX-OBJS += x86/dsputil_mmx.o \
+MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/fdct.o \
x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \
@@ -42,6 +43,10 @@ MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
+YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \
+ x86/hpeldsp.o \
+ x86/mpeg4qpel.o \
+ x86/qpel.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
YASM-OBJS-$(CONFIG_H263_DECODER) += x86/h263_loopfilter.o
@@ -57,7 +62,8 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
x86/h264_intrapred_10bit.o
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
- x86/h264_qpel_10bit.o
+ x86/h264_qpel_10bit.o \
+ x86/qpel.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
@@ -71,9 +77,5 @@ YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
-YASM-OBJS += x86/dsputil.o \
- x86/deinterlace.o \
+YASM-OBJS += x86/deinterlace.o \
x86/fmtconvert.o \
- x86/hpeldsp.o \
- x86/mpeg4qpel.o \
- x86/qpel.o \
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
new file mode 100644
index 0000000000..f4d04729ae
--- /dev/null
+++ b/libavcodec/x86/constants.c
@@ -0,0 +1,42 @@
+/*
+ * MMX/SSE constants used across x86 dsp optimizations.
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h" // for xmm_reg
+
+DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
+
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
+
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index aa7b3984aa..7af11b0e68 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -36,47 +36,22 @@
//#include <assert.h>
/* pixel operations */
-DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
-
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = 0x0014001400140014ULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27) = { 0x001B001B001B001BULL, 0x001B001B001B001BULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28) = { 0x001C001C001C001CULL, 0x001C001C001C001CULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63) = { 0x003F003F003F003FULL, 0x003F003F003F003FULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4) = { 0x0404040404040404ULL, 0x0404040404040404ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_7) = 0x0707070707070707ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F) = 0x1F1F1F1F1F1F1F1FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F) = 0x3F3F3F3F3F3F3F3FULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_81) = 0x8181818181818181ULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1) = { 0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A1A1A1A1A1A1ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8) = { 0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F8F8F8F8F8F8ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index e4880c0976..d4a7754fc9 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -28,8 +28,6 @@
#include "libavcodec/dsputil.h"
#include "libavutil/x86/asm.h"
-typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
-
extern const uint64_t ff_bone;
extern const uint64_t ff_wtwo;
@@ -41,12 +39,9 @@ extern const uint64_t ff_pw_15;
extern const xmm_reg ff_pw_16;
extern const xmm_reg ff_pw_18;
extern const uint64_t ff_pw_20;
-extern const xmm_reg ff_pw_27;
-extern const xmm_reg ff_pw_28;
extern const xmm_reg ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_53;
-extern const xmm_reg ff_pw_63;
extern const xmm_reg ff_pw_64;
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
@@ -58,10 +53,8 @@ extern const uint64_t ff_pb_7;
extern const uint64_t ff_pb_1F;
extern const uint64_t ff_pb_3F;
extern const uint64_t ff_pb_81;
-extern const xmm_reg ff_pb_A1;
extern const xmm_reg ff_pb_F8;
extern const uint64_t ff_pb_FC;
-extern const xmm_reg ff_pb_FE;
extern const double ff_pd_1[2];
extern const double ff_pd_2[2];
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index 440a473c6f..b7b18e03f8 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -60,7 +60,7 @@ rnd_rv40_1d_tbl: times 4 dw 0
cextern pw_3
cextern pw_4
cextern pw_8
-cextern pw_28
+pw_28: times 8 dw 28
cextern pw_32
cextern pw_64
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 67f9c02464..fc6c983052 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -28,6 +28,7 @@
SECTION_RODATA
+pb_A1: times 16 db 0xA1
pb_3_1: times 4 db 3, 1
SECTION .text
@@ -35,7 +36,6 @@ SECTION .text
cextern pb_0
cextern pb_1
cextern pb_3
-cextern pb_A1
; expands to [base],...,[base+7*stride]
%define PASS8ROWS(base, base3, stride, stride3) \
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index caf2cd679a..1d7aadc809 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -143,11 +143,15 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9
filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11
-pw_256: times 8 dw 256
-
+pw_27: times 8 dw 27
+pw_63: times 8 dw 63
+pw_256: times 8 dw 256
pw_20091: times 4 dw 20091
pw_17734: times 4 dw 17734
+pb_4: times 16 db 4
+pb_F8: times 16 db 0xF8
+pb_FE: times 16 db 0xFE
pb_27_63: times 8 db 27, 63
pb_18_63: times 8 db 18, 63
pb_9_63: times 8 db 9, 63
@@ -156,15 +160,10 @@ cextern pb_1
cextern pw_3
cextern pb_3
cextern pw_4
-cextern pb_4
cextern pw_9
cextern pw_18
-cextern pw_27
-cextern pw_63
cextern pw_64
cextern pb_80
-cextern pb_F8
-cextern pb_FE
SECTION .text