aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-07-07 15:04:05 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-07 15:08:55 +0200
commit462c6cdb8ed256d2063815b67ca4d14e62e25802 (patch)
tree324761563d86a2e3e2e8a47dc28e53a407bf8857 /libavcodec/x86
parent8324bd51867fdc8a79cbdb5850d36bfd0c741d3f (diff)
parent8d686ca59db14900ad5c12b547fb8a7afc8b0b94 (diff)
downloadffmpeg-462c6cdb8ed256d2063815b67ca4d14e62e25802.tar.gz
Merge commit '8d686ca59db14900ad5c12b547fb8a7afc8b0b94'
* commit '8d686ca59db14900ad5c12b547fb8a7afc8b0b94': dsputil: Split off *_8x8basis to a separate context Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile3
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c79
-rw-r--r--libavcodec/x86/mpegvideoenc_qns_template.c (renamed from libavcodec/x86/dsputil_qns_template.c)4
-rw-r--r--libavcodec/x86/mpegvideoencdsp_init.c125
4 files changed, 129 insertions, 82 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index d376bb73e1..f757be177d 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -28,7 +28,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
x86/mpegvideodsp.o
-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
+OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
+ x86/mpegvideoencdsp_init.o
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 01dec6ec47..f235ad0a53 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -352,72 +352,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
#undef SUM
-#define PHADDD(a, t) \
- "movq " #a ", " #t " \n\t" \
- "psrlq $32, " #a " \n\t" \
- "paddd " #t ", " #a " \n\t"
-
-/*
- * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
- * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
- * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
- */
-#define PMULHRW(x, y, s, o) \
- "pmulhw " #s ", " #x " \n\t" \
- "pmulhw " #s ", " #y " \n\t" \
- "paddw " #o ", " #x " \n\t" \
- "paddw " #o ", " #y " \n\t" \
- "psraw $1, " #x " \n\t" \
- "psraw $1, " #y " \n\t"
-#define DEF(x) x ## _mmx
-#define SET_RND MOVQ_WONE
-#define SCALE_OFFSET 1
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-
-#define DEF(x) x ## _3dnow
-#define SET_RND(x)
-#define SCALE_OFFSET 0
-#define PMULHRW(x, y, s, o) \
- "pmulhrw " #s ", " #x " \n\t" \
- "pmulhrw " #s ", " #y " \n\t"
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-
-#if HAVE_SSSE3_INLINE
-#undef PHADDD
-#define DEF(x) x ## _ssse3
-#define SET_RND(x)
-#define SCALE_OFFSET -1
-
-#define PHADDD(a, t) \
- "pshufw $0x0E, " #a ", " #t " \n\t" \
- /* faster than phaddd on core2 */ \
- "paddd " #t ", " #a " \n\t"
-
-#define PMULHRW(x, y, s, o) \
- "pmulhrsw " #s ", " #x " \n\t" \
- "pmulhrsw " #s ", " #y " \n\t"
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-#undef PHADDD
-#endif /* HAVE_SSSE3_INLINE */
-
#endif /* HAVE_INLINE_ASM */
av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
@@ -448,16 +382,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->vsad[0] = vsad16_mmx;
- c->try_8x8basis = try_8x8basis_mmx;
- }
- c->add_8x8basis = add_8x8basis_mmx;
- }
-
- if (INLINE_AMD3DNOW(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_3dnow;
}
- c->add_8x8basis = add_8x8basis_3dnow;
}
if (INLINE_MMXEXT(cpu_flags)) {
@@ -480,10 +405,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_ssse3;
- }
- c->add_8x8basis = add_8x8basis_ssse3;
}
#endif
#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/dsputil_qns_template.c b/libavcodec/x86/mpegvideoenc_qns_template.c
index ebaad252f4..882d486205 100644
--- a/libavcodec/x86/dsputil_qns_template.c
+++ b/libavcodec/x86/mpegvideoenc_qns_template.c
@@ -1,5 +1,5 @@
/*
- * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
+ * QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
* Copyright (c) 2004 Michael Niedermayer
*
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
@@ -22,9 +22,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <assert.h>
#include <stdint.h>
+#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/x86/asm.h"
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
new file mode 100644
index 0000000000..d7650ec0e1
--- /dev/null
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -0,0 +1,125 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/mpegvideoencdsp.h"
+
+#if HAVE_INLINE_ASM
+
+#define PHADDD(a, t) \
+ "movq " #a ", " #t " \n\t" \
+ "psrlq $32, " #a " \n\t" \
+ "paddd " #t ", " #a " \n\t"
+
+/*
+ * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
+ * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
+ * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
+ */
+#define PMULHRW(x, y, s, o) \
+ "pmulhw " #s ", " #x " \n\t" \
+ "pmulhw " #s ", " #y " \n\t" \
+ "paddw " #o ", " #x " \n\t" \
+ "paddw " #o ", " #y " \n\t" \
+ "psraw $1, " #x " \n\t" \
+ "psraw $1, " #y " \n\t"
+#define DEF(x) x ## _mmx
+#define SET_RND MOVQ_WONE
+#define SCALE_OFFSET 1
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+
+#define DEF(x) x ## _3dnow
+#define SET_RND(x)
+#define SCALE_OFFSET 0
+#define PMULHRW(x, y, s, o) \
+ "pmulhrw " #s ", " #x " \n\t" \
+ "pmulhrw " #s ", " #y " \n\t"
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+
+#if HAVE_SSSE3_INLINE
+#undef PHADDD
+#define DEF(x) x ## _ssse3
+#define SET_RND(x)
+#define SCALE_OFFSET -1
+
+#define PHADDD(a, t) \
+ "pshufw $0x0E, " #a ", " #t " \n\t" \
+ /* faster than phaddd on core2 */ \
+ "paddd " #t ", " #a " \n\t"
+
+#define PMULHRW(x, y, s, o) \
+ "pmulhrsw " #s ", " #x " \n\t" \
+ "pmulhrsw " #s ", " #y " \n\t"
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+#undef PHADDD
+#endif /* HAVE_SSSE3_INLINE */
+
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+#if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (INLINE_MMX(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_mmx;
+ }
+ c->add_8x8basis = add_8x8basis_mmx;
+ }
+
+ if (INLINE_AMD3DNOW(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_3dnow;
+ }
+ c->add_8x8basis = add_8x8basis_3dnow;
+ }
+
+#if HAVE_SSSE3_INLINE
+ if (INLINE_SSSE3(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_ssse3;
+ }
+ c->add_8x8basis = add_8x8basis_ssse3;
+ }
+#endif /* HAVE_SSSE3_INLINE */
+
+#endif /* HAVE_INLINE_ASM */
+}