aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-07-07 15:27:11 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-07 15:36:58 +0200
commit020865f557ccf06a41ecc461fd13ce6678817d04 (patch)
tree657a0da279922b830200908b6e620be025e20b85
parent462c6cdb8ed256d2063815b67ca4d14e62e25802 (diff)
parentc166148409fe8f0dbccef2fe684286a40ba1e37d (diff)
downloadffmpeg-020865f557ccf06a41ecc461fd13ce6678817d04.tar.gz
Merge commit 'c166148409fe8f0dbccef2fe684286a40ba1e37d'
* commit 'c166148409fe8f0dbccef2fe684286a40ba1e37d': dsputil: Move pix_sum, pix_norm1, shrink function pointers to mpegvideoenc Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc.asm libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/arm/Makefile2
-rw-r--r--libavcodec/arm/dsputil_armv6.S55
-rw-r--r--libavcodec/arm/dsputil_init_armv6.c6
-rw-r--r--libavcodec/arm/mpegvideoencdsp_armv6.S76
-rw-r--r--libavcodec/arm/mpegvideoencdsp_init_arm.c38
-rw-r--r--libavcodec/dnxhdenc.c5
-rw-r--r--libavcodec/dsputil.c79
-rw-r--r--libavcodec/dsputil.h6
-rw-r--r--libavcodec/motion_est.c5
-rw-r--r--libavcodec/mpegvideo_enc.c31
-rw-r--r--libavcodec/mpegvideoencdsp.c83
-rw-r--r--libavcodec/mpegvideoencdsp.h9
-rw-r--r--libavcodec/ppc/Makefile1
-rw-r--r--libavcodec/ppc/dsputil_altivec.c60
-rw-r--r--libavcodec/ppc/mpegvideoencdsp.c103
-rw-r--r--libavcodec/svq1enc.c1
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/dsputilenc.asm111
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c13
-rw-r--r--libavcodec/x86/mpegvideoencdsp.asm137
-rw-r--r--libavcodec/x86/mpegvideoencdsp_init.c23
21 files changed, 498 insertions, 347 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 575e79fe5e..fbbd0696b7 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -22,6 +22,7 @@ OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
OBJS-$(CONFIG_LLAUDDSP) += arm/lossless_audiodsp_init_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
+OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
@@ -61,6 +62,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
arm/idctdsp_armv6.o \
arm/simple_idct_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
+ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S
index 8876d5fa18..60232243e5 100644
--- a/libavcodec/arm/dsputil_armv6.S
+++ b/libavcodec/arm/dsputil_armv6.S
@@ -297,58 +297,3 @@ function ff_sse16_armv6, export=1
pop {r4-r9, pc}
endfunc
-
-function ff_pix_norm1_armv6, export=1
- push {r4-r6, lr}
- mov r12, #16
- mov lr, #0
-1:
- ldm r0, {r2-r5}
- uxtb16 r6, r2
- uxtb16 r2, r2, ror #8
- smlad lr, r6, r6, lr
- uxtb16 r6, r3
- smlad lr, r2, r2, lr
- uxtb16 r3, r3, ror #8
- smlad lr, r6, r6, lr
- uxtb16 r6, r4
- smlad lr, r3, r3, lr
- uxtb16 r4, r4, ror #8
- smlad lr, r6, r6, lr
- uxtb16 r6, r5
- smlad lr, r4, r4, lr
- uxtb16 r5, r5, ror #8
- smlad lr, r6, r6, lr
- subs r12, r12, #1
- add r0, r0, r1
- smlad lr, r5, r5, lr
- bgt 1b
-
- mov r0, lr
- pop {r4-r6, pc}
-endfunc
-
-function ff_pix_sum_armv6, export=1
- push {r4-r7, lr}
- mov r12, #16
- mov r2, #0
- mov r3, #0
- mov lr, #0
- ldr r4, [r0]
-1:
- subs r12, r12, #1
- ldr r5, [r0, #4]
- usada8 r2, r4, lr, r2
- ldr r6, [r0, #8]
- usada8 r3, r5, lr, r3
- ldr r7, [r0, #12]
- usada8 r2, r6, lr, r2
- beq 2f
- ldr_pre r4, r0, r1
- usada8 r3, r7, lr, r3
- bgt 1b
-2:
- usada8 r3, r7, lr, r3
- add r0, r2, r3
- pop {r4-r7, pc}
-endfunc
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 57b90daa1e..1cfad42183 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -43,9 +43,6 @@ int ff_pix_abs8_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
int line_size, int h);
-int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
-int ff_pix_sum_armv6(uint8_t *pix, int line_size);
-
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
@@ -63,7 +60,4 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
c->sad[1] = ff_pix_abs8_armv6;
c->sse[0] = ff_sse16_armv6;
-
- c->pix_norm1 = ff_pix_norm1_armv6;
- c->pix_sum = ff_pix_sum_armv6;
}
diff --git a/libavcodec/arm/mpegvideoencdsp_armv6.S b/libavcodec/arm/mpegvideoencdsp_armv6.S
new file mode 100644
index 0000000000..ab0dad7b18
--- /dev/null
+++ b/libavcodec/arm/mpegvideoencdsp_armv6.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_pix_norm1_armv6, export=1
+ push {r4-r6, lr}
+ mov r12, #16
+ mov lr, #0
+1:
+ ldm r0, {r2-r5}
+ uxtb16 r6, r2
+ uxtb16 r2, r2, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r3
+ smlad lr, r2, r2, lr
+ uxtb16 r3, r3, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r4
+ smlad lr, r3, r3, lr
+ uxtb16 r4, r4, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r5
+ smlad lr, r4, r4, lr
+ uxtb16 r5, r5, ror #8
+ smlad lr, r6, r6, lr
+ subs r12, r12, #1
+ add r0, r0, r1
+ smlad lr, r5, r5, lr
+ bgt 1b
+
+ mov r0, lr
+ pop {r4-r6, pc}
+endfunc
+
+function ff_pix_sum_armv6, export=1
+ push {r4-r7, lr}
+ mov r12, #16
+ mov r2, #0
+ mov r3, #0
+ mov lr, #0
+ ldr r4, [r0]
+1:
+ subs r12, r12, #1
+ ldr r5, [r0, #4]
+ usada8 r2, r4, lr, r2
+ ldr r6, [r0, #8]
+ usada8 r3, r5, lr, r3
+ ldr r7, [r0, #12]
+ usada8 r2, r6, lr, r2
+ beq 2f
+ ldr_pre r4, r0, r1
+ usada8 r3, r7, lr, r3
+ bgt 1b
+2:
+ usada8 r3, r7, lr, r3
+ add r0, r2, r3
+ pop {r4-r7, pc}
+endfunc
diff --git a/libavcodec/arm/mpegvideoencdsp_init_arm.c b/libavcodec/arm/mpegvideoencdsp_init_arm.c
new file mode 100644
index 0000000000..4bfe835684
--- /dev/null
+++ b/libavcodec/arm/mpegvideoencdsp_init_arm.c
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/mpegvideoencdsp.h"
+
+int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
+int ff_pix_sum_armv6(uint8_t *pix, int line_size);
+
+av_cold void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_armv6(cpu_flags)) {
+ c->pix_norm1 = ff_pix_norm1_armv6;
+ c->pix_sum = ff_pix_sum_armv6;
+ }
+}
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 850427e3fc..45b882da40 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -323,6 +323,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_idctdsp_init(&ctx->m.idsp, avctx);
+ ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
ff_dct_common_init(&ctx->m);
ff_dct_encode_init(&ctx->m);
@@ -733,8 +734,8 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg,
int varc;
if (!partial_last_row && mb_x * 16 <= avctx->width - 16) {
- sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
- varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize);
+ sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
+ varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
} else {
int bw = FFMIN(avctx->width - 16 * mb_x, 16);
int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index fe3c135713..640a4bfa9c 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -26,7 +26,6 @@
*/
#include "libavutil/attributes.h"
-#include "libavutil/imgutils.h"
#include "libavutil/internal.h"
#include "avcodec.h"
#include "copy_block.h"
@@ -34,8 +33,6 @@
#include "dsputil.h"
#include "simple_idct.h"
#include "faandct.h"
-#include "imgconvert.h"
-#include "mathops.h"
#include "mpegvideo.h"
#include "config.h"
@@ -48,74 +45,6 @@ uint32_t ff_square_tab[512] = { 0, };
#define BIT_DEPTH 8
#include "dsputilenc_template.c"
-static int pix_sum_c(uint8_t *pix, int line_size)
-{
- int s = 0, i, j;
-
- for (i = 0; i < 16; i++) {
- for (j = 0; j < 16; j += 8) {
- s += pix[0];
- s += pix[1];
- s += pix[2];
- s += pix[3];
- s += pix[4];
- s += pix[5];
- s += pix[6];
- s += pix[7];
- pix += 8;
- }
- pix += line_size - 16;
- }
- return s;
-}
-
-static int pix_norm1_c(uint8_t *pix, int line_size)
-{
- int s = 0, i, j;
- uint32_t *sq = ff_square_tab + 256;
-
- for (i = 0; i < 16; i++) {
- for (j = 0; j < 16; j += 8) {
-#if 0
- s += sq[pix[0]];
- s += sq[pix[1]];
- s += sq[pix[2]];
- s += sq[pix[3]];
- s += sq[pix[4]];
- s += sq[pix[5]];
- s += sq[pix[6]];
- s += sq[pix[7]];
-#else
-#if HAVE_FAST_64BIT
- register uint64_t x = *(uint64_t *) pix;
- s += sq[x & 0xff];
- s += sq[(x >> 8) & 0xff];
- s += sq[(x >> 16) & 0xff];
- s += sq[(x >> 24) & 0xff];
- s += sq[(x >> 32) & 0xff];
- s += sq[(x >> 40) & 0xff];
- s += sq[(x >> 48) & 0xff];
- s += sq[(x >> 56) & 0xff];
-#else
- register uint32_t x = *(uint32_t *) pix;
- s += sq[x & 0xff];
- s += sq[(x >> 8) & 0xff];
- s += sq[(x >> 16) & 0xff];
- s += sq[(x >> 24) & 0xff];
- x = *(uint32_t *) (pix + 4);
- s += sq[x & 0xff];
- s += sq[(x >> 8) & 0xff];
- s += sq[(x >> 16) & 0xff];
- s += sq[(x >> 24) & 0xff];
-#endif
-#endif
- pix += 8;
- }
- pix += line_size - 16;
- }
- return s;
-}
-
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{
@@ -1094,9 +1023,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->sum_abs_dctelem = sum_abs_dctelem_c;
- c->pix_sum = pix_sum_c;
- c->pix_norm1 = pix_norm1_c;
-
/* TODO [0] 16 [1] 8 */
c->pix_abs[0][0] = pix_abs16_c;
c->pix_abs[0][1] = pix_abs16_x2_c;
@@ -1141,11 +1067,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_dsputil_init_dwt(c);
#endif
- c->shrink[0] = av_image_copy_plane;
- c->shrink[1] = ff_shrink22;
- c->shrink[2] = ff_shrink44;
- c->shrink[3] = ff_shrink88;
-
c->draw_edges = draw_edges_8_c;
switch (avctx->bits_per_raw_sample) {
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 8633c90056..8dafbbd9d7 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -72,9 +72,6 @@ typedef struct DSPContext {
int stride);
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
- int (*pix_sum)(uint8_t *pix, int line_size);
- int (*pix_norm1)(uint8_t *pix, int line_size);
-
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[6];
me_cmp_func hadamard8_diff[6];
@@ -108,9 +105,6 @@ typedef struct DSPContext {
#define EDGE_WIDTH 16
#define EDGE_TOP 1
#define EDGE_BOTTOM 2
-
- void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
- int src_wrap, int width, int height);
} DSPContext;
void ff_dsputil_static_init(void);
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index b093580e8f..6b3cd61e8a 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -903,8 +903,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
/* intra / predictive decision */
pix = c->src[0][0];
- sum = s->dsp.pix_sum(pix, s->linesize);
- varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500;
+ sum = s->mpvencdsp.pix_sum(pix, s->linesize);
+ varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
+ (((unsigned) sum * sum) >> 8) + 500;
pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 4a453f3723..0d630df4ea 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1010,7 +1010,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src,
int offset = x + y * stride;
int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
16);
- int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
+ int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
int sae = get_sae(src + offset, mean, stride);
acc += sae + 500 < sad;
@@ -1278,15 +1278,21 @@ static int estimate_best_b_count(MpegEncContext *s)
data[2] += INPLACE_OFFSET;
}
- s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
- data[0], pre_input.f->linesize[0],
- c->width, c->height);
- s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
- data[1], pre_input.f->linesize[1],
- c->width >> 1, c->height >> 1);
- s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
- data[2], pre_input.f->linesize[2],
- c->width >> 1, c->height >> 1);
+ s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
+ s->tmp_frames[i]->linesize[0],
+ data[0],
+ pre_input.f->linesize[0],
+ c->width, c->height);
+ s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
+ s->tmp_frames[i]->linesize[1],
+ data[1],
+ pre_input.f->linesize[1],
+ c->width >> 1, c->height >> 1);
+ s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
+ s->tmp_frames[i]->linesize[2],
+ data[2],
+ pre_input.f->linesize[2],
+ c->width >> 1, c->height >> 1);
}
}
@@ -2585,9 +2591,10 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
int yy = mb_y * 16;
uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
int varc;
- int sum = s->dsp.pix_sum(pix, s->linesize);
+ int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
- varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
+ varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
+ (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index c5e0b4874c..bde4345750 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
@@ -22,7 +22,10 @@
#include "config.h"
#include "libavutil/avassert.h"
#include "libavutil/attributes.h"
+#include "libavutil/imgutils.h"
#include "avcodec.h"
+#include "dsputil.h"
+#include "imgconvert.h"
#include "mpegvideoencdsp.h"
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
@@ -54,12 +57,92 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
(BASIS_SHIFT - RECON_SHIFT);
}
+static int pix_sum_c(uint8_t *pix, int line_size)
+{
+ int s = 0, i, j;
+
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j += 8) {
+ s += pix[0];
+ s += pix[1];
+ s += pix[2];
+ s += pix[3];
+ s += pix[4];
+ s += pix[5];
+ s += pix[6];
+ s += pix[7];
+ pix += 8;
+ }
+ pix += line_size - 16;
+ }
+ return s;
+}
+
+static int pix_norm1_c(uint8_t *pix, int line_size)
+{
+ int s = 0, i, j;
+ uint32_t *sq = ff_square_tab + 256;
+
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j += 8) {
+#if 0
+ s += sq[pix[0]];
+ s += sq[pix[1]];
+ s += sq[pix[2]];
+ s += sq[pix[3]];
+ s += sq[pix[4]];
+ s += sq[pix[5]];
+ s += sq[pix[6]];
+ s += sq[pix[7]];
+#else
+#if HAVE_FAST_64BIT
+ register uint64_t x = *(uint64_t *) pix;
+ s += sq[x & 0xff];
+ s += sq[(x >> 8) & 0xff];
+ s += sq[(x >> 16) & 0xff];
+ s += sq[(x >> 24) & 0xff];
+ s += sq[(x >> 32) & 0xff];
+ s += sq[(x >> 40) & 0xff];
+ s += sq[(x >> 48) & 0xff];
+ s += sq[(x >> 56) & 0xff];
+#else
+ register uint32_t x = *(uint32_t *) pix;
+ s += sq[x & 0xff];
+ s += sq[(x >> 8) & 0xff];
+ s += sq[(x >> 16) & 0xff];
+ s += sq[(x >> 24) & 0xff];
+ x = *(uint32_t *) (pix + 4);
+ s += sq[x & 0xff];
+ s += sq[(x >> 8) & 0xff];
+ s += sq[(x >> 16) & 0xff];
+ s += sq[(x >> 24) & 0xff];
+#endif
+#endif
+ pix += 8;
+ }
+ pix += line_size - 16;
+ }
+ return s;
+}
+
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c;
+ c->shrink[0] = av_image_copy_plane;
+ c->shrink[1] = ff_shrink22;
+ c->shrink[2] = ff_shrink44;
+ c->shrink[3] = ff_shrink88;
+
+ c->pix_sum = pix_sum_c;
+ c->pix_norm1 = pix_norm1_c;
+
+ if (ARCH_ARM)
+ ff_mpegvideoencdsp_init_arm(c, avctx);
+ if (ARCH_PPC)
+ ff_mpegvideoencdsp_init_ppc(c, avctx);
if (ARCH_X86)
ff_mpegvideoencdsp_init_x86(c, avctx);
}
diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h
index a7bc2ae2c5..81e3fe67b0 100644
--- a/libavcodec/mpegvideoencdsp.h
+++ b/libavcodec/mpegvideoencdsp.h
@@ -31,10 +31,19 @@ typedef struct MpegvideoEncDSPContext {
int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+ int (*pix_sum)(uint8_t *pix, int line_size);
+ int (*pix_norm1)(uint8_t *pix, int line_size);
+
+ void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
+ int src_wrap, int width, int height);
} MpegvideoEncDSPContext;
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 88aaf2644a..c357dafbac 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -13,6 +13,7 @@ OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
ppc/mpegvideodsp.o
+OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 20b15b019e..5ab1b51e2b 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -308,34 +308,6 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
return s;
}
-static int pix_norm1_altivec(uint8_t *pix, int line_size)
-{
- int i, s = 0;
- const vector unsigned int zero =
- (const vector unsigned int) vec_splat_u32(0);
- vector unsigned char perm = vec_lvsl(0, pix);
- vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
- vector signed int sum;
-
- for (i = 0; i < 16; i++) {
- /* Read the potentially unaligned pixels. */
- vector unsigned char pixl = vec_ld(0, pix);
- vector unsigned char pixr = vec_ld(15, pix);
- vector unsigned char pixv = vec_perm(pixl, pixr, perm);
-
- /* Square the values, and add them to our sum. */
- sv = vec_msum(pixv, pixv, sv);
-
- pix += line_size;
- }
- /* Sum up the four partial sums, and put the result into s. */
- sum = vec_sums((vector signed int) sv, (vector signed int) zero);
- sum = vec_splat(sum, 3);
- vec_ste(sum, 0, &s);
-
- return s;
-}
-
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
* It's the sad8_altivec code above w/ squaring added. */
static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
@@ -430,35 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
return s;
}
-static int pix_sum_altivec(uint8_t *pix, int line_size)
-{
- int i, s;
- const vector unsigned int zero =
- (const vector unsigned int) vec_splat_u32(0);
- vector unsigned char perm = vec_lvsl(0, pix);
- vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
- vector signed int sumdiffs;
-
- for (i = 0; i < 16; i++) {
- /* Read the potentially unaligned 16 pixels into t1. */
- vector unsigned char pixl = vec_ld(0, pix);
- vector unsigned char pixr = vec_ld(15, pix);
- vector unsigned char t1 = vec_perm(pixl, pixr, perm);
-
- /* Add each 4 pixel group together and put 4 results into sad. */
- sad = vec_sum4s(t1, sad);
-
- pix += line_size;
- }
-
- /* Sum up the four partial sums, and put the result into s. */
- sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
- sumdiffs = vec_splat(sumdiffs, 3);
- vec_ste(sumdiffs, 0, &s);
-
- return s;
-}
-
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
int line_size)
{
@@ -911,9 +854,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
c->sse[0] = sse16_altivec;
c->sse[1] = sse8_altivec;
- c->pix_norm1 = pix_norm1_altivec;
- c->pix_sum = pix_sum_altivec;
-
c->diff_pixels = diff_pixels_altivec;
if (!high_bit_depth) {
diff --git a/libavcodec/ppc/mpegvideoencdsp.c b/libavcodec/ppc/mpegvideoencdsp.c
new file mode 100644
index 0000000000..00ae2a6f30
--- /dev/null
+++ b/libavcodec/ppc/mpegvideoencdsp.c
@@ -0,0 +1,103 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include <stdint.h>
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "libavcodec/mpegvideoencdsp.h"
+
+#if HAVE_ALTIVEC
+
+static int pix_norm1_altivec(uint8_t *pix, int line_size)
+{
+ int i, s = 0;
+ const vector unsigned int zero =
+ (const vector unsigned int) vec_splat_u32(0);
+ vector unsigned char perm = vec_lvsl(0, pix);
+ vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
+ vector signed int sum;
+
+ for (i = 0; i < 16; i++) {
+ /* Read the potentially unaligned pixels. */
+ vector unsigned char pixl = vec_ld(0, pix);
+ vector unsigned char pixr = vec_ld(15, pix);
+ vector unsigned char pixv = vec_perm(pixl, pixr, perm);
+
+ /* Square the values, and add them to our sum. */
+ sv = vec_msum(pixv, pixv, sv);
+
+ pix += line_size;
+ }
+ /* Sum up the four partial sums, and put the result into s. */
+ sum = vec_sums((vector signed int) sv, (vector signed int) zero);
+ sum = vec_splat(sum, 3);
+ vec_ste(sum, 0, &s);
+
+ return s;
+}
+
+static int pix_sum_altivec(uint8_t *pix, int line_size)
+{
+ int i, s;
+ const vector unsigned int zero =
+ (const vector unsigned int) vec_splat_u32(0);
+ vector unsigned char perm = vec_lvsl(0, pix);
+ vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
+ vector signed int sumdiffs;
+
+ for (i = 0; i < 16; i++) {
+ /* Read the potentially unaligned 16 pixels into t1. */
+ vector unsigned char pixl = vec_ld(0, pix);
+ vector unsigned char pixr = vec_ld(15, pix);
+ vector unsigned char t1 = vec_perm(pixl, pixr, perm);
+
+ /* Add each 4 pixel group together and put 4 results into sad. */
+ sad = vec_sum4s(t1, sad);
+
+ pix += line_size;
+ }
+
+ /* Sum up the four partial sums, and put the result into s. */
+ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+ sumdiffs = vec_splat(sumdiffs, 3);
+ vec_ste(sumdiffs, 0, &s);
+
+ return s;
+}
+
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+#if HAVE_ALTIVEC
+ if (!PPC_ALTIVEC(av_get_cpu_flags()))
+ return;
+
+ c->pix_norm1 = pix_norm1_altivec;
+ c->pix_sum = pix_sum_altivec;
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 6d1b397fd2..9ff690dce2 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -517,6 +517,7 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
ff_dsputil_init(&s->dsp, avctx);
ff_hpeldsp_init(&s->hdsp, avctx->flags);
+ ff_mpegvideoencdsp_init(&s->m.mpvencdsp, avctx);
avctx->coded_frame = av_frame_alloc();
s->current_picture = av_frame_alloc();
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index f757be177d..ac336c7d86 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -109,6 +109,7 @@ YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
+YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
x86/fpel.o \
x86/qpel.o
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 84cb7b363b..13682ba5d4 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -23,10 +23,6 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
-
-cextern pw_1
-
SECTION .text
%macro DIFF_PIXELS_1 4
@@ -465,113 +461,6 @@ cglobal diff_pixels, 4, 5, 5
jne .loop
RET
-; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
-; %1 = number of xmm registers used
-; %2 = number of loops
-; %3 = number of GPRs used
-%macro PIX_SUM16 4
-cglobal pix_sum16, 2, %3, %1
- movsxdifnidn r1, r1d
- mov r2, %2
-%if cpuflag(xop)
- lea r3, [r1*3]
-%else
- pxor m5, m5
-%endif
- pxor m4, m4
-.loop:
-%if cpuflag(xop)
- vphaddubq m0, [r0]
- vphaddubq m1, [r0+r1]
- vphaddubq m2, [r0+r1*2]
- vphaddubq m3, [r0+r3]
-%else
- mova m0, [r0]
-%if mmsize == 8
- mova m1, [r0+8]
-%else
- mova m1, [r0+r1]
-%endif
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
-%endif ; cpuflag(xop)
- paddw m1, m0
- paddw m3, m2
- paddw m3, m1
- paddw m4, m3
-%if mmsize == 8
- add r0, r1
-%else
- lea r0, [r0+r1*%4]
-%endif
- dec r2
- jne .loop
-%if cpuflag(xop)
- pshufd m0, m4, q0032
- paddd m4, m0
-%else
- HADDW m4, m5
-%endif
- movd eax, m4
- RET
-%endmacro
-
-INIT_MMX mmx
-PIX_SUM16 0, 16, 3, 0
-INIT_XMM sse2
-PIX_SUM16 6, 8, 3, 2
-%if HAVE_XOP_EXTERNAL
-INIT_XMM xop
-PIX_SUM16 5, 4, 4, 4
-%endif
-
-; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
-; %1 = number of xmm registers used
-; %2 = number of loops
-%macro PIX_NORM1 2
-cglobal pix_norm1, 2, 3, %1
- movsxdifnidn r1, r1d
- mov r2, %2
- pxor m0, m0
- pxor m5, m5
-.loop:
- mova m2, [r0+0]
-%if mmsize == 8
- mova m3, [r0+8]
-%else
- mova m3, [r0+r1]
-%endif
- punpckhbw m1, m2, m0
- punpcklbw m2, m0
- punpckhbw m4, m3, m0
- punpcklbw m3, m0
- pmaddwd m1, m1
- pmaddwd m2, m2
- pmaddwd m3, m3
- pmaddwd m4, m4
- paddd m2, m1
- paddd m4, m3
- paddd m5, m2
- paddd m5, m4
-%if mmsize == 8
- add r0, r1
-%else
- lea r0, [r0+r1*2]
-%endif
- dec r2
- jne .loop
- HADDD m5, m1
- movd eax, m5
- RET
-%endmacro
-
-INIT_MMX mmx
-PIX_NORM1 0, 16
-INIT_XMM sse2
-PIX_NORM1 6, 8
-
;-----------------------------------------------
;int ff_sum_abs_dctelem(int16_t *block)
;-----------------------------------------------
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index f235ad0a53..9e3078b144 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -37,11 +37,6 @@ void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
int stride);
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
int stride);
-int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
-int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
-int ff_pix_sum16_xop(uint8_t *pix, int line_size);
-int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
-int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
int ff_sum_abs_dctelem_mmx(int16_t *block);
int ff_sum_abs_dctelem_mmxext(int16_t *block);
int ff_sum_abs_dctelem_sse2(int16_t *block);
@@ -364,8 +359,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
if (!high_bit_depth)
c->get_pixels = ff_get_pixels_mmx;
c->diff_pixels = ff_diff_pixels_mmx;
- c->pix_sum = ff_pix_sum16_mmx;
- c->pix_norm1 = ff_pix_norm1_mmx;
}
if (EXTERNAL_SSE2(cpu_flags))
@@ -431,8 +424,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->sse[0] = ff_sse16_sse2;
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
c->diff_pixels = ff_diff_pixels_sse2;
- c->pix_sum = ff_pix_sum16_sse2;
- c->pix_norm1 = ff_pix_norm1_sse2;
#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
@@ -448,9 +439,5 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif
}
- if (EXTERNAL_XOP(cpu_flags)) {
- c->pix_sum = ff_pix_sum16_xop;
- }
-
ff_dsputil_init_pix_mmx(c, avctx);
}
diff --git a/libavcodec/x86/mpegvideoencdsp.asm b/libavcodec/x86/mpegvideoencdsp.asm
new file mode 100644
index 0000000000..4fe6cfe5a6
--- /dev/null
+++ b/libavcodec/x86/mpegvideoencdsp.asm
@@ -0,0 +1,137 @@
+;*****************************************************************************
+;* SIMD-optimized MPEG encoding functions
+;*****************************************************************************
+;* Copyright (c) 2000, 2001 Fabrice Bellard
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pw_1
+
+SECTION .text
+; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
+; %1 = number of xmm registers used
+; %2 = number of loops
+; %3 = number of GPRs used
+%macro PIX_SUM16 4
+cglobal pix_sum16, 2, %3, %1
+ movsxdifnidn r1, r1d
+ mov r2, %2
+%if cpuflag(xop)
+ lea r3, [r1*3]
+%else
+ pxor m5, m5
+%endif
+ pxor m4, m4
+.loop:
+%if cpuflag(xop)
+ vphaddubq m0, [r0]
+ vphaddubq m1, [r0+r1]
+ vphaddubq m2, [r0+r1*2]
+ vphaddubq m3, [r0+r3]
+%else
+ mova m0, [r0]
+%if mmsize == 8
+ mova m1, [r0+8]
+%else
+ mova m1, [r0+r1]
+%endif
+ punpckhbw m2, m0, m5
+ punpcklbw m0, m5
+ punpckhbw m3, m1, m5
+ punpcklbw m1, m5
+%endif ; cpuflag(xop)
+ paddw m1, m0
+ paddw m3, m2
+ paddw m3, m1
+ paddw m4, m3
+%if mmsize == 8
+ add r0, r1
+%else
+ lea r0, [r0+r1*%4]
+%endif
+ dec r2
+ jne .loop
+%if cpuflag(xop)
+ pshufd m0, m4, q0032
+ paddd m4, m0
+%else
+ HADDW m4, m5
+%endif
+ movd eax, m4
+ RET
+%endmacro
+
+INIT_MMX mmx
+PIX_SUM16 0, 16, 3, 0
+INIT_XMM sse2
+PIX_SUM16 6, 8, 3, 2
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+PIX_SUM16 5, 4, 4, 4
+%endif
+
+; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
+; %1 = number of xmm registers used
+; %2 = number of loops
+%macro PIX_NORM1 2
+cglobal pix_norm1, 2, 3, %1
+ movsxdifnidn r1, r1d
+ mov r2, %2
+ pxor m0, m0
+ pxor m5, m5
+.loop:
+ mova m2, [r0+0]
+%if mmsize == 8
+ mova m3, [r0+8]
+%else
+ mova m3, [r0+r1]
+%endif
+ punpckhbw m1, m2, m0
+ punpcklbw m2, m0
+ punpckhbw m4, m3, m0
+ punpcklbw m3, m0
+ pmaddwd m1, m1
+ pmaddwd m2, m2
+ pmaddwd m3, m3
+ pmaddwd m4, m4
+ paddd m2, m1
+ paddd m4, m3
+ paddd m5, m2
+ paddd m5, m4
+%if mmsize == 8
+ add r0, r1
+%else
+ lea r0, [r0+r1*2]
+%endif
+ dec r2
+ jne .loop
+ HADDD m5, m1
+ movd eax, m5
+ RET
+%endmacro
+
+INIT_MMX mmx
+PIX_NORM1 0, 16
+INIT_XMM sse2
+PIX_NORM1 6, 8
+
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index d7650ec0e1..16841893a4 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -22,6 +22,12 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/mpegvideoencdsp.h"
+int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
+int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
+int ff_pix_sum16_xop(uint8_t *pix, int line_size);
+int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
+int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
+
#if HAVE_INLINE_ASM
#define PHADDD(a, t) \
@@ -95,9 +101,24 @@
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
-#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();
+ if (EXTERNAL_MMX(cpu_flags)) {
+ c->pix_sum = ff_pix_sum16_mmx;
+ c->pix_norm1 = ff_pix_norm1_mmx;
+ }
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->pix_sum = ff_pix_sum16_sse2;
+ c->pix_norm1 = ff_pix_norm1_sse2;
+ }
+
+ if (EXTERNAL_XOP(cpu_flags)) {
+ c->pix_sum = ff_pix_sum16_xop;
+ }
+
+#if HAVE_INLINE_ASM
+
if (INLINE_MMX(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_mmx;