aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-07-10 00:56:05 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-10 01:22:14 +0200
commit2d5e9451de3c7ab00cac6ec4aff290e12a2f190d (patch)
tree9934a09e3ad1f63796274ee59f216df19cd9ca02 /libavcodec
parent6cc1fec41263add956b35af96d7c4a81c9436a65 (diff)
parentf46bb608d9d76c543e4929dc8cffe36b84bd789e (diff)
downloadffmpeg-2d5e9451de3c7ab00cac6ec4aff290e12a2f190d.tar.gz
Merge commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e'
* commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e': dsputil: Split off pixel block routines into their own context Conflicts: configure libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/pixblockdsp_template.c libavcodec/x86/dsputilenc.asm libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/arm/Makefile2
-rw-r--r--libavcodec/arm/dsputil_armv6.S55
-rw-r--r--libavcodec/arm/dsputil_init_armv6.c8
-rw-r--r--libavcodec/arm/pixblockdsp_armv6.S76
-rw-r--r--libavcodec/arm/pixblockdsp_init_arm.c42
-rw-r--r--libavcodec/asv.h4
-rw-r--r--libavcodec/asvenc.c14
-rw-r--r--libavcodec/dnxhdenc.c29
-rw-r--r--libavcodec/dsputil.c57
-rw-r--r--libavcodec/dsputil.h8
-rw-r--r--libavcodec/dvenc.c5
-rw-r--r--libavcodec/libavcodec.v1
-rw-r--r--libavcodec/mpegvideo.h2
-rw-r--r--libavcodec/mpegvideo_enc.c55
-rw-r--r--libavcodec/pixblockdsp.c80
-rw-r--r--libavcodec/pixblockdsp.h44
-rw-r--r--libavcodec/pixblockdsp_template.c (renamed from libavcodec/dsputilenc_template.c)11
-rw-r--r--libavcodec/ppc/Makefile1
-rw-r--r--libavcodec/ppc/dsputil_altivec.c105
-rw-r--r--libavcodec/ppc/pixblockdsp.c153
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/dsputilenc.asm109
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c17
-rw-r--r--libavcodec/x86/pixblockdsp.asm135
-rw-r--r--libavcodec/x86/pixblockdsp_init.c50
26 files changed, 651 insertions, 415 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3a2f5baa6b..0bbfa27e48 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -82,6 +82,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
motion_est.o ratecontrol.o \
mpegvideoencdsp.o
+OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index fbbd0696b7..6b80de8a2b 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -24,6 +24,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
+OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
@@ -63,6 +64,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
arm/simple_idct_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S
index 60232243e5..fa5a82301e 100644
--- a/libavcodec/arm/dsputil_armv6.S
+++ b/libavcodec/arm/dsputil_armv6.S
@@ -20,61 +20,6 @@
#include "libavutil/arm/asm.S"
-function ff_get_pixels_armv6, export=1
- pld [r1, r2]
- push {r4-r8, lr}
- mov lr, #8
-1:
- ldrd_post r4, r5, r1, r2
- subs lr, lr, #1
- uxtb16 r6, r4
- uxtb16 r4, r4, ror #8
- uxtb16 r12, r5
- uxtb16 r8, r5, ror #8
- pld [r1, r2]
- pkhbt r5, r6, r4, lsl #16
- pkhtb r6, r4, r6, asr #16
- pkhbt r7, r12, r8, lsl #16
- pkhtb r12, r8, r12, asr #16
- stm r0!, {r5,r6,r7,r12}
- bgt 1b
-
- pop {r4-r8, pc}
-endfunc
-
-function ff_diff_pixels_armv6, export=1
- pld [r1, r3]
- pld [r2, r3]
- push {r4-r9, lr}
- mov lr, #8
-1:
- ldrd_post r4, r5, r1, r3
- ldrd_post r6, r7, r2, r3
- uxtb16 r8, r4
- uxtb16 r4, r4, ror #8
- uxtb16 r9, r6
- uxtb16 r6, r6, ror #8
- pld [r1, r3]
- ssub16 r9, r8, r9
- ssub16 r6, r4, r6
- uxtb16 r8, r5
- uxtb16 r5, r5, ror #8
- pld [r2, r3]
- pkhbt r4, r9, r6, lsl #16
- pkhtb r6, r6, r9, asr #16
- uxtb16 r9, r7
- uxtb16 r7, r7, ror #8
- ssub16 r9, r8, r9
- ssub16 r5, r5, r7
- subs lr, lr, #1
- pkhbt r8, r9, r5, lsl #16
- pkhtb r9, r5, r9, asr #16
- stm r0!, {r4,r6,r8,r9}
- bgt 1b
-
- pop {r4-r9, pc}
-endfunc
-
function ff_pix_abs16_armv6, export=1
ldr r0, [sp]
push {r4-r9, lr}
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 1cfad42183..86d84f5744 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -26,10 +26,6 @@
#include "libavcodec/mpegvideo.h"
#include "dsputil_arm.h"
-void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
-void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
- const uint8_t *s2, int stride);
-
int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
int line_size, int h);
int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
@@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
- if (!high_bit_depth)
- c->get_pixels = ff_get_pixels_armv6;
- c->diff_pixels = ff_diff_pixels_armv6;
-
c->pix_abs[0][0] = ff_pix_abs16_armv6;
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
c->pix_abs[0][2] = ff_pix_abs16_y2_armv6;
diff --git a/libavcodec/arm/pixblockdsp_armv6.S b/libavcodec/arm/pixblockdsp_armv6.S
new file mode 100644
index 0000000000..b10ea78e88
--- /dev/null
+++ b/libavcodec/arm/pixblockdsp_armv6.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_get_pixels_armv6, export=1
+ pld [r1, r2]
+ push {r4-r8, lr}
+ mov lr, #8
+1:
+ ldrd_post r4, r5, r1, r2
+ subs lr, lr, #1
+ uxtb16 r6, r4
+ uxtb16 r4, r4, ror #8
+ uxtb16 r12, r5
+ uxtb16 r8, r5, ror #8
+ pld [r1, r2]
+ pkhbt r5, r6, r4, lsl #16
+ pkhtb r6, r4, r6, asr #16
+ pkhbt r7, r12, r8, lsl #16
+ pkhtb r12, r8, r12, asr #16
+ stm r0!, {r5,r6,r7,r12}
+ bgt 1b
+
+ pop {r4-r8, pc}
+endfunc
+
+function ff_diff_pixels_armv6, export=1
+ pld [r1, r3]
+ pld [r2, r3]
+ push {r4-r9, lr}
+ mov lr, #8
+1:
+ ldrd_post r4, r5, r1, r3
+ ldrd_post r6, r7, r2, r3
+ uxtb16 r8, r4
+ uxtb16 r4, r4, ror #8
+ uxtb16 r9, r6
+ uxtb16 r6, r6, ror #8
+ pld [r1, r3]
+ ssub16 r9, r8, r9
+ ssub16 r6, r4, r6
+ uxtb16 r8, r5
+ uxtb16 r5, r5, ror #8
+ pld [r2, r3]
+ pkhbt r4, r9, r6, lsl #16
+ pkhtb r6, r6, r9, asr #16
+ uxtb16 r9, r7
+ uxtb16 r7, r7, ror #8
+ ssub16 r9, r8, r9
+ ssub16 r5, r5, r7
+ subs lr, lr, #1
+ pkhbt r8, r9, r5, lsl #16
+ pkhtb r9, r5, r9, asr #16
+ stm r0!, {r4,r6,r8,r9}
+ bgt 1b
+
+ pop {r4-r9, pc}
+endfunc
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
new file mode 100644
index 0000000000..b77c523a6e
--- /dev/null
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -0,0 +1,42 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/pixblockdsp.h"
+
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
+void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
+ const uint8_t *s2, int stride);
+
+av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
+ AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_armv6(cpu_flags)) {
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_armv6;
+ c->diff_pixels = ff_diff_pixels_armv6;
+ }
+}
diff --git a/libavcodec/asv.h b/libavcodec/asv.h
index e1f90e51ec..a0e8fef703 100644
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -33,19 +33,19 @@
#include "avcodec.h"
#include "blockdsp.h"
#include "bswapdsp.h"
-#include "dsputil.h"
#include "fdctdsp.h"
#include "idctdsp.h"
#include "get_bits.h"
+#include "pixblockdsp.h"
#include "put_bits.h"
typedef struct ASV1Context{
AVCodecContext *avctx;
BlockDSPContext bdsp;
BswapDSPContext bbdsp;
- DSPContext dsp;
FDCTDSPContext fdsp;
IDCTDSPContext idsp;
+ PixblockDSPContext pdsp;
PutBitContext pb;
GetBitContext gb;
ScanTable scantable;
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index ae81953f30..02cf2db991 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -160,16 +160,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame,
uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
- a->dsp.get_pixels(block[0], ptr_y , linesize);
- a->dsp.get_pixels(block[1], ptr_y + 8, linesize);
- a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize);
- a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
+ a->pdsp.get_pixels(block[0], ptr_y, linesize);
+ a->pdsp.get_pixels(block[1], ptr_y + 8, linesize);
+ a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize);
+ a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
for(i=0; i<4; i++)
a->fdsp.fdct(block[i]);
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
- a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
- a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
+ a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
+ a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
for(i=4; i<6; i++)
a->fdsp.fdct(block[i]);
}
@@ -282,8 +282,8 @@ static av_cold int encode_init(AVCodecContext *avctx){
const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2;
ff_asv_common_init(avctx);
- ff_dsputil_init(&a->dsp, avctx);
ff_fdctdsp_init(&a->fdsp, avctx);
+ ff_pixblockdsp_init(&a->pdsp, avctx);
if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index f6f9af833a..3ad625352a 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -30,10 +30,10 @@
#include "avcodec.h"
#include "blockdsp.h"
-#include "dsputil.h"
#include "fdctdsp.h"
#include "internal.h"
#include "mpegvideo.h"
+#include "pixblockdsp.h"
#include "dnxhdenc.h"
@@ -326,6 +326,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
ff_fdctdsp_init(&ctx->m.fdsp, avctx);
ff_idctdsp_init(&ctx->m.idsp, avctx);
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
+ ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
ff_dct_common_init(&ctx->m);
ff_dct_encode_init(&ctx->m);
@@ -561,12 +562,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
const uint8_t *ptr_v = ctx->thread[0]->src[2] +
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
- DSPContext *dsp = &ctx->m.dsp;
+ PixblockDSPContext *pdsp = &ctx->m.pdsp;
- dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
+ pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
+ pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
+ pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
+ pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
if (ctx->interlaced) {
@@ -589,14 +590,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
ctx->bdsp.clear_block(ctx->blocks[7]);
}
} else {
- dsp->get_pixels(ctx->blocks[4],
- ptr_y + ctx->dct_y_offset, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[5],
- ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[6],
- ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[7],
- ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ pdsp->get_pixels(ctx->blocks[4],
+ ptr_y + ctx->dct_y_offset, ctx->m.linesize);
+ pdsp->get_pixels(ctx->blocks[5],
+ ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+ pdsp->get_pixels(ctx->blocks[6],
+ ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ pdsp->get_pixels(ctx->blocks[7],
+ ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
}
}
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index c68a70a79e..1cd9658ba6 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -36,13 +36,6 @@
uint32_t ff_square_tab[512] = { 0, };
-#define BIT_DEPTH 16
-#include "dsputilenc_template.c"
-#undef BIT_DEPTH
-
-#define BIT_DEPTH 8
-#include "dsputilenc_template.c"
-
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{
@@ -111,27 +104,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
return s;
}
-static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
-{
- int i;
-
- /* read the pixels */
- for (i = 0; i < 8; i++) {
- block[0] = s1[0] - s2[0];
- block[1] = s1[1] - s2[1];
- block[2] = s1[2] - s2[2];
- block[3] = s1[3] - s2[3];
- block[4] = s1[4] - s2[4];
- block[5] = s1[5] - s2[5];
- block[6] = s1[6] - s2[6];
- block[7] = s1[7] - s2[7];
- s1 += stride;
- s2 += stride;
- block += 8;
- }
-}
-
static int sum_abs_dctelem_c(int16_t *block)
{
int sum = 0, i;
@@ -586,7 +558,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
av_assert2(h == 8);
- s->dsp.diff_pixels(temp, src1, src2, stride);
+ s->pdsp.diff_pixels(temp, src1, src2, stride);
s->fdsp.fdct(temp);
return s->dsp.sum_abs_dctelem(temp);
}
@@ -626,7 +598,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
int16_t dct[8][8];
int i, sum = 0;
- s->dsp.diff_pixels(dct[0], src1, src2, stride);
+ s->pdsp.diff_pixels(dct[0], src1, src2, stride);
#define SRC(x) dct[i][x]
#define DST(x, v) dct[i][x] = v
@@ -653,7 +625,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
av_assert2(h == 8);
- s->dsp.diff_pixels(temp, src1, src2, stride);
+ s->pdsp.diff_pixels(temp, src1, src2, stride);
s->fdsp.fdct(temp);
for (i = 0; i < 64; i++)
@@ -672,7 +644,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
av_assert2(h == 8);
s->mb_intra = 0;
- s->dsp.diff_pixels(temp, src1, src2, stride);
+ s->pdsp.diff_pixels(temp, src1, src2, stride);
memcpy(bak, temp, 64 * sizeof(int16_t));
@@ -703,7 +675,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
copy_block8(lsrc1, src1, 8, stride, 8);
copy_block8(lsrc2, src2, 8, stride, 8);
- s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
+ s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
s->block_last_index[0 /* FIXME */] =
last =
@@ -775,7 +747,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
av_assert2(h == 8);
- s->dsp.diff_pixels(temp, src1, src2, stride);
+ s->pdsp.diff_pixels(temp, src1, src2, stride);
s->block_last_index[0 /* FIXME */] =
last =
@@ -971,8 +943,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_check_alignment();
- c->diff_pixels = diff_pixels_c;
-
c->sum_abs_dctelem = sum_abs_dctelem_c;
/* TODO [0] 16 [1] 8 */
@@ -1019,21 +989,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_dsputil_init_dwt(c);
#endif
- switch (avctx->bits_per_raw_sample) {
- case 9:
- case 10:
- case 12:
- case 14:
- c->get_pixels = get_pixels_16_c;
- break;
- default:
- if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
- c->get_pixels = get_pixels_8_c;
- }
- break;
- }
-
-
if (ARCH_ALPHA)
ff_dsputil_init_alpha(c, avctx);
if (ARCH_ARM)
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ea5d13c72d..5f4ba349e7 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -62,14 +62,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c,
* DSPContext.
*/
typedef struct DSPContext {
- /* pixel ops : interface with DCT */
- void (*get_pixels)(int16_t *block /* align 16 */,
- const uint8_t *pixels /* align 8 */,
- int line_size);
- void (*diff_pixels)(int16_t *block /* align 16 */,
- const uint8_t *s1 /* align 8 */,
- const uint8_t *s2 /* align 8 */,
- int stride);
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index a60b834dfe..aeb4a33259 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -31,6 +31,7 @@
#include "dsputil.h"
#include "fdctdsp.h"
#include "internal.h"
+#include "pixblockdsp.h"
#include "put_bits.h"
#include "dv.h"
#include "dv_tablegen.h"
@@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
DVVideoContext *s = avctx->priv_data;
DSPContext dsp;
FDCTDSPContext fdsp;
+ PixblockDSPContext pdsp;
int ret;
s->sys = avpriv_dv_codec_profile(avctx);
@@ -70,9 +72,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
memset(&dsp,0, sizeof(dsp));
ff_dsputil_init(&dsp, avctx);
ff_fdctdsp_init(&fdsp, avctx);
+ ff_pixblockdsp_init(&pdsp, avctx);
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
- s->get_pixels = dsp.get_pixels;
+ s->get_pixels = pdsp.get_pixels;
s->ildct_cmp = dsp.ildct_cmp[5];
s->fdct[0] = fdsp.fdct;
diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
index 5909dce46b..5a8c005b97 100644
--- a/libavcodec/libavcodec.v
+++ b/libavcodec/libavcodec.v
@@ -29,5 +29,6 @@ LIBAVCODEC_$MAJOR {
ff_dnxhd_cid_table;
ff_idctdsp_init;
ff_fdctdsp_init;
+ ff_pixblockdsp_init;
local: *;
};
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index f2d884bad5..94937e5cdc 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -40,6 +40,7 @@
#include "idctdsp.h"
#include "mpegvideodsp.h"
#include "mpegvideoencdsp.h"
+#include "pixblockdsp.h"
#include "put_bits.h"
#include "ratecontrol.h"
#include "parser.h"
@@ -371,6 +372,7 @@ typedef struct MpegEncContext {
IDCTDSPContext idsp;
MpegVideoDSPContext mdsp;
MpegvideoEncDSPContext mpvencdsp;
+ PixblockDSPContext pdsp;
QpelDSPContext qdsp;
VideoDSPContext vdsp;
H263DSPContext h263dsp;
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 826f061eea..56867ccb85 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -37,7 +37,6 @@
#include "libavutil/timer.h"
#include "avcodec.h"
#include "dct.h"
-#include "dsputil.h"
#include "idctdsp.h"
#include "mpeg12.h"
#include "mpegvideo.h"
@@ -48,6 +47,7 @@
#include "mpegutils.h"
#include "mjpegenc.h"
#include "msmpeg4.h"
+#include "pixblockdsp.h"
#include "qpeldsp.h"
#include "faandct.h"
#include "thread.h"
@@ -820,6 +820,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
ff_fdctdsp_init(&s->fdsp, avctx);
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+ ff_pixblockdsp_init(&s->pdsp, avctx);
ff_qpeldsp_init(&s->qdsp);
s->avctx->coded_frame = s->current_picture.f;
@@ -2093,27 +2094,27 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
}
}
- s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
- s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y);
- s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
- s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
+ s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y);
+ s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
+ s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y);
+ s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
if (s->flags & CODEC_FLAG_GRAY) {
skip_dct[4] = 1;
skip_dct[5] = 1;
} else {
- s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
- s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
+ s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
+ s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
- s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
- s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
+ s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
+ s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
} else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
- s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
- s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
- s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
- s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
- s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
- s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
+ s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
+ s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
+ s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
+ s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
+ s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
+ s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
}
}
} else {
@@ -2180,24 +2181,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
}
}
- s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
- s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
- s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
- dest_y + dct_offset, wrap_y);
- s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
- dest_y + dct_offset + 8, wrap_y);
+ s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
+ s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
+ s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
+ dest_y + dct_offset, wrap_y);
+ s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
+ dest_y + dct_offset + 8, wrap_y);
if (s->flags & CODEC_FLAG_GRAY) {
skip_dct[4] = 1;
skip_dct[5] = 1;
} else {
- s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
- s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
+ s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
+ s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
if (!s->chroma_y_shift) { /* 422 */
- s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
- dest_cb + uv_dct_offset, wrap_c);
- s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
- dest_cr + uv_dct_offset, wrap_c);
+ s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
+ dest_cb + uv_dct_offset, wrap_c);
+ s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
+ dest_cr + uv_dct_offset, wrap_c);
}
}
/* pre quantization */
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
new file mode 100644
index 0000000000..a69948e43e
--- /dev/null
+++ b/libavcodec/pixblockdsp.c
@@ -0,0 +1,80 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "pixblockdsp.h"
+
+#define BIT_DEPTH 16
+#include "pixblockdsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 8
+#include "pixblockdsp_template.c"
+
+static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
+ const uint8_t *s2, int stride)
+{
+ int i;
+
+ /* read the pixels */
+ for (i = 0; i < 8; i++) {
+ block[0] = s1[0] - s2[0];
+ block[1] = s1[1] - s2[1];
+ block[2] = s1[2] - s2[2];
+ block[3] = s1[3] - s2[3];
+ block[4] = s1[4] - s2[4];
+ block[5] = s1[5] - s2[5];
+ block[6] = s1[6] - s2[6];
+ block[7] = s1[7] - s2[7];
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+ }
+}
+
+av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
+{
+ const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+ c->diff_pixels = diff_pixels_c;
+
+ switch (avctx->bits_per_raw_sample) {
+ case 9:
+ case 10:
+ case 12:
+ case 14:
+ c->get_pixels = get_pixels_16_c;
+ break;
+ default:
+ if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+ c->get_pixels = get_pixels_8_c;
+ }
+ break;
+ }
+
+ if (ARCH_ARM)
+ ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
+ if (ARCH_PPC)
+ ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
+ if (ARCH_X86)
+ ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
+}
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
new file mode 100644
index 0000000000..a724ffbef0
--- /dev/null
+++ b/libavcodec/pixblockdsp.h
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PIXBLOCKDSP_H
+#define AVCODEC_PIXBLOCKDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+typedef struct PixblockDSPContext {
+ void (*get_pixels)(int16_t *block /* align 16 */,
+ const uint8_t *pixels /* align 8 */,
+ int line_size);
+ void (*diff_pixels)(int16_t *block /* align 16 */,
+ const uint8_t *s1 /* align 8 */,
+ const uint8_t *s2 /* align 8 */,
+ int stride);
+} PixblockDSPContext;
+
+void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+
+#endif /* AVCODEC_PIXBLOCKDSP_H */
diff --git a/libavcodec/dsputilenc_template.c b/libavcodec/pixblockdsp_template.c
index 711c404a97..3aeddf526c 100644
--- a/libavcodec/dsputilenc_template.c
+++ b/libavcodec/pixblockdsp_template.c
@@ -1,10 +1,4 @@
/*
- * DSP utils
- * Copyright (c) 2000, 2001 Fabrice Bellard
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
- *
- * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
- *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -22,11 +16,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/**
- * @file
- * DSP utils
- */
-
#include "bit_depth_template.c"
static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 8f0f0ff308..bfa4d05578 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
ppc/mpegvideodsp.o
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
+OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 5ab1b51e2b..4cce30ac58 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
return s;
}
-static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
- int line_size)
-{
- int i;
- vector unsigned char perm = vec_lvsl(0, pixels);
- const vector unsigned char zero =
- (const vector unsigned char) vec_splat_u8(0);
-
- for (i = 0; i < 8; i++) {
- /* Read potentially unaligned pixels.
- * We're reading 16 pixels, and actually only want 8,
- * but we simply ignore the extras. */
- vector unsigned char pixl = vec_ld(0, pixels);
- vector unsigned char pixr = vec_ld(7, pixels);
- vector unsigned char bytes = vec_perm(pixl, pixr, perm);
-
- // Convert the bytes into shorts.
- vector signed short shorts = (vector signed short) vec_mergeh(zero,
- bytes);
-
- // Save the data to the block, we assume the block is 16-byte aligned.
- vec_st(shorts, i * 16, (vector signed short *) block);
-
- pixels += line_size;
- }
-}
-
-static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
-{
- int i;
- vector unsigned char perm1 = vec_lvsl(0, s1);
- vector unsigned char perm2 = vec_lvsl(0, s2);
- const vector unsigned char zero =
- (const vector unsigned char) vec_splat_u8(0);
- vector signed short shorts1, shorts2;
-
- for (i = 0; i < 4; i++) {
- /* Read potentially unaligned pixels.
- * We're reading 16 pixels, and actually only want 8,
- * but we simply ignore the extras. */
- vector unsigned char pixl = vec_ld(0, s1);
- vector unsigned char pixr = vec_ld(15, s1);
- vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
-
- // Convert the bytes into shorts.
- shorts1 = (vector signed short) vec_mergeh(zero, bytes);
-
- // Do the same for the second block of pixels.
- pixl = vec_ld(0, s2);
- pixr = vec_ld(15, s2);
- bytes = vec_perm(pixl, pixr, perm2);
-
- // Convert the bytes into shorts.
- shorts2 = (vector signed short) vec_mergeh(zero, bytes);
-
- // Do the subtraction.
- shorts1 = vec_sub(shorts1, shorts2);
-
- // Save the data to the block, we assume the block is 16-byte aligned.
- vec_st(shorts1, 0, (vector signed short *) block);
-
- s1 += stride;
- s2 += stride;
- block += 8;
-
- /* The code below is a copy of the code above...
- * This is a manual unroll. */
-
- /* Read potentially unaligned pixels.
- * We're reading 16 pixels, and actually only want 8,
- * but we simply ignore the extras. */
- pixl = vec_ld(0, s1);
- pixr = vec_ld(15, s1);
- bytes = vec_perm(pixl, pixr, perm1);
-
- // Convert the bytes into shorts.
- shorts1 = (vector signed short) vec_mergeh(zero, bytes);
-
- // Do the same for the second block of pixels.
- pixl = vec_ld(0, s2);
- pixr = vec_ld(15, s2);
- bytes = vec_perm(pixl, pixr, perm2);
-
- // Convert the bytes into shorts.
- shorts2 = (vector signed short) vec_mergeh(zero, bytes);
-
- // Do the subtraction.
- shorts1 = vec_sub(shorts1, shorts2);
-
- // Save the data to the block, we assume the block is 16-byte aligned.
- vec_st(shorts1, 0, (vector signed short *) block);
-
- s1 += stride;
- s2 += stride;
- block += 8;
- }
-}
-
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
uint8_t *src, int stride, int h)
{
@@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
c->sse[0] = sse16_altivec;
c->sse[1] = sse8_altivec;
- c->diff_pixels = diff_pixels_altivec;
-
- if (!high_bit_depth) {
- c->get_pixels = get_pixels_altivec;
- }
-
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
}
diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c
new file mode 100644
index 0000000000..42c5be842e
--- /dev/null
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/pixblockdsp.h"
+
+#if HAVE_ALTIVEC
+
+static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
+ int line_size)
+{
+ int i;
+ vector unsigned char perm = vec_lvsl(0, pixels);
+ const vector unsigned char zero =
+ (const vector unsigned char) vec_splat_u8(0);
+
+ for (i = 0; i < 8; i++) {
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ vector unsigned char pixl = vec_ld(0, pixels);
+ vector unsigned char pixr = vec_ld(7, pixels);
+ vector unsigned char bytes = vec_perm(pixl, pixr, perm);
+
+ // Convert the bytes into shorts.
+ vector signed short shorts = (vector signed short) vec_mergeh(zero,
+ bytes);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_st(shorts, i * 16, (vector signed short *) block);
+
+ pixels += line_size;
+ }
+}
+
+static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
+ const uint8_t *s2, int stride)
+{
+ int i;
+ vector unsigned char perm1 = vec_lvsl(0, s1);
+ vector unsigned char perm2 = vec_lvsl(0, s2);
+ const vector unsigned char zero =
+ (const vector unsigned char) vec_splat_u8(0);
+ vector signed short shorts1, shorts2;
+
+ for (i = 0; i < 4; i++) {
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ vector unsigned char pixl = vec_ld(0, s1);
+ vector unsigned char pixr = vec_ld(15, s1);
+ vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
+
+ // Convert the bytes into shorts.
+ shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+
+ // Do the same for the second block of pixels.
+ pixl = vec_ld(0, s2);
+ pixr = vec_ld(15, s2);
+ bytes = vec_perm(pixl, pixr, perm2);
+
+ // Convert the bytes into shorts.
+ shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+
+ // Do the subtraction.
+ shorts1 = vec_sub(shorts1, shorts2);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_st(shorts1, 0, (vector signed short *) block);
+
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+
+ /* The code below is a copy of the code above...
+ * This is a manual unroll. */
+
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ pixl = vec_ld(0, s1);
+ pixr = vec_ld(15, s1);
+ bytes = vec_perm(pixl, pixr, perm1);
+
+ // Convert the bytes into shorts.
+ shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+
+ // Do the same for the second block of pixels.
+ pixl = vec_ld(0, s2);
+ pixr = vec_ld(15, s2);
+ bytes = vec_perm(pixl, pixr, perm2);
+
+ // Convert the bytes into shorts.
+ shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+
+ // Do the subtraction.
+ shorts1 = vec_sub(shorts1, shorts2);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_st(shorts1, 0, (vector signed short *) block);
+
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+ }
+}
+
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
+ AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+#if HAVE_ALTIVEC
+ if (!PPC_ALTIVEC(av_get_cpu_flags()))
+ return;
+
+ c->diff_pixels = diff_pixels_altivec;
+
+ if (!high_bit_depth) {
+ c->get_pixels = get_pixels_altivec;
+ }
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 0843dcc774..44ccb2040f 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
x86/mpegvideodsp.o
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
x86/mpegvideoencdsp_init.o
+OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
@@ -110,6 +111,7 @@ YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
+YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
x86/fpel.o \
x86/qpel.o
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 13682ba5d4..023f512edd 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -352,115 +352,6 @@ SUM_SQUARED_ERRORS 16
INIT_XMM sse2
SUM_SQUARED_ERRORS 16
-INIT_MMX mmx
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
-cglobal get_pixels, 3,4
- movsxdifnidn r2, r2d
- add r0, 128
- mov r3, -128
- pxor m7, m7
-.loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, m0
- mova m3, m2
- punpcklbw m0, m7
- punpckhbw m1, m7
- punpcklbw m2, m7
- punpckhbw m3, m7
- mova [r0+r3+ 0], m0
- mova [r0+r3+ 8], m1
- mova [r0+r3+16], m2
- mova [r0+r3+24], m3
- lea r1, [r1+r2*2]
- add r3, 32
- js .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal get_pixels, 3, 4, 5
- movsxdifnidn r2, r2d
- lea r3, [r2*3]
- pxor m4, m4
- movh m0, [r1]
- movh m1, [r1+r2]
- movh m2, [r1+r2*2]
- movh m3, [r1+r3]
- lea r1, [r1+r2*4]
- punpcklbw m0, m4
- punpcklbw m1, m4
- punpcklbw m2, m4
- punpcklbw m3, m4
- mova [r0], m0
- mova [r0+0x10], m1
- mova [r0+0x20], m2
- mova [r0+0x30], m3
- movh m0, [r1]
- movh m1, [r1+r2*1]
- movh m2, [r1+r2*2]
- movh m3, [r1+r3]
- punpcklbw m0, m4
- punpcklbw m1, m4
- punpcklbw m2, m4
- punpcklbw m3, m4
- mova [r0+0x40], m0
- mova [r0+0x50], m1
- mova [r0+0x60], m2
- mova [r0+0x70], m3
- RET
-
-INIT_MMX mmx
-; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-; int stride);
-cglobal diff_pixels, 4,5
- movsxdifnidn r3, r3d
- pxor m7, m7
- add r0, 128
- mov r4, -128
-.loop:
- mova m0, [r1]
- mova m2, [r2]
- mova m1, m0
- mova m3, m2
- punpcklbw m0, m7
- punpckhbw m1, m7
- punpcklbw m2, m7
- punpckhbw m3, m7
- psubw m0, m2
- psubw m1, m3
- mova [r0+r4+0], m0
- mova [r0+r4+8], m1
- add r1, r3
- add r2, r3
- add r4, 16
- jne .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal diff_pixels, 4, 5, 5
- movsxdifnidn r3, r3d
- pxor m4, m4
- add r0, 128
- mov r4, -128
-.loop:
- movh m0, [r1]
- movh m2, [r2]
- movh m1, [r1+r3]
- movh m3, [r2+r3]
- punpcklbw m0, m4
- punpcklbw m1, m4
- punpcklbw m2, m4
- punpcklbw m3, m4
- psubw m0, m2
- psubw m1, m3
- mova [r0+r4+0 ], m0
- mova [r0+r4+16], m1
- lea r1, [r1+r3*2]
- lea r2, [r2+r3*2]
- add r4, 32
- jne .loop
- RET
-
;-----------------------------------------------
;int ff_sum_abs_dctelem(int16_t *block)
;-----------------------------------------------
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index d0936595d0..5d48a78daa 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -30,12 +30,6 @@
#include "libavcodec/mpegvideo.h"
#include "dsputil_x86.h"
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
-void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
-void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
-void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
int ff_sum_abs_dctelem_mmx(int16_t *block);
int ff_sum_abs_dctelem_mmxext(int16_t *block);
int ff_sum_abs_dctelem_sse2(int16_t *block);
@@ -353,16 +347,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_MMX(cpu_flags)) {
- if (!high_bit_depth)
- c->get_pixels = ff_get_pixels_mmx;
- c->diff_pixels = ff_diff_pixels_mmx;
- }
-
- if (EXTERNAL_SSE2(cpu_flags))
- if (!high_bit_depth)
- c->get_pixels = ff_get_pixels_sse2;
-
#if HAVE_INLINE_ASM
if (INLINE_MMX(cpu_flags)) {
c->vsad[4] = vsad_intra16_mmx;
@@ -410,7 +394,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
if (EXTERNAL_SSE2(cpu_flags)) {
c->sse[0] = ff_sse16_sse2;
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
- c->diff_pixels = ff_diff_pixels_sse2;
#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
new file mode 100644
index 0000000000..00ee9b4ac2
--- /dev/null
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -0,0 +1,135 @@
+;*****************************************************************************
+;* SIMD-optimized pixel operations
+;*****************************************************************************
+;* Copyright (c) 2000, 2001 Fabrice Bellard
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_MMX mmx
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
+cglobal get_pixels, 3,4
+ movsxdifnidn r2, r2d
+ add r0, 128
+ mov r3, -128
+ pxor m7, m7
+.loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, m0
+ mova m3, m2
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ mova [r0+r3+ 0], m0
+ mova [r0+r3+ 8], m1
+ mova [r0+r3+16], m2
+ mova [r0+r3+24], m3
+ lea r1, [r1+r2*2]
+ add r3, 32
+ js .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal get_pixels, 3, 4, 5
+ movsxdifnidn r2, r2d
+ lea r3, [r2*3]
+ pxor m4, m4
+ movh m0, [r1]
+ movh m1, [r1+r2]
+ movh m2, [r1+r2*2]
+ movh m3, [r1+r3]
+ lea r1, [r1+r2*4]
+ punpcklbw m0, m4
+ punpcklbw m1, m4
+ punpcklbw m2, m4
+ punpcklbw m3, m4
+ mova [r0], m0
+ mova [r0+0x10], m1
+ mova [r0+0x20], m2
+ mova [r0+0x30], m3
+ movh m0, [r1]
+ movh m1, [r1+r2*1]
+ movh m2, [r1+r2*2]
+ movh m3, [r1+r3]
+ punpcklbw m0, m4
+ punpcklbw m1, m4
+ punpcklbw m2, m4
+ punpcklbw m3, m4
+ mova [r0+0x40], m0
+ mova [r0+0x50], m1
+ mova [r0+0x60], m2
+ mova [r0+0x70], m3
+ RET
+
+INIT_MMX mmx
+; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+; int stride);
+cglobal diff_pixels, 4,5
+ movsxdifnidn r3, r3d
+ pxor m7, m7
+ add r0, 128
+ mov r4, -128
+.loop:
+ mova m0, [r1]
+ mova m2, [r2]
+ mova m1, m0
+ mova m3, m2
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ psubw m0, m2
+ psubw m1, m3
+ mova [r0+r4+0], m0
+ mova [r0+r4+8], m1
+ add r1, r3
+ add r2, r3
+ add r4, 16
+ jne .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal diff_pixels, 4, 5, 5
+ movsxdifnidn r3, r3d
+ pxor m4, m4
+ add r0, 128
+ mov r4, -128
+.loop:
+ movh m0, [r1]
+ movh m2, [r2]
+ movh m1, [r1+r3]
+ movh m3, [r2+r3]
+ punpcklbw m0, m4
+ punpcklbw m1, m4
+ punpcklbw m2, m4
+ punpcklbw m3, m4
+ psubw m0, m2
+ psubw m1, m3
+ mova [r0+r4+0 ], m0
+ mova [r0+r4+16], m1
+ lea r1, [r1+r3*2]
+ lea r2, [r2+r3*2]
+ add r4, 32
+ jne .loop
+ RET
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
new file mode 100644
index 0000000000..4c31b802ff
--- /dev/null
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -0,0 +1,50 @@
+/*
+ * SIMD-optimized pixel operations
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/pixblockdsp.h"
+
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
+void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+ int stride);
+void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+ int stride);
+
+av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
+ AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(cpu_flags)) {
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_mmx;
+ c->diff_pixels = ff_diff_pixels_mmx;
+ }
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_sse2;
+ c->diff_pixels = ff_diff_pixels_sse2;
+ }
+}