diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-03-13 01:56:33 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-03-13 01:56:33 +0100 |
commit | b25a265a5c921d2d223a8aeff2f918894d515934 (patch) | |
tree | 480f9648f685220520a344ac293f66e307abfc5c | |
parent | 2d38081b4f65f23077cb1b27f2d08c82c45afa05 (diff) | |
parent | bd3e07c82ae558c2cc3616115161827630826ec1 (diff) | |
download | ffmpeg-b25a265a5c921d2d223a8aeff2f918894d515934.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
pcm-mpeg: convert to bytestream2 API
Revert "h264: clear trailing bits in partially parsed NAL units"
remove iwmmxt optimizations
mimic: do not continue if swap_buf_size is 0
mimic: convert to bytestream2 API
frwu: use MKTAG to check marker instead of AV_RL32
txd: port to bytestream2 API
c93: convert to bytestream2 API
iff: make .long_name more descriptive
FATE: add test for cdxl demuxer
rtsp: Fix a typo
Conflicts:
libavcodec/arm/dsputil_iwmmxt.c
libavcodec/arm/dsputil_iwmmxt_rnd_template.c
libavcodec/arm/mpegvideo_iwmmxt.c
libavcodec/c93.c
libavcodec/txd.c
libavutil/arm/cpu.c
tests/fate/demux.mak
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-x | configure | 5 | ||||
-rw-r--r-- | libavcodec/arm/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_arm.h | 1 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_arm.c | 1 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_iwmmxt.c | 210 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_iwmmxt_rnd_template.c | 1114 | ||||
-rw-r--r-- | libavcodec/arm/mpegvideo_arm.c | 7 | ||||
-rw-r--r-- | libavcodec/arm/mpegvideo_arm.h | 1 | ||||
-rw-r--r-- | libavcodec/arm/mpegvideo_iwmmxt.c | 101 | ||||
-rw-r--r-- | libavcodec/c93.c | 52 | ||||
-rw-r--r-- | libavcodec/frwu.c | 3 | ||||
-rw-r--r-- | libavcodec/mimic.c | 22 | ||||
-rw-r--r-- | libavcodec/pcm-mpeg.c | 110 | ||||
-rw-r--r-- | libavcodec/s3tc.c | 22 | ||||
-rw-r--r-- | libavcodec/s3tc.h | 8 | ||||
-rw-r--r-- | libavcodec/txd.c | 68 | ||||
-rw-r--r-- | libavformat/iff.c | 2 | ||||
-rw-r--r-- | libavformat/rtsp.c | 2 | ||||
-rw-r--r-- | libavutil/Makefile | 1 | ||||
-rw-r--r-- | libavutil/arm/cpu.c | 25 | ||||
-rw-r--r-- | libavutil/cpu.c | 5 | ||||
-rw-r--r-- | libavutil/cpu.h | 2 |
22 files changed, 142 insertions, 1623 deletions
@@ -251,7 +251,6 @@ Advanced options (experts only): --disable-armv6 disable armv6 optimizations --disable-armv6t2 disable armv6t2 optimizations --disable-armvfp disable ARM VFP optimizations - --disable-iwmmxt disable iwmmxt optimizations --disable-mmi disable MMI optimizations --disable-neon disable NEON optimizations --disable-vis disable VIS optimizations @@ -1132,7 +1131,6 @@ ARCH_EXT_LIST=' armv6t2 armvfp avx - iwmmxt mmi mmx mmx2 @@ -1344,7 +1342,6 @@ armv5te_deps="arm" armv6_deps="arm" armv6t2_deps="arm" armvfp_deps="arm" -iwmmxt_deps="arm" neon_deps="arm" vfpv3_deps="armvfp" @@ -2884,7 +2881,6 @@ EOF enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"' enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"' enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"' - enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"' enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"' enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"' @@ -3489,7 +3485,6 @@ if enabled arm; then echo "ARMv6 enabled ${armv6-no}" echo "ARMv6T2 enabled ${armv6t2-no}" echo "ARM VFP enabled ${armvfp-no}" - echo "IWMMXT enabled ${iwmmxt-no}" echo "NEON enabled ${neon-no}" fi if enabled mips; then diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 0e45bc0a52..39852c4f4a 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -44,9 +44,6 @@ OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \ arm/dsputil_init_vfp.o \ $(VFP-OBJS-yes) -OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \ - arm/mpegvideo_iwmmxt.o \ - NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ arm/fft_fixed_neon.o \ diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h index b333c70226..b7b5bdc0e6 100644 --- a/libavcodec/arm/dsputil_arm.h +++ b/libavcodec/arm/dsputil_arm.h @@ -28,6 +28,5 @@ void ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); #endif /* AVCODEC_ARM_DSPUTIL_H */ diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 58577dde9a..82af718b66 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -119,7 +119,6 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx); if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx); - if (HAVE_IWMMXT) ff_dsputil_init_iwmmxt(c, avctx); if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx); if (HAVE_NEON) ff_dsputil_init_neon(c, avctx); } diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c deleted file mode 100644 index 2837af119f..0000000000 --- a/libavcodec/arm/dsputil_iwmmxt.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * iWMMXt optimized DSP utils - * Copyright (c) 2004 AGAWA Koji - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/cpu.h" -#include "libavcodec/dsputil.h" - -#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt -#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); -#define WAVG2B "wavg2b" -#include "dsputil_iwmmxt_rnd_template.c" -#undef DEF -#undef SET_RND -#undef WAVG2B - -#define DEF(x, y) x ## _ ## y ##_iwmmxt -#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); -#define WAVG2B "wavg2br" -#include "dsputil_iwmmxt_rnd_template.c" -#undef DEF -#undef SET_RND -#undef WAVG2BR - -// need scheduling -#define OP(AVG) \ - __asm__ volatile ( \ - /* alignment */ \ - "and r12, %[pixels], #7 \n\t" \ - "bic %[pixels], %[pixels], #7 \n\t" \ - "tmcr wcgr1, r12 \n\t" \ - \ - "wldrd wr0, [%[pixels]] \n\t" \ - "wldrd wr1, [%[pixels], #8] \n\t" \ - "add %[pixels], %[pixels], %[line_size] \n\t" \ - "walignr1 wr4, wr0, wr1 \n\t" \ - \ - "1: \n\t" \ - \ - "wldrd wr2, [%[pixels]] \n\t" \ - "wldrd wr3, [%[pixels], #8] \n\t" \ - "add %[pixels], %[pixels], %[line_size] \n\t" \ - "pld [%[pixels]] \n\t" \ - "walignr1 wr5, wr2, wr3 \n\t" \ - AVG " wr6, wr4, wr5 \n\t" \ - "wstrd wr6, [%[block]] \n\t" \ - "add %[block], %[block], %[line_size] \n\t" \ - \ - "wldrd wr0, [%[pixels]] \n\t" \ - "wldrd wr1, [%[pixels], #8] \n\t" \ - "add %[pixels], %[pixels], %[line_size] \n\t" \ - "walignr1 wr4, wr0, wr1 \n\t" \ - "pld [%[pixels]] \n\t" \ - AVG " wr6, wr4, wr5 \n\t" \ - "wstrd wr6, [%[block]] \n\t" \ - "add %[block], %[block], %[line_size] \n\t" \ - \ - "subs %[h], %[h], #2 \n\t" \ - "bne 1b \n\t" \ - : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ - : [line_size]"r"(line_size) \ - : "memory", "r12"); -void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - OP("wavg2br"); -} -void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - OP("wavg2b"); -} -#undef OP - -void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) -{ - uint8_t *pixels2 = pixels + line_size; - - __asm__ volatile ( - "mov r12, #4 \n\t" - "1: \n\t" - "pld [%[pixels], %[line_size2]] \n\t" - "pld [%[pixels2], %[line_size2]] \n\t" - "wldrd wr4, [%[pixels]] \n\t" - "wldrd wr5, [%[pixels2]] \n\t" - "pld [%[block], #32] \n\t" - "wunpckelub wr6, wr4 \n\t" - "wldrd wr0, [%[block]] \n\t" - "wunpckehub wr7, wr4 \n\t" - "wldrd wr1, [%[block], #8] \n\t" - "wunpckelub wr8, wr5 \n\t" - "wldrd wr2, [%[block], #16] \n\t" - "wunpckehub wr9, wr5 \n\t" - "wldrd wr3, [%[block], #24] \n\t" - "add %[block], %[block], #32 \n\t" - "waddhss wr10, wr0, wr6 \n\t" - "waddhss wr11, wr1, wr7 \n\t" - "waddhss wr12, wr2, wr8 \n\t" - "waddhss wr13, wr3, wr9 \n\t" - "wpackhus wr14, wr10, wr11 \n\t" - "wpackhus wr15, wr12, wr13 \n\t" - "wstrd wr14, [%[pixels]] \n\t" - "add %[pixels], %[pixels], %[line_size2] \n\t" - "subs r12, r12, #1 \n\t" - "wstrd wr15, [%[pixels2]] \n\t" - "add %[pixels2], %[pixels2], %[line_size2] \n\t" - "bne 1b \n\t" - : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) - : [line_size2]"r"(line_size << 1) - : "cc", "memory", "r12"); -} - -static void clear_blocks_iwmmxt(DCTELEM *blocks) -{ - __asm__ volatile( - "wzero wr0 \n\t" - "mov r1, #(128 * 6 / 32) \n\t" - "1: \n\t" - "wstrd wr0, [%0] \n\t" - "wstrd wr0, [%0, #8] \n\t" - "wstrd wr0, [%0, #16] \n\t" - "wstrd wr0, [%0, #24] \n\t" - "subs r1, r1, #1 \n\t" - "add %0, %0, #32 \n\t" - "bne 1b \n\t" - : "+r"(blocks) - : - : "r1" - ); -} - -static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) -{ - return; -} - -/* A run time test is not simple. If this file is compiled in - * then we should install the functions - */ - -void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) -{ - int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */ - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (avctx->dsp_mask) { - if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) - mm_flags |= (avctx->dsp_mask & 0xffff); - else - mm_flags &= ~(avctx->dsp_mask & 0xffff); - } - - if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return; - - c->add_pixels_clamped = add_pixels_clamped_iwmmxt; - - if (!high_bit_depth) { - c->clear_blocks = clear_blocks_iwmmxt; - - c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; - c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; - c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; - c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; - c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; - - c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; - c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; - c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; - c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; - c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; - - c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; - c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; - c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; - c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; - c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; - - c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; - c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; - c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; - c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; - c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; - c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; - c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; - } -} diff --git a/libavcodec/arm/dsputil_iwmmxt_rnd_template.c b/libavcodec/arm/dsputil_iwmmxt_rnd_template.c deleted file mode 100644 index 35a5a9b8b4..0000000000 --- a/libavcodec/arm/dsputil_iwmmxt_rnd_template.c +++ /dev/null @@ -1,1114 +0,0 @@ -/* - * iWMMXt optimized DSP utils - * copyright (c) 2004 AGAWA Koji - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - __asm__ volatile ( - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r4, %[pixels], %[line_size] \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "1: \n\t" - "wldrd wr0, [%[pixels]] \n\t" - "subs %[h], %[h], #2 \n\t" - "wldrd wr1, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr3, [r4] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wldrd wr4, [r4, #8] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr8, wr0, wr1 \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr10, wr3, wr4 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr10, [r5] \n\t" - "add r5, r5, %[line_size] \n\t" - "bne 1b \n\t" - : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) - : - : "memory", "r4", "r5", "r12"); -} - -void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - __asm__ volatile ( - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r4, %[pixels], %[line_size] \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "1: \n\t" - "wldrd wr0, [%[pixels]] \n\t" - "subs %[h], %[h], #2 \n\t" - "wldrd wr1, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr3, [r4] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wldrd wr4, [r4, #8] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr8, wr0, wr1 \n\t" - "wldrd wr0, [%[block]] \n\t" - "wldrd wr2, [r5] \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr10, wr3, wr4 \n\t" - WAVG2B" wr8, wr8, wr0 \n\t" - WAVG2B" wr10, wr10, wr2 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr10, [r5] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "add r5, r5, %[line_size] \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - "bne 1b \n\t" - : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) - : - : "memory", "r4", "r5", "r12"); -} - -void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - __asm__ volatile ( - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r4, %[pixels], %[line_size] \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "1: \n\t" - "wldrd wr0, [%[pixels]] \n\t" - "wldrd wr1, [%[pixels], #8] \n\t" - "subs %[h], %[h], #2 \n\t" - "wldrd wr2, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr3, [r4] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr8, wr0, wr1 \n\t" - "wldrd wr4, [r4, #8] \n\t" - "walignr1 wr9, wr1, wr2 \n\t" - "wldrd wr5, [r4, #16] \n\t" - "add r4, r4, %[line_size] \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr10, wr3, wr4 \n\t" - "wstrd wr8, [%[block]] \n\t" - "walignr1 wr11, wr4, wr5 \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr10, [r5] \n\t" - "wstrd wr11, [r5, #8] \n\t" - "add r5, r5, %[line_size] \n\t" - "bne 1b \n\t" - : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) - : - : "memory", "r4", "r5", "r12"); -} - -void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - __asm__ volatile ( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r4, %[pixels], %[line_size]\n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "1: \n\t" - "wldrd wr0, [%[pixels]] \n\t" - "wldrd wr1, [%[pixels], #8] \n\t" - "subs %[h], %[h], #2 \n\t" - "wldrd wr2, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr3, [r4] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr8, wr0, wr1 \n\t" - "wldrd wr4, [r4, #8] \n\t" - "walignr1 wr9, wr1, wr2 \n\t" - "wldrd wr5, [r4, #16] \n\t" - "add r4, r4, %[line_size] \n\t" - "wldrd wr0, [%[block]] \n\t" - "pld [r4] \n\t" - "wldrd wr1, [%[block], #8] \n\t" - "pld [r4, #32] \n\t" - "wldrd wr2, [r5] \n\t" - "walignr1 wr10, wr3, wr4 \n\t" - "wldrd wr3, [r5, #8] \n\t" - WAVG2B" wr8, wr8, wr0 \n\t" - WAVG2B" wr9, wr9, wr1 \n\t" - WAVG2B" wr10, wr10, wr2 \n\t" - "wstrd wr8, [%[block]] \n\t" - "walignr1 wr11, wr4, wr5 \n\t" - WAVG2B" wr11, wr11, wr3 \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr10, [r5] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "wstrd wr11, [r5, #8] \n\t" - "add r5, r5, %[line_size] \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - "bne 1b \n\t" - : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) - : - : "memory", "r4", "r5", "r12"); -} - -void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "add r4, %[pixels], %[line_size]\n\t" - "tmcr wcgr2, r12 \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr13, [r4] \n\t" - "pld [%[pixels]] \n\t" - "wldrd wr14, [r4, #8] \n\t" - "pld [%[pixels], #32] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr2, wr13, wr14 \n\t" - "wmoveq wr4, wr11 \n\t" - "wmoveq wr6, wr14 \n\t" - "walignr2ne wr4, wr10, wr11 \n\t" - "walignr2ne wr6, wr13, wr14 \n\t" - WAVG2B" wr0, wr0, wr4 \n\t" - WAVG2B" wr2, wr2, wr6 \n\t" - "wstrd wr0, [%[block]] \n\t" - "subs %[h], %[h], #2 \n\t" - "wstrd wr2, [r5] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "add r5, r5, %[line_size] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "r4", "r5", "r12", "memory"); -} - -void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "add r4, %[pixels], %[line_size]\n\t" - "tmcr wcgr2, r12 \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr13, [r4] \n\t" - "pld [%[pixels]] \n\t" - "wldrd wr14, [r4, #8] \n\t" - "pld [%[pixels], #32] \n\t" - "wldrd wr15, [r4, #16] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - "walignr1 wr2, wr13, wr14 \n\t" - "walignr1 wr3, wr14, wr15 \n\t" - "wmoveq wr4, wr11 \n\t" - "wmoveq wr5, wr12 \n\t" - "wmoveq wr6, wr14 \n\t" - "wmoveq wr7, wr15 \n\t" - "walignr2ne wr4, wr10, wr11 \n\t" - "walignr2ne wr5, wr11, wr12 \n\t" - "walignr2ne wr6, wr13, wr14 \n\t" - "walignr2ne wr7, wr14, wr15 \n\t" - WAVG2B" wr0, wr0, wr4 \n\t" - WAVG2B" wr1, wr1, wr5 \n\t" - "wstrd wr0, [%[block]] \n\t" - WAVG2B" wr2, wr2, wr6 \n\t" - "wstrd wr1, [%[block], #8] \n\t" - WAVG2B" wr3, wr3, wr7 \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr2, [r5] \n\t" - "subs %[h], %[h], #2 \n\t" - "wstrd wr3, [r5, #8] \n\t" - "add r5, r5, %[line_size] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "r4", "r5", "r12", "memory"); -} - -void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "add r4, %[pixels], %[line_size]\n\t" - "tmcr wcgr2, r12 \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr13, [r4] \n\t" - "pld [%[pixels]] \n\t" - "wldrd wr14, [r4, #8] \n\t" - "pld [%[pixels], #32] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr2, wr13, wr14 \n\t" - "wmoveq wr4, wr11 \n\t" - "wmoveq wr6, wr14 \n\t" - "walignr2ne wr4, wr10, wr11 \n\t" - "wldrd wr10, [%[block]] \n\t" - "walignr2ne wr6, wr13, wr14 \n\t" - "wldrd wr12, [r5] \n\t" - WAVG2B" wr0, wr0, wr4 \n\t" - WAVG2B" wr2, wr2, wr6 \n\t" - WAVG2B" wr0, wr0, wr10 \n\t" - WAVG2B" wr2, wr2, wr12 \n\t" - "wstrd wr0, [%[block]] \n\t" - "subs %[h], %[h], #2 \n\t" - "wstrd wr2, [r5] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "add r5, r5, %[line_size] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "r4", "r5", "r12", "memory"); -} - -void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "add r4, %[pixels], %[line_size]\n\t" - "tmcr wcgr2, r12 \n\t" - "add r5, %[block], %[line_size] \n\t" - "mov %[line_size], %[line_size], lsl #1 \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "wldrd wr13, [r4] \n\t" - "pld [%[pixels]] \n\t" - "wldrd wr14, [r4, #8] \n\t" - "pld [%[pixels], #32] \n\t" - "wldrd wr15, [r4, #16] \n\t" - "add r4, r4, %[line_size] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "pld [r4] \n\t" - "pld [r4, #32] \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - "walignr1 wr2, wr13, wr14 \n\t" - "walignr1 wr3, wr14, wr15 \n\t" - "wmoveq wr4, wr11 \n\t" - "wmoveq wr5, wr12 \n\t" - "wmoveq wr6, wr14 \n\t" - "wmoveq wr7, wr15 \n\t" - "walignr2ne wr4, wr10, wr11 \n\t" - "walignr2ne wr5, wr11, wr12 \n\t" - "walignr2ne wr6, wr13, wr14 \n\t" - "walignr2ne wr7, wr14, wr15 \n\t" - "wldrd wr10, [%[block]] \n\t" - WAVG2B" wr0, wr0, wr4 \n\t" - "wldrd wr11, [%[block], #8] \n\t" - WAVG2B" wr1, wr1, wr5 \n\t" - "wldrd wr12, [r5] \n\t" - WAVG2B" wr2, wr2, wr6 \n\t" - "wldrd wr13, [r5, #8] \n\t" - WAVG2B" wr3, wr3, wr7 \n\t" - WAVG2B" wr0, wr0, wr10 \n\t" - WAVG2B" wr1, wr1, wr11 \n\t" - WAVG2B" wr2, wr2, wr12 \n\t" - WAVG2B" wr3, wr3, wr13 \n\t" - "wstrd wr0, [%[block]] \n\t" - "subs %[h], %[h], #2 \n\t" - "wstrd wr1, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wstrd wr2, [r5] \n\t" - "pld [%[block]] \n\t" - "wstrd wr3, [r5, #8] \n\t" - "add r5, r5, %[line_size] \n\t" - "pld [%[block], #32] \n\t" - "pld [r5] \n\t" - "pld [r5, #32] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - :"r4", "r5", "r12", "memory"); -} - -void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "pld [%[block]] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr4, wr10, wr11 \n\t" - "wldrd wr10, [%[block]] \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr8, wr8, wr10 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "pld [%[block]] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "wldrd wr10, [%[block]] \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr8, wr8, wr10 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "subs %[h], %[h], #2 \n\t" - "pld [%[block]] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "cc", "memory", "r12"); -} - -void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr4, wr10, wr11 \n\t" - "walignr1 wr5, wr11, wr12 \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr9, wr1, wr5 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr9, wr1, wr5 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "subs %[h], %[h], #2 \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "r4", "r5", "r12", "memory"); -} - -void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - int stride = line_size; - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - __asm__ volatile( - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "and r12, %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "pld [%[block]] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - - "1: \n\t" - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr4, wr10, wr11 \n\t" - "walignr1 wr5, wr11, wr12 \n\t" - "wldrd wr10, [%[block]] \n\t" - "wldrd wr11, [%[block], #8] \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr9, wr1, wr5 \n\t" - WAVG2B" wr8, wr8, wr10 \n\t" - WAVG2B" wr9, wr9, wr11 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "wldrd wr10, [%[pixels]] \n\t" - "wldrd wr11, [%[pixels], #8] \n\t" - "pld [%[block]] \n\t" - "wldrd wr12, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr0, wr10, wr11 \n\t" - "walignr1 wr1, wr11, wr12 \n\t" - "wldrd wr10, [%[block]] \n\t" - "wldrd wr11, [%[block], #8] \n\t" - WAVG2B" wr8, wr0, wr4 \n\t" - WAVG2B" wr9, wr1, wr5 \n\t" - WAVG2B" wr8, wr8, wr10 \n\t" - WAVG2B" wr9, wr9, wr11 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "subs %[h], %[h], #2 \n\t" - "pld [%[block]] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) - : - : "r4", "r5", "r12", "memory"); -} - -void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "mov r12, #2 \n\t" - "pld [%[pixels], #32] \n\t" - "tmcr wcgr0, r12 \n\t" /* for shift value */ - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "add r12, r12, #1 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "tmcr wcgr2, r12 \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "cmp r12, #8 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - - "1: \n\t" - // [wr0 wr1 wr2 wr3] - // [wr4 wr5 wr6 wr7] <= * - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr6, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr4, wr6 \n\t" - "wunpckehub wr5, wr6 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr4, wr4, wr8 \n\t" - "waddhus wr5, wr5, wr9 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "subs %[h], %[h], #2 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) - : [line_size]"r"(line_size) - : "r12", "memory"); -} - -void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[pixels]] \n\t" - "mov r12, #2 \n\t" - "pld [%[pixels], #32] \n\t" - "tmcr wcgr0, r12 \n\t" /* for shift value */ - /* alignment */ - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "tmcr wcgr2, r12 \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr3, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr2, wr3 \n\t" - "wunpckehub wr3, wr3 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr2, wr2, wr10 \n\t" - "waddhus wr3, wr3, wr11 \n\t" - - "1: \n\t" - // [wr0 wr1 wr2 wr3] - // [wr4 wr5 wr6 wr7] <= * - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr6, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr7, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr4, wr6 \n\t" - "wunpckehub wr5, wr6 \n\t" - "wunpckelub wr6, wr7 \n\t" - "wunpckehub wr7, wr7 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr4, wr4, wr8 \n\t" - "waddhus wr5, wr5, wr9 \n\t" - "waddhus wr6, wr6, wr10 \n\t" - "waddhus wr7, wr7, wr11 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr10, wr2, wr6 \n\t" - "waddhus wr11, wr3, wr7 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "waddhus wr10, wr10, wr15 \n\t" - "waddhus wr11, wr11, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wsrlhg wr10, wr10, wcgr0 \n\t" - "wsrlhg wr11, wr11, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "wpackhus wr9, wr10, wr11 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr3, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr2, wr3 \n\t" - "wunpckehub wr3, wr3 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr2, wr2, wr10 \n\t" - "waddhus wr3, wr3, wr11 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr10, wr2, wr6 \n\t" - "waddhus wr11, wr3, wr7 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "waddhus wr10, wr10, wr15 \n\t" - "waddhus wr11, wr11, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wsrlhg wr10, wr10, wcgr0 \n\t" - "wsrlhg wr11, wr11, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "wpackhus wr9, wr10, wr11 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - "subs %[h], %[h], #2 \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) - : [line_size]"r"(line_size) - : "r12", "memory"); -} - -void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "pld [%[pixels]] \n\t" - "mov r12, #2 \n\t" - "pld [%[pixels], #32] \n\t" - "tmcr wcgr0, r12 \n\t" /* for shift value */ - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "add r12, r12, #1 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "tmcr wcgr2, r12 \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "cmp r12, #8 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - - "1: \n\t" - // [wr0 wr1 wr2 wr3] - // [wr4 wr5 wr6 wr7] <= * - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr6, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr4, wr6 \n\t" - "wunpckehub wr5, wr6 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr4, wr4, wr8 \n\t" - "waddhus wr5, wr5, wr9 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "wldrd wr12, [%[block]] \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - WAVG2B" wr8, wr8, wr12 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "wldrd wr12, [%[pixels]] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr13, [%[pixels], #8] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "wmoveq wr10, wr13 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "wldrd wr12, [%[block]] \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "subs %[h], %[h], #2 \n\t" - WAVG2B" wr8, wr8, wr12 \n\t" - "wstrd wr8, [%[block]] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) - : [line_size]"r"(line_size) - : "r12", "memory"); -} - -void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) -{ - // [wr0 wr1 wr2 wr3] for previous line - // [wr4 wr5 wr6 wr7] for current line - SET_RND(wr15); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "pld [%[pixels]] \n\t" - "mov r12, #2 \n\t" - "pld [%[pixels], #32] \n\t" - "tmcr wcgr0, r12 \n\t" /* for shift value */ - /* alignment */ - "and r12, %[pixels], #7 \n\t" - "bic %[pixels], %[pixels], #7 \n\t" - "tmcr wcgr1, r12 \n\t" - "add r12, r12, #1 \n\t" - "tmcr wcgr2, r12 \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "pld [%[pixels]] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr3, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr2, wr3 \n\t" - "wunpckehub wr3, wr3 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr2, wr2, wr10 \n\t" - "waddhus wr3, wr3, wr11 \n\t" - - "1: \n\t" - // [wr0 wr1 wr2 wr3] - // [wr4 wr5 wr6 wr7] <= * - "wldrd wr12, [%[pixels]] \n\t" - "cmp r12, #8 \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr6, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr7, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr4, wr6 \n\t" - "wunpckehub wr5, wr6 \n\t" - "wunpckelub wr6, wr7 \n\t" - "wunpckehub wr7, wr7 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr4, wr4, wr8 \n\t" - "waddhus wr5, wr5, wr9 \n\t" - "waddhus wr6, wr6, wr10 \n\t" - "waddhus wr7, wr7, wr11 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr10, wr2, wr6 \n\t" - "waddhus wr11, wr3, wr7 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "waddhus wr10, wr10, wr15 \n\t" - "waddhus wr11, wr11, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wldrd wr12, [%[block]] \n\t" - "wldrd wr13, [%[block], #8] \n\t" - "wsrlhg wr10, wr10, wcgr0 \n\t" - "wsrlhg wr11, wr11, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "wpackhus wr9, wr10, wr11 \n\t" - WAVG2B" wr8, wr8, wr12 \n\t" - WAVG2B" wr9, wr9, wr13 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - - // [wr0 wr1 wr2 wr3] <= * - // [wr4 wr5 wr6 wr7] - "wldrd wr12, [%[pixels]] \n\t" - "pld [%[block]] \n\t" - "wldrd wr13, [%[pixels], #8] \n\t" - "pld [%[block], #32] \n\t" - "wldrd wr14, [%[pixels], #16] \n\t" - "add %[pixels], %[pixels], %[line_size] \n\t" - "walignr1 wr2, wr12, wr13 \n\t" - "pld [%[pixels]] \n\t" - "pld [%[pixels], #32] \n\t" - "walignr1 wr3, wr13, wr14 \n\t" - "wmoveq wr10, wr13 \n\t" - "wmoveq wr11, wr14 \n\t" - "walignr2ne wr10, wr12, wr13 \n\t" - "walignr2ne wr11, wr13, wr14 \n\t" - "wunpckelub wr0, wr2 \n\t" - "wunpckehub wr1, wr2 \n\t" - "wunpckelub wr2, wr3 \n\t" - "wunpckehub wr3, wr3 \n\t" - "wunpckelub wr8, wr10 \n\t" - "wunpckehub wr9, wr10 \n\t" - "wunpckelub wr10, wr11 \n\t" - "wunpckehub wr11, wr11 \n\t" - "waddhus wr0, wr0, wr8 \n\t" - "waddhus wr1, wr1, wr9 \n\t" - "waddhus wr2, wr2, wr10 \n\t" - "waddhus wr3, wr3, wr11 \n\t" - "waddhus wr8, wr0, wr4 \n\t" - "waddhus wr9, wr1, wr5 \n\t" - "waddhus wr10, wr2, wr6 \n\t" - "waddhus wr11, wr3, wr7 \n\t" - "waddhus wr8, wr8, wr15 \n\t" - "waddhus wr9, wr9, wr15 \n\t" - "waddhus wr10, wr10, wr15 \n\t" - "waddhus wr11, wr11, wr15 \n\t" - "wsrlhg wr8, wr8, wcgr0 \n\t" - "wsrlhg wr9, wr9, wcgr0 \n\t" - "wldrd wr12, [%[block]] \n\t" - "wldrd wr13, [%[block], #8] \n\t" - "wsrlhg wr10, wr10, wcgr0 \n\t" - "wsrlhg wr11, wr11, wcgr0 \n\t" - "wpackhus wr8, wr8, wr9 \n\t" - "wpackhus wr9, wr10, wr11 \n\t" - WAVG2B" wr8, wr8, wr12 \n\t" - WAVG2B" wr9, wr9, wr13 \n\t" - "wstrd wr8, [%[block]] \n\t" - "wstrd wr9, [%[block], #8] \n\t" - "add %[block], %[block], %[line_size] \n\t" - "subs %[h], %[h], #2 \n\t" - "pld [%[block]] \n\t" - "pld [%[block], #32] \n\t" - "bne 1b \n\t" - : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) - : [line_size]"r"(line_size) - : "r12", "memory"); -} diff --git a/libavcodec/arm/mpegvideo_arm.c b/libavcodec/arm/mpegvideo_arm.c index b2b254e2d3..73d03c1a5e 100644 --- a/libavcodec/arm/mpegvideo_arm.c +++ b/libavcodec/arm/mpegvideo_arm.c @@ -40,16 +40,9 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block, void ff_MPV_common_init_arm(MpegEncContext *s) { - /* IWMMXT support is a superset of armv5te, so - * allow optimized functions for armv5te unless - * a better iwmmxt function exists - */ #if HAVE_ARMV5TE ff_MPV_common_init_armv5te(s); #endif -#if HAVE_IWMMXT - ff_MPV_common_init_iwmmxt(s); -#endif if (HAVE_NEON) { s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon; diff --git a/libavcodec/arm/mpegvideo_arm.h b/libavcodec/arm/mpegvideo_arm.h index e9de979300..4ff93b76b9 100644 --- a/libavcodec/arm/mpegvideo_arm.h +++ b/libavcodec/arm/mpegvideo_arm.h @@ -21,7 +21,6 @@ #include "libavcodec/mpegvideo.h" -void ff_MPV_common_init_iwmmxt(MpegEncContext *s); void ff_MPV_common_init_armv5te(MpegEncContext *s); #endif /* AVCODEC_ARM_MPEGVIDEO_H */ diff --git a/libavcodec/arm/mpegvideo_iwmmxt.c b/libavcodec/arm/mpegvideo_iwmmxt.c deleted file mode 100644 index c85e6dc8b1..0000000000 --- a/libavcodec/arm/mpegvideo_iwmmxt.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * copyright (c) 2004 AGAWA Koji - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/cpu.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" -#include "libavcodec/mpegvideo.h" -#include "mpegvideo_arm.h" - -static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, - DCTELEM *block, int n, int qscale) -{ - int level, qmul, qadd; - int nCoeffs; - DCTELEM *block_orig = block; - - assert(s->block_last_index[n]>=0); - - qmul = qscale << 1; - - if (!s->h263_aic) { - if (n < 4) - level = block[0] * s->y_dc_scale; - else - level = block[0] * s->c_dc_scale; - qadd = (qscale - 1) | 1; - }else{ - qadd = 0; - level = block[0]; - } - if(s->ac_pred) - nCoeffs=63; - else - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - - __asm__ volatile ( -/* "movd %1, %%mm6 \n\t" //qmul */ -/* "packssdw %%mm6, %%mm6 \n\t" */ -/* "packssdw %%mm6, %%mm6 \n\t" */ - "tbcsth wr6, %[qmul] \n\t" -/* "movd %2, %%mm5 \n\t" //qadd */ -/* "packssdw %%mm5, %%mm5 \n\t" */ -/* "packssdw %%mm5, %%mm5 \n\t" */ - "tbcsth wr5, %[qadd] \n\t" - "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */ - "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */ - "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */ - "1: \n\t" - "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */ - "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */ - "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */ - "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */ -/* "movq (%0, %3), %%mm2 \n\t" */ -/* "movq 8(%0, %3), %%mm3 \n\t" */ - "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */ - "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */ - "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */ - "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */ - "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */ - "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */ - "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */ - "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */ - "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */ - "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */ - "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */ - "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */ - "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */ - "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */ - "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */ - "subs %[i], %[i], #1 \n\t" - "bne 1b \n\t" /* "jng 1b \n\t" */ - :[block]"+r"(block) - :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd) - :"memory"); - - block_orig[0] = level; -} - -void ff_MPV_common_init_iwmmxt(MpegEncContext *s) -{ - if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return; - - s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt; -} diff --git a/libavcodec/c93.c b/libavcodec/c93.c index ad2dc0a7bf..733e80cfbb 100644 --- a/libavcodec/c93.c +++ b/libavcodec/c93.c @@ -125,8 +125,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, AVFrame * const newpic = &c93->pictures[c93->currentpic]; AVFrame * const oldpic = &c93->pictures[c93->currentpic^1]; AVFrame *picture = data; + GetByteContext gb; uint8_t *out; - int stride, i, x, y, bt = 0; + int stride, i, x, y, b, bt = 0; c93->currentpic ^= 1; @@ -140,7 +141,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, stride = newpic->linesize[0]; - if (buf[0] & C93_FIRST_FRAME) { + bytestream2_init(&gb, buf, buf_size); + b = bytestream2_get_byte(&gb); + if (b & C93_FIRST_FRAME) { newpic->pict_type = AV_PICTURE_TYPE_I; newpic->key_frame = 1; } else { @@ -148,17 +151,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, newpic->key_frame = 0; } - if (*buf++ & C93_HAS_PALETTE) { - uint32_t *palette = (uint32_t *) newpic->data[1]; - const uint8_t *palbuf = buf + buf_size - 768 - 1; - for (i = 0; i < 256; i++) { - palette[i] = 0xFF << 24 | bytestream_get_be24(&palbuf); - } - } else { - if (oldpic->data[1]) - memcpy(newpic->data[1], oldpic->data[1], 256 * 4); - } - for (y = 0; y < HEIGHT; y += 8) { out = newpic->data[0] + y * stride; for (x = 0; x < WIDTH; x += 8) { @@ -168,12 +160,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, C93BlockType block_type; if (!bt) - bt = *buf++; + bt = bytestream2_get_byte(&gb); block_type= bt & 0x0F; switch (block_type) { case C93_8X8_FROM_PREV: - offset = bytestream_get_le16(&buf); + offset = bytestream2_get_le16(&gb); if (copy_block(avctx, out, copy_from, offset, 8, stride)) return -1; break; @@ -183,7 +175,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, case C93_4X4_FROM_PREV: for (j = 0; j < 8; j += 4) { for (i = 0; i < 8; i += 4) { - offset = bytestream_get_le16(&buf); + offset = bytestream2_get_le16(&gb); if (copy_block(avctx, &out[j*stride+i], copy_from, offset, 4, stride)) return -1; @@ -192,10 +184,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, break; case C93_8X8_2COLOR: - bytestream_get_buffer(&buf, cols, 2); + bytestream2_get_buffer(&gb, cols, 2); for (i = 0; i < 8; i++) { draw_n_color(out + i*stride, stride, 8, 1, 1, cols, - NULL, *buf++); + NULL, bytestream2_get_byte(&gb)); } break; @@ -206,17 +198,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, for (j = 0; j < 8; j += 4) { for (i = 0; i < 8; i += 4) { if (block_type == C93_4X4_2COLOR) { - bytestream_get_buffer(&buf, cols, 2); + bytestream2_get_buffer(&gb, cols, 2); draw_n_color(out + i + j*stride, stride, 4, 4, - 1, cols, NULL, bytestream_get_le16(&buf)); + 1, cols, NULL, bytestream2_get_le16(&gb)); } else if (block_type == C93_4X4_4COLOR) { - bytestream_get_buffer(&buf, cols, 4); + bytestream2_get_buffer(&gb, cols, 4); draw_n_color(out + i + j*stride, stride, 4, 4, - 2, cols, NULL, bytestream_get_le32(&buf)); + 2, cols, NULL, bytestream2_get_le32(&gb)); } else { - bytestream_get_buffer(&buf, grps, 4); + bytestream2_get_buffer(&gb, grps, 4); draw_n_color(out + i + j*stride, stride, 4, 4, - 1, cols, grps, bytestream_get_le16(&buf)); + 1, cols, grps, bytestream2_get_le16(&gb)); } } } @@ -227,7 +219,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, case C93_8X8_INTRA: for (j = 0; j < 8; j++) - bytestream_get_buffer(&buf, out + j*stride, 8); + bytestream2_get_buffer(&gb, out + j*stride, 8); break; default: @@ -240,6 +232,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, } } + if (b & C93_HAS_PALETTE) { + uint32_t *palette = (uint32_t *) newpic->data[1]; + for (i = 0; i < 256; i++) { + palette[i] = 0xFFU << 24 | bytestream2_get_be24(&gb); + } + } else { + if (oldpic->data[1]) + memcpy(newpic->data[1], oldpic->data[1], 256 * 4); + } + *picture = *newpic; *data_size = sizeof(AVFrame); diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c index b47cb717f2..1363b53d92 100644 --- a/libavcodec/frwu.c +++ b/libavcodec/frwu.c @@ -22,7 +22,6 @@ #include "avcodec.h" #include "bytestream.h" -#include "libavutil/intreadwrite.h" static av_cold int decode_init(AVCodecContext *avctx) { @@ -54,7 +53,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, av_log(avctx, AV_LOG_ERROR, "Packet is too small.\n"); return AVERROR_INVALIDDATA; } - if (bytestream_get_le32(&buf) != AV_RL32("FRW1")) { + if (bytestream_get_le32(&buf) != MKTAG('F', 'R', 'W', '1')) { av_log(avctx, AV_LOG_ERROR, "incorrect marker\n"); return AVERROR_INVALIDDATA; } diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c index 51ae5be8bc..4f085b4e5d 100644 --- a/libavcodec/mimic.c +++ b/libavcodec/mimic.c @@ -306,24 +306,26 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; MimicContext *ctx = avctx->priv_data; + GetByteContext gb; int is_pframe; int width, height; int quality, num_coeffs; int swap_buf_size = buf_size - MIMIC_HEADER_SIZE; - if(buf_size < MIMIC_HEADER_SIZE) { + if (buf_size <= MIMIC_HEADER_SIZE) { av_log(avctx, AV_LOG_ERROR, "insufficient data\n"); return -1; } - buf += 2; /* some constant (always 256) */ - quality = bytestream_get_le16(&buf); - width = bytestream_get_le16(&buf); - height = bytestream_get_le16(&buf); - buf += 4; /* some constant */ - is_pframe = bytestream_get_le32(&buf); - num_coeffs = bytestream_get_byte(&buf); - buf += 3; /* some constant */ + bytestream2_init(&gb, buf, MIMIC_HEADER_SIZE); + bytestream2_skip(&gb, 2); /* some constant (always 256) */ + quality = bytestream2_get_le16u(&gb); + width = bytestream2_get_le16u(&gb); + height = bytestream2_get_le16u(&gb); + bytestream2_skip(&gb, 4); /* some constant */ + is_pframe = bytestream2_get_le32u(&gb); + num_coeffs = bytestream2_get_byteu(&gb); + bytestream2_skip(&gb, 3); /* some constant */ if(!ctx->avctx) { int i; @@ -372,7 +374,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, return AVERROR(ENOMEM); ctx->dsp.bswap_buf(ctx->swap_buf, - (const uint32_t*) buf, + (const uint32_t*) (buf + MIMIC_HEADER_SIZE), swap_buf_size>>2); init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3); diff --git a/libavcodec/pcm-mpeg.c b/libavcodec/pcm-mpeg.c index aea3ff79c6..9c49a0d9ec 100644 --- a/libavcodec/pcm-mpeg.c +++ b/libavcodec/pcm-mpeg.c @@ -141,6 +141,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, const uint8_t *src = avpkt->data; int buf_size = avpkt->size; PCMBRDecode *s = avctx->priv_data; + GetByteContext gb; int num_source_channels, channel, retval; int sample_size, samples; int16_t *dst16; @@ -156,6 +157,8 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, src += 4; buf_size -= 4; + bytestream2_init(&gb, src, buf_size); + /* There's always an even number of channels in the source */ num_source_channels = FFALIGN(avctx->channels, 2); sample_size = (num_source_channels * (avctx->sample_fmt == AV_SAMPLE_FMT_S16 ? 16 : 24)) >> 3; @@ -179,15 +182,15 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, samples *= num_source_channels; if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { #if HAVE_BIGENDIAN - memcpy(dst16, src, buf_size); + bytestream2_get_buffer(&gb, dst16, buf_size); #else do { - *dst16++ = bytestream_get_be16(&src); + *dst16++ = bytestream2_get_be16u(&gb); } while (--samples); #endif } else { do { - *dst32++ = bytestream_get_be24(&src) << 8; + *dst32++ = bytestream2_get_be24u(&gb) << 8; } while (--samples); } break; @@ -199,24 +202,23 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { do { #if HAVE_BIGENDIAN - memcpy(dst16, src, avctx->channels * 2); + bytestream2_get_buffer(&gb, dst16, avctx->channels * 2); dst16 += avctx->channels; - src += sample_size; #else channel = avctx->channels; do { - *dst16++ = bytestream_get_be16(&src); + *dst16++ = bytestream2_get_be16u(&gb); } while (--channel); - src += 2; #endif + bytestream2_skip(&gb, 2); } while (--samples); } else { do { channel = avctx->channels; do { - *dst32++ = bytestream_get_be24(&src) << 8; + *dst32++ = bytestream2_get_be24u(&gb) << 8; } while (--channel); - src += 3; + bytestream2_skip(&gb, 3); } while (--samples); } break; @@ -224,22 +226,22 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, case AV_CH_LAYOUT_5POINT1: if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { do { - dst16[0] = bytestream_get_be16(&src); - dst16[1] = bytestream_get_be16(&src); - dst16[2] = bytestream_get_be16(&src); - dst16[4] = bytestream_get_be16(&src); - dst16[5] = bytestream_get_be16(&src); - dst16[3] = bytestream_get_be16(&src); + dst16[0] = bytestream2_get_be16u(&gb); + dst16[1] = bytestream2_get_be16u(&gb); + dst16[2] = bytestream2_get_be16u(&gb); + dst16[4] = bytestream2_get_be16u(&gb); + dst16[5] = bytestream2_get_be16u(&gb); + dst16[3] = bytestream2_get_be16u(&gb); dst16 += 6; } while (--samples); } else { do { - dst32[0] = bytestream_get_be24(&src) << 8; - dst32[1] = bytestream_get_be24(&src) << 8; - dst32[2] = bytestream_get_be24(&src) << 8; - dst32[4] = bytestream_get_be24(&src) << 8; - dst32[5] = bytestream_get_be24(&src) << 8; - dst32[3] = bytestream_get_be24(&src) << 8; + dst32[0] = bytestream2_get_be24u(&gb) << 8; + dst32[1] = bytestream2_get_be24u(&gb) << 8; + dst32[2] = bytestream2_get_be24u(&gb) << 8; + dst32[4] = bytestream2_get_be24u(&gb) << 8; + dst32[5] = bytestream2_get_be24u(&gb) << 8; + dst32[3] = bytestream2_get_be24u(&gb) << 8; dst32 += 6; } while (--samples); } @@ -248,27 +250,27 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, case AV_CH_LAYOUT_7POINT0: if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { do { - dst16[0] = bytestream_get_be16(&src); - dst16[1] = bytestream_get_be16(&src); - dst16[2] = bytestream_get_be16(&src); - dst16[5] = bytestream_get_be16(&src); - dst16[3] = bytestream_get_be16(&src); - dst16[4] = bytestream_get_be16(&src); - dst16[6] = bytestream_get_be16(&src); + dst16[0] = bytestream2_get_be16u(&gb); + dst16[1] = bytestream2_get_be16u(&gb); + dst16[2] = bytestream2_get_be16u(&gb); + dst16[5] = bytestream2_get_be16u(&gb); + dst16[3] = bytestream2_get_be16u(&gb); + dst16[4] = bytestream2_get_be16u(&gb); + dst16[6] = bytestream2_get_be16u(&gb); dst16 += 7; - src += 2; + bytestream2_skip(&gb, 2); } while (--samples); } else { do { - dst32[0] = bytestream_get_be24(&src) << 8; - dst32[1] = bytestream_get_be24(&src) << 8; - dst32[2] = bytestream_get_be24(&src) << 8; - dst32[5] = bytestream_get_be24(&src) << 8; - dst32[3] = bytestream_get_be24(&src) << 8; - dst32[4] = bytestream_get_be24(&src) << 8; - dst32[6] = bytestream_get_be24(&src) << 8; + dst32[0] = bytestream2_get_be24u(&gb) << 8; + dst32[1] = bytestream2_get_be24u(&gb) << 8; + dst32[2] = bytestream2_get_be24u(&gb) << 8; + dst32[5] = bytestream2_get_be24u(&gb) << 8; + dst32[3] = bytestream2_get_be24u(&gb) << 8; + dst32[4] = bytestream2_get_be24u(&gb) << 8; + dst32[6] = bytestream2_get_be24u(&gb) << 8; dst32 += 7; - src += 3; + bytestream2_skip(&gb, 3); } while (--samples); } break; @@ -276,26 +278,26 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, case AV_CH_LAYOUT_7POINT1: if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { do { - dst16[0] = bytestream_get_be16(&src); - dst16[1] = bytestream_get_be16(&src); - dst16[2] = bytestream_get_be16(&src); - dst16[6] = bytestream_get_be16(&src); - dst16[4] = bytestream_get_be16(&src); - dst16[5] = bytestream_get_be16(&src); - dst16[7] = bytestream_get_be16(&src); - dst16[3] = bytestream_get_be16(&src); + dst16[0] = bytestream2_get_be16u(&gb); + dst16[1] = bytestream2_get_be16u(&gb); + dst16[2] = bytestream2_get_be16u(&gb); + dst16[6] = bytestream2_get_be16u(&gb); + dst16[4] = bytestream2_get_be16u(&gb); + dst16[5] = bytestream2_get_be16u(&gb); + dst16[7] = bytestream2_get_be16u(&gb); + dst16[3] = bytestream2_get_be16u(&gb); dst16 += 8; } while (--samples); } else { do { - dst32[0] = bytestream_get_be24(&src) << 8; - dst32[1] = bytestream_get_be24(&src) << 8; - dst32[2] = bytestream_get_be24(&src) << 8; - dst32[6] = bytestream_get_be24(&src) << 8; - dst32[4] = bytestream_get_be24(&src) << 8; - dst32[5] = bytestream_get_be24(&src) << 8; - dst32[7] = bytestream_get_be24(&src) << 8; - dst32[3] = bytestream_get_be24(&src) << 8; + dst32[0] = bytestream2_get_be24u(&gb) << 8; + dst32[1] = bytestream2_get_be24u(&gb) << 8; + dst32[2] = bytestream2_get_be24u(&gb) << 8; + dst32[6] = bytestream2_get_be24u(&gb) << 8; + dst32[4] = bytestream2_get_be24u(&gb) << 8; + dst32[5] = bytestream2_get_be24u(&gb) << 8; + dst32[7] = bytestream2_get_be24u(&gb) << 8; + dst32[3] = bytestream2_get_be24u(&gb) << 8; dst32 += 8; } while (--samples); } @@ -306,7 +308,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, *got_frame_ptr = 1; *(AVFrame *)data = s->frame; - retval = src - avpkt->data; + retval = bytestream2_tell(&gb); if (avctx->debug & FF_DEBUG_BITSTREAM) av_dlog(avctx, "pcm_bluray_decode_frame: decoded %d -> %d bytes\n", retval, buf_size); diff --git a/libavcodec/s3tc.c b/libavcodec/s3tc.c index 8e979a84ac..4e791c86c0 100644 --- a/libavcodec/s3tc.c +++ b/libavcodec/s3tc.c @@ -21,19 +21,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/intreadwrite.h" +#include "libavcodec/bytestream.h" #include "avcodec.h" #include "s3tc.h" -static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, +static inline void dxt1_decode_pixels(GetByteContext *gb, uint32_t *d, unsigned int qstride, unsigned int flag, uint64_t alpha) { unsigned int x, y, c0, c1, a = (!flag * 255u) << 24; unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3; uint32_t colors[4], pixels; - c0 = AV_RL16(s); - c1 = AV_RL16(s+2); + c0 = bytestream2_get_le16(gb); + c1 = bytestream2_get_le16(gb); rb0 = (c0<<3 | c0<<8) & 0xf800f8; rb1 = (c1<<3 | c1<<8) & 0xf800f8; @@ -61,7 +61,7 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, colors[2] = rb2 + g2 + a; - pixels = AV_RL32(s+4); + pixels = bytestream2_get_le32(gb); for (y=0; y<4; y++) { for (x=0; x<4; x++) { a = (alpha & 0x0f) << 28; @@ -74,24 +74,24 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, } } -void ff_decode_dxt1(const uint8_t *s, uint8_t *dst, +void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst, const unsigned int w, const unsigned int h, const unsigned int stride) { unsigned int bx, by, qstride = stride/4; uint32_t *d = (uint32_t *) dst; for (by=0; by < h/4; by++, d += stride-w) - for (bx=0; bx < w/4; bx++, s+=8, d+=4) - dxt1_decode_pixels(s, d, qstride, 0, 0LL); + for (bx = 0; bx < w / 4; bx++, d += 4) + dxt1_decode_pixels(gb, d, qstride, 0, 0LL); } -void ff_decode_dxt3(const uint8_t *s, uint8_t *dst, +void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst, const unsigned int w, const unsigned int h, const unsigned int stride) { unsigned int bx, by, qstride = stride/4; uint32_t *d = (uint32_t *) dst; for (by=0; by < h/4; by++, d += stride-w) - for (bx=0; bx < w/4; bx++, s+=16, d+=4) - dxt1_decode_pixels(s+8, d, qstride, 1, AV_RL64(s)); + for (bx = 0; bx < w / 4; bx++, d += 4) + dxt1_decode_pixels(gb, d, qstride, 1, bytestream2_get_le64(gb)); } diff --git a/libavcodec/s3tc.h b/libavcodec/s3tc.h index 5116dc80f6..4378bd9483 100644 --- a/libavcodec/s3tc.h +++ b/libavcodec/s3tc.h @@ -29,24 +29,24 @@ /** * Decode DXT1 encoded data to RGB32 - * @param src source buffer, has to be aligned on a 4-byte boundary + * @param gb GetByteContext * @param dst destination buffer * @param w width of output image * @param h height of output image * @param stride line size of output image */ -void ff_decode_dxt1(const uint8_t *src, uint8_t *dst, +void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst, const unsigned int w, const unsigned int h, const unsigned int stride); /** * Decode DXT3 encoded data to RGB32 - * @param src source buffer, has to be aligned on a 4-byte boundary + * @param gb GetByteContext * @param dst destination buffer * @param w width of output image * @param h height of output image * @param stride line size of output image */ -void ff_decode_dxt3(const uint8_t *src, uint8_t *dst, +void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst, const unsigned int w, const unsigned int h, const unsigned int stride); diff --git a/libavcodec/txd.c b/libavcodec/txd.c index ca07b6ce17..cf88a9b3bb 100644 --- a/libavcodec/txd.c +++ b/libavcodec/txd.c @@ -25,6 +25,7 @@ #include "libavutil/imgutils.h" #include "bytestream.h" #include "avcodec.h" +#include "bytestream.h" #include "s3tc.h" typedef struct TXDContext { @@ -42,28 +43,25 @@ static av_cold int txd_init(AVCodecContext *avctx) { static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { - const uint8_t *buf = avpkt->data; - const uint8_t *buf_end = avpkt->data + avpkt->size; TXDContext * const s = avctx->priv_data; + GetByteContext gb; AVFrame *picture = data; AVFrame * const p = &s->picture; unsigned int version, w, h, d3d_format, depth, stride, mipmap_count, flags; unsigned int y, v; uint8_t *ptr; - const uint8_t *cur = buf; - const uint32_t *palette = (const uint32_t *)(cur + 88); uint32_t *pal; - if (buf_end - cur < 92) - return AVERROR_INVALIDDATA; - version = AV_RL32(cur); - d3d_format = AV_RL32(cur+76); - w = AV_RL16(cur+80); - h = AV_RL16(cur+82); - depth = AV_RL8 (cur+84); - mipmap_count = AV_RL8 (cur+85); - flags = AV_RL8 (cur+87); - cur += 92; + bytestream2_init(&gb, avpkt->data, avpkt->size); + version = bytestream2_get_le32(&gb); + bytestream2_skip(&gb, 72); + d3d_format = bytestream2_get_le32(&gb); + w = bytestream2_get_le16(&gb); + h = bytestream2_get_le16(&gb); + depth = bytestream2_get_byte(&gb); + mipmap_count = bytestream2_get_byte(&gb); + bytestream2_skip(&gb, 1); + flags = bytestream2_get_byte(&gb); if (version < 8 || version > 9) { av_log(avctx, AV_LOG_ERROR, "texture data version %i is unsupported\n", @@ -73,12 +71,9 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, if (depth == 8) { avctx->pix_fmt = PIX_FMT_PAL8; - if (buf_end - cur < 1024) - return AVERROR_INVALIDDATA; - cur += 1024; - } else if (depth == 16 || depth == 32) + } else if (depth == 16 || depth == 32) { avctx->pix_fmt = PIX_FMT_RGB32; - else { + } else { av_log(avctx, AV_LOG_ERROR, "depth of %i is unsupported\n", depth); return -1; } @@ -102,31 +97,32 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, if (depth == 8) { pal = (uint32_t *) p->data[1]; - for (y=0; y<256; y++) { - v = AV_RB32(palette+y); - pal[y] = (v>>8) + (v<<24); + for (y = 0; y < 256; y++) { + v = bytestream2_get_be32(&gb); + pal[y] = (v >> 8) + (v << 24); } - if (buf_end - cur < w * h) + if (bytestream2_get_bytes_left(&gb) < w * h) return AVERROR_INVALIDDATA; + bytestream2_skip(&gb, 4); for (y=0; y<h; y++) { - memcpy(ptr, cur, w); + bytestream2_get_buffer(&gb, ptr, w); ptr += stride; - cur += w; } } else if (depth == 16) { + bytestream2_skip(&gb, 4); switch (d3d_format) { case 0: if (!(flags & 1)) goto unsupported; case FF_S3TC_DXT1: - if (buf_end - cur < (w/4) * (h/4) * 8) + if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 8) return AVERROR_INVALIDDATA; - ff_decode_dxt1(cur, ptr, w, h, stride); + ff_decode_dxt1(&gb, ptr, w, h, stride); break; case FF_S3TC_DXT3: - if (buf_end - cur < (w/4) * (h/4) * 16) + if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 16) return AVERROR_INVALIDDATA; - ff_decode_dxt3(cur, ptr, w, h, stride); + ff_decode_dxt3(&gb, ptr, w, h, stride); break; default: goto unsupported; @@ -135,12 +131,11 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, switch (d3d_format) { case 0x15: case 0x16: - if (buf_end - cur < h * w * 4) + if (bytestream2_get_bytes_left(&gb) < h * w * 4) return AVERROR_INVALIDDATA; for (y=0; y<h; y++) { - memcpy(ptr, cur, w*4); + bytestream2_get_buffer(&gb, ptr, w * 4); ptr += stride; - cur += w*4; } break; default: @@ -148,17 +143,10 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, } } - for (; mipmap_count > 1 && buf_end - cur >= 4; mipmap_count--) { - uint32_t length = bytestream_get_le32(&cur); - if (buf_end - cur < length) - break; - cur += length; - } - *picture = s->picture; *data_size = sizeof(AVPicture); - return cur - buf; + return avpkt->size; unsupported: av_log(avctx, AV_LOG_ERROR, "unsupported d3d format (%08x)\n", d3d_format); diff --git a/libavformat/iff.c b/libavformat/iff.c index 7473c182b0..1890800cf2 100644 --- a/libavformat/iff.c +++ b/libavformat/iff.c @@ -385,7 +385,7 @@ static int iff_read_packet(AVFormatContext *s, AVInputFormat ff_iff_demuxer = { .name = "IFF", - .long_name = NULL_IF_CONFIG_SMALL("IFF format"), + .long_name = NULL_IF_CONFIG_SMALL("Interchange File Format"), .priv_data_size = sizeof(IffDemuxContext), .read_probe = iff_probe, .read_header = iff_read_header, diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c index f2f69642ec..ac2196a925 100644 --- a/libavformat/rtsp.c +++ b/libavformat/rtsp.c @@ -1005,7 +1005,7 @@ start: av_freep(content_ptr); /* If method is set, this is called from ff_rtsp_send_cmd, * where a reply to exactly this request is awaited. For - * callers from within packet reciving, we just want to + * callers from within packet receiving, we just want to * return to the caller and go back to receiving packets. */ if (method) goto start; diff --git a/libavutil/Makefile b/libavutil/Makefile index 23049f600f..77d23d4e63 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -77,7 +77,6 @@ OBJS = adler32.o \ tree.o \ utils.o \ -OBJS-$(ARCH_ARM) += arm/cpu.o OBJS-$(ARCH_PPC) += ppc/cpu.o OBJS-$(ARCH_X86) += x86/cpu.o diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c deleted file mode 100644 index 742c3e498d..0000000000 --- a/libavutil/arm/cpu.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/cpu.h" -#include "config.h" - -int ff_get_cpu_flags_arm(void) -{ - return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT; -} diff --git a/libavutil/cpu.c b/libavutil/cpu.c index fed6093316..762e8d5241 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -31,7 +31,6 @@ int av_get_cpu_flags(void) if (checked) return flags; - if (ARCH_ARM) flags = ff_get_cpu_flags_arm(); if (ARCH_PPC) flags = ff_get_cpu_flags_ppc(); if (ARCH_X86) flags = ff_get_cpu_flags_x86(); @@ -55,9 +54,7 @@ static const struct { int flag; const char *name; } cpu_flag_tab[] = { -#if ARCH_ARM - { AV_CPU_FLAG_IWMMXT, "iwmmxt" }, -#elif ARCH_PPC +#if ARCH_PPC { AV_CPU_FLAG_ALTIVEC, "altivec" }, #elif ARCH_X86 { AV_CPU_FLAG_MMX, "mmx" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index e448da6cdc..638374db4d 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -43,7 +43,6 @@ #define AV_CPU_FLAG_CMOV 0x1000000 ///< supports cmov instruction #define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions #define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions -#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard /** @@ -67,7 +66,6 @@ void av_force_cpu_flags(int flags); attribute_deprecated void av_set_cpu_flags_mask(int mask); /* The following CPU-specific functions shall not be called directly. */ -int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); |