aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-03-13 01:56:33 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-03-13 01:56:33 +0100
commitb25a265a5c921d2d223a8aeff2f918894d515934 (patch)
tree480f9648f685220520a344ac293f66e307abfc5c
parent2d38081b4f65f23077cb1b27f2d08c82c45afa05 (diff)
parentbd3e07c82ae558c2cc3616115161827630826ec1 (diff)
downloadffmpeg-b25a265a5c921d2d223a8aeff2f918894d515934.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: pcm-mpeg: convert to bytestream2 API Revert "h264: clear trailing bits in partially parsed NAL units" remove iwmmxt optimizations mimic: do not continue if swap_buf_size is 0 mimic: convert to bytestream2 API frwu: use MKTAG to check marker instead of AV_RL32 txd: port to bytestream2 API c93: convert to bytestream2 API iff: make .long_name more descriptive FATE: add test for cdxl demuxer rtsp: Fix a typo Conflicts: libavcodec/arm/dsputil_iwmmxt.c libavcodec/arm/dsputil_iwmmxt_rnd_template.c libavcodec/arm/mpegvideo_iwmmxt.c libavcodec/c93.c libavcodec/txd.c libavutil/arm/cpu.c tests/fate/demux.mak Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-xconfigure5
-rw-r--r--libavcodec/arm/Makefile3
-rw-r--r--libavcodec/arm/dsputil_arm.h1
-rw-r--r--libavcodec/arm/dsputil_init_arm.c1
-rw-r--r--libavcodec/arm/dsputil_iwmmxt.c210
-rw-r--r--libavcodec/arm/dsputil_iwmmxt_rnd_template.c1114
-rw-r--r--libavcodec/arm/mpegvideo_arm.c7
-rw-r--r--libavcodec/arm/mpegvideo_arm.h1
-rw-r--r--libavcodec/arm/mpegvideo_iwmmxt.c101
-rw-r--r--libavcodec/c93.c52
-rw-r--r--libavcodec/frwu.c3
-rw-r--r--libavcodec/mimic.c22
-rw-r--r--libavcodec/pcm-mpeg.c110
-rw-r--r--libavcodec/s3tc.c22
-rw-r--r--libavcodec/s3tc.h8
-rw-r--r--libavcodec/txd.c68
-rw-r--r--libavformat/iff.c2
-rw-r--r--libavformat/rtsp.c2
-rw-r--r--libavutil/Makefile1
-rw-r--r--libavutil/arm/cpu.c25
-rw-r--r--libavutil/cpu.c5
-rw-r--r--libavutil/cpu.h2
22 files changed, 142 insertions, 1623 deletions
diff --git a/configure b/configure
index e8ae9dca46..f46f8d351c 100755
--- a/configure
+++ b/configure
@@ -251,7 +251,6 @@ Advanced options (experts only):
--disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations
--disable-armvfp disable ARM VFP optimizations
- --disable-iwmmxt disable iwmmxt optimizations
--disable-mmi disable MMI optimizations
--disable-neon disable NEON optimizations
--disable-vis disable VIS optimizations
@@ -1132,7 +1131,6 @@ ARCH_EXT_LIST='
armv6t2
armvfp
avx
- iwmmxt
mmi
mmx
mmx2
@@ -1344,7 +1342,6 @@ armv5te_deps="arm"
armv6_deps="arm"
armv6t2_deps="arm"
armvfp_deps="arm"
-iwmmxt_deps="arm"
neon_deps="arm"
vfpv3_deps="armvfp"
@@ -2884,7 +2881,6 @@ EOF
enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"'
enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"'
enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"'
- enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"'
enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"'
enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"'
@@ -3489,7 +3485,6 @@ if enabled arm; then
echo "ARMv6 enabled ${armv6-no}"
echo "ARMv6T2 enabled ${armv6t2-no}"
echo "ARM VFP enabled ${armvfp-no}"
- echo "IWMMXT enabled ${iwmmxt-no}"
echo "NEON enabled ${neon-no}"
fi
if enabled mips; then
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0e45bc0a52..39852c4f4a 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -44,9 +44,6 @@ OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
arm/dsputil_init_vfp.o \
$(VFP-OBJS-yes)
-OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \
- arm/mpegvideo_iwmmxt.o \
-
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o \
diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h
index b333c70226..b7b5bdc0e6 100644
--- a/libavcodec/arm/dsputil_arm.h
+++ b/libavcodec/arm/dsputil_arm.h
@@ -28,6 +28,5 @@ void ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
-void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
#endif /* AVCODEC_ARM_DSPUTIL_H */
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 58577dde9a..82af718b66 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -119,7 +119,6 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx);
- if (HAVE_IWMMXT) ff_dsputil_init_iwmmxt(c, avctx);
if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx);
if (HAVE_NEON) ff_dsputil_init_neon(c, avctx);
}
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
deleted file mode 100644
index 2837af119f..0000000000
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * iWMMXt optimized DSP utils
- * Copyright (c) 2004 AGAWA Koji
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/cpu.h"
-#include "libavcodec/dsputil.h"
-
-#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
-#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
-#define WAVG2B "wavg2b"
-#include "dsputil_iwmmxt_rnd_template.c"
-#undef DEF
-#undef SET_RND
-#undef WAVG2B
-
-#define DEF(x, y) x ## _ ## y ##_iwmmxt
-#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
-#define WAVG2B "wavg2br"
-#include "dsputil_iwmmxt_rnd_template.c"
-#undef DEF
-#undef SET_RND
-#undef WAVG2BR
-
-// need scheduling
-#define OP(AVG) \
- __asm__ volatile ( \
- /* alignment */ \
- "and r12, %[pixels], #7 \n\t" \
- "bic %[pixels], %[pixels], #7 \n\t" \
- "tmcr wcgr1, r12 \n\t" \
- \
- "wldrd wr0, [%[pixels]] \n\t" \
- "wldrd wr1, [%[pixels], #8] \n\t" \
- "add %[pixels], %[pixels], %[line_size] \n\t" \
- "walignr1 wr4, wr0, wr1 \n\t" \
- \
- "1: \n\t" \
- \
- "wldrd wr2, [%[pixels]] \n\t" \
- "wldrd wr3, [%[pixels], #8] \n\t" \
- "add %[pixels], %[pixels], %[line_size] \n\t" \
- "pld [%[pixels]] \n\t" \
- "walignr1 wr5, wr2, wr3 \n\t" \
- AVG " wr6, wr4, wr5 \n\t" \
- "wstrd wr6, [%[block]] \n\t" \
- "add %[block], %[block], %[line_size] \n\t" \
- \
- "wldrd wr0, [%[pixels]] \n\t" \
- "wldrd wr1, [%[pixels], #8] \n\t" \
- "add %[pixels], %[pixels], %[line_size] \n\t" \
- "walignr1 wr4, wr0, wr1 \n\t" \
- "pld [%[pixels]] \n\t" \
- AVG " wr6, wr4, wr5 \n\t" \
- "wstrd wr6, [%[block]] \n\t" \
- "add %[block], %[block], %[line_size] \n\t" \
- \
- "subs %[h], %[h], #2 \n\t" \
- "bne 1b \n\t" \
- : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
- : [line_size]"r"(line_size) \
- : "memory", "r12");
-void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- OP("wavg2br");
-}
-void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- OP("wavg2b");
-}
-#undef OP
-
-void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
-{
- uint8_t *pixels2 = pixels + line_size;
-
- __asm__ volatile (
- "mov r12, #4 \n\t"
- "1: \n\t"
- "pld [%[pixels], %[line_size2]] \n\t"
- "pld [%[pixels2], %[line_size2]] \n\t"
- "wldrd wr4, [%[pixels]] \n\t"
- "wldrd wr5, [%[pixels2]] \n\t"
- "pld [%[block], #32] \n\t"
- "wunpckelub wr6, wr4 \n\t"
- "wldrd wr0, [%[block]] \n\t"
- "wunpckehub wr7, wr4 \n\t"
- "wldrd wr1, [%[block], #8] \n\t"
- "wunpckelub wr8, wr5 \n\t"
- "wldrd wr2, [%[block], #16] \n\t"
- "wunpckehub wr9, wr5 \n\t"
- "wldrd wr3, [%[block], #24] \n\t"
- "add %[block], %[block], #32 \n\t"
- "waddhss wr10, wr0, wr6 \n\t"
- "waddhss wr11, wr1, wr7 \n\t"
- "waddhss wr12, wr2, wr8 \n\t"
- "waddhss wr13, wr3, wr9 \n\t"
- "wpackhus wr14, wr10, wr11 \n\t"
- "wpackhus wr15, wr12, wr13 \n\t"
- "wstrd wr14, [%[pixels]] \n\t"
- "add %[pixels], %[pixels], %[line_size2] \n\t"
- "subs r12, r12, #1 \n\t"
- "wstrd wr15, [%[pixels2]] \n\t"
- "add %[pixels2], %[pixels2], %[line_size2] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
- : [line_size2]"r"(line_size << 1)
- : "cc", "memory", "r12");
-}
-
-static void clear_blocks_iwmmxt(DCTELEM *blocks)
-{
- __asm__ volatile(
- "wzero wr0 \n\t"
- "mov r1, #(128 * 6 / 32) \n\t"
- "1: \n\t"
- "wstrd wr0, [%0] \n\t"
- "wstrd wr0, [%0, #8] \n\t"
- "wstrd wr0, [%0, #16] \n\t"
- "wstrd wr0, [%0, #24] \n\t"
- "subs r1, r1, #1 \n\t"
- "add %0, %0, #32 \n\t"
- "bne 1b \n\t"
- : "+r"(blocks)
- :
- : "r1"
- );
-}
-
-static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- return;
-}
-
-/* A run time test is not simple. If this file is compiled in
- * then we should install the functions
- */
-
-void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
-{
- int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (avctx->dsp_mask) {
- if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
- mm_flags |= (avctx->dsp_mask & 0xffff);
- else
- mm_flags &= ~(avctx->dsp_mask & 0xffff);
- }
-
- if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
-
- c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
-
- if (!high_bit_depth) {
- c->clear_blocks = clear_blocks_iwmmxt;
-
- c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
- c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
- c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
- c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
-
- c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
- c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
- c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
- c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
- c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
- c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
- c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
- c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
- c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
-
- c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
- c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
- c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
- c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
- c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
- c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
- c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
- c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
- }
-}
diff --git a/libavcodec/arm/dsputil_iwmmxt_rnd_template.c b/libavcodec/arm/dsputil_iwmmxt_rnd_template.c
deleted file mode 100644
index 35a5a9b8b4..0000000000
--- a/libavcodec/arm/dsputil_iwmmxt_rnd_template.c
+++ /dev/null
@@ -1,1114 +0,0 @@
-/*
- * iWMMXt optimized DSP utils
- * copyright (c) 2004 AGAWA Koji
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr0, [%[block]] \n\t"
- "wldrd wr2, [r5] \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- WAVG2B" wr8, wr8, wr0 \n\t"
- WAVG2B" wr10, wr10, wr2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr2, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "walignr1 wr9, wr1, wr2 \n\t"
- "wldrd wr5, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "walignr1 wr11, wr4, wr5 \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "wstrd wr11, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr2, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "walignr1 wr9, wr1, wr2 \n\t"
- "wldrd wr5, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "wldrd wr0, [%[block]] \n\t"
- "pld [r4] \n\t"
- "wldrd wr1, [%[block], #8] \n\t"
- "pld [r4, #32] \n\t"
- "wldrd wr2, [r5] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wldrd wr3, [r5, #8] \n\t"
- WAVG2B" wr8, wr8, wr0 \n\t"
- WAVG2B" wr9, wr9, wr1 \n\t"
- WAVG2B" wr10, wr10, wr2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "walignr1 wr11, wr4, wr5 \n\t"
- WAVG2B" wr11, wr11, wr3 \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "wstrd wr11, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr2, [r5] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr15, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "walignr1 wr3, wr14, wr15 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr5, wr12 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "wmoveq wr7, wr15 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr5, wr11, wr12 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "walignr2ne wr7, wr14, wr15 \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr1, wr1, wr5 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wstrd wr1, [%[block], #8] \n\t"
- WAVG2B" wr3, wr3, wr7 \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr2, [r5] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr3, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "wldrd wr12, [r5] \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- WAVG2B" wr0, wr0, wr10 \n\t"
- WAVG2B" wr2, wr2, wr12 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr2, [r5] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr15, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "walignr1 wr3, wr14, wr15 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr5, wr12 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "wmoveq wr7, wr15 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr5, wr11, wr12 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "walignr2ne wr7, wr14, wr15 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr1, wr1, wr5 \n\t"
- "wldrd wr12, [r5] \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wldrd wr13, [r5, #8] \n\t"
- WAVG2B" wr3, wr3, wr7 \n\t"
- WAVG2B" wr0, wr0, wr10 \n\t"
- WAVG2B" wr1, wr1, wr11 \n\t"
- WAVG2B" wr2, wr2, wr12 \n\t"
- WAVG2B" wr3, wr3, wr13 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr1, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr2, [r5] \n\t"
- "pld [%[block]] \n\t"
- "wstrd wr3, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- :"r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "cc", "memory", "r12");
-}
-
-void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "walignr1 wr5, wr11, wr12 \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "walignr1 wr5, wr11, wr12 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- WAVG2B" wr9, wr9, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- WAVG2B" wr9, wr9, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "add r12, r12, #1 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "tmcr wcgr2, r12 \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "cmp r12, #8 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- /* alignment */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "tmcr wcgr2, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr7, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr6, wr7 \n\t"
- "wunpckehub wr7, wr7 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr6, wr6, wr10 \n\t"
- "waddhus wr7, wr7, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "add r12, r12, #1 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "tmcr wcgr2, r12 \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "cmp r12, #8 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wldrd wr12, [%[pixels]] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "subs %[h], %[h], #2 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- /* alignment */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "tmcr wcgr2, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr7, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr6, wr7 \n\t"
- "wunpckehub wr7, wr7 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr6, wr6, wr10 \n\t"
- "waddhus wr7, wr7, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wldrd wr13, [%[block], #8] \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- WAVG2B" wr9, wr9, wr13 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "pld [%[block], #32] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wldrd wr13, [%[block], #8] \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- WAVG2B" wr9, wr9, wr13 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
diff --git a/libavcodec/arm/mpegvideo_arm.c b/libavcodec/arm/mpegvideo_arm.c
index b2b254e2d3..73d03c1a5e 100644
--- a/libavcodec/arm/mpegvideo_arm.c
+++ b/libavcodec/arm/mpegvideo_arm.c
@@ -40,16 +40,9 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block,
void ff_MPV_common_init_arm(MpegEncContext *s)
{
- /* IWMMXT support is a superset of armv5te, so
- * allow optimized functions for armv5te unless
- * a better iwmmxt function exists
- */
#if HAVE_ARMV5TE
ff_MPV_common_init_armv5te(s);
#endif
-#if HAVE_IWMMXT
- ff_MPV_common_init_iwmmxt(s);
-#endif
if (HAVE_NEON) {
s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon;
diff --git a/libavcodec/arm/mpegvideo_arm.h b/libavcodec/arm/mpegvideo_arm.h
index e9de979300..4ff93b76b9 100644
--- a/libavcodec/arm/mpegvideo_arm.h
+++ b/libavcodec/arm/mpegvideo_arm.h
@@ -21,7 +21,6 @@
#include "libavcodec/mpegvideo.h"
-void ff_MPV_common_init_iwmmxt(MpegEncContext *s);
void ff_MPV_common_init_armv5te(MpegEncContext *s);
#endif /* AVCODEC_ARM_MPEGVIDEO_H */
diff --git a/libavcodec/arm/mpegvideo_iwmmxt.c b/libavcodec/arm/mpegvideo_iwmmxt.c
deleted file mode 100644
index c85e6dc8b1..0000000000
--- a/libavcodec/arm/mpegvideo_iwmmxt.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * copyright (c) 2004 AGAWA Koji
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/cpu.h"
-#include "libavcodec/avcodec.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/mpegvideo.h"
-#include "mpegvideo_arm.h"
-
-static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
- DCTELEM *block, int n, int qscale)
-{
- int level, qmul, qadd;
- int nCoeffs;
- DCTELEM *block_orig = block;
-
- assert(s->block_last_index[n]>=0);
-
- qmul = qscale << 1;
-
- if (!s->h263_aic) {
- if (n < 4)
- level = block[0] * s->y_dc_scale;
- else
- level = block[0] * s->c_dc_scale;
- qadd = (qscale - 1) | 1;
- }else{
- qadd = 0;
- level = block[0];
- }
- if(s->ac_pred)
- nCoeffs=63;
- else
- nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
- __asm__ volatile (
-/* "movd %1, %%mm6 \n\t" //qmul */
-/* "packssdw %%mm6, %%mm6 \n\t" */
-/* "packssdw %%mm6, %%mm6 \n\t" */
- "tbcsth wr6, %[qmul] \n\t"
-/* "movd %2, %%mm5 \n\t" //qadd */
-/* "packssdw %%mm5, %%mm5 \n\t" */
-/* "packssdw %%mm5, %%mm5 \n\t" */
- "tbcsth wr5, %[qadd] \n\t"
- "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */
- "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */
- "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */
- "1: \n\t"
- "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */
- "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */
- "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */
- "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */
-/* "movq (%0, %3), %%mm2 \n\t" */
-/* "movq 8(%0, %3), %%mm3 \n\t" */
- "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */
- "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */
- "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */
- "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */
- "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */
- "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */
- "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */
- "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */
- "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */
- "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */
- "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */
- "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */
- "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */
- "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */
- "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */
- "subs %[i], %[i], #1 \n\t"
- "bne 1b \n\t" /* "jng 1b \n\t" */
- :[block]"+r"(block)
- :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
- :"memory");
-
- block_orig[0] = level;
-}
-
-void ff_MPV_common_init_iwmmxt(MpegEncContext *s)
-{
- if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
-
- s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
-}
diff --git a/libavcodec/c93.c b/libavcodec/c93.c
index ad2dc0a7bf..733e80cfbb 100644
--- a/libavcodec/c93.c
+++ b/libavcodec/c93.c
@@ -125,8 +125,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
AVFrame * const newpic = &c93->pictures[c93->currentpic];
AVFrame * const oldpic = &c93->pictures[c93->currentpic^1];
AVFrame *picture = data;
+ GetByteContext gb;
uint8_t *out;
- int stride, i, x, y, bt = 0;
+ int stride, i, x, y, b, bt = 0;
c93->currentpic ^= 1;
@@ -140,7 +141,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
stride = newpic->linesize[0];
- if (buf[0] & C93_FIRST_FRAME) {
+ bytestream2_init(&gb, buf, buf_size);
+ b = bytestream2_get_byte(&gb);
+ if (b & C93_FIRST_FRAME) {
newpic->pict_type = AV_PICTURE_TYPE_I;
newpic->key_frame = 1;
} else {
@@ -148,17 +151,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
newpic->key_frame = 0;
}
- if (*buf++ & C93_HAS_PALETTE) {
- uint32_t *palette = (uint32_t *) newpic->data[1];
- const uint8_t *palbuf = buf + buf_size - 768 - 1;
- for (i = 0; i < 256; i++) {
- palette[i] = 0xFF << 24 | bytestream_get_be24(&palbuf);
- }
- } else {
- if (oldpic->data[1])
- memcpy(newpic->data[1], oldpic->data[1], 256 * 4);
- }
-
for (y = 0; y < HEIGHT; y += 8) {
out = newpic->data[0] + y * stride;
for (x = 0; x < WIDTH; x += 8) {
@@ -168,12 +160,12 @@ static int decode_frame(AVCodecContext *avctx, void *data,
C93BlockType block_type;
if (!bt)
- bt = *buf++;
+ bt = bytestream2_get_byte(&gb);
block_type= bt & 0x0F;
switch (block_type) {
case C93_8X8_FROM_PREV:
- offset = bytestream_get_le16(&buf);
+ offset = bytestream2_get_le16(&gb);
if (copy_block(avctx, out, copy_from, offset, 8, stride))
return -1;
break;
@@ -183,7 +175,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
case C93_4X4_FROM_PREV:
for (j = 0; j < 8; j += 4) {
for (i = 0; i < 8; i += 4) {
- offset = bytestream_get_le16(&buf);
+ offset = bytestream2_get_le16(&gb);
if (copy_block(avctx, &out[j*stride+i],
copy_from, offset, 4, stride))
return -1;
@@ -192,10 +184,10 @@ static int decode_frame(AVCodecContext *avctx, void *data,
break;
case C93_8X8_2COLOR:
- bytestream_get_buffer(&buf, cols, 2);
+ bytestream2_get_buffer(&gb, cols, 2);
for (i = 0; i < 8; i++) {
draw_n_color(out + i*stride, stride, 8, 1, 1, cols,
- NULL, *buf++);
+ NULL, bytestream2_get_byte(&gb));
}
break;
@@ -206,17 +198,17 @@ static int decode_frame(AVCodecContext *avctx, void *data,
for (j = 0; j < 8; j += 4) {
for (i = 0; i < 8; i += 4) {
if (block_type == C93_4X4_2COLOR) {
- bytestream_get_buffer(&buf, cols, 2);
+ bytestream2_get_buffer(&gb, cols, 2);
draw_n_color(out + i + j*stride, stride, 4, 4,
- 1, cols, NULL, bytestream_get_le16(&buf));
+ 1, cols, NULL, bytestream2_get_le16(&gb));
} else if (block_type == C93_4X4_4COLOR) {
- bytestream_get_buffer(&buf, cols, 4);
+ bytestream2_get_buffer(&gb, cols, 4);
draw_n_color(out + i + j*stride, stride, 4, 4,
- 2, cols, NULL, bytestream_get_le32(&buf));
+ 2, cols, NULL, bytestream2_get_le32(&gb));
} else {
- bytestream_get_buffer(&buf, grps, 4);
+ bytestream2_get_buffer(&gb, grps, 4);
draw_n_color(out + i + j*stride, stride, 4, 4,
- 1, cols, grps, bytestream_get_le16(&buf));
+ 1, cols, grps, bytestream2_get_le16(&gb));
}
}
}
@@ -227,7 +219,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
case C93_8X8_INTRA:
for (j = 0; j < 8; j++)
- bytestream_get_buffer(&buf, out + j*stride, 8);
+ bytestream2_get_buffer(&gb, out + j*stride, 8);
break;
default:
@@ -240,6 +232,16 @@ static int decode_frame(AVCodecContext *avctx, void *data,
}
}
+ if (b & C93_HAS_PALETTE) {
+ uint32_t *palette = (uint32_t *) newpic->data[1];
+ for (i = 0; i < 256; i++) {
+ palette[i] = 0xFFU << 24 | bytestream2_get_be24(&gb);
+ }
+ } else {
+ if (oldpic->data[1])
+ memcpy(newpic->data[1], oldpic->data[1], 256 * 4);
+ }
+
*picture = *newpic;
*data_size = sizeof(AVFrame);
diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c
index b47cb717f2..1363b53d92 100644
--- a/libavcodec/frwu.c
+++ b/libavcodec/frwu.c
@@ -22,7 +22,6 @@
#include "avcodec.h"
#include "bytestream.h"
-#include "libavutil/intreadwrite.h"
static av_cold int decode_init(AVCodecContext *avctx)
{
@@ -54,7 +53,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
av_log(avctx, AV_LOG_ERROR, "Packet is too small.\n");
return AVERROR_INVALIDDATA;
}
- if (bytestream_get_le32(&buf) != AV_RL32("FRW1")) {
+ if (bytestream_get_le32(&buf) != MKTAG('F', 'R', 'W', '1')) {
av_log(avctx, AV_LOG_ERROR, "incorrect marker\n");
return AVERROR_INVALIDDATA;
}
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 51ae5be8bc..4f085b4e5d 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -306,24 +306,26 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
MimicContext *ctx = avctx->priv_data;
+ GetByteContext gb;
int is_pframe;
int width, height;
int quality, num_coeffs;
int swap_buf_size = buf_size - MIMIC_HEADER_SIZE;
- if(buf_size < MIMIC_HEADER_SIZE) {
+ if (buf_size <= MIMIC_HEADER_SIZE) {
av_log(avctx, AV_LOG_ERROR, "insufficient data\n");
return -1;
}
- buf += 2; /* some constant (always 256) */
- quality = bytestream_get_le16(&buf);
- width = bytestream_get_le16(&buf);
- height = bytestream_get_le16(&buf);
- buf += 4; /* some constant */
- is_pframe = bytestream_get_le32(&buf);
- num_coeffs = bytestream_get_byte(&buf);
- buf += 3; /* some constant */
+ bytestream2_init(&gb, buf, MIMIC_HEADER_SIZE);
+ bytestream2_skip(&gb, 2); /* some constant (always 256) */
+ quality = bytestream2_get_le16u(&gb);
+ width = bytestream2_get_le16u(&gb);
+ height = bytestream2_get_le16u(&gb);
+ bytestream2_skip(&gb, 4); /* some constant */
+ is_pframe = bytestream2_get_le32u(&gb);
+ num_coeffs = bytestream2_get_byteu(&gb);
+ bytestream2_skip(&gb, 3); /* some constant */
if(!ctx->avctx) {
int i;
@@ -372,7 +374,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
return AVERROR(ENOMEM);
ctx->dsp.bswap_buf(ctx->swap_buf,
- (const uint32_t*) buf,
+ (const uint32_t*) (buf + MIMIC_HEADER_SIZE),
swap_buf_size>>2);
init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3);
diff --git a/libavcodec/pcm-mpeg.c b/libavcodec/pcm-mpeg.c
index aea3ff79c6..9c49a0d9ec 100644
--- a/libavcodec/pcm-mpeg.c
+++ b/libavcodec/pcm-mpeg.c
@@ -141,6 +141,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
const uint8_t *src = avpkt->data;
int buf_size = avpkt->size;
PCMBRDecode *s = avctx->priv_data;
+ GetByteContext gb;
int num_source_channels, channel, retval;
int sample_size, samples;
int16_t *dst16;
@@ -156,6 +157,8 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
src += 4;
buf_size -= 4;
+ bytestream2_init(&gb, src, buf_size);
+
/* There's always an even number of channels in the source */
num_source_channels = FFALIGN(avctx->channels, 2);
sample_size = (num_source_channels * (avctx->sample_fmt == AV_SAMPLE_FMT_S16 ? 16 : 24)) >> 3;
@@ -179,15 +182,15 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
samples *= num_source_channels;
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
#if HAVE_BIGENDIAN
- memcpy(dst16, src, buf_size);
+ bytestream2_get_buffer(&gb, dst16, buf_size);
#else
do {
- *dst16++ = bytestream_get_be16(&src);
+ *dst16++ = bytestream2_get_be16u(&gb);
} while (--samples);
#endif
} else {
do {
- *dst32++ = bytestream_get_be24(&src) << 8;
+ *dst32++ = bytestream2_get_be24u(&gb) << 8;
} while (--samples);
}
break;
@@ -199,24 +202,23 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do {
#if HAVE_BIGENDIAN
- memcpy(dst16, src, avctx->channels * 2);
+ bytestream2_get_buffer(&gb, dst16, avctx->channels * 2);
dst16 += avctx->channels;
- src += sample_size;
#else
channel = avctx->channels;
do {
- *dst16++ = bytestream_get_be16(&src);
+ *dst16++ = bytestream2_get_be16u(&gb);
} while (--channel);
- src += 2;
#endif
+ bytestream2_skip(&gb, 2);
} while (--samples);
} else {
do {
channel = avctx->channels;
do {
- *dst32++ = bytestream_get_be24(&src) << 8;
+ *dst32++ = bytestream2_get_be24u(&gb) << 8;
} while (--channel);
- src += 3;
+ bytestream2_skip(&gb, 3);
} while (--samples);
}
break;
@@ -224,22 +226,22 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_5POINT1:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do {
- dst16[0] = bytestream_get_be16(&src);
- dst16[1] = bytestream_get_be16(&src);
- dst16[2] = bytestream_get_be16(&src);
- dst16[4] = bytestream_get_be16(&src);
- dst16[5] = bytestream_get_be16(&src);
- dst16[3] = bytestream_get_be16(&src);
+ dst16[0] = bytestream2_get_be16u(&gb);
+ dst16[1] = bytestream2_get_be16u(&gb);
+ dst16[2] = bytestream2_get_be16u(&gb);
+ dst16[4] = bytestream2_get_be16u(&gb);
+ dst16[5] = bytestream2_get_be16u(&gb);
+ dst16[3] = bytestream2_get_be16u(&gb);
dst16 += 6;
} while (--samples);
} else {
do {
- dst32[0] = bytestream_get_be24(&src) << 8;
- dst32[1] = bytestream_get_be24(&src) << 8;
- dst32[2] = bytestream_get_be24(&src) << 8;
- dst32[4] = bytestream_get_be24(&src) << 8;
- dst32[5] = bytestream_get_be24(&src) << 8;
- dst32[3] = bytestream_get_be24(&src) << 8;
+ dst32[0] = bytestream2_get_be24u(&gb) << 8;
+ dst32[1] = bytestream2_get_be24u(&gb) << 8;
+ dst32[2] = bytestream2_get_be24u(&gb) << 8;
+ dst32[4] = bytestream2_get_be24u(&gb) << 8;
+ dst32[5] = bytestream2_get_be24u(&gb) << 8;
+ dst32[3] = bytestream2_get_be24u(&gb) << 8;
dst32 += 6;
} while (--samples);
}
@@ -248,27 +250,27 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_7POINT0:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do {
- dst16[0] = bytestream_get_be16(&src);
- dst16[1] = bytestream_get_be16(&src);
- dst16[2] = bytestream_get_be16(&src);
- dst16[5] = bytestream_get_be16(&src);
- dst16[3] = bytestream_get_be16(&src);
- dst16[4] = bytestream_get_be16(&src);
- dst16[6] = bytestream_get_be16(&src);
+ dst16[0] = bytestream2_get_be16u(&gb);
+ dst16[1] = bytestream2_get_be16u(&gb);
+ dst16[2] = bytestream2_get_be16u(&gb);
+ dst16[5] = bytestream2_get_be16u(&gb);
+ dst16[3] = bytestream2_get_be16u(&gb);
+ dst16[4] = bytestream2_get_be16u(&gb);
+ dst16[6] = bytestream2_get_be16u(&gb);
dst16 += 7;
- src += 2;
+ bytestream2_skip(&gb, 2);
} while (--samples);
} else {
do {
- dst32[0] = bytestream_get_be24(&src) << 8;
- dst32[1] = bytestream_get_be24(&src) << 8;
- dst32[2] = bytestream_get_be24(&src) << 8;
- dst32[5] = bytestream_get_be24(&src) << 8;
- dst32[3] = bytestream_get_be24(&src) << 8;
- dst32[4] = bytestream_get_be24(&src) << 8;
- dst32[6] = bytestream_get_be24(&src) << 8;
+ dst32[0] = bytestream2_get_be24u(&gb) << 8;
+ dst32[1] = bytestream2_get_be24u(&gb) << 8;
+ dst32[2] = bytestream2_get_be24u(&gb) << 8;
+ dst32[5] = bytestream2_get_be24u(&gb) << 8;
+ dst32[3] = bytestream2_get_be24u(&gb) << 8;
+ dst32[4] = bytestream2_get_be24u(&gb) << 8;
+ dst32[6] = bytestream2_get_be24u(&gb) << 8;
dst32 += 7;
- src += 3;
+ bytestream2_skip(&gb, 3);
} while (--samples);
}
break;
@@ -276,26 +278,26 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_7POINT1:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do {
- dst16[0] = bytestream_get_be16(&src);
- dst16[1] = bytestream_get_be16(&src);
- dst16[2] = bytestream_get_be16(&src);
- dst16[6] = bytestream_get_be16(&src);
- dst16[4] = bytestream_get_be16(&src);
- dst16[5] = bytestream_get_be16(&src);
- dst16[7] = bytestream_get_be16(&src);
- dst16[3] = bytestream_get_be16(&src);
+ dst16[0] = bytestream2_get_be16u(&gb);
+ dst16[1] = bytestream2_get_be16u(&gb);
+ dst16[2] = bytestream2_get_be16u(&gb);
+ dst16[6] = bytestream2_get_be16u(&gb);
+ dst16[4] = bytestream2_get_be16u(&gb);
+ dst16[5] = bytestream2_get_be16u(&gb);
+ dst16[7] = bytestream2_get_be16u(&gb);
+ dst16[3] = bytestream2_get_be16u(&gb);
dst16 += 8;
} while (--samples);
} else {
do {
- dst32[0] = bytestream_get_be24(&src) << 8;
- dst32[1] = bytestream_get_be24(&src) << 8;
- dst32[2] = bytestream_get_be24(&src) << 8;
- dst32[6] = bytestream_get_be24(&src) << 8;
- dst32[4] = bytestream_get_be24(&src) << 8;
- dst32[5] = bytestream_get_be24(&src) << 8;
- dst32[7] = bytestream_get_be24(&src) << 8;
- dst32[3] = bytestream_get_be24(&src) << 8;
+ dst32[0] = bytestream2_get_be24u(&gb) << 8;
+ dst32[1] = bytestream2_get_be24u(&gb) << 8;
+ dst32[2] = bytestream2_get_be24u(&gb) << 8;
+ dst32[6] = bytestream2_get_be24u(&gb) << 8;
+ dst32[4] = bytestream2_get_be24u(&gb) << 8;
+ dst32[5] = bytestream2_get_be24u(&gb) << 8;
+ dst32[7] = bytestream2_get_be24u(&gb) << 8;
+ dst32[3] = bytestream2_get_be24u(&gb) << 8;
dst32 += 8;
} while (--samples);
}
@@ -306,7 +308,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
*got_frame_ptr = 1;
*(AVFrame *)data = s->frame;
- retval = src - avpkt->data;
+ retval = bytestream2_tell(&gb);
if (avctx->debug & FF_DEBUG_BITSTREAM)
av_dlog(avctx, "pcm_bluray_decode_frame: decoded %d -> %d bytes\n",
retval, buf_size);
diff --git a/libavcodec/s3tc.c b/libavcodec/s3tc.c
index 8e979a84ac..4e791c86c0 100644
--- a/libavcodec/s3tc.c
+++ b/libavcodec/s3tc.c
@@ -21,19 +21,19 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "libavutil/intreadwrite.h"
+#include "libavcodec/bytestream.h"
#include "avcodec.h"
#include "s3tc.h"
-static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d,
+static inline void dxt1_decode_pixels(GetByteContext *gb, uint32_t *d,
unsigned int qstride, unsigned int flag,
uint64_t alpha) {
unsigned int x, y, c0, c1, a = (!flag * 255u) << 24;
unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3;
uint32_t colors[4], pixels;
- c0 = AV_RL16(s);
- c1 = AV_RL16(s+2);
+ c0 = bytestream2_get_le16(gb);
+ c1 = bytestream2_get_le16(gb);
rb0 = (c0<<3 | c0<<8) & 0xf800f8;
rb1 = (c1<<3 | c1<<8) & 0xf800f8;
@@ -61,7 +61,7 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d,
colors[2] = rb2 + g2 + a;
- pixels = AV_RL32(s+4);
+ pixels = bytestream2_get_le32(gb);
for (y=0; y<4; y++) {
for (x=0; x<4; x++) {
a = (alpha & 0x0f) << 28;
@@ -74,24 +74,24 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d,
}
}
-void ff_decode_dxt1(const uint8_t *s, uint8_t *dst,
+void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h,
const unsigned int stride) {
unsigned int bx, by, qstride = stride/4;
uint32_t *d = (uint32_t *) dst;
for (by=0; by < h/4; by++, d += stride-w)
- for (bx=0; bx < w/4; bx++, s+=8, d+=4)
- dxt1_decode_pixels(s, d, qstride, 0, 0LL);
+ for (bx = 0; bx < w / 4; bx++, d += 4)
+ dxt1_decode_pixels(gb, d, qstride, 0, 0LL);
}
-void ff_decode_dxt3(const uint8_t *s, uint8_t *dst,
+void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h,
const unsigned int stride) {
unsigned int bx, by, qstride = stride/4;
uint32_t *d = (uint32_t *) dst;
for (by=0; by < h/4; by++, d += stride-w)
- for (bx=0; bx < w/4; bx++, s+=16, d+=4)
- dxt1_decode_pixels(s+8, d, qstride, 1, AV_RL64(s));
+ for (bx = 0; bx < w / 4; bx++, d += 4)
+ dxt1_decode_pixels(gb, d, qstride, 1, bytestream2_get_le64(gb));
}
diff --git a/libavcodec/s3tc.h b/libavcodec/s3tc.h
index 5116dc80f6..4378bd9483 100644
--- a/libavcodec/s3tc.h
+++ b/libavcodec/s3tc.h
@@ -29,24 +29,24 @@
/**
* Decode DXT1 encoded data to RGB32
- * @param src source buffer, has to be aligned on a 4-byte boundary
+ * @param gb GetByteContext
* @param dst destination buffer
* @param w width of output image
* @param h height of output image
* @param stride line size of output image
*/
-void ff_decode_dxt1(const uint8_t *src, uint8_t *dst,
+void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h,
const unsigned int stride);
/**
* Decode DXT3 encoded data to RGB32
- * @param src source buffer, has to be aligned on a 4-byte boundary
+ * @param gb GetByteContext
* @param dst destination buffer
* @param w width of output image
* @param h height of output image
* @param stride line size of output image
*/
-void ff_decode_dxt3(const uint8_t *src, uint8_t *dst,
+void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h,
const unsigned int stride);
diff --git a/libavcodec/txd.c b/libavcodec/txd.c
index ca07b6ce17..cf88a9b3bb 100644
--- a/libavcodec/txd.c
+++ b/libavcodec/txd.c
@@ -25,6 +25,7 @@
#include "libavutil/imgutils.h"
#include "bytestream.h"
#include "avcodec.h"
+#include "bytestream.h"
#include "s3tc.h"
typedef struct TXDContext {
@@ -42,28 +43,25 @@ static av_cold int txd_init(AVCodecContext *avctx) {
static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) {
- const uint8_t *buf = avpkt->data;
- const uint8_t *buf_end = avpkt->data + avpkt->size;
TXDContext * const s = avctx->priv_data;
+ GetByteContext gb;
AVFrame *picture = data;
AVFrame * const p = &s->picture;
unsigned int version, w, h, d3d_format, depth, stride, mipmap_count, flags;
unsigned int y, v;
uint8_t *ptr;
- const uint8_t *cur = buf;
- const uint32_t *palette = (const uint32_t *)(cur + 88);
uint32_t *pal;
- if (buf_end - cur < 92)
- return AVERROR_INVALIDDATA;
- version = AV_RL32(cur);
- d3d_format = AV_RL32(cur+76);
- w = AV_RL16(cur+80);
- h = AV_RL16(cur+82);
- depth = AV_RL8 (cur+84);
- mipmap_count = AV_RL8 (cur+85);
- flags = AV_RL8 (cur+87);
- cur += 92;
+ bytestream2_init(&gb, avpkt->data, avpkt->size);
+ version = bytestream2_get_le32(&gb);
+ bytestream2_skip(&gb, 72);
+ d3d_format = bytestream2_get_le32(&gb);
+ w = bytestream2_get_le16(&gb);
+ h = bytestream2_get_le16(&gb);
+ depth = bytestream2_get_byte(&gb);
+ mipmap_count = bytestream2_get_byte(&gb);
+ bytestream2_skip(&gb, 1);
+ flags = bytestream2_get_byte(&gb);
if (version < 8 || version > 9) {
av_log(avctx, AV_LOG_ERROR, "texture data version %i is unsupported\n",
@@ -73,12 +71,9 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
if (depth == 8) {
avctx->pix_fmt = PIX_FMT_PAL8;
- if (buf_end - cur < 1024)
- return AVERROR_INVALIDDATA;
- cur += 1024;
- } else if (depth == 16 || depth == 32)
+ } else if (depth == 16 || depth == 32) {
avctx->pix_fmt = PIX_FMT_RGB32;
- else {
+ } else {
av_log(avctx, AV_LOG_ERROR, "depth of %i is unsupported\n", depth);
return -1;
}
@@ -102,31 +97,32 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
if (depth == 8) {
pal = (uint32_t *) p->data[1];
- for (y=0; y<256; y++) {
- v = AV_RB32(palette+y);
- pal[y] = (v>>8) + (v<<24);
+ for (y = 0; y < 256; y++) {
+ v = bytestream2_get_be32(&gb);
+ pal[y] = (v >> 8) + (v << 24);
}
- if (buf_end - cur < w * h)
+ if (bytestream2_get_bytes_left(&gb) < w * h)
return AVERROR_INVALIDDATA;
+ bytestream2_skip(&gb, 4);
for (y=0; y<h; y++) {
- memcpy(ptr, cur, w);
+ bytestream2_get_buffer(&gb, ptr, w);
ptr += stride;
- cur += w;
}
} else if (depth == 16) {
+ bytestream2_skip(&gb, 4);
switch (d3d_format) {
case 0:
if (!(flags & 1))
goto unsupported;
case FF_S3TC_DXT1:
- if (buf_end - cur < (w/4) * (h/4) * 8)
+ if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 8)
return AVERROR_INVALIDDATA;
- ff_decode_dxt1(cur, ptr, w, h, stride);
+ ff_decode_dxt1(&gb, ptr, w, h, stride);
break;
case FF_S3TC_DXT3:
- if (buf_end - cur < (w/4) * (h/4) * 16)
+ if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 16)
return AVERROR_INVALIDDATA;
- ff_decode_dxt3(cur, ptr, w, h, stride);
+ ff_decode_dxt3(&gb, ptr, w, h, stride);
break;
default:
goto unsupported;
@@ -135,12 +131,11 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
switch (d3d_format) {
case 0x15:
case 0x16:
- if (buf_end - cur < h * w * 4)
+ if (bytestream2_get_bytes_left(&gb) < h * w * 4)
return AVERROR_INVALIDDATA;
for (y=0; y<h; y++) {
- memcpy(ptr, cur, w*4);
+ bytestream2_get_buffer(&gb, ptr, w * 4);
ptr += stride;
- cur += w*4;
}
break;
default:
@@ -148,17 +143,10 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
}
}
- for (; mipmap_count > 1 && buf_end - cur >= 4; mipmap_count--) {
- uint32_t length = bytestream_get_le32(&cur);
- if (buf_end - cur < length)
- break;
- cur += length;
- }
-
*picture = s->picture;
*data_size = sizeof(AVPicture);
- return cur - buf;
+ return avpkt->size;
unsupported:
av_log(avctx, AV_LOG_ERROR, "unsupported d3d format (%08x)\n", d3d_format);
diff --git a/libavformat/iff.c b/libavformat/iff.c
index 7473c182b0..1890800cf2 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -385,7 +385,7 @@ static int iff_read_packet(AVFormatContext *s,
AVInputFormat ff_iff_demuxer = {
.name = "IFF",
- .long_name = NULL_IF_CONFIG_SMALL("IFF format"),
+ .long_name = NULL_IF_CONFIG_SMALL("Interchange File Format"),
.priv_data_size = sizeof(IffDemuxContext),
.read_probe = iff_probe,
.read_header = iff_read_header,
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index f2f69642ec..ac2196a925 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -1005,7 +1005,7 @@ start:
av_freep(content_ptr);
/* If method is set, this is called from ff_rtsp_send_cmd,
* where a reply to exactly this request is awaited. For
- * callers from within packet reciving, we just want to
+ * callers from within packet receiving, we just want to
* return to the caller and go back to receiving packets. */
if (method)
goto start;
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 23049f600f..77d23d4e63 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -77,7 +77,6 @@ OBJS = adler32.o \
tree.o \
utils.o \
-OBJS-$(ARCH_ARM) += arm/cpu.o
OBJS-$(ARCH_PPC) += ppc/cpu.o
OBJS-$(ARCH_X86) += x86/cpu.o
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
deleted file mode 100644
index 742c3e498d..0000000000
--- a/libavutil/arm/cpu.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/cpu.h"
-#include "config.h"
-
-int ff_get_cpu_flags_arm(void)
-{
- return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT;
-}
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index fed6093316..762e8d5241 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -31,7 +31,6 @@ int av_get_cpu_flags(void)
if (checked)
return flags;
- if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
if (ARCH_X86) flags = ff_get_cpu_flags_x86();
@@ -55,9 +54,7 @@ static const struct {
int flag;
const char *name;
} cpu_flag_tab[] = {
-#if ARCH_ARM
- { AV_CPU_FLAG_IWMMXT, "iwmmxt" },
-#elif ARCH_PPC
+#if ARCH_PPC
{ AV_CPU_FLAG_ALTIVEC, "altivec" },
#elif ARCH_X86
{ AV_CPU_FLAG_MMX, "mmx" },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index e448da6cdc..638374db4d 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -43,7 +43,6 @@
#define AV_CPU_FLAG_CMOV 0x1000000 ///< supports cmov instruction
#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
-#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
/**
@@ -67,7 +66,6 @@ void av_force_cpu_flags(int flags);
attribute_deprecated void av_set_cpu_flags_mask(int mask);
/* The following CPU-specific functions shall not be called directly. */
-int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);