aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2013-08-23 20:01:36 +0200
committerDiego Biurrun <diego@biurrun.de>2013-08-29 11:24:14 +0200
commitf0389eb777b1ab4291329d4f709098cdfa7384dc (patch)
tree369f5cc03bd02610fc95123b84e446036c00feb6
parent7ffda66fd5c81af4725bff7c2c4f207ba2aa0613 (diff)
downloadffmpeg-f0389eb777b1ab4291329d4f709098cdfa7384dc.tar.gz
arm: fmtconvert: Split armv6 fmtconvert code off from vfp code
-rw-r--r--libavcodec/arm/Makefile4
-rw-r--r--libavcodec/arm/fmtconvert_init_arm.c9
-rw-r--r--libavcodec/arm/fmtconvert_vfp.S57
-rw-r--r--libavcodec/arm/fmtconvert_vfp_armv6.S78
4 files changed, 85 insertions, 63 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0e5e53a16a..201f897a61 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_init_armv6.o \
arm/vp8dsp_armv6.o
+VFP-OBJS += arm/fmtconvert_vfp.o
+
VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
arm/synth_filter_vfp.o
VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o
VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
-VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
+VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o
NEON-OBJS += arm/fmtconvert_neon.o
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index eb66eb8233..7c5bd91d50 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
{
int cpu_flags = av_get_cpu_flags();
- if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
+ if (have_vfp(cpu_flags)) {
if (!have_vfpv3(cpu_flags)) {
- // These functions don't use anything armv6 specific in themselves,
- // but ff_float_to_int16_vfp which is in the same assembly source
- // file does, thus the whole file requires armv6 to be built.
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
}
- c->float_to_int16 = ff_float_to_int16_vfp;
+ if (have_armv6(cpu_flags)) {
+ c->float_to_int16 = ff_float_to_int16_vfp;
+ }
}
if (have_neon(cpu_flags)) {
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index d841802158..4e43f425a5 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,4 @@
/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
* Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
*
* This file is part of Libav.
@@ -23,62 +22,6 @@
#include "libavutil/arm/asm.S"
/**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
- */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
- push {r4-r8,lr}
- vpush {d8-d11}
- vldmia r1!, {s16-s23}
- vcvt.s32.f32 s0, s16
- vcvt.s32.f32 s1, s17
- vcvt.s32.f32 s2, s18
- vcvt.s32.f32 s3, s19
- vcvt.s32.f32 s4, s20
- vcvt.s32.f32 s5, s21
- vcvt.s32.f32 s6, s22
- vcvt.s32.f32 s7, s23
-1:
- subs r2, r2, #8
- vmov r3, r4, s0, s1
- vmov r5, r6, s2, s3
- vmov r7, r8, s4, s5
- vmov ip, lr, s6, s7
- it gt
- vldmiagt r1!, {s16-s23}
- ssat r4, #16, r4
- ssat r3, #16, r3
- ssat r6, #16, r6
- ssat r5, #16, r5
- pkhbt r3, r3, r4, lsl #16
- pkhbt r4, r5, r6, lsl #16
- itttt gt
- vcvtgt.s32.f32 s0, s16
- vcvtgt.s32.f32 s1, s17
- vcvtgt.s32.f32 s2, s18
- vcvtgt.s32.f32 s3, s19
- itttt gt
- vcvtgt.s32.f32 s4, s20
- vcvtgt.s32.f32 s5, s21
- vcvtgt.s32.f32 s6, s22
- vcvtgt.s32.f32 s7, s23
- ssat r8, #16, r8
- ssat r7, #16, r7
- ssat lr, #16, lr
- ssat ip, #16, ip
- pkhbt r5, r7, r8, lsl #16
- pkhbt r6, ip, lr, lsl #16
- stmia r0!, {r3-r6}
- bgt 1b
-
- vpop {d8-d11}
- pop {r4-r8,pc}
-endfunc
-
-/**
* ARM VFP optimised int32 to float conversion.
* Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
* (16 bytes alignment is best for BCM2835), little-endian.
diff --git a/libavcodec/arm/fmtconvert_vfp_armv6.S b/libavcodec/arm/fmtconvert_vfp_armv6.S
new file mode 100644
index 0000000000..fb12de1368
--- /dev/null
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+/**
+ * ARM VFP optimized float to int16 conversion.
+ * Assume that len is a positive number and is multiple of 8, destination
+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
+ * performance), little-endian byte sex.
+ */
+@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
+function ff_float_to_int16_vfp, export=1
+ push {r4-r8,lr}
+ vpush {d8-d11}
+ vldmia r1!, {s16-s23}
+ vcvt.s32.f32 s0, s16
+ vcvt.s32.f32 s1, s17
+ vcvt.s32.f32 s2, s18
+ vcvt.s32.f32 s3, s19
+ vcvt.s32.f32 s4, s20
+ vcvt.s32.f32 s5, s21
+ vcvt.s32.f32 s6, s22
+ vcvt.s32.f32 s7, s23
+1:
+ subs r2, r2, #8
+ vmov r3, r4, s0, s1
+ vmov r5, r6, s2, s3
+ vmov r7, r8, s4, s5
+ vmov ip, lr, s6, s7
+ it gt
+ vldmiagt r1!, {s16-s23}
+ ssat r4, #16, r4
+ ssat r3, #16, r3
+ ssat r6, #16, r6
+ ssat r5, #16, r5
+ pkhbt r3, r3, r4, lsl #16
+ pkhbt r4, r5, r6, lsl #16
+ itttt gt
+ vcvtgt.s32.f32 s0, s16
+ vcvtgt.s32.f32 s1, s17
+ vcvtgt.s32.f32 s2, s18
+ vcvtgt.s32.f32 s3, s19
+ itttt gt
+ vcvtgt.s32.f32 s4, s20
+ vcvtgt.s32.f32 s5, s21
+ vcvtgt.s32.f32 s6, s22
+ vcvtgt.s32.f32 s7, s23
+ ssat r8, #16, r8
+ ssat r7, #16, r7
+ ssat lr, #16, lr
+ ssat ip, #16, ip
+ pkhbt r5, r7, r8, lsl #16
+ pkhbt r6, ip, lr, lsl #16
+ stmia r0!, {r3-r6}
+ bgt 1b
+
+ vpop {d8-d11}
+ pop {r4-r8,pc}
+endfunc