arm: fmtconvert: Split armv6 fmtconvert code off from vfp code

author: Diego Biurrun <diego@biurrun.de> 2013-08-23 20:01:36 +0200
committer: Diego Biurrun <diego@biurrun.de> 2013-08-29 11:24:14 +0200
commit: f0389eb777b1ab4291329d4f709098cdfa7384dc (patch)
tree: 369f5cc03bd02610fc95123b84e446036c00feb6
parent: 7ffda66fd5c81af4725bff7c2c4f207ba2aa0613 (diff)
download: ffmpeg-f0389eb777b1ab4291329d4f709098cdfa7384dc.tar.gz
4 files changed, 85 insertions, 63 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0e5e53a16a..201f897a61 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
                                           arm/vp8dsp_init_armv6.o       \
                                           arm/vp8dsp_armv6.o
 
+VFP-OBJS                               += arm/fmtconvert_vfp.o
+
 VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
                                           arm/synth_filter_vfp.o
 VFP-OBJS-$(CONFIG_FFT)                 += arm/fft_vfp.o
 VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
-VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o
+VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp_armv6.o
 
 NEON-OBJS                              += arm/fmtconvert_neon.o
 
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index eb66eb8233..7c5bd91d50 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
+    if (have_vfp(cpu_flags)) {
         if (!have_vfpv3(cpu_flags)) {
-            // These functions don't use anything armv6 specific in themselves,
-            // but ff_float_to_int16_vfp which is in the same assembly source
-            // file does, thus the whole file requires armv6 to be built.
             c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
             c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
         }
 
-        c->float_to_int16 = ff_float_to_int16_vfp;
+        if (have_armv6(cpu_flags)) {
+            c->float_to_int16 = ff_float_to_int16_vfp;
+        }
     }
 
     if (have_neon(cpu_flags)) {
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index d841802158..4e43f425a5 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,4 @@
 /*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
  * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
  *
  * This file is part of Libav.
@@ -23,62 +22,6 @@
 #include "libavutil/arm/asm.S"
 
 /**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
- */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
-        push            {r4-r8,lr}
-        vpush           {d8-d11}
-        vldmia          r1!, {s16-s23}
-        vcvt.s32.f32    s0,  s16
-        vcvt.s32.f32    s1,  s17
-        vcvt.s32.f32    s2,  s18
-        vcvt.s32.f32    s3,  s19
-        vcvt.s32.f32    s4,  s20
-        vcvt.s32.f32    s5,  s21
-        vcvt.s32.f32    s6,  s22
-        vcvt.s32.f32    s7,  s23
-1:
-        subs            r2,  r2,  #8
-        vmov            r3,  r4,  s0, s1
-        vmov            r5,  r6,  s2, s3
-        vmov            r7,  r8,  s4, s5
-        vmov            ip,  lr,  s6, s7
-        it              gt
-        vldmiagt        r1!, {s16-s23}
-        ssat            r4,  #16, r4
-        ssat            r3,  #16, r3
-        ssat            r6,  #16, r6
-        ssat            r5,  #16, r5
-        pkhbt           r3,  r3,  r4, lsl #16
-        pkhbt           r4,  r5,  r6, lsl #16
-        itttt           gt
-        vcvtgt.s32.f32  s0,  s16
-        vcvtgt.s32.f32  s1,  s17
-        vcvtgt.s32.f32  s2,  s18
-        vcvtgt.s32.f32  s3,  s19
-        itttt           gt
-        vcvtgt.s32.f32  s4,  s20
-        vcvtgt.s32.f32  s5,  s21
-        vcvtgt.s32.f32  s6,  s22
-        vcvtgt.s32.f32  s7,  s23
-        ssat            r8,  #16, r8
-        ssat            r7,  #16, r7
-        ssat            lr,  #16, lr
-        ssat            ip,  #16, ip
-        pkhbt           r5,  r7,  r8, lsl #16
-        pkhbt           r6,  ip,  lr, lsl #16
-        stmia           r0!, {r3-r6}
-        bgt             1b
-
-        vpop            {d8-d11}
-        pop             {r4-r8,pc}
-endfunc
-
-/**
  * ARM VFP optimised int32 to float conversion.
  * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
  * (16 bytes alignment is best for BCM2835), little-endian.
diff --git a/libavcodec/arm/fmtconvert_vfp_armv6.S b/libavcodec/arm/fmtconvert_vfp_armv6.S
new file mode 100644
index 0000000000..fb12de1368
--- /dev/null
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+/**
+ * ARM VFP optimized float to int16 conversion.
+ * Assume that len is a positive number and is multiple of 8, destination
+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
+ * performance), little-endian byte sex.
+ */
+@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
+function ff_float_to_int16_vfp, export=1
+        push            {r4-r8,lr}
+        vpush           {d8-d11}
+        vldmia          r1!, {s16-s23}
+        vcvt.s32.f32    s0,  s16
+        vcvt.s32.f32    s1,  s17
+        vcvt.s32.f32    s2,  s18
+        vcvt.s32.f32    s3,  s19
+        vcvt.s32.f32    s4,  s20
+        vcvt.s32.f32    s5,  s21
+        vcvt.s32.f32    s6,  s22
+        vcvt.s32.f32    s7,  s23
+1:
+        subs            r2,  r2,  #8
+        vmov            r3,  r4,  s0, s1
+        vmov            r5,  r6,  s2, s3
+        vmov            r7,  r8,  s4, s5
+        vmov            ip,  lr,  s6, s7
+        it              gt
+        vldmiagt        r1!, {s16-s23}
+        ssat            r4,  #16, r4
+        ssat            r3,  #16, r3
+        ssat            r6,  #16, r6
+        ssat            r5,  #16, r5
+        pkhbt           r3,  r3,  r4, lsl #16
+        pkhbt           r4,  r5,  r6, lsl #16
+        itttt           gt
+        vcvtgt.s32.f32  s0,  s16
+        vcvtgt.s32.f32  s1,  s17
+        vcvtgt.s32.f32  s2,  s18
+        vcvtgt.s32.f32  s3,  s19
+        itttt           gt
+        vcvtgt.s32.f32  s4,  s20
+        vcvtgt.s32.f32  s5,  s21
+        vcvtgt.s32.f32  s6,  s22
+        vcvtgt.s32.f32  s7,  s23
+        ssat            r8,  #16, r8
+        ssat            r7,  #16, r7
+        ssat            lr,  #16, lr
+        ssat            ip,  #16, ip
+        pkhbt           r5,  r7,  r8, lsl #16
+        pkhbt           r6,  ip,  lr, lsl #16
+        stmia           r0!, {r3-r6}
+        bgt             1b
+
+        vpop            {d8-d11}
+        pop             {r4-r8,pc}
+endfunc
author	Diego Biurrun <diego@biurrun.de>	2013-08-23 20:01:36 +0200
committer	Diego Biurrun <diego@biurrun.de>	2013-08-29 11:24:14 +0200
commit	f0389eb777b1ab4291329d4f709098cdfa7384dc (patch)
tree	369f5cc03bd02610fc95123b84e446036c00feb6
parent	7ffda66fd5c81af4725bff7c2c4f207ba2aa0613 (diff)
download	ffmpeg-f0389eb777b1ab4291329d4f709098cdfa7384dc.tar.gz