diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:04:50 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:04:50 +0100 |
commit | 6e6e1708984e45881b9a5d4e26c3e7de852c54d5 (patch) | |
tree | 5e04d38f8e152faf98921843ca5e4530cbdc46a4 /libavcodec/arm | |
parent | b1b870fbd7185bffbe27c5918001b40a8ff8b920 (diff) | |
parent | 42d324694883cdf1fff1612ac70fa403692a1ad4 (diff) | |
download | ffmpeg-6e6e1708984e45881b9a5d4e26c3e7de852c54d5.tar.gz |
Merge commit '42d324694883cdf1fff1612ac70fa403692a1ad4'
* commit '42d324694883cdf1fff1612ac70fa403692a1ad4':
floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp.
Conflicts:
libavcodec/arm/dsputil_init_vfp.c
libavcodec/arm/dsputil_vfp.S
libavcodec/dsputil.c
libavcodec/ppc/float_altivec.c
libavcodec/x86/dsputil.asm
libavutil/x86/float_dsp.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_arm.c | 1 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 3 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_vfp.c | 30 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_neon.S | 24 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_vfp.S | 106 |
6 files changed, 0 insertions, 167 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 71048f9c4c..5ebda62cf6 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -56,9 +56,6 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o -VFP-OBJS += arm/dsputil_vfp.o \ - arm/dsputil_init_vfp.o \ - NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ arm/fft_fixed_neon.o \ diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 03df6b29d0..45192d1b5c 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -121,6 +121,5 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx); if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx); - if (have_vfp(cpu_flags)) ff_dsputil_init_vfp(c, avctx); if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx); } diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index ebdafff23d..35b3a53ce8 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_butterflies_float_neon(float *v1, float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); -void ff_vector_fmul_reverse_neon(float *dst, const float *src0, - const float *src1, int len); void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, int len); @@ -298,7 +296,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->butterflies_float = ff_butterflies_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon; - c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; c->vector_clipf = ff_vector_clipf_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon; diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c deleted file mode 100644 index 5713c71f80..0000000000 --- a/libavcodec/arm/dsputil_init_vfp.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavcodec/dsputil.h" -#include "dsputil_arm.h" - -void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, - const float *src1, int len); - -void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) -{ - c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; -} diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 794ceb8b55..4cdbb1771d 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -556,30 +556,6 @@ NOVFP vmov.32 r0, d0[0] bx lr endfunc -function ff_vector_fmul_reverse_neon, export=1 - add r2, r2, r3, lsl #2 - sub r2, r2, #32 - mov r12, #-32 - vld1.32 {q0-q1}, [r1,:128]! - vld1.32 {q2-q3}, [r2,:128], r12 -1: pld [r1, #32] - vrev64.32 q3, q3 - vmul.f32 d16, d0, d7 - vmul.f32 d17, d1, d6 - pld [r2, #-32] - vrev64.32 q2, q2 - vmul.f32 d18, d2, d5 - vmul.f32 d19, d3, d4 - subs r3, r3, #8 - beq 2f - vld1.32 {q0-q1}, [r1,:128]! - vld1.32 {q2-q3}, [r2,:128], r12 - vst1.32 {q8-q9}, [r0,:128]! - b 1b -2: vst1.32 {q8-q9}, [r0,:128]! - bx lr -endfunc - function ff_vector_clipf_neon, export=1 VFP vdup.32 q1, d0[1] VFP vdup.32 q0, d0[0] diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S deleted file mode 100644 index 63ba766859..0000000000 --- a/libavcodec/arm/dsputil_vfp.S +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" -#include "libavutil/arm/asm.S" - -/* - * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle - * throughput for almost all the instructions (except for double precision - * arithmetics), but rather high latency. Latency is 4 cycles for loads and 8 cycles - * for arithmetic operations. Scheduling code to avoid pipeline stalls is very - * important for performance. One more interesting feature is that VFP has - * independent load/store and arithmetics pipelines, so it is possible to make - * them work simultaneously and get more than 1 operation per cycle. Load/store - * pipeline can process 2 single precision floating point values per cycle and - * supports bulk loads and stores for large sets of registers. Arithmetic operations - * can be done on vectors, which allows to keep the arithmetics pipeline busy, - * while the processor may issue and execute other instructions. Detailed - * optimization manuals can be found at http://www.arm.com - */ - -/** - * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. - * Assume that len is a positive number and is multiple of 8 - */ -@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, -@ const float *src1, int len) -function ff_vector_fmul_reverse_vfp, export=1 - vpush {d8-d15} - add r2, r2, r3, lsl #2 - vldmdb r2!, {s0-s3} - vldmia r1!, {s8-s11} - vldmdb r2!, {s4-s7} - vldmia r1!, {s12-s15} - vmul.f32 s8, s3, s8 - vmul.f32 s9, s2, s9 - vmul.f32 s10, s1, s10 - vmul.f32 s11, s0, s11 -1: - subs r3, r3, #16 - it ge - vldmdbge r2!, {s16-s19} - vmul.f32 s12, s7, s12 - it ge - vldmiage r1!, {s24-s27} - vmul.f32 s13, s6, s13 - it ge - vldmdbge r2!, {s20-s23} - vmul.f32 s14, s5, s14 - it ge - vldmiage r1!, {s28-s31} - vmul.f32 s15, s4, s15 - it ge - vmulge.f32 s24, s19, s24 - it gt - vldmdbgt r2!, {s0-s3} - it ge - vmulge.f32 s25, s18, s25 - vstmia r0!, {s8-s13} - it ge - vmulge.f32 s26, s17, s26 - it gt - vldmiagt r1!, {s8-s11} - itt ge - vmulge.f32 s27, s16, s27 - vmulge.f32 s28, s23, s28 - it gt - vldmdbgt r2!, {s4-s7} - it ge - vmulge.f32 s29, s22, s29 - vstmia r0!, {s14-s15} - ittt ge - vmulge.f32 s30, s21, s30 - vmulge.f32 s31, s20, s31 - vmulge.f32 s8, s3, s8 - it gt - vldmiagt r1!, {s12-s15} - itttt ge - vmulge.f32 s9, s2, s9 - vmulge.f32 s10, s1, s10 - vstmiage r0!, {s24-s27} - vmulge.f32 s11, s0, s11 - it ge - vstmiage r0!, {s28-s31} - bgt 1b - - vpop {d8-d15} - bx lr -endfunc |