diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-19 22:21:10 -0800 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-19 22:21:10 -0800 |
commit | fef906c77c09940a2fdad155b2adc05080e17eda (patch) | |
tree | 04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/arm | |
parent | aeaf268e52fc11c1f64914a319e0edddf1346d6a (diff) | |
download | ffmpeg-fef906c77c09940a2fdad155b2adc05080e17eda.tar.gz |
Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.
Conveniently (together with Justin's earlier patches), this makes
our vorbis decoder entirely independent of dsputil.
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 5 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_neon.S | 64 | ||||
-rw-r--r-- | libavcodec/arm/vorbisdsp_init_arm.c | 36 | ||||
-rw-r--r-- | libavcodec/arm/vorbisdsp_neon.S | 83 |
5 files changed, 122 insertions, 69 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index ac486f47dc..71048f9c4c 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o +OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o @@ -86,6 +87,8 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ arm/rv40dsp_neon.o \ arm/h264cmc_neon.o \ +NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o + NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 34bb6191f1..ee0e9afa88 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -154,8 +154,6 @@ void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); -void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); - int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); @@ -307,9 +305,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->vector_clipf = ff_vector_clipf_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon; - if (CONFIG_VORBIS_DECODER) - c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; - c->scalarproduct_int16 = ff_scalarproduct_int16_neon; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index a0d201cd85..ebc70ac109 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -19,7 +19,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" #include "libavutil/arm/asm.S" function ff_clear_block_neon, export=1 @@ -532,69 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 bx lr endfunc -#if CONFIG_VORBIS_DECODER -function ff_vorbis_inverse_coupling_neon, export=1 - vmov.i32 q10, #1<<31 - subs r2, r2, #4 - mov r3, r0 - mov r12, r1 - beq 3f - - vld1.32 {d24-d25},[r1,:128]! - vld1.32 {d22-d23},[r0,:128]! - vcle.s32 q8, q12, #0 - vand q9, q11, q10 - veor q12, q12, q9 - vand q2, q12, q8 - vbic q3, q12, q8 - vadd.f32 q12, q11, q2 - vsub.f32 q11, q11, q3 -1: vld1.32 {d2-d3}, [r1,:128]! - vld1.32 {d0-d1}, [r0,:128]! - vcle.s32 q8, q1, #0 - vand q9, q0, q10 - veor q1, q1, q9 - vst1.32 {d24-d25},[r3, :128]! - vst1.32 {d22-d23},[r12,:128]! - vand q2, q1, q8 - vbic q3, q1, q8 - vadd.f32 q1, q0, q2 - vsub.f32 q0, q0, q3 - subs r2, r2, #8 - ble 2f - vld1.32 {d24-d25},[r1,:128]! - vld1.32 {d22-d23},[r0,:128]! - vcle.s32 q8, q12, #0 - vand q9, q11, q10 - veor q12, q12, q9 - vst1.32 {d2-d3}, [r3, :128]! - vst1.32 {d0-d1}, [r12,:128]! - vand q2, q12, q8 - vbic q3, q12, q8 - vadd.f32 q12, q11, q2 - vsub.f32 q11, q11, q3 - b 1b - -2: vst1.32 {d2-d3}, [r3, :128]! - vst1.32 {d0-d1}, [r12,:128]! - it lt - bxlt lr - -3: vld1.32 {d2-d3}, [r1,:128] - vld1.32 {d0-d1}, [r0,:128] - vcle.s32 q8, q1, #0 - vand q9, q0, q10 - veor q1, q1, q9 - vand q2, q1, q8 - vbic q3, q1, q8 - vadd.f32 q1, q0, q2 - vsub.f32 q0, q0, q3 - vst1.32 {d2-d3}, [r0,:128]! - vst1.32 {d0-d1}, [r1,:128]! - bx lr -endfunc -#endif - function ff_butterflies_float_neon, export=1 1: vld1.32 {q0},[r0,:128] vld1.32 {q1},[r1,:128] diff --git a/libavcodec/arm/vorbisdsp_init_arm.c b/libavcodec/arm/vorbisdsp_init_arm.c new file mode 100644 index 0000000000..ec8fbd5bc9 --- /dev/null +++ b/libavcodec/arm/vorbisdsp_init_arm.c @@ -0,0 +1,36 @@ +/* + * ARM NEON optimised DSP functions + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/vorbisdsp.h" + +void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); + +void ff_vorbisdsp_init_arm(VorbisDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; + } +} diff --git a/libavcodec/arm/vorbisdsp_neon.S b/libavcodec/arm/vorbisdsp_neon.S new file mode 100644 index 0000000000..7df876c2bc --- /dev/null +++ b/libavcodec/arm/vorbisdsp_neon.S @@ -0,0 +1,83 @@ +/* + * ARM NEON optimised DSP functions + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_vorbis_inverse_coupling_neon, export=1 + vmov.i32 q10, #1<<31 + subs r2, r2, #4 + mov r3, r0 + mov r12, r1 + beq 3f + + vld1.32 {d24-d25},[r1,:128]! + vld1.32 {d22-d23},[r0,:128]! + vcle.s32 q8, q12, #0 + vand q9, q11, q10 + veor q12, q12, q9 + vand q2, q12, q8 + vbic q3, q12, q8 + vadd.f32 q12, q11, q2 + vsub.f32 q11, q11, q3 +1: vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d0-d1}, [r0,:128]! + vcle.s32 q8, q1, #0 + vand q9, q0, q10 + veor q1, q1, q9 + vst1.32 {d24-d25},[r3, :128]! + vst1.32 {d22-d23},[r12,:128]! + vand q2, q1, q8 + vbic q3, q1, q8 + vadd.f32 q1, q0, q2 + vsub.f32 q0, q0, q3 + subs r2, r2, #8 + ble 2f + vld1.32 {d24-d25},[r1,:128]! + vld1.32 {d22-d23},[r0,:128]! + vcle.s32 q8, q12, #0 + vand q9, q11, q10 + veor q12, q12, q9 + vst1.32 {d2-d3}, [r3, :128]! + vst1.32 {d0-d1}, [r12,:128]! + vand q2, q12, q8 + vbic q3, q12, q8 + vadd.f32 q12, q11, q2 + vsub.f32 q11, q11, q3 + b 1b + +2: vst1.32 {d2-d3}, [r3, :128]! + vst1.32 {d0-d1}, [r12,:128]! + it lt + bxlt lr + +3: vld1.32 {d2-d3}, [r1,:128] + vld1.32 {d0-d1}, [r0,:128] + vcle.s32 q8, q1, #0 + vand q9, q0, q10 + veor q1, q1, q9 + vand q2, q1, q8 + vbic q3, q1, q8 + vadd.f32 q1, q0, q2 + vsub.f32 q0, q0, q3 + vst1.32 {d2-d3}, [r0,:128]! + vst1.32 {d0-d1}, [r1,:128]! + bx lr +endfunc |