diff options
author | Janne Grunau <janne-libav@jannau.net> | 2014-02-22 18:27:10 +0100 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2014-02-28 13:12:19 +0100 |
commit | 6e4009d4cdf5927bdaedf58fcfc5e813b14c366b (patch) | |
tree | d321a5f886878371ad256cad296632e7d31bca37 /libavcodec/arm | |
parent | 4cb6964244fd6c099383d8b7e99731e72cc844b9 (diff) | |
download | ffmpeg-6e4009d4cdf5927bdaedf58fcfc5e813b14c366b.tar.gz |
arm: dcadsp: implement decode_hf as external NEON asm
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 7 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 29 |
2 files changed, 36 insertions, 0 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index d91c787d79..540048415f 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -49,6 +49,12 @@ void ff_synth_filter_float_neon(FFTContext *imdct, float out[32], const float in[32], float scale); +void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], + const int32_t vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int32_t scale[DCA_SUBBANDS][2], + intptr_t start, intptr_t end); + av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); @@ -61,6 +67,7 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) if (have_neon(cpu_flags)) { s->lfe_fir[0] = ff_dca_lfe_fir0_neon; s->lfe_fir[1] = ff_dca_lfe_fir1_neon; + s->decode_hf = ff_decode_hf_neon; } } diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index 735c4c28e5..70580cdeec 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -20,6 +20,35 @@ #include "libavutil/arm/asm.S" +function ff_decode_hf_neon, export=1 + push {r4-r5,lr} + add r2, r2, r3 + ldr r3, [sp, #12] + ldrd r4, r5, [sp, #16] + add r3, r3, r4, lsl #3 + add r1, r1, r4, lsl #2 + add r0, r0, r4, lsl #5 + +1: ldr_post lr, r1, #4 + add r4, r4, #1 + add lr, r2, lr, lsl #5 + cmp r4, r5 + vld1.32 {d7}, [r3]! + vld1.8 {d0}, [lr,:64] + vcvt.f32.s32 d7, d7, #4 + vmovl.s8 q1, d0 + vmovl.s16 q0, d2 + vmovl.s16 q1, d3 + vcvt.f32.s32 q0, q0 + vcvt.f32.s32 q1, q1 + vmul.f32 q0, q0, d7[0] + vmul.f32 q1, q1, d7[0] + vst1.32 {q0-q1}, [r0,:128]! + bne 1b + + pop {r4-r5,pc} +endfunc + function ff_dca_lfe_fir0_neon, export=1 push {r4-r6,lr} mov r3, #32 @ decifactor |