diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-02-08 02:16:47 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-08 02:27:27 +0100 |
commit | 5794e9fce22a4d3dffda536a220ca9483a2d87da (patch) | |
tree | 9cd8048ad384e3568814fadeb1c81e7632a950ec | |
parent | 45854df9a5220bdde400a447f63f61618b89dde2 (diff) | |
parent | 5fdbfcb5b793f5849c496214668094a8ec99fa07 (diff) | |
download | ffmpeg-5794e9fce22a4d3dffda536a220ca9483a2d87da.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
dcadsp: split lfe_dir cases
Conflicts:
libavcodec/arm/dcadsp_init_arm.c
See: 45854df9a5220bdde400a447f63f61618b89dde2
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 46 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 18 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_vfp.S | 32 |
3 files changed, 38 insertions, 58 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index 0a8c2feafd..99e2df400c 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -24,16 +24,22 @@ #include "libavutil/attributes.h" #include "libavcodec/dcadsp.h" -void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, - int decifactor, float scale); +void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs, + float scale); +void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs, + float scale); + +void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs, + float scale); +void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs, + float scale); + void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, SynthFilterContext *synth, FFTContext *imdct, float synth_buf_ptr[512], int *synth_buf_offset, float synth_buf2[32], const float window[512], float *samples_out, float raXin[32], float scale); -void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, - int decifactor, float scale); void ff_synth_filter_float_vfp(FFTContext *imdct, float *synth_buf_ptr, int *synth_buf_offset, @@ -47,42 +53,18 @@ void ff_synth_filter_float_neon(FFTContext *imdct, float out[32], const float in[32], float scale); -static void lfe_fir0_vfp(float *out, const float *in, const float *coefs, - float scale) -{ - ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale); -} - -static void lfe_fir1_vfp(float *out, const float *in, const float *coefs, - float scale) -{ - ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale); -} - -static void lfe_fir0_neon(float *out, const float *in, const float *coefs, - float scale) -{ - ff_dca_lfe_fir_neon(out, in, coefs, 32, scale); -} - -static void lfe_fir1_neon(float *out, const float *in, const float *coefs, - float scale) -{ - ff_dca_lfe_fir_neon(out, in, coefs, 64, scale); -} - av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { - s->lfe_fir[0] = lfe_fir0_vfp; - s->lfe_fir[1] = lfe_fir1_vfp; + s->lfe_fir[0] = ff_dca_lfe_fir32_vfp; + s->lfe_fir[1] = ff_dca_lfe_fir64_vfp; s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; } if (have_neon(cpu_flags)) { - s->lfe_fir[0] = lfe_fir0_neon; - s->lfe_fir[1] = lfe_fir1_neon; + s->lfe_fir[0] = ff_dca_lfe_fir0_neon; + s->lfe_fir[1] = ff_dca_lfe_fir1_neon; } } diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index 6a6c77aca8..4cef1a83d9 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -20,17 +20,23 @@ #include "libavutil/arm/asm.S" -function ff_dca_lfe_fir_neon, export=1 +function ff_dca_lfe_fir0_neon, export=1 push {r4-r6,lr} +NOVFP vmov s0, r3 @ scale + mov r3, #32 @ decifactor + mov r6, #256/32 + b dca_lfe_fir +endfunc +function ff_dca_lfe_fir1_neon, export=1 + push {r4-r6,lr} +NOVFP vmov s0, r3 @ scale + mov r3, #64 @ decifactor + mov r6, #256/64 +dca_lfe_fir: add r4, r0, r3, lsl #2 @ out2 add r5, r2, #256*4-16 @ cf1 sub r1, r1, #12 - cmp r3, #32 - ite eq - moveq r6, #256/32 - movne r6, #256/64 -NOVFP vldr s0, [sp, #16] @ scale mov lr, #-16 1: vmov.f32 q2, #0.0 @ v0 diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S index b23ce4ac8a..ecfbb31266 100644 --- a/libavcodec/arm/dcadsp_vfp.S +++ b/libavcodec/arm/dcadsp_vfp.S @@ -24,7 +24,6 @@ POUT .req a1 PIN .req a2 PCOEF .req a3 -DECIFACTOR .req a4 OLDFPSCR .req a4 COUNTER .req ip @@ -129,6 +128,15 @@ POST3 .req s27 .endm .macro dca_lfe_fir decifactor +function ff_dca_lfe_fir\decifactor\()_vfp, export=1 +NOVFP vmov s0, r3 + fmrx OLDFPSCR, FPSCR + ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 + fmxr FPSCR, ip + vldr IN0, [PIN, #-0*4] + vldr IN1, [PIN, #-1*4] + vldr IN2, [PIN, #-2*4] + vldr IN3, [PIN, #-3*4] .if \decifactor == 32 .set JMAX, 8 vpush {s16-s31} @@ -165,32 +173,16 @@ POST3 .req s27 .endif fmxr FPSCR, OLDFPSCR bx lr +endfunc .endm - -/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, - * int decifactor, float scale) - */ -function ff_dca_lfe_fir_vfp, export=1 - teq DECIFACTOR, #32 - fmrx OLDFPSCR, FPSCR - ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 - fmxr FPSCR, ip -NOVFP vldr s0, [sp] - vldr IN0, [PIN, #-0*4] - vldr IN1, [PIN, #-1*4] - vldr IN2, [PIN, #-2*4] - vldr IN3, [PIN, #-3*4] - beq 32f -64: dca_lfe_fir 64 + dca_lfe_fir 64 .ltorg -32: dca_lfe_fir 32 -endfunc + dca_lfe_fir 32 .unreq POUT .unreq PIN .unreq PCOEF - .unreq DECIFACTOR .unreq OLDFPSCR .unreq COUNTER |