diff options
author | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-02 11:14:28 +0100 |
---|---|---|
committer | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-02 11:14:28 +0100 |
commit | 10e075c138467b1fbe63cd9eec0dfd2c18cf903a (patch) | |
tree | 5eb36d69caf24a12e4e7b9018b50d5b7731e4a66 | |
parent | de3a33784cb79c35fadb6fc22a0b406450bdef7c (diff) | |
parent | 705f5e5e155f6f280a360af220fc5b30cfcee702 (diff) | |
download | ffmpeg-10e075c138467b1fbe63cd9eec0dfd2c18cf903a.tar.gz |
Merge commit '705f5e5e155f6f280a360af220fc5b30cfcee702'
* commit '705f5e5e155f6f280a360af220fc5b30cfcee702':
arm64: port synth_filter_float_neon from arm
Merged-by: Hendrik Leppkes <h.leppkes@gmail.com>
-rw-r--r-- | libavcodec/aarch64/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/aarch64/asm-offsets.h | 3 | ||||
-rw-r--r-- | libavcodec/aarch64/dcadsp_init.c | 16 | ||||
-rw-r--r-- | libavcodec/aarch64/synth_filter_neon.S | 119 | ||||
-rw-r--r-- | libavcodec/synth_filter.c | 8 | ||||
-rw-r--r-- | libavcodec/synth_filter.h | 1 |
6 files changed, 147 insertions, 3 deletions
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 0b614a3ac2..2175578f8e 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -16,7 +16,8 @@ OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o -NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/dcadsp_neon.o \ + aarch64/synth_filter_neon.o NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ diff --git a/libavcodec/aarch64/asm-offsets.h b/libavcodec/aarch64/asm-offsets.h index 8defd7c9ec..e05c5ad2e4 100644 --- a/libavcodec/aarch64/asm-offsets.h +++ b/libavcodec/aarch64/asm-offsets.h @@ -27,4 +27,7 @@ #define CELT_TMP 0x10 #define CELT_TWIDDLE (CELT_TMP + 0x8) // loaded as pair +/* FFTContext */ +#define IMDCT_HALF 0x48 + #endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */ diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c index 45c9d96bc8..769ab6ee1b 100644 --- a/libavcodec/aarch64/dcadsp_init.c +++ b/libavcodec/aarch64/dcadsp_init.c @@ -22,7 +22,15 @@ #include "libavutil/aarch64/cpu.h" #include "libavutil/attributes.h" +#include "libavutil/internal.h" #include "libavcodec/dcadsp.h" +#include "libavcodec/fft.h" + +#include "asm-offsets.h" + +#if HAVE_NEON || HAVE_VFP +AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF); +#endif void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); @@ -49,3 +57,11 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) s->decode_hf = ff_decode_hf_neon; } } + +av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + s->synth_filter_float = ff_synth_filter_float_neon; +} diff --git a/libavcodec/aarch64/synth_filter_neon.S b/libavcodec/aarch64/synth_filter_neon.S new file mode 100644 index 0000000000..65551cbff7 --- /dev/null +++ b/libavcodec/aarch64/synth_filter_neon.S @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> + * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm-offsets.h" + +#include "libavutil/aarch64/asm.S" + +.macro inner_loop + ld1 {v29.4s}, [x9], x15 + ld1 {v28.4s}, [x8], x15 + ld1 {v30.4s}, [x10], x15 + ld1 {v31.4s}, [x11], x15 + rev64 v28.4s, v28.4s + ld1 {v24.4s}, [x4], x15 + ld1 {v25.4s}, [x5], x15 + rev64 v31.4s, v31.4s + ld1 {v26.4s}, [x6], x15 + fmla v5.4s, v25.4s, v29.4s + ld1 {v27.4s}, [x7], x15 + ext v28.16b, v28.16b, v28.16b, #8 + ext v31.16b, v31.16b, v31.16b, #8 + fmla v6.4s, v26.4s, v30.4s + fmls v4.4s, v24.4s, v28.4s + fmla v7.4s, v27.4s, v31.4s +.endm + +function ff_synth_filter_float_neon, export=1 + ldr w7, [x2] // *synth_buf_offset + ldr x9, [x0, #IMDCT_HALF] // imdct_half function pointer + sxtw x7, w7 + stp x3, x4, [sp, #-64]! + add x1, x1, x7, lsl #2 // synth_buf + sub w8, w7, #32 + stp x5, x1, [sp, #16] + bic x7, x7, #63 + and w8, w8, #511 + stp x7, x30, [sp, #32] + str w8, [x2] + str s0, [sp, #48] + + mov x2, x6 // in + + blr x9 + + ldp x2, x4, [sp] // synct_buf_2, window + ldp x13, x9, [sp, #16] // out, synth_buf + ldp x0, x30, [sp, #32] // *synth_buf_offset + ldr s0, [sp, #48] + + add x3, x2, #16*4 // synct_buf_2 + 16 + add x14, x13, #16*4 // out + 16 + add x8, x9, #12*4 + mov x15, #64*4 + mov x1, #4 +1: + add x10, x9, #16*4 // synth_buf + add x11, x8, #16*4 + add x5, x4, #16*4 // window + add x6, x4, #32*4 + add x7, x4, #48*4 + + ld1 {v4.4s}, [x2] // a + ld1 {v5.4s}, [x3] // b + movi v6.4s, #0 // c + movi v7.4s, #0 // d + + mov x12, #512 +2: + sub x12, x12, #64 + cmp x12, x0 + inner_loop + b.gt 2b + + sub x8, x8, #512*4 + sub x9, x9, #512*4 + cbz x12, 4f + sub x10, x10, #512*4 + sub x11, x11, #512*4 +3: + subs x12, x12, #64 + inner_loop + b.gt 3b +4: + subs x1, x1, #1 + fmul v4.4s, v4.4s, v0.s[0] + fmul v5.4s, v5.4s, v0.s[0] + st1 {v6.4s}, [x2], #16 + st1 {v7.4s}, [x3], #16 + st1 {v4.4s}, [x13], #16 + st1 {v5.4s}, [x14], #16 + b.le 10f + + sub x4, x4, #508*4 // window + add x9, x9, #4*4 // synth_buf + sub x8, x8, #4*4 // synth_buf + b 1b + +10: + add sp, sp, #64 + ret +endfunc diff --git a/libavcodec/synth_filter.c b/libavcodec/synth_filter.c index d49ffe642d..8dfca00ed9 100644 --- a/libavcodec/synth_filter.c +++ b/libavcodec/synth_filter.c @@ -60,6 +60,10 @@ av_cold void ff_synth_filter_init(SynthFilterContext *c) { c->synth_filter_float = synth_filter_float; - if (ARCH_ARM) ff_synth_filter_init_arm(c); - if (ARCH_X86) ff_synth_filter_init_x86(c); + if (ARCH_AARCH64) + ff_synth_filter_init_aarch64(c); + if (ARCH_ARM) + ff_synth_filter_init_arm(c); + if (ARCH_X86) + ff_synth_filter_init_x86(c); } diff --git a/libavcodec/synth_filter.h b/libavcodec/synth_filter.h index b63fd779b5..e073f427ad 100644 --- a/libavcodec/synth_filter.h +++ b/libavcodec/synth_filter.h @@ -32,6 +32,7 @@ typedef struct SynthFilterContext { } SynthFilterContext; void ff_synth_filter_init(SynthFilterContext *c); +void ff_synth_filter_init_aarch64(SynthFilterContext *c); void ff_synth_filter_init_arm(SynthFilterContext *c); void ff_synth_filter_init_x86(SynthFilterContext *c); |