diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2015-10-28 15:38:22 +0100 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2016-10-01 00:46:25 +0200 |
commit | b57e38f52cc3f31a27105c28887d57cd6812c3eb (patch) | |
tree | e37260a47617bb608972ae3169b90feb43c270e6 /libavcodec/x86/ac3dsp_downmix.asm | |
parent | a9ba59591ed509fb7e6decfde8da4cbfd4ddf4b8 (diff) | |
download | ffmpeg-b57e38f52cc3f31a27105c28887d57cd6812c3eb.tar.gz |
ac3dsp: x86: Replace inline asm for in-decoder downmixing with standalone asm
Adds a wrapper function for downmixing which detects channel count changes
and updates the selected downmix function accordingly.
Simplification and porting to current x86inc infrastructure by Diego Biurrun.
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavcodec/x86/ac3dsp_downmix.asm')
-rw-r--r-- | libavcodec/x86/ac3dsp_downmix.asm | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/libavcodec/x86/ac3dsp_downmix.asm b/libavcodec/x86/ac3dsp_downmix.asm new file mode 100644 index 0000000000..b085035ce3 --- /dev/null +++ b/libavcodec/x86/ac3dsp_downmix.asm @@ -0,0 +1,187 @@ +;***************************************************************************** +;* x86-optimized AC-3 downmixing +;* Copyright (c) 2012 Justin Ruggles +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +;****************************************************************************** +;* This is based on the channel mixing asm in libavresample, but it is +;* simplified for only float coefficients and only 3 to 6 channels. +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +;----------------------------------------------------------------------------- +; functions to downmix from 3 to 6 channels to mono or stereo +; void ff_ac3_downmix_*(float **samples, float **matrix, int len); +;----------------------------------------------------------------------------- + +%macro AC3_DOWNMIX 2 ; %1 = in channels, %2 = out channels +; define some names to make the code clearer +%assign in_channels %1 +%assign out_channels %2 +%assign stereo out_channels - 1 + +; determine how many matrix elements must go on the stack vs. mmregs +%assign matrix_elements in_channels * out_channels +%if stereo + %assign needed_mmregs 4 +%else + %assign needed_mmregs 3 +%endif +%assign matrix_elements_mm num_mmregs - needed_mmregs +%if matrix_elements < matrix_elements_mm + %assign matrix_elements_mm matrix_elements +%endif +%assign total_mmregs needed_mmregs+matrix_elements_mm +%if matrix_elements_mm < matrix_elements + %assign matrix_elements_stack matrix_elements - matrix_elements_mm +%else + %assign matrix_elements_stack 0 +%endif + +cglobal ac3_downmix_%1_to_%2, 3,in_channels+1,total_mmregs,0-matrix_elements_stack*mmsize, src0, src1, len, src2, src3, src4, src5 + +; load matrix pointers +%define matrix0q r1q +%define matrix1q r3q +%if stereo + mov matrix1q, [matrix0q+gprsize] +%endif + mov matrix0q, [matrix0q] + +; define matrix coeff names +%assign %%i 0 +%assign %%j needed_mmregs +%rep in_channels + %if %%i >= matrix_elements_mm + CAT_XDEFINE mx_stack_0_, %%i, 1 + CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize] + %else + CAT_XDEFINE mx_stack_0_, %%i, 0 + CAT_XDEFINE mx_0_, %%i, m %+ %%j + %assign %%j %%j+1 + %endif + %assign %%i %%i+1 +%endrep +%if stereo +%assign %%i 0 +%rep in_channels + %if in_channels + %%i >= matrix_elements_mm + CAT_XDEFINE mx_stack_1_, %%i, 1 + CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize] + %else + CAT_XDEFINE mx_stack_1_, %%i, 0 + CAT_XDEFINE mx_1_, %%i, m %+ %%j + %assign %%j %%j+1 + %endif + %assign %%i %%i+1 +%endrep +%endif + +; load/splat matrix coeffs +%assign %%i 0 +%rep in_channels + %if mx_stack_0_ %+ %%i + VBROADCASTSS m0, [matrix0q+4*%%i] + mova mx_0_ %+ %%i, m0 + %else + VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i] + %endif + %if stereo + %if mx_stack_1_ %+ %%i + VBROADCASTSS m0, [matrix1q+4*%%i] + mova mx_1_ %+ %%i, m0 + %else + VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i] + %endif + %endif + %assign %%i %%i+1 +%endrep + + lea lenq, [4*r2d] + ; load channel pointers to registers +%assign %%i 1 +%rep (in_channels - 1) + mov src %+ %%i %+ q, [src0q+%%i*gprsize] + add src %+ %%i %+ q, lenq + %assign %%i %%i+1 +%endrep + mov src0q, [src0q] + add src0q, lenq + neg lenq +.loop: + %if stereo || mx_stack_0_0 + mova m0, [src0q+lenq] + %endif + %if stereo + mulps m1, m0, mx_1_0 + %endif + %if stereo || mx_stack_0_0 + mulps m0, m0, mx_0_0 + %else + mulps m0, mx_0_0, [src0q+lenq] + %endif +%assign %%i 1 +%rep (in_channels - 1) + %define src_ptr src %+ %%i %+ q + ; avoid extra load for mono if matrix is in a mm register + %if stereo || mx_stack_0_ %+ %%i + mova m2, [src_ptr+lenq] + %endif + %if stereo + FMULADD_PS m1, m2, mx_1_ %+ %%i, m1, m3 + %endif + %if stereo || mx_stack_0_ %+ %%i + FMULADD_PS m0, m2, mx_0_ %+ %%i, m0, m2 + %else + FMULADD_PS m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1 + %endif + %assign %%i %%i+1 +%endrep + mova [src0q+lenq], m0 + %if stereo + mova [src1q+lenq], m1 + %endif + + add lenq, mmsize + jl .loop + RET +%endmacro + +%macro AC3_DOWNMIX_FUNCS 0 +%assign %%i 3 +%rep 4 + INIT_XMM sse + AC3_DOWNMIX %%i, 1 + AC3_DOWNMIX %%i, 2 + INIT_YMM avx + AC3_DOWNMIX %%i, 1 + AC3_DOWNMIX %%i, 2 + %if HAVE_FMA3_EXTERNAL + INIT_YMM fma3 + AC3_DOWNMIX %%i, 1 + AC3_DOWNMIX %%i, 2 + %endif + %assign %%i %%i+1 +%endrep +%endmacro + +AC3_DOWNMIX_FUNCS |