avcodec: Remove DCT, FFT, MDCT and RDFT

They were replaced by TX from libavutil; the tremendous work to get to this point (both creating TX as well as porting the users of the components removed in this commit) was completely performed by Lynne alone. Removing the subsystems from configure may break some command lines, because the --disable-fft etc. options are no longer recognized. Co-authored-by: Lynne <dev@lynne.ee> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
author: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2023-09-28 19:57:36 +0200
committer: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2023-10-01 02:25:09 +0200
commit: 6f7bf64dbca408b700582fb9678f300b14267585 (patch)
tree: 8180359c7fd1e808592848b10a0eb2ba609fe4ea /libavcodec/ppc
parent: d9464f3e34e444c4e798ec882dab95bafe5179d5 (diff)
download: ffmpeg-6f7bf64dbca408b700582fb9678f300b14267585.tar.gz
5 files changed, 0 insertions, 1684 deletions
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index bc13d8a0ce..10b9ca60da 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -1,9 +1,6 @@
 # subsystems
 OBJS-$(CONFIG_AUDIODSP)                += ppc/audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += ppc/blockdsp.o
-OBJS-$(CONFIG_FFT)                     += ppc/fft_init.o                \
-                                          ppc/fft_altivec.o             \
-                                          ppc/fft_vsx.o
 OBJS-$(CONFIG_FDCTDSP)                 += ppc/fdctdsp.o
 OBJS-$(CONFIG_FMTCONVERT)              += ppc/fmtconvert_altivec.o
 OBJS-$(CONFIG_H264CHROMA)              += ppc/h264chroma_init.o
diff --git a/libavcodec/ppc/fft_altivec.S b/libavcodec/ppc/fft_altivec.S
deleted file mode 100644
index 8cd68d6a90..0000000000
--- a/libavcodec/ppc/fft_altivec.S
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * FFT transform with Altivec optimizations
- * Copyright (c) 2009 Loren Merritt
- *
- * This algorithm (though not any of the implementation details) is
- * based on libdjbfft by D. J. Bernstein.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*
- * These functions are not individually interchangeable with the C versions.
- * While C takes arrays of FFTComplex, Altivec leaves intermediate results
- * in blocks as convenient to the vector size.
- * i.e. {4x real, 4x imaginary, 4x real, ...}
- *
- * I ignore standard calling convention.
- * Instead, the following registers are treated as global constants:
- * v14: zero
- * v15..v18: cosines
- * v19..v29: permutations
- * r9: 16
- * r12: ff_cos_tabs
- * and the rest are free for local use.
- */
-
-#include "config.h"
-
-#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
-
-#include "asm.S"
-
-.text
-
-.macro addi2 ra, imm // add 32-bit immediate
-.if \imm & 0xffff
-    addi \ra, \ra, \imm@l
-.endif
-.if (\imm+0x8000)>>16
-    addis \ra, \ra, \imm@ha
-.endif
-.endm
-
-.macro FFT4 a0, a1, a2, a3 // in:0-1 out:2-3
-    vperm   \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
-    vperm   \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
-    vaddfp  \a0,\a2,\a3                         // {t1,t2,t6,t5}
-    vsubfp  \a1,\a2,\a3                         // {t3,t4,t8,t7}
-    vmrghw  \a2,\a0,\a1     // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
-    vperm   \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
-    vaddfp  \a0,\a2,\a3                         // {r0,r1,i0,i1}
-    vsubfp  \a1,\a2,\a3                         // {r2,r3,i2,i3}
-    vperm   \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
-    vperm   \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
-.endm
-
-.macro FFT4x2 a0, a1, b0, b1, a2, a3, b2, b3
-    vperm   \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
-    vperm   \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
-    vperm   \b2,\b0,\b1,v20
-    vperm   \b3,\b0,\b1,v21
-    vaddfp  \a0,\a2,\a3                         // {t1,t2,t6,t5}
-    vsubfp  \a1,\a2,\a3                         // {t3,t4,t8,t7}
-    vaddfp  \b0,\b2,\b3
-    vsubfp  \b1,\b2,\b3
-    vmrghw  \a2,\a0,\a1     // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
-    vperm   \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
-    vmrghw  \b2,\b0,\b1
-    vperm   \b3,\b0,\b1,v22
-    vaddfp  \a0,\a2,\a3                         // {r0,r1,i0,i1}
-    vsubfp  \a1,\a2,\a3                         // {r2,r3,i2,i3}
-    vaddfp  \b0,\b2,\b3
-    vsubfp  \b1,\b2,\b3
-    vperm   \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
-    vperm   \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
-    vperm   \b2,\b0,\b1,v23
-    vperm   \b3,\b0,\b1,v24
-.endm
-
-.macro FFT8 a0, a1, b0, b1, a2, a3, b2, b3, b4 // in,out:a0-b1
-    vmrghw  \b2,\b0,\b1     // vcprm(0,s0,1,s1) // {r4,r6,i4,i6}
-    vmrglw  \b3,\b0,\b1     // vcprm(2,s2,3,s3) // {r5,r7,i5,i7}
-    vperm   \a2,\a0,\a1,v20         // FFT4 ...
-    vperm   \a3,\a0,\a1,v21
-    vaddfp  \b0,\b2,\b3                         // {t1,t3,t2,t4}
-    vsubfp  \b1,\b2,\b3                         // {r5,r7,i5,i7}
-    vperm   \b4,\b1,\b1,v25 // vcprm(2,3,0,1)   // {i5,i7,r5,r7}
-    vaddfp  \a0,\a2,\a3
-    vsubfp  \a1,\a2,\a3
-    vmaddfp \b1,\b1,v17,v14 // * {-1,1,1,-1}/sqrt(2)
-    vmaddfp \b1,\b4,v18,\b1 // * { 1,1,1,1 }/sqrt(2) // {t8,ta,t7,t9}
-    vmrghw  \a2,\a0,\a1
-    vperm   \a3,\a0,\a1,v22
-    vperm   \b2,\b0,\b1,v26 // vcprm(1,2,s3,s0) // {t3,t2,t9,t8}
-    vperm   \b3,\b0,\b1,v27 // vcprm(0,3,s2,s1) // {t1,t4,t7,ta}
-    vaddfp  \a0,\a2,\a3
-    vsubfp  \a1,\a2,\a3
-    vaddfp  \b0,\b2,\b3                         // {t1,t2,t9,ta}
-    vsubfp  \b1,\b2,\b3                         // {t6,t5,tc,tb}
-    vperm   \a2,\a0,\a1,v23
-    vperm   \a3,\a0,\a1,v24
-    vperm   \b2,\b0,\b1,v28 // vcprm(0,2,s1,s3) // {t1,t9,t5,tb}
-    vperm   \b3,\b0,\b1,v29 // vcprm(1,3,s0,s2) // {t2,ta,t6,tc}
-    vsubfp  \b0,\a2,\b2                         // {r4,r5,r6,r7}
-    vsubfp  \b1,\a3,\b3                         // {i4,i5,i6,i7}
-    vaddfp  \a0,\a2,\b2                         // {r0,r1,r2,r3}
-    vaddfp  \a1,\a3,\b3                         // {i0,i1,i2,i3}
-.endm
-
-.macro BF d0,d1,s0,s1
-    vsubfp  \d1,\s0,\s1
-    vaddfp  \d0,\s0,\s1
-.endm
-
-.macro zip d0,d1,s0,s1
-    vmrghw  \d0,\s0,\s1
-    vmrglw  \d1,\s0,\s1
-.endm
-
-.macro def_fft4 interleave
-fft4\interleave\()_altivec:
-    lvx    v0, 0,r3
-    lvx    v1,r9,r3
-    FFT4   v0,v1,v2,v3
-.ifnb \interleave
-    zip    v0,v1,v2,v3
-    stvx   v0, 0,r3
-    stvx   v1,r9,r3
-.else
-    stvx   v2, 0,r3
-    stvx   v3,r9,r3
-.endif
-    blr
-.endm
-
-.macro def_fft8 interleave
-fft8\interleave\()_altivec:
-    addi   r4,r3,32
-    lvx    v0, 0,r3
-    lvx    v1,r9,r3
-    lvx    v2, 0,r4
-    lvx    v3,r9,r4
-    FFT8   v0,v1,v2,v3,v4,v5,v6,v7,v8
-.ifnb \interleave
-    zip    v4,v5,v0,v1
-    zip    v6,v7,v2,v3
-    stvx   v4, 0,r3
-    stvx   v5,r9,r3
-    stvx   v6, 0,r4
-    stvx   v7,r9,r4
-.else
-    stvx   v0, 0,r3
-    stvx   v1,r9,r3
-    stvx   v2, 0,r4
-    stvx   v3,r9,r4
-.endif
-    blr
-.endm
-
-.macro def_fft16 interleave
-fft16\interleave\()_altivec:
-    addi   r5,r3,64
-    addi   r6,r3,96
-    addi   r4,r3,32
-    lvx    v0, 0,r5
-    lvx    v1,r9,r5
-    lvx    v2, 0,r6
-    lvx    v3,r9,r6
-    FFT4x2 v0,v1,v2,v3,v4,v5,v6,v7
-    lvx    v0, 0,r3
-    lvx    v1,r9,r3
-    lvx    v2, 0,r4
-    lvx    v3,r9,r4
-    FFT8   v0,v1,v2,v3,v8,v9,v10,v11,v12
-    vmaddfp   v8,v4,v15,v14 // r2*wre
-    vmaddfp   v9,v5,v15,v14 // i2*wre
-    vmaddfp  v10,v6,v15,v14 // r3*wre
-    vmaddfp  v11,v7,v15,v14 // i3*wre
-    vmaddfp   v8,v5,v16,v8  // i2*wim
-    vnmsubfp  v9,v4,v16,v9  // r2*wim
-    vnmsubfp v10,v7,v16,v10 // i3*wim
-    vmaddfp  v11,v6,v16,v11 // r3*wim
-    BF     v10,v12,v10,v8
-    BF     v11,v13,v9,v11
-    BF     v0,v4,v0,v10
-    BF     v3,v7,v3,v12
-    BF     v1,v5,v1,v11
-    BF     v2,v6,v2,v13
-.ifnb \interleave
-    zip     v8, v9,v0,v1
-    zip    v10,v11,v2,v3
-    zip    v12,v13,v4,v5
-    zip    v14,v15,v6,v7
-    stvx    v8, 0,r3
-    stvx    v9,r9,r3
-    stvx   v10, 0,r4
-    stvx   v11,r9,r4
-    stvx   v12, 0,r5
-    stvx   v13,r9,r5
-    stvx   v14, 0,r6
-    stvx   v15,r9,r6
-.else
-    stvx   v0, 0,r3
-    stvx   v4, 0,r5
-    stvx   v3,r9,r4
-    stvx   v7,r9,r6
-    stvx   v1,r9,r3
-    stvx   v5,r9,r5
-    stvx   v2, 0,r4
-    stvx   v6, 0,r6
-.endif
-    blr
-.endm
-
-// void pass(float *z, float *wre, int n)
-.macro PASS interleave, suffix
-fft_pass\suffix\()_altivec:
-    mtctr  r5
-    slwi   r0,r5,4
-    slwi   r7,r5,6   // o2
-    slwi   r5,r5,5   // o1
-    add   r10,r5,r7  // o3
-    add    r0,r4,r0  // wim
-    addi   r6,r5,16  // o1+16
-    addi   r8,r7,16  // o2+16
-    addi  r11,r10,16 // o3+16
-1:
-    lvx    v8, 0,r4  // wre
-    lvx   v10, 0,r0  // wim
-    sub    r0,r0,r9
-    lvx    v9, 0,r0
-    vperm  v9,v9,v10,v19   // vcprm(s0,3,2,1) => wim[0 .. -3]
-    lvx    v4,r3,r7        // r2 = z[o2]
-    lvx    v5,r3,r8        // i2 = z[o2+16]
-    lvx    v6,r3,r10       // r3 = z[o3]
-    lvx    v7,r3,r11       // i3 = z[o3+16]
-    vmaddfp  v10,v4,v8,v14 // r2*wre
-    vmaddfp  v11,v5,v8,v14 // i2*wre
-    vmaddfp  v12,v6,v8,v14 // r3*wre
-    vmaddfp  v13,v7,v8,v14 // i3*wre
-    lvx    v0, 0,r3        // r0 = z[0]
-    lvx    v3,r3,r6        // i1 = z[o1+16]
-    vmaddfp  v10,v5,v9,v10 // i2*wim
-    vnmsubfp v11,v4,v9,v11 // r2*wim
-    vnmsubfp v12,v7,v9,v12 // i3*wim
-    vmaddfp  v13,v6,v9,v13 // r3*wim
-    lvx    v1,r3,r9        // i0 = z[16]
-    lvx    v2,r3,r5        // r1 = z[o1]
-    BF     v12,v8,v12,v10
-    BF     v13,v9,v11,v13
-    BF     v0,v4,v0,v12
-    BF     v3,v7,v3,v8
-.if !\interleave
-    stvx   v0, 0,r3
-    stvx   v4,r3,r7
-    stvx   v3,r3,r6
-    stvx   v7,r3,r11
-.endif
-    BF     v1,v5,v1,v13
-    BF     v2,v6,v2,v9
-.if !\interleave
-    stvx   v1,r3,r9
-    stvx   v2,r3,r5
-    stvx   v5,r3,r8
-    stvx   v6,r3,r10
-.else
-    vmrghw v8,v0,v1
-    vmrglw v9,v0,v1
-    stvx   v8, 0,r3
-    stvx   v9,r3,r9
-    vmrghw v8,v2,v3
-    vmrglw v9,v2,v3
-    stvx   v8,r3,r5
-    stvx   v9,r3,r6
-    vmrghw v8,v4,v5
-    vmrglw v9,v4,v5
-    stvx   v8,r3,r7
-    stvx   v9,r3,r8
-    vmrghw v8,v6,v7
-    vmrglw v9,v6,v7
-    stvx   v8,r3,r10
-    stvx   v9,r3,r11
-.endif
-    addi   r3,r3,32
-    addi   r4,r4,16
-    bdnz 1b
-    sub    r3,r3,r5
-    blr
-.endm
-
-#define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
-
-#define WORD_0  0x00,0x01,0x02,0x03
-#define WORD_1  0x04,0x05,0x06,0x07
-#define WORD_2  0x08,0x09,0x0a,0x0b
-#define WORD_3  0x0c,0x0d,0x0e,0x0f
-#define WORD_s0 0x10,0x11,0x12,0x13
-#define WORD_s1 0x14,0x15,0x16,0x17
-#define WORD_s2 0x18,0x19,0x1a,0x1b
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f
-
-#define vcprm(a, b, c, d) .byte WORD_##a, WORD_##b, WORD_##c, WORD_##d
-
-    .rodata
-    .align 4
-fft_data:
-    .float  0, 0, 0, 0
-    .float  1, 0.92387953, M_SQRT1_2, 0.38268343
-    .float  0, 0.38268343, M_SQRT1_2, 0.92387953
-    .float  -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2,-M_SQRT1_2
-    .float   M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
-    vcprm(s0,3,2,1)
-    vcprm(0,1,s2,s1)
-    vcprm(2,3,s0,s3)
-    vcprm(2,s3,3,s2)
-    vcprm(0,1,s0,s1)
-    vcprm(2,3,s2,s3)
-    vcprm(2,3,0,1)
-    vcprm(1,2,s3,s0)
-    vcprm(0,3,s2,s1)
-    vcprm(0,2,s1,s3)
-    vcprm(1,3,s0,s2)
-
-.macro lvm  b, r, regs:vararg
-    lvx     \r, 0, \b
-    addi    \b, \b, 16
-  .ifnb \regs
-    lvm     \b, \regs
-  .endif
-.endm
-
-.macro stvm b, r, regs:vararg
-    stvx    \r, 0, \b
-    addi    \b, \b, 16
-  .ifnb \regs
-    stvm    \b, \regs
-  .endif
-.endm
-
-.macro fft_calc interleave
-extfunc ff_fft_calc\interleave\()_altivec
-    mflr    r0
-    stp     r0, 2*PS(R(1))
-    stpu    r1, -(160+16*PS)(R(1))
-    get_got r11
-    addi    r6, r1, 16*PS
-    stvm    r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
-    mfvrsave r0
-    stw     r0, 15*PS(R(1))
-#if __APPLE__
-    li      r6, 0xfffffffc
-#else
-    li      r6, -4
-#endif
-    mtvrsave r6
-
-    movrel  r6, fft_data, r11
-    lvm     r6, v14, v15, v16, v17, v18, v19, v20, v21
-    lvm     r6, v22, v23, v24, v25, v26, v27, v28, v29
-
-    li      r9, 16
-    movrel  r12, X(ff_cos_tabs), r11
-
-    movrel  r6, fft_dispatch_tab\interleave\()_altivec, r11
-    lwz     r3, 0(R(3))
-    subi    r3, r3, 2
-    slwi    r3, r3, 2+ARCH_PPC64
-    lpx     r3, r3, r6
-    mtctr   r3
-    mr      r3, r4
-    bctrl
-
-    addi    r6, r1, 16*PS
-    lvm     r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
-    lwz     r6, 15*PS(R(1))
-    mtvrsave r6
-    lp      r1, 0(R(1))
-    lp      r0, 2*PS(R(1))
-    mtlr    r0
-    blr
-.endm
-
-.macro DECL_FFT suffix, bits, n, n2, n4
-fft\n\suffix\()_altivec:
-    mflr  r0
-    stp   r0,PS*(\bits-3)(R(1))
-    bl    fft\n2\()_altivec
-    addi2 r3,\n*4
-    bl    fft\n4\()_altivec
-    addi2 r3,\n*2
-    bl    fft\n4\()_altivec
-    addi2 r3,\n*-6
-    lp    r0,PS*(\bits-3)(R(1))
-    lp    r4,\bits*PS(R(12))
-    mtlr  r0
-    li    r5,\n/16
-    b     fft_pass\suffix\()_altivec
-.endm
-
-.macro DECL_FFTS interleave, suffix
-    .text
-    def_fft4  \suffix
-    def_fft8  \suffix
-    def_fft16 \suffix
-    PASS \interleave, \suffix
-    DECL_FFT \suffix, 5,   32,   16,    8
-    DECL_FFT \suffix, 6,   64,   32,   16
-    DECL_FFT \suffix, 7,  128,   64,   32
-    DECL_FFT \suffix, 8,  256,  128,   64
-    DECL_FFT \suffix, 9,  512,  256,  128
-    DECL_FFT \suffix,10, 1024,  512,  256
-    DECL_FFT \suffix,11, 2048, 1024,  512
-    DECL_FFT \suffix,12, 4096, 2048, 1024
-    DECL_FFT \suffix,13, 8192, 4096, 2048
-    DECL_FFT \suffix,14,16384, 8192, 4096
-    DECL_FFT \suffix,15,32768,16384, 8192
-    DECL_FFT \suffix,16,65536,32768,16384
-
-    fft_calc \suffix
-
-    .rodata
-    .align 3
-fft_dispatch_tab\suffix\()_altivec:
-    PTR fft4\suffix\()_altivec
-    PTR fft8\suffix\()_altivec
-    PTR fft16\suffix\()_altivec
-    PTR fft32\suffix\()_altivec
-    PTR fft64\suffix\()_altivec
-    PTR fft128\suffix\()_altivec
-    PTR fft256\suffix\()_altivec
-    PTR fft512\suffix\()_altivec
-    PTR fft1024\suffix\()_altivec
-    PTR fft2048\suffix\()_altivec
-    PTR fft4096\suffix\()_altivec
-    PTR fft8192\suffix\()_altivec
-    PTR fft16384\suffix\()_altivec
-    PTR fft32768\suffix\()_altivec
-    PTR fft65536\suffix\()_altivec
-.endm
-
-DECL_FFTS 0
-DECL_FFTS 1, _interleave
-
-#endif /* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
diff --git a/libavcodec/ppc/fft_init.c b/libavcodec/ppc/fft_init.c
deleted file mode 100644
index 65ce64f6a1..0000000000
--- a/libavcodec/ppc/fft_init.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * FFT/IFFT transforms
- * AltiVec-enabled
- * Copyright (c) 2009 Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/ppc/cpu.h"
-#include "libavutil/ppc/util_altivec.h"
-#include "libavcodec/fft.h"
-
-/**
- * Do a complex FFT with the parameters defined in ff_fft_init().
- * The input data must be permuted before with s->revtab table.
- * No 1.0 / sqrt(n) normalization is done.
- * AltiVec-enabled:
- * This code assumes that the 'z' pointer is 16 bytes-aligned.
- * It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
- */
-
-#if HAVE_VSX
-#include "fft_vsx.h"
-#else
-void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
-void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z);
-#endif
-
-#if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
-static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    int j, k;
-    int n = 1 << s->mdct_bits;
-    int n4 = n >> 2;
-    int n8 = n >> 3;
-    int n32 = n >> 5;
-    const uint16_t *revtabj = s->revtab;
-    const uint16_t *revtabk = s->revtab+n4;
-    const vec_f *tcos = (const vec_f*)(s->tcos+n8);
-    const vec_f *tsin = (const vec_f*)(s->tsin+n8);
-    const vec_f *pin = (const vec_f*)(input+n4);
-    vec_f *pout = (vec_f*)(output+n4);
-
-    /* pre rotation */
-    k = n32-1;
-    do {
-        vec_f cos,sin,cos0,sin0,cos1,sin1,re,im,r0,i0,r1,i1,a,b,c,d;
-#define CMULA(p,o0,o1,o2,o3)\
-        a = pin[ k*2+p];                       /* { z[k].re,    z[k].im,    z[k+1].re,  z[k+1].im  } */\
-        b = pin[-k*2-p-1];                     /* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */\
-        re = vec_perm(a, b, vcprm(0,2,s0,s2)); /* { z[k].re,    z[k+1].re,  z[-k-2].re, z[-k-1].re } */\
-        im = vec_perm(a, b, vcprm(s3,s1,3,1)); /* { z[-k-1].im, z[-k-2].im, z[k+1].im,  z[k].im    } */\
-        cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3)); /* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */\
-        sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
-        r##p = im*cos - re*sin;\
-        i##p = re*cos + im*sin;
-#define STORE2(v,dst)\
-        j = dst;\
-        vec_ste(v, 0, output+j*2);\
-        vec_ste(v, 4, output+j*2);
-#define STORE8(p)\
-        a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
-        b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
-        c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
-        d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
-        STORE2(a, revtabk[ p*2-4]);\
-        STORE2(b, revtabk[ p*2-3]);\
-        STORE2(c, revtabj[-p*2+2]);\
-        STORE2(d, revtabj[-p*2+3]);
-
-        cos0 = tcos[k];
-        sin0 = tsin[k];
-        cos1 = tcos[-k-1];
-        sin1 = tsin[-k-1];
-        CMULA(0, 0,1,2,3);
-        CMULA(1, 2,3,0,1);
-        STORE8(0);
-        STORE8(1);
-        revtabj += 4;
-        revtabk -= 4;
-        k--;
-    } while(k >= 0);
-
-#if HAVE_VSX
-    ff_fft_calc_vsx(s, (FFTComplex*)output);
-#else
-    ff_fft_calc_altivec(s, (FFTComplex*)output);
-#endif
-
-    /* post rotation + reordering */
-    j = -n32;
-    k = n32-1;
-    do {
-        vec_f cos,sin,re,im,a,b,c,d;
-#define CMULB(d0,d1,o)\
-        re = pout[o*2];\
-        im = pout[o*2+1];\
-        cos = tcos[o];\
-        sin = tsin[o];\
-        d0 = im*sin - re*cos;\
-        d1 = re*sin + im*cos;
-
-        CMULB(a,b,j);
-        CMULB(c,d,k);
-        pout[2*j]   = vec_perm(a, d, vcprm(0,s3,1,s2));
-        pout[2*j+1] = vec_perm(a, d, vcprm(2,s1,3,s0));
-        pout[2*k]   = vec_perm(c, b, vcprm(0,s3,1,s2));
-        pout[2*k+1] = vec_perm(c, b, vcprm(2,s1,3,s0));
-        j++;
-        k--;
-    } while(k >= 0);
-}
-
-static void imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    int k;
-    int n = 1 << s->mdct_bits;
-    int n4 = n >> 2;
-    int n16 = n >> 4;
-    vec_u32 sign = {1U<<31,1U<<31,1U<<31,1U<<31};
-    vec_u32 *p0 = (vec_u32*)(output+n4);
-    vec_u32 *p1 = (vec_u32*)(output+n4*3);
-
-    imdct_half_altivec(s, output + n4, input);
-
-    for (k = 0; k < n16; k++) {
-        vec_u32 a = p0[k] ^ sign;
-        vec_u32 b = p1[-k-1];
-        p0[-k-1] = vec_perm(a, a, vcprm(3,2,1,0));
-        p1[k]    = vec_perm(b, b, vcprm(3,2,1,0));
-    }
-}
-#endif /* HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX) */
-
-av_cold void ff_fft_init_ppc(FFTContext *s)
-{
-#if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
-    if (!PPC_ALTIVEC(av_get_cpu_flags()))
-        return;
-
-#if HAVE_VSX
-    s->fft_calc = ff_fft_calc_interleave_vsx;
-#else
-    s->fft_calc   = ff_fft_calc_interleave_altivec;
-#endif
-    if (s->mdct_bits >= 5) {
-        s->imdct_calc = imdct_calc_altivec;
-        s->imdct_half = imdct_half_altivec;
-    }
-#endif /* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
-}
diff --git a/libavcodec/ppc/fft_vsx.c b/libavcodec/ppc/fft_vsx.c
deleted file mode 100644
index c365fa1380..0000000000
--- a/libavcodec/ppc/fft_vsx.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * FFT  transform, optimized with VSX built-in functions
- * Copyright (c) 2014 Rong Yan
- *
- * This algorithm (though not any of the implementation details) is
- * based on libdjbfft by D. J. Bernstein.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#include "config.h"
-#include "libavutil/cpu.h"
-#include "libavutil/ppc/util_altivec.h"
-#include "libavcodec/fft.h"
-#include "libavcodec/fft-internal.h"
-#include "fft_vsx.h"
-
-#if HAVE_VSX
-
-static void fft32_vsx_interleave(FFTComplex *z)
-{
-    fft16_vsx_interleave(z);
-    fft8_vsx_interleave(z+16);
-    fft8_vsx_interleave(z+24);
-    pass_vsx_interleave(z,ff_cos_32,4);
-}
-
-static void fft64_vsx_interleave(FFTComplex *z)
-{
-    fft32_vsx_interleave(z);
-    fft16_vsx_interleave(z+32);
-    fft16_vsx_interleave(z+48);
-    pass_vsx_interleave(z,ff_cos_64, 8);
-}
-static void fft128_vsx_interleave(FFTComplex *z)
-{
-    fft64_vsx_interleave(z);
-    fft32_vsx_interleave(z+64);
-    fft32_vsx_interleave(z+96);
-    pass_vsx_interleave(z,ff_cos_128,16);
-}
-static void fft256_vsx_interleave(FFTComplex *z)
-{
-    fft128_vsx_interleave(z);
-    fft64_vsx_interleave(z+128);
-    fft64_vsx_interleave(z+192);
-    pass_vsx_interleave(z,ff_cos_256,32);
-}
-static void fft512_vsx_interleave(FFTComplex *z)
-{
-    fft256_vsx_interleave(z);
-    fft128_vsx_interleave(z+256);
-    fft128_vsx_interleave(z+384);
-    pass_vsx_interleave(z,ff_cos_512,64);
-}
-static void fft1024_vsx_interleave(FFTComplex *z)
-{
-    fft512_vsx_interleave(z);
-    fft256_vsx_interleave(z+512);
-    fft256_vsx_interleave(z+768);
-    pass_vsx_interleave(z,ff_cos_1024,128);
-
-}
-static void fft2048_vsx_interleave(FFTComplex *z)
-{
-    fft1024_vsx_interleave(z);
-    fft512_vsx_interleave(z+1024);
-    fft512_vsx_interleave(z+1536);
-    pass_vsx_interleave(z,ff_cos_2048,256);
-}
-static void fft4096_vsx_interleave(FFTComplex *z)
-{
-    fft2048_vsx_interleave(z);
-    fft1024_vsx_interleave(z+2048);
-    fft1024_vsx_interleave(z+3072);
-    pass_vsx_interleave(z,ff_cos_4096, 512);
-}
-static void fft8192_vsx_interleave(FFTComplex *z)
-{
-    fft4096_vsx_interleave(z);
-    fft2048_vsx_interleave(z+4096);
-    fft2048_vsx_interleave(z+6144);
-    pass_vsx_interleave(z,ff_cos_8192,1024);
-}
-static void fft16384_vsx_interleave(FFTComplex *z)
-{
-    fft8192_vsx_interleave(z);
-    fft4096_vsx_interleave(z+8192);
-    fft4096_vsx_interleave(z+12288);
-    pass_vsx_interleave(z,ff_cos_16384,2048);
-}
-static void fft32768_vsx_interleave(FFTComplex *z)
-{
-    fft16384_vsx_interleave(z);
-    fft8192_vsx_interleave(z+16384);
-    fft8192_vsx_interleave(z+24576);
-    pass_vsx_interleave(z,ff_cos_32768,4096);
-}
-static void fft65536_vsx_interleave(FFTComplex *z)
-{
-    fft32768_vsx_interleave(z);
-    fft16384_vsx_interleave(z+32768);
-    fft16384_vsx_interleave(z+49152);
-    pass_vsx_interleave(z,ff_cos_65536,8192);
-}
-
-static void fft32_vsx(FFTComplex *z)
-{
-    fft16_vsx(z);
-    fft8_vsx(z+16);
-    fft8_vsx(z+24);
-    pass_vsx(z,ff_cos_32,4);
-}
-
-static void fft64_vsx(FFTComplex *z)
-{
-    fft32_vsx(z);
-    fft16_vsx(z+32);
-    fft16_vsx(z+48);
-    pass_vsx(z,ff_cos_64, 8);
-}
-static void fft128_vsx(FFTComplex *z)
-{
-    fft64_vsx(z);
-    fft32_vsx(z+64);
-    fft32_vsx(z+96);
-    pass_vsx(z,ff_cos_128,16);
-}
-static void fft256_vsx(FFTComplex *z)
-{
-    fft128_vsx(z);
-    fft64_vsx(z+128);
-    fft64_vsx(z+192);
-    pass_vsx(z,ff_cos_256,32);
-}
-static void fft512_vsx(FFTComplex *z)
-{
-    fft256_vsx(z);
-    fft128_vsx(z+256);
-    fft128_vsx(z+384);
-    pass_vsx(z,ff_cos_512,64);
-}
-static void fft1024_vsx(FFTComplex *z)
-{
-    fft512_vsx(z);
-    fft256_vsx(z+512);
-    fft256_vsx(z+768);
-    pass_vsx(z,ff_cos_1024,128);
-
-}
-static void fft2048_vsx(FFTComplex *z)
-{
-    fft1024_vsx(z);
-    fft512_vsx(z+1024);
-    fft512_vsx(z+1536);
-    pass_vsx(z,ff_cos_2048,256);
-}
-static void fft4096_vsx(FFTComplex *z)
-{
-    fft2048_vsx(z);
-    fft1024_vsx(z+2048);
-    fft1024_vsx(z+3072);
-    pass_vsx(z,ff_cos_4096, 512);
-}
-static void fft8192_vsx(FFTComplex *z)
-{
-    fft4096_vsx(z);
-    fft2048_vsx(z+4096);
-    fft2048_vsx(z+6144);
-    pass_vsx(z,ff_cos_8192,1024);
-}
-static void fft16384_vsx(FFTComplex *z)
-{
-    fft8192_vsx(z);
-    fft4096_vsx(z+8192);
-    fft4096_vsx(z+12288);
-    pass_vsx(z,ff_cos_16384,2048);
-}
-static void fft32768_vsx(FFTComplex *z)
-{
-    fft16384_vsx(z);
-    fft8192_vsx(z+16384);
-    fft8192_vsx(z+24576);
-    pass_vsx(z,ff_cos_32768,4096);
-}
-static void fft65536_vsx(FFTComplex *z)
-{
-    fft32768_vsx(z);
-    fft16384_vsx(z+32768);
-    fft16384_vsx(z+49152);
-    pass_vsx(z,ff_cos_65536,8192);
-}
-
-static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
-    fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
-    fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
-};
-static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
-    fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
-    fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
-    fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
-};
-void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
-{
-     fft_dispatch_vsx_interleave[s->nbits-2](z);
-}
-void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
-{
-     fft_dispatch_vsx[s->nbits-2](z);
-}
-#endif /* HAVE_VSX */
diff --git a/libavcodec/ppc/fft_vsx.h b/libavcodec/ppc/fft_vsx.h
deleted file mode 100644
index 1e44031aa5..0000000000
--- a/libavcodec/ppc/fft_vsx.h
+++ /dev/null
@@ -1,829 +0,0 @@
-#ifndef AVCODEC_PPC_FFT_VSX_H
-#define AVCODEC_PPC_FFT_VSX_H
-/*
- * FFT  transform, optimized with VSX built-in functions
- * Copyright (c) 2014 Rong Yan  Copyright (c) 2009 Loren Merritt
- *
- * This algorithm (though not any of the implementation details) is
- * based on libdjbfft by D. J. Bernstein, and fft_altivec_s.S.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#include "config.h"
-#include "libavutil/cpu.h"
-#include "libavutil/ppc/util_altivec.h"
-#include "libavcodec/fft.h"
-#include "libavcodec/fft-internal.h"
-
-#if HAVE_VSX
-
-void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z);
-void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z);
-
-
-#define byte_2complex (2*sizeof(FFTComplex))
-#define byte_4complex (4*sizeof(FFTComplex))
-#define byte_6complex (6*sizeof(FFTComplex))
-#define byte_8complex (8*sizeof(FFTComplex))
-#define byte_10complex (10*sizeof(FFTComplex))
-#define byte_12complex (12*sizeof(FFTComplex))
-#define byte_14complex (14*sizeof(FFTComplex))
-
-inline static void pass_vsx_interleave(FFTComplex *z, const FFTSample *wre, unsigned int n)
-{
-    int o1 = n<<1;
-    int o2 = n<<2;
-    int o3 = o1+o2;
-    int i1, i2, i3;
-    FFTSample* out = (FFTSample*)z;
-    const FFTSample *wim = wre+o1;
-    vec_f vz0, vzo1, vzo2, vzo3;
-    vec_f x0, x1, x2, x3;
-    vec_f x4, x5, x6, x7;
-    vec_f x8, x9, x10, x11;
-    vec_f x12, x13, x14, x15;
-    vec_f x16, x17, x18, x19;
-    vec_f x20, x21, x22, x23;
-    vec_f vz0plus1, vzo1plus1, vzo2plus1, vzo3plus1;
-    vec_f y0, y1, y2, y3;
-    vec_f y4, y5, y8, y9;
-    vec_f y10, y13, y14, y15;
-    vec_f y16, y17, y18, y19;
-    vec_f y20, y21, y22, y23;
-    vec_f wr1, wi1, wr0, wi0;
-    vec_f wr2, wi2, wr3, wi3;
-    vec_f xmulwi0, xmulwi1, ymulwi2, ymulwi3;
-
-    n = n-2;
-    i1 = o1*sizeof(FFTComplex);
-    i2 = o2*sizeof(FFTComplex);
-    i3 = o3*sizeof(FFTComplex);
-    vzo2 = vec_ld(i2, &(out[0]));  // zo2.r  zo2.i  z(o2+1).r  z(o2+1).i
-    vzo2plus1 = vec_ld(i2+16, &(out[0]));
-    vzo3 = vec_ld(i3, &(out[0]));  // zo3.r  zo3.i  z(o3+1).r  z(o3+1).i
-    vzo3plus1 = vec_ld(i3+16, &(out[0]));
-    vz0 = vec_ld(0, &(out[0]));    // z0.r  z0.i  z1.r  z1.i
-    vz0plus1 = vec_ld(16, &(out[0]));
-    vzo1 = vec_ld(i1, &(out[0]));  // zo1.r  zo1.i  z(o1+1).r  z(o1+1).i
-    vzo1plus1 = vec_ld(i1+16, &(out[0]));
-
-    x0 = vec_add(vzo2, vzo3);
-    x1 = vec_sub(vzo2, vzo3);
-    y0 = vec_add(vzo2plus1, vzo3plus1);
-    y1 = vec_sub(vzo2plus1, vzo3plus1);
-
-    wr1 = vec_splats(wre[1]);
-    wi1 = vec_splats(wim[-1]);
-    wi2 = vec_splats(wim[-2]);
-    wi3 = vec_splats(wim[-3]);
-    wr2 = vec_splats(wre[2]);
-    wr3 = vec_splats(wre[3]);
-
-    x2 = vec_perm(x0, x1, vcprm(2,s2,3,s3));
-    x3 = vec_perm(x0, x1, vcprm(s3,3,s2,2));
-
-    y4 = vec_perm(y0, y1, vcprm(s1,1,s0,0));
-    y5 = vec_perm(y0, y1, vcprm(s3,3,s2,2));
-    y2 = vec_perm(y0, y1, vcprm(0,s0,1,s1));
-    y3 = vec_perm(y0, y1, vcprm(2,s2,3,s3));
-
-    ymulwi2 = vec_mul(y4, wi2);
-    ymulwi3 = vec_mul(y5, wi3);
-    x4 = vec_mul(x2, wr1);
-    x5 = vec_mul(x3, wi1);
-    y8 = vec_madd(y2, wr2, ymulwi2);
-    y9 = vec_msub(y2, wr2, ymulwi2);
-    x6 = vec_add(x4, x5);
-    x7 = vec_sub(x4, x5);
-    y13 = vec_madd(y3, wr3, ymulwi3);
-    y14 = vec_msub(y3, wr3, ymulwi3);
-
-    x8 = vec_perm(x6, x7, vcprm(0,1,s2,s3));
-    y10 = vec_perm(y8, y9, vcprm(0,1,s2,s3));
-    y15 = vec_perm(y13, y14, vcprm(0,1,s2,s3));
-
-    x9 = vec_perm(x0, x8, vcprm(0,1,s0,s2));
-    x10 = vec_perm(x1, x8, vcprm(1,0,s3,s1));
-
-    y16 = vec_perm(y10, y15, vcprm(0,2,s0,s2));
-    y17 = vec_perm(y10, y15, vcprm(3,1,s3,s1));
-
-    x11 = vec_add(vz0, x9);
-    x12 = vec_sub(vz0, x9);
-    x13 = vec_add(vzo1, x10);
-    x14 = vec_sub(vzo1, x10);
-
-    y18 = vec_add(vz0plus1, y16);
-    y19 = vec_sub(vz0plus1, y16);
-    y20 = vec_add(vzo1plus1, y17);
-    y21 = vec_sub(vzo1plus1, y17);
-
-    x15 = vec_perm(x13, x14, vcprm(0,s1,2,s3));
-    x16 = vec_perm(x13, x14, vcprm(s0,1,s2,3));
-    y22 = vec_perm(y20, y21, vcprm(0,s1,2,s3));
-    y23 = vec_perm(y20, y21, vcprm(s0,1,s2,3));
-
-
-    vec_st(x11, 0, &(out[0]));
-    vec_st(y18, 16, &(out[0]));
-    vec_st(x15, i1, &(out[0]));
-    vec_st(y22, i1+16, &(out[0]));
-    vec_st(x12, i2, &(out[0]));
-    vec_st(y19, i2+16, &(out[0]));
-    vec_st(x16, i3, &(out[0]));
-    vec_st(y23, i3+16, &(out[0]));
-
-    do {
-        out += 8;
-        wre += 4;
-        wim -= 4;
-        wr0 = vec_splats(wre[0]);
-        wr1 = vec_splats(wre[1]);
-        wi0 = vec_splats(wim[0]);
-        wi1 = vec_splats(wim[-1]);
-
-        wr2 = vec_splats(wre[2]);
-        wr3 = vec_splats(wre[3]);
-        wi2 = vec_splats(wim[-2]);
-        wi3 = vec_splats(wim[-3]);
-
-        vzo2 = vec_ld(i2, &(out[0]));  // zo2.r  zo2.i  z(o2+1).r  z(o2+1).i
-        vzo2plus1 = vec_ld(i2+16, &(out[0]));
-        vzo3 = vec_ld(i3, &(out[0]));  // zo3.r  zo3.i  z(o3+1).r  z(o3+1).i
-        vzo3plus1 = vec_ld(i3+16, &(out[0]));
-        vz0 = vec_ld(0, &(out[0]));    // z0.r  z0.i  z1.r  z1.i
-        vz0plus1 = vec_ld(16, &(out[0]));
-        vzo1 = vec_ld(i1, &(out[0])); // zo1.r  zo1.i  z(o1+1).r  z(o1+1).i
-        vzo1plus1 = vec_ld(i1+16, &(out[0]));
-
-        x0 = vec_add(vzo2, vzo3);
-        x1 = vec_sub(vzo2, vzo3);
-
-        y0 = vec_add(vzo2plus1, vzo3plus1);
-        y1 = vec_sub(vzo2plus1, vzo3plus1);
-
-        x4 = vec_perm(x0, x1, vcprm(s1,1,s0,0));
-        x5 = vec_perm(x0, x1, vcprm(s3,3,s2,2));
-        x2 = vec_perm(x0, x1, vcprm(0,s0,1,s1));
-        x3 = vec_perm(x0, x1, vcprm(2,s2,3,s3));
-
-        y2 = vec_perm(y0, y1, vcprm(0,s0,1,s1));
-        y3 = vec_perm(y0, y1, vcprm(2,s2,3,s3));
-        xmulwi0 = vec_mul(x4, wi0);
-        xmulwi1 = vec_mul(x5, wi1);
-
-        y4 = vec_perm(y0, y1, vcprm(s1,1,s0,0));
-        y5 = vec_perm(y0, y1, vcprm(s3,3,s2,2));
-
-        x8 = vec_madd(x2, wr0, xmulwi0);
-        x9 = vec_msub(x2, wr0, xmulwi0);
-        ymulwi2 = vec_mul(y4, wi2);
-        ymulwi3 = vec_mul(y5, wi3);
-
-        x13 = vec_madd(x3, wr1, xmulwi1);
-        x14 = vec_msub(x3, wr1, xmulwi1);
-
-        y8 = vec_madd(y2, wr2, ymulwi2);
-        y9 = vec_msub(y2, wr2, ymulwi2);
-        y13 = vec_madd(y3, wr3, ymulwi3);
-        y14 = vec_msub(y3, wr3, ymulwi3);
-
-        x10 = vec_perm(x8, x9, vcprm(0,1,s2,s3));
-        x15 = vec_perm(x13, x14, vcprm(0,1,s2,s3));
-
-        y10 = vec_perm(y8, y9, vcprm(0,1,s2,s3));
-        y15 = vec_perm(y13, y14, vcprm(0,1,s2,s3));
-
-        x16 = vec_perm(x10, x15, vcprm(0,2,s0,s2));
-        x17 = vec_perm(x10, x15, vcprm(3,1,s3,s1));
-
-        y16 = vec_perm(y10, y15, vcprm(0,2,s0,s2));
-        y17 = vec_perm(y10, y15, vcprm(3,1,s3,s1));
-
-        x18 = vec_add(vz0, x16);
-        x19 = vec_sub(vz0, x16);
-        x20 = vec_add(vzo1, x17);
-        x21 = vec_sub(vzo1, x17);
-
-        y18 = vec_add(vz0plus1, y16);
-        y19 = vec_sub(vz0plus1, y16);
-        y20 = vec_add(vzo1plus1, y17);
-        y21 = vec_sub(vzo1plus1, y17);
-
-        x22 = vec_perm(x20, x21, vcprm(0,s1,2,s3));
-        x23 = vec_perm(x20, x21, vcprm(s0,1,s2,3));
-
-        y22 = vec_perm(y20, y21, vcprm(0,s1,2,s3));
-        y23 = vec_perm(y20, y21, vcprm(s0,1,s2,3));
-
-        vec_st(x18, 0, &(out[0]));
-        vec_st(y18, 16, &(out[0]));
-        vec_st(x22, i1, &(out[0]));
-        vec_st(y22, i1+16, &(out[0]));
-        vec_st(x19, i2, &(out[0]));
-        vec_st(y19, i2+16, &(out[0]));
-        vec_st(x23, i3, &(out[0]));
-        vec_st(y23, i3+16, &(out[0]));
-    } while (n-=2);
-}
-
-inline static void fft2_vsx_interleave(FFTComplex *z)
-{
-    FFTSample r1, i1;
-
-    r1 = z[0].re - z[1].re;
-    z[0].re += z[1].re;
-    z[1].re = r1;
-
-    i1 = z[0].im - z[1].im;
-    z[0].im += z[1].im;
-    z[1].im = i1;
- }
-
-inline static void fft4_vsx_interleave(FFTComplex *z)
-{
-    vec_f a, b, c, d;
-    float* out=  (float*)z;
-    a = vec_ld(0, &(out[0]));
-    b = vec_ld(byte_2complex, &(out[0]));
-
-    c = vec_perm(a, b, vcprm(0,1,s2,s1));
-    d = vec_perm(a, b, vcprm(2,3,s0,s3));
-    a = vec_add(c, d);
-    b = vec_sub(c, d);
-
-    c = vec_perm(a, b, vcprm(0,1,s0,s1));
-    d = vec_perm(a, b, vcprm(2,3,s3,s2));
-
-    a = vec_add(c, d);
-    b = vec_sub(c, d);
-    vec_st(a, 0, &(out[0]));
-    vec_st(b, byte_2complex, &(out[0]));
-}
-
-inline static void fft8_vsx_interleave(FFTComplex *z)
-{
-    vec_f vz0, vz1, vz2, vz3;
-    vec_f x0, x1, x2, x3;
-    vec_f x4, x5, x6, x7;
-    vec_f x8, x9, x10, x11;
-    vec_f x12, x13, x14, x15;
-    vec_f x16, x17, x18, x19;
-    vec_f x20, x21, x22, x23;
-    vec_f x24, x25, x26, x27;
-    vec_f x28, x29, x30, x31;
-    vec_f x32, x33, x34;
-
-    float* out=  (float*)z;
-    vec_f vc1 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
-
-    vz0 = vec_ld(0, &(out[0]));
-    vz1 = vec_ld(byte_2complex, &(out[0]));
-    vz2 = vec_ld(byte_4complex, &(out[0]));
-    vz3 = vec_ld(byte_6complex, &(out[0]));
-
-    x0 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
-    x1 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
-    x2 = vec_perm(vz2, vz3, vcprm(2,1,s0,s1));
-    x3 = vec_perm(vz2, vz3, vcprm(0,3,s2,s3));
-
-    x4 = vec_add(x0, x1);
-    x5 = vec_sub(x0, x1);
-    x6 = vec_add(x2, x3);
-    x7 = vec_sub(x2, x3);
-
-    x8 = vec_perm(x4, x5, vcprm(0,1,s0,s1));
-    x9 = vec_perm(x4, x5, vcprm(2,3,s3,s2));
-    x10 = vec_perm(x6, x7, vcprm(2,1,s2,s1));
-    x11 = vec_perm(x6, x7, vcprm(0,3,s0,s3));
-
-    x12 = vec_add(x8, x9);
-    x13 = vec_sub(x8, x9);
-    x14 = vec_add(x10, x11);
-    x15 = vec_sub(x10, x11);
-    x16 = vec_perm(x12, x13, vcprm(0,s0,1,s1));
-    x17 = vec_perm(x14, x15, vcprm(0,s0,1,s1));
-    x18 = vec_perm(x16, x17, vcprm(s0,s3,s2,s1));
-    x19 = vec_add(x16, x18); // z0.r  z2.r  z0.i  z2.i
-    x20 = vec_sub(x16, x18); // z4.r  z6.r  z4.i  z6.i
-
-    x21 = vec_perm(x12, x13, vcprm(2,s2,3,s3));
-    x22 = vec_perm(x14, x15, vcprm(2,3,s2,s3));
-    x23 = vec_perm(x14, x15, vcprm(3,2,s3,s2));
-    x24 = vec_add(x22, x23);
-    x25 = vec_sub(x22, x23);
-    x26 = vec_mul( vec_perm(x24, x25, vcprm(2,s2,0,s0)), vc1);
-
-    x27 = vec_add(x21, x26); // z1.r  z7.r z1.i z3.i
-    x28 = vec_sub(x21, x26); //z5.r  z3.r z5.i z7.i
-
-    x29 = vec_perm(x19, x27, vcprm(0,2,s0,s2)); // z0.r  z0.i  z1.r  z1.i
-    x30 = vec_perm(x19, x27, vcprm(1,3,s1,s3)); // z2.r  z2.i  z7.r  z3.i
-    x31 = vec_perm(x20, x28, vcprm(0,2,s0,s2)); // z4.r  z4.i  z5.r  z5.i
-    x32 = vec_perm(x20, x28, vcprm(1,3,s1,s3)); // z6.r  z6.i  z3.r  z7.i
-    x33 = vec_perm(x30, x32, vcprm(0,1,s2,3));  // z2.r  z2.i  z3.r  z3.i
-    x34 = vec_perm(x30, x32, vcprm(s0,s1,2,s3)); // z6.r  z6.i  z7.r  z7.i
-
-    vec_st(x29, 0, &(out[0]));
-    vec_st(x33, byte_2complex, &(out[0]));
-    vec_st(x31, byte_4complex, &(out[0]));
-    vec_st(x34, byte_6complex, &(out[0]));
-}
-
-inline static void fft16_vsx_interleave(FFTComplex *z)
-{
-    float* out=  (float*)z;
-    vec_f vc0 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
-    vec_f vc1 = {ff_cos_16[1], ff_cos_16[1], ff_cos_16[1], ff_cos_16[1]};
-    vec_f vc2 = {ff_cos_16[3], ff_cos_16[3], ff_cos_16[3], ff_cos_16[3]};
-    vec_f vz0, vz1, vz2, vz3;
-    vec_f vz4, vz5, vz6, vz7;
-    vec_f x0, x1, x2, x3;
-    vec_f x4, x5, x6, x7;
-    vec_f x8, x9, x10, x11;
-    vec_f x12, x13, x14, x15;
-    vec_f x16, x17, x18, x19;
-    vec_f x20, x21, x22, x23;
-    vec_f x24, x25, x26, x27;
-    vec_f x28, x29, x30, x31;
-    vec_f x32, x33, x34, x35;
-    vec_f x36, x37, x38, x39;
-    vec_f x40, x41, x42, x43;
-    vec_f x44, x45, x46, x47;
-    vec_f x48, x49, x50, x51;
-    vec_f x52, x53, x54, x55;
-    vec_f x56, x57, x58, x59;
-    vec_f x60, x61, x62, x63;
-    vec_f x64, x65, x66, x67;
-    vec_f x68, x69, x70, x71;
-    vec_f x72, x73, x74, x75;
-    vec_f x76, x77, x78, x79;
-    vec_f x80, x81, x82, x83;
-    vec_f x84, x85, x86;
-
-    vz0 = vec_ld(0, &(out[0]));
-    vz1 = vec_ld(byte_2complex, &(out[0]));
-    vz2 = vec_ld(byte_4complex, &(out[0]));
-    vz3 = vec_ld(byte_6complex, &(out[0]));
-    vz4 = vec_ld(byte_8complex, &(out[0]));
-    vz5 = vec_ld(byte_10complex, &(out[0]));
-    vz6 = vec_ld(byte_12complex, &(out[0]));
-    vz7 = vec_ld(byte_14complex, &(out[0]));
-
-    x0 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
-    x1 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
-    x2 = vec_perm(vz2, vz3, vcprm(0,1,s0,s1));
-    x3 = vec_perm(vz2, vz3, vcprm(2,3,s2,s3));
-
-    x4 = vec_perm(vz4, vz5, vcprm(0,1,s2,s1));
-    x5 = vec_perm(vz4, vz5, vcprm(2,3,s0,s3));
-    x6 = vec_perm(vz6, vz7, vcprm(0,1,s2,s1));
-    x7 = vec_perm(vz6, vz7, vcprm(2,3,s0,s3));
-
-    x8 = vec_add(x0, x1);
-    x9 = vec_sub(x0, x1);
-    x10 = vec_add(x2, x3);
-    x11 = vec_sub(x2, x3);
-
-    x12 = vec_add(x4, x5);
-    x13 = vec_sub(x4, x5);
-    x14 = vec_add(x6, x7);
-    x15 = vec_sub(x6, x7);
-
-    x16 = vec_perm(x8, x9, vcprm(0,1,s0,s1));
-    x17 = vec_perm(x8, x9, vcprm(2,3,s3,s2));
-    x18 = vec_perm(x10, x11, vcprm(2,1,s1,s2));
-    x19 = vec_perm(x10, x11, vcprm(0,3,s0,s3));
-    x20 = vec_perm(x12, x14, vcprm(0,1,s0, s1));
-    x21 = vec_perm(x12, x14, vcprm(2,3,s2,s3));
-    x22 = vec_perm(x13, x15, vcprm(0,1,s0,s1));
-    x23 = vec_perm(x13, x15, vcprm(3,2,s3,s2));
-
-    x24 = vec_add(x16, x17);
-    x25 = vec_sub(x16, x17);
-    x26 = vec_add(x18, x19);
-    x27 = vec_sub(x18, x19);
-    x28 = vec_add(x20, x21);
-    x29 = vec_sub(x20, x21);
-    x30 = vec_add(x22, x23);
-    x31 = vec_sub(x22, x23);
-
-    x32 = vec_add(x24, x26);
-    x33 = vec_sub(x24, x26);
-    x34 = vec_perm(x32, x33, vcprm(0,1,s0,s1));
-
-    x35 = vec_perm(x28, x29, vcprm(2,1,s1,s2));
-    x36 = vec_perm(x28, x29, vcprm(0,3,s0,s3));
-    x37 = vec_add(x35, x36);
-    x38 = vec_sub(x35, x36);
-    x39 = vec_perm(x37, x38, vcprm(0,1,s1,s0));
-
-    x40 = vec_perm(x27, x38, vcprm(3,2,s2,s3));
-    x41 = vec_perm(x26,  x37, vcprm(2,3,s3,s2));
-    x42 = vec_add(x40, x41);
-    x43 = vec_sub(x40, x41);
-    x44 = vec_mul(x42, vc0);
-    x45 = vec_mul(x43, vc0);
-
-    x46 = vec_add(x34, x39);  // z0.r  z0.i  z4.r  z4.i
-    x47 = vec_sub(x34, x39);  // z8.r  z8.i  z12.r  z12.i
-
-    x48 = vec_perm(x30, x31, vcprm(2,1,s1,s2));
-    x49 = vec_perm(x30, x31, vcprm(0,3,s3,s0));
-    x50 = vec_add(x48, x49);
-    x51 = vec_sub(x48, x49);
-    x52 = vec_mul(x50, vc1);
-    x53 = vec_mul(x50, vc2);
-    x54 = vec_mul(x51, vc1);
-    x55 = vec_mul(x51, vc2);
-
-    x56 = vec_perm(x24, x25, vcprm(2,3,s2,s3));
-    x57 = vec_perm(x44, x45, vcprm(0,1,s1,s0));
-    x58 = vec_add(x56, x57);
-    x59 = vec_sub(x56, x57);
-
-    x60 = vec_perm(x54, x55, vcprm(1,0,3,2));
-    x61 = vec_perm(x54, x55, vcprm(s1,s0,s3,s2));
-    x62 = vec_add(x52, x61);
-    x63 = vec_sub(x52, x61);
-    x64 = vec_add(x60, x53);
-    x65 = vec_sub(x60, x53);
-    x66 = vec_perm(x62, x64, vcprm(0,1,s3,s2));
-    x67 = vec_perm(x63, x65, vcprm(s0,s1,3,2));
-
-    x68 = vec_add(x58, x66); // z1.r    z1.i  z3.r    z3.i
-    x69 = vec_sub(x58, x66); // z9.r    z9.i  z11.r  z11.i
-    x70 = vec_add(x59, x67); // z5.r    z5.i  z15.r  z15.i
-    x71 = vec_sub(x59, x67); // z13.r  z13.i z7.r   z7.i
-
-    x72 = vec_perm(x25, x27, vcprm(s1,s0,s2,s3));
-    x73 = vec_add(x25, x72);
-    x74 = vec_sub(x25, x72);
-    x75 = vec_perm(x73, x74, vcprm(0,1,s0,s1));
-    x76 = vec_perm(x44, x45, vcprm(3,2,s2,s3));
-    x77 = vec_add(x75, x76); // z2.r   z2.i    z6.r    z6.i
-    x78 = vec_sub(x75, x76); // z10.r  z10.i  z14.r  z14.i
-
-    x79 = vec_perm(x46, x68, vcprm(0,1,s0,s1)); // z0.r  z0.i  z1.r  z1.i
-    x80 = vec_perm(x77, x68, vcprm(0,1,s2,s3)); // z2.r  z2.i  z3.r  z3.i
-    x81 = vec_perm(x46, x70, vcprm(2,3,s0,s1)); // z4.r  z4.i  z5.r  z5.i
-    x82 = vec_perm(x71, x77, vcprm(s2,s3,2,3)); // z6.r  z6.i  z7.r  z7.i
-    vec_st(x79, 0, &(out[0]));
-    vec_st(x80, byte_2complex, &(out[0]));
-    vec_st(x81, byte_4complex, &(out[0]));
-    vec_st(x82, byte_6complex, &(out[0]));
-    x83 = vec_perm(x47, x69, vcprm(0,1,s0,s1)); // z8.r  z8.i  z9.r  z9.i
-    x84 = vec_perm(x78, x69, vcprm(0,1,s2,s3)); // z10.r  z10.i  z11.r  z11.i
-    x85 = vec_perm(x47, x71, vcprm(2,3,s0,s1)); // z12.r  z12.i  z13.r  z13.i
-    x86 = vec_perm(x70, x78, vcprm(s2,s3,2,3)); // z14.r  z14.i  z15.r  z15.i
-    vec_st(x83, byte_8complex, &(out[0]));
-    vec_st(x84, byte_10complex, &(out[0]));
-    vec_st(x85, byte_12complex, &(out[0]));
-    vec_st(x86, byte_14complex, &(out[0]));
-}
-
-inline static void fft4_vsx(FFTComplex *z)
-{
-    vec_f a, b, c, d;
-    float* out=  (float*)z;
-    a = vec_ld(0, &(out[0]));
-    b = vec_ld(byte_2complex, &(out[0]));
-
-    c = vec_perm(a, b, vcprm(0,1,s2,s1));
-    d = vec_perm(a, b, vcprm(2,3,s0,s3));
-    a = vec_add(c, d);
-    b = vec_sub(c, d);
-
-    c = vec_perm(a,b, vcprm(0,s0,1,s1));
-    d = vec_perm(a, b, vcprm(2,s3,3,s2));
-
-    a = vec_add(c, d);
-    b = vec_sub(c, d);
-
-    c = vec_perm(a, b, vcprm(0,1,s0,s1));
-    d = vec_perm(a, b, vcprm(2,3,s2,s3));
-
-    vec_st(c, 0, &(out[0]));
-    vec_st(d, byte_2complex, &(out[0]));
-    return;
-}
-
-inline static void fft8_vsx(FFTComplex *z)
-{
-    vec_f vz0, vz1, vz2, vz3;
-    vec_f vz4, vz5, vz6, vz7, vz8;
-
-    float* out=  (float*)z;
-    vec_f vc0 = {0.0, 0.0, 0.0, 0.0};
-    vec_f vc1 = {-sqrthalf, sqrthalf, sqrthalf, -sqrthalf};
-    vec_f vc2 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
-
-    vz0 = vec_ld(0, &(out[0]));
-    vz1 = vec_ld(byte_2complex, &(out[0]));
-    vz2 = vec_ld(byte_4complex, &(out[0]));
-    vz3 = vec_ld(byte_6complex, &(out[0]));
-
-    vz6 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
-    vz7 = vec_perm(vz2, vz3, vcprm(2,s2,3,s3));
-    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
-
-    vz2 = vec_add(vz6, vz7);
-    vz3 = vec_sub(vz6, vz7);
-    vz8 = vec_perm(vz3, vz3, vcprm(2,3,0,1));
-
-    vz0 = vec_add(vz4, vz5);
-    vz1 = vec_sub(vz4, vz5);
-
-    vz3 = vec_madd(vz3, vc1, vc0);
-    vz3 = vec_madd(vz8, vc2, vz3);
-
-    vz4 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
-    vz6 = vec_perm(vz2, vz3, vcprm(1,2,s3,s0));
-    vz7 = vec_perm(vz2, vz3, vcprm(0,3,s2,s1));
-
-    vz0 = vec_add(vz4, vz5);
-    vz1 = vec_sub(vz4, vz5);
-    vz2 = vec_add(vz6, vz7);
-    vz3 = vec_sub(vz6, vz7);
-
-    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
-    vz6 = vec_perm(vz2, vz3, vcprm(0,2,s1,s3));
-    vz7 = vec_perm(vz2, vz3, vcprm(1,3,s0,s2));
-
-
-    vz2 = vec_sub(vz4, vz6);
-    vz3 = vec_sub(vz5, vz7);
-
-    vz0 = vec_add(vz4, vz6);
-    vz1 = vec_add(vz5, vz7);
-
-    vec_st(vz0, 0, &(out[0]));
-    vec_st(vz1, byte_2complex, &(out[0]));
-    vec_st(vz2, byte_4complex, &(out[0]));
-    vec_st(vz3, byte_6complex, &(out[0]));
-    return;
-}
-
-inline static void fft16_vsx(FFTComplex *z)
-{
-    float* out=  (float*)z;
-    vec_f vc0 = {0.0, 0.0, 0.0, 0.0};
-    vec_f vc1 = {-sqrthalf, sqrthalf, sqrthalf, -sqrthalf};
-    vec_f vc2 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
-    vec_f vc3 = {1.0, 0.92387953, sqrthalf, 0.38268343};
-    vec_f vc4 = {0.0, 0.38268343, sqrthalf, 0.92387953};
-    vec_f vc5 = {-0.0, -0.38268343, -sqrthalf, -0.92387953};
-
-    vec_f vz0, vz1, vz2, vz3;
-    vec_f vz4, vz5, vz6, vz7;
-    vec_f vz8, vz9, vz10, vz11;
-    vec_f vz12, vz13;
-
-    vz0 = vec_ld(byte_8complex, &(out[0]));
-    vz1 = vec_ld(byte_10complex, &(out[0]));
-    vz2 = vec_ld(byte_12complex, &(out[0]));
-    vz3 = vec_ld(byte_14complex, &(out[0]));
-
-    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
-    vz6 = vec_perm(vz2, vz3, vcprm(0,1,s2,s1));
-    vz7 = vec_perm(vz2, vz3, vcprm(2,3,s0,s3));
-
-    vz0 = vec_add(vz4, vz5);
-    vz1= vec_sub(vz4, vz5);
-    vz2 = vec_add(vz6, vz7);
-    vz3 = vec_sub(vz6, vz7);
-
-    vz4 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
-    vz6 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
-    vz7 = vec_perm(vz2, vz3, vcprm(2,s3,3,s2));
-
-    vz0 = vec_add(vz4, vz5);
-    vz1 = vec_sub(vz4, vz5);
-    vz2 = vec_add(vz6, vz7);
-    vz3 = vec_sub(vz6, vz7);
-
-    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
-    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
-
-    vz6 = vec_perm(vz2, vz3, vcprm(0,1,s0,s1));
-    vz7 = vec_perm(vz2, vz3, vcprm(2,3,s2,s3));
-
-    vz0 = vec_ld(0, &(out[0]));
-    vz1 = vec_ld(byte_2complex, &(out[0]));
-    vz2 = vec_ld(byte_4complex, &(out[0]));
-    vz3 = vec_ld(byte_6complex, &(out[0]));
-    vz10 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
-    vz11 = vec_perm(vz2, vz3, vcprm(2,s2,3,s3));
-    vz8 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
-    vz9 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
-
-    vz2 = vec_add(vz10, vz11);
-    vz3 = vec_sub(vz10, vz11);
-    vz12 = vec_perm(vz3, vz3, vcprm(2,3,0,1));
-    vz0 = vec_add(vz8, vz9);
-    vz1 = vec_sub(vz8, vz9);
-
-    vz3 = vec_madd(vz3, vc1, vc0);
-    vz3 = vec_madd(vz12, vc2, vz3);
-    vz8 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
-    vz9 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
-    vz10 = vec_perm(vz2, vz3, vcprm(1,2,s3,s0));
-    vz11 = vec_perm(vz2, vz3, vcprm(0,3,s2,s1));
-
-    vz0 = vec_add(vz8, vz9);
-    vz1 = vec_sub(vz8, vz9);
-    vz2 = vec_add(vz10, vz11);
-    vz3 = vec_sub(vz10, vz11);
-
-    vz8 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
-    vz9 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
-    vz10 = vec_perm(vz2, vz3, vcprm(0,2,s1,s3));
-    vz11 = vec_perm(vz2, vz3, vcprm(1,3,s0,s2));
-
-    vz2 = vec_sub(vz8, vz10);
-    vz3 = vec_sub(vz9, vz11);
-    vz0 = vec_add(vz8, vz10);
-    vz1 = vec_add(vz9, vz11);
-
-    vz8 = vec_madd(vz4, vc3, vc0);
-    vz9 = vec_madd(vz5, vc3, vc0);
-    vz10 = vec_madd(vz6, vc3, vc0);
-    vz11 = vec_madd(vz7, vc3, vc0);
-
-    vz8 = vec_madd(vz5, vc4, vz8);
-    vz9 = vec_madd(vz4, vc5, vz9);
-    vz10 = vec_madd(vz7, vc5, vz10);
-    vz11 = vec_madd(vz6, vc4, vz11);
-
-    vz12 = vec_sub(vz10, vz8);
-    vz10 = vec_add(vz10, vz8);
-
-    vz13 = vec_sub(vz9, vz11);
-    vz11 = vec_add(vz9, vz11);
-
-    vz4 = vec_sub(vz0, vz10);
-    vz0 = vec_add(vz0, vz10);
-
-    vz7= vec_sub(vz3, vz12);
-    vz3= vec_add(vz3, vz12);
-
-    vz5 = vec_sub(vz1, vz11);
-    vz1 = vec_add(vz1, vz11);
-
-    vz6 = vec_sub(vz2, vz13);
-    vz2 = vec_add(vz2, vz13);
-
-    vec_st(vz0, 0, &(out[0]));
-    vec_st(vz1, byte_2complex, &(out[0]));
-    vec_st(vz2, byte_4complex, &(out[0]));
-    vec_st(vz3, byte_6complex, &(out[0]));
-    vec_st(vz4, byte_8complex, &(out[0]));
-    vec_st(vz5, byte_10complex, &(out[0]));
-    vec_st(vz6, byte_12complex, &(out[0]));
-    vec_st(vz7, byte_14complex, &(out[0]));
-    return;
-
-}
-inline static void pass_vsx(FFTComplex * z, const FFTSample * wre, unsigned int n)
-{
-    int o1 = n<<1;
-    int o2 = n<<2;
-    int o3 = o1+o2;
-    int i1, i2, i3;
-    FFTSample* out = (FFTSample*)z;
-    const FFTSample *wim = wre+o1;
-    vec_f v0, v1, v2, v3;
-    vec_f v4, v5, v6, v7;
-    vec_f v8, v9, v10, v11;
-    vec_f v12, v13;
-
-    n = n-2;
-    i1 = o1*sizeof(FFTComplex);
-    i2 = o2*sizeof(FFTComplex);
-    i3 = o3*sizeof(FFTComplex);
-
-    v8 = vec_ld(0, &(wre[0]));
-    v10 = vec_ld(0, &(wim[0]));
-    v9 = vec_ld(0, &(wim[-4]));
-    v9 = vec_perm(v9, v10, vcprm(s0,3,2,1));
-
-    v4 = vec_ld(i2, &(out[0]));
-    v5 = vec_ld(i2+16, &(out[0]));
-    v6 = vec_ld(i3, &(out[0]));
-    v7 = vec_ld(i3+16, &(out[0]));
-    v10 = vec_mul(v4, v8); // r2*wre
-    v11 = vec_mul(v5, v8); // i2*wre
-    v12 = vec_mul(v6, v8); // r3*wre
-    v13 = vec_mul(v7, v8); // i3*wre
-
-    v0 = vec_ld(0, &(out[0])); // r0
-    v3 = vec_ld(i1+16, &(out[0])); // i1
-    v10 = vec_madd(v5, v9, v10); // r2*wim
-    v11 = vec_nmsub(v4, v9, v11); // i2*wim
-    v12 = vec_nmsub(v7, v9, v12); // r3*wim
-    v13 = vec_madd(v6, v9, v13); // i3*wim
-
-    v1 = vec_ld(16, &(out[0])); // i0
-    v2 = vec_ld(i1, &(out[0])); // r1
-    v8 = vec_sub(v12, v10);
-    v12 = vec_add(v12, v10);
-    v9 = vec_sub(v11, v13);
-    v13 = vec_add(v11, v13);
-    v4 = vec_sub(v0, v12);
-    v0 = vec_add(v0, v12);
-    v7 = vec_sub(v3, v8);
-    v3 = vec_add(v3, v8);
-
-    vec_st(v0, 0, &(out[0])); // r0
-    vec_st(v3, i1+16, &(out[0])); // i1
-    vec_st(v4, i2, &(out[0])); // r2
-    vec_st(v7, i3+16, &(out[0]));// i3
-
-    v5 = vec_sub(v1, v13);
-    v1 = vec_add(v1, v13);
-    v6 = vec_sub(v2, v9);
-    v2 = vec_add(v2, v9);
-
-    vec_st(v1, 16, &(out[0])); // i0
-    vec_st(v2, i1, &(out[0])); // r1
-    vec_st(v5, i2+16, &(out[0])); // i2
-    vec_st(v6, i3, &(out[0])); // r3
-
-    do {
-        out += 8;
-        wre += 4;
-        wim -= 4;
-
-        v8 = vec_ld(0, &(wre[0]));
-        v10 = vec_ld(0, &(wim[0]));
-        v9 = vec_ld(0, &(wim[-4]));
-        v9 = vec_perm(v9, v10, vcprm(s0,3,2,1));
-
-        v4 = vec_ld(i2, &(out[0])); // r2
-        v5 = vec_ld(i2+16, &(out[0])); // i2
-        v6 = vec_ld(i3, &(out[0])); // r3
-        v7 = vec_ld(i3+16, &(out[0]));// i3
-        v10 = vec_mul(v4, v8); // r2*wre
-        v11 = vec_mul(v5, v8); // i2*wre
-        v12 = vec_mul(v6, v8); // r3*wre
-        v13 = vec_mul(v7, v8); // i3*wre
-
-        v0 = vec_ld(0, &(out[0])); // r0
-        v3 = vec_ld(i1+16, &(out[0])); // i1
-        v10 = vec_madd(v5, v9, v10); // r2*wim
-        v11 = vec_nmsub(v4, v9, v11); // i2*wim
-        v12 = vec_nmsub(v7, v9, v12); // r3*wim
-        v13 = vec_madd(v6, v9, v13); // i3*wim
-
-        v1 = vec_ld(16, &(out[0])); // i0
-        v2 = vec_ld(i1, &(out[0])); // r1
-        v8 = vec_sub(v12, v10);
-        v12 = vec_add(v12, v10);
-        v9 = vec_sub(v11, v13);
-        v13 = vec_add(v11, v13);
-        v4 = vec_sub(v0, v12);
-        v0 = vec_add(v0, v12);
-        v7 = vec_sub(v3, v8);
-        v3 = vec_add(v3, v8);
-
-        vec_st(v0, 0, &(out[0])); // r0
-        vec_st(v3, i1+16, &(out[0])); // i1
-        vec_st(v4, i2, &(out[0])); // r2
-        vec_st(v7, i3+16, &(out[0])); // i3
-
-        v5 = vec_sub(v1, v13);
-        v1 = vec_add(v1, v13);
-        v6 = vec_sub(v2, v9);
-        v2 = vec_add(v2, v9);
-
-        vec_st(v1, 16, &(out[0])); // i0
-        vec_st(v2, i1, &(out[0])); // r1
-        vec_st(v5, i2+16, &(out[0])); // i2
-        vec_st(v6, i3, &(out[0])); // r3
-    } while (n-=2);
-}
-
-#endif
-
-#endif /* AVCODEC_PPC_FFT_VSX_H */
author	Andreas Rheinhardt <andreas.rheinhardt@outlook.com>	2023-09-28 19:57:36 +0200
committer	Andreas Rheinhardt <andreas.rheinhardt@outlook.com>	2023-10-01 02:25:09 +0200
commit	6f7bf64dbca408b700582fb9678f300b14267585 (patch)
tree	8180359c7fd1e808592848b10a0eb2ba609fe4ea /libavcodec/ppc
parent	d9464f3e34e444c4e798ec882dab95bafe5179d5 (diff)
download	ffmpeg-6f7bf64dbca408b700582fb9678f300b14267585.tar.gz