fast mdct added (vorbis implementation)

author: Daniil Cherednik <dan.cherednik@gmail.com> 2015-11-04 12:59:44 +0300
committer: Daniil Cherednik <dan.cherednik@gmail.com> 2015-11-04 12:59:44 +0300
commit: ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e (patch)
tree: 4a77f39c1286322f81d0c743680b6aaaebe80b26
parent: 0b3154af9d688e1a5e3c88a4382ff1f6d165e9ac (diff)
download: atracdenc-ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e.tar.gz
10 files changed, 702 insertions, 27 deletions
diff --git a/src/Makefile b/src/Makefile
index 46afbde..5a58987 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,4 @@
 
 all:
-	g++ -std=c++11 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp -o atracdenc
+	cd ./mdct && make && cd ../
+	g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc
diff --git a/src/atracdenc.cpp b/src/atracdenc.cpp
index 2855193..c2ecad7 100644
--- a/src/atracdenc.cpp
+++ b/src/atracdenc.cpp
@@ -11,9 +11,12 @@ namespace NAtracDEnc {
 using namespace std;
 using namespace NBitStream;
 using namespace NAtrac1;
+using namespace NMDCT;
 TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, bool mono)
     : MixChannel(mono)
     , Aea(std::move(aea))
+    , Mdct512(2)
+    , Mdct256(1)
 {
 }
 
@@ -36,18 +39,6 @@ static void vector_fmul_window(double *dst, const double *src0,
     }
 }
 
-vector<double> mdct(double* x, int N) {
-    vector<double> res;
-    for (int k = 0; k < N; k++) {
-        double sum = 0;
-        for (int n = 0; n < 2 * N; n++) {
-            sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5));
-        }
-        res.push_back(sum);
-    }
-    return res;
-}
-	
 vector<double> midct(double* x, int N) {
     vector<double> res;
     for (int n = 0; n < 2 * N; n++) {
@@ -61,28 +52,21 @@ vector<double> midct(double* x, int N) {
     return res;
 }
 
-
 void TAtrac1Processor::Mdct(double Specs[512], double* low, double* mid, double* hi) {
     uint32_t pos = 0;
     for (uint32_t band = 0; band < QMF_BANDS; band++) {
         double* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
         uint32_t bufSz = (band == 2) ? 256 : 128; 
-        uint32_t xxx = (band == 2) ? 112 : 48; 
-        double tmp[512];
-        //if (band == 2) {
-        //    for (uint32_t i = 0; i < bufSz; i++) {
-        //        srcBuf[i] = srcBuf[i] * 2;
-        //    }
-        //}
-        for (uint32_t i = 0; i < 512; i++)
-            tmp[i] = 0;
-        memcpy(&tmp[xxx], &srcBuf[bufSz], 32 * sizeof(double));
+        uint32_t winStart = (band == 2) ? 112 : 48; 
+        vector<double> tmp(512);
+
+        memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(double));
         for (int i = 0; i < 32; i++) {
             srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[bufSz - 32 + i];
             srcBuf[bufSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[bufSz - 32 + i];
         }
-        memcpy(&tmp[xxx+32], &srcBuf[0], bufSz * sizeof(double));
-        const vector<double>&  sp = mdct(&tmp[0], bufSz);
+        memcpy(&tmp[winStart+32], &srcBuf[0], bufSz * sizeof(double));
+        const vector<double>&  sp = (band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0]);
         if (band) {
             for (uint32_t j = 0; j < sp.size()/2; j++) {
                 Specs[pos+j] = sp[bufSz - 1 -j];
@@ -200,7 +184,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
             vector<double> specs;
             specs.resize(512);
             for (int i = 0; i < NumSamples; ++i) {
-                src[i] = data[i][channel] / 256.0;
+                src[i] = data[i][channel];
             }
             SplitFilterBank[channel].Split(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
 
diff --git a/src/atracdenc.h b/src/atracdenc.h
index 2fb114e..a0ab786 100644
--- a/src/atracdenc.h
+++ b/src/atracdenc.h
@@ -4,6 +4,7 @@
 #include "atrac/atrac1.h"
 #include "atrac/atrac1_qmf.h"
 #include "atrac/atrac1_scale.h"
+#include "mdct/mdct.h"
 
 namespace NAtracDEnc {
 
@@ -22,6 +23,8 @@ class TAtrac1Processor : public TAtrac1Data {
 
     Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2];
     Atrac1SplitFilterBank<double> SplitFilterBank[2];
+    NMDCT::TMDCT<512> Mdct512;
+    NMDCT::TMDCT<256> Mdct256;
     void IMdct(double specs[512], const TBlockSize& mode, double* low, double* mid, double* hi);
     void Mdct(double specs[512], double* low, double* mid, double* hi);
     NAtrac1::TScaler Scaler;
diff --git a/src/config.h b/src/config.h
new file mode 100644
index 0000000..942841a
--- /dev/null
+++ b/src/config.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#define CONFIG_DOUBLE
+
diff --git a/src/mdct/Makefile b/src/mdct/Makefile
new file mode 100644
index 0000000..cffd39d
--- /dev/null
+++ b/src/mdct/Makefile
@@ -0,0 +1,12 @@
+all: lib
+
+lib:
+	gcc -O2 -c vorbis_impl/mdct.c -o mdct_impl.o
+
+test: lib
+	g++ -std=c++11 mdct_ut.cpp -I ../../3rd/gtest-1.7.0/include/  ../../3rd/gtest-1.7.0/src/gtest-all.o ../../3rd/gtest-1.7.0/src/gtest_main.o ./mdct_impl.o -o mdct_ut
+	./mdct_ut
+
+clean:
+	rm ./mdct_ut
+	rm ./mdct_impl.o
diff --git a/src/mdct/common.h b/src/mdct/common.h
new file mode 100644
index 0000000..d217ed5
--- /dev/null
+++ b/src/mdct/common.h
@@ -0,0 +1,29 @@
+#pragma once
+#include "../config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef CONFIG_DOUBLE
+typedef double FLOAT;
+#define FCONST(X) (X)
+#define AFT_COS cos
+#define AFT_SIN sin
+#define AFT_TAN tan
+#define AFT_LOG10 log10
+#define AFT_EXP exp
+#define AFT_FABS fabs
+#define AFT_SQRT sqrt
+#define AFT_EXP exp
+#else
+typedef float FLOAT;
+#define FCONST(X) (X##f)
+#define AFT_COS cosf
+#define AFT_SIN sinf
+#define AFT_TAN tanf
+#define AFT_LOG10 log10f
+#define AFT_FABS fabsf
+#define AFT_SQRT sqrtf
+#define AFT_EXP expf
+#endif
diff --git a/src/mdct/mdct.h b/src/mdct/mdct.h
new file mode 100644
index 0000000..0637651
--- /dev/null
+++ b/src/mdct/mdct.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "vorbis_impl/mdct.h"
+#include <vector>
+
+namespace NMDCT {
+
+class TMDCTBase {
+protected:
+    MDCTContext Ctx;
+    TMDCTBase(int n, double scale) {
+        mdct_ctx_init(&Ctx, n, scale);
+    };
+    virtual ~TMDCTBase() {
+        mdct_ctx_close(&Ctx);
+    };
+};
+
+
+template<int N>
+class TMDCT : public TMDCTBase {
+    std::vector<double> Buf;
+public:
+    TMDCT(float scale = 1.0)
+        : Buf(N/2)
+        , TMDCTBase(N, scale)
+    {}
+    const std::vector<double>& operator()(double* in) {
+        mdct(&Ctx, &Buf[0], in);
+        return Buf;
+    }
+
+};
+} //namespace NMDCT
diff --git a/src/mdct/mdct_ut.cpp b/src/mdct/mdct_ut.cpp
new file mode 100644
index 0000000..a63544e
--- /dev/null
+++ b/src/mdct/mdct_ut.cpp
@@ -0,0 +1,64 @@
+#include "mdct.h"
+#include <gtest/gtest.h>
+
+#include <vector>
+
+using std::vector;
+using namespace NMDCT;
+
+static vector<double> mdct(double* x, int N) {
+    vector<double> res;
+    for (int k = 0; k < N; k++) {
+        double sum = 0;
+        for (int n = 0; n < 2 * N; n++)
+            sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5));
+
+        res.push_back(sum);
+    }
+    return res;
+}  
+
+TEST(TBitStream, MDCT64) {
+    const int N = 64;
+    TMDCT<N> transform(N);
+    vector<double> src(N);
+    for (int i = 0; i < N; i++) {
+        src[i] = i;
+    }
+    const vector<double> res1 = mdct(&src[0], N/2);
+    const vector<double> res2 = transform(&src[0]);
+    EXPECT_EQ(res1.size(), res2.size());
+    for (int i = 0; i < res1.size(); i++) {
+        EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
+    }
+}
+
+TEST(TBitStream, MDCT128) {
+    const int N = 128;
+    TMDCT<N> transform(N);
+    vector<double> src(N);
+    for (int i = 0; i < N; i++) {
+        src[i] = i;
+    }
+    const vector<double> res1 = mdct(&src[0], N/2);
+    const vector<double> res2 = transform(&src[0]);
+    EXPECT_EQ(res1.size(), res2.size());
+    for (int i = 0; i < res1.size(); i++) {
+        EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
+    }
+}
+
+TEST(TBitStream, MDCT256) {
+    const int N = 256;
+    TMDCT<N> transform(N);
+    vector<double> src(N);
+    for (int i = 0; i < N; i++) {
+        src[i] = i;
+    }
+    const vector<double> res1 = mdct(&src[0], N/2);
+    const vector<double> res2 = transform(&src[0]);
+    EXPECT_EQ(res1.size(), res2.size());
+    for (int i = 0; i < res1.size(); i++) {
+        EXPECT_NEAR(res1[i], res2[i], 0.00000001);
+    }
+}
diff --git a/src/mdct/vorbis_impl/mdct.c b/src/mdct/vorbis_impl/mdct.c
new file mode 100644
index 0000000..b297089
--- /dev/null
+++ b/src/mdct/vorbis_impl/mdct.c
@@ -0,0 +1,450 @@
+/**
+ *
+ * This file is derived from libvorbis
+ * Copyright (c) 2002, Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file mdct.c
+ * Modified Discrete Cosine Transform
+ */
+
+#include "mdct.h"
+
+/**
+ * Allocates and initializes lookup tables in the MDCT context.
+ * @param mdct  The MDCT context
+ * @param n     Number of time-domain samples used in the MDCT transform
+ * @param scale Multiplier
+ */
+void
+mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale)
+{
+    int *bitrev = malloc((n/4) * sizeof(int));
+    FLOAT *trig = malloc((n+n/4) * sizeof(FLOAT));
+    int i;
+    int n2 = (n >> 1);
+    int log2n = mdct->log2n = round(log((float)n)/log(2.f));
+    mdct->n = n;
+    mdct->trig = trig;
+    mdct->bitrev = bitrev;
+
+    // trig lookups
+    for (i = 0; i < n/4; i++) {
+        trig[i*2]      =  AFT_COS((M_PI/n)*(4*i));
+        trig[i*2+1]    = -AFT_SIN((M_PI/n)*(4*i));
+        trig[n2+i*2]   =  AFT_COS((M_PI/(2*n))*(2*i+1));
+        trig[n2+i*2+1] =  AFT_SIN((M_PI/(2*n))*(2*i+1));
+    }
+    for (i = 0; i < n/8; i++) {
+        trig[n+i*2]    =  AFT_COS((M_PI/n)*(4*i+2))*FCONST(0.5);
+        trig[n+i*2+1]  = -AFT_SIN((M_PI/n)*(4*i+2))*FCONST(0.5);
+    }
+
+    // bitreverse lookup
+    {
+        int j, acc;
+        int mask = (1 << (log2n-1)) - 1;
+        int msb = (1 << (log2n-2));
+        for (i = 0; i < n/8; i++) {
+            acc = 0;
+            for (j = 0; msb>>j; j++) {
+                if ((msb>>j) & i)
+                    acc |= (1 << j);
+            }
+            bitrev[i*2]= ((~acc) & mask) - 1;
+            bitrev[i*2+1] = acc;
+        }
+    }
+    mdct->scale = scale / (FLOAT)n;
+}
+
+/** Deallocates memory use by the lookup tables in the MDCT context. */
+void
+mdct_ctx_close(MDCTContext *mdct)
+{
+    if (mdct) {
+        if (mdct->trig)
+            free(mdct->trig);
+        if (mdct->bitrev)
+            free(mdct->bitrev);
+        memset(mdct, 0, sizeof(MDCTContext));
+    }
+}
+
+/** 8 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_8(FLOAT *x) {
+    FLOAT r0 = x[6] + x[2];
+    FLOAT r1 = x[6] - x[2];
+    FLOAT r2 = x[4] + x[0];
+    FLOAT r3 = x[4] - x[0];
+
+    x[6] = r0 + r2;
+    x[4] = r0 - r2;
+
+    r0   = x[5] - x[1];
+    r2   = x[7] - x[3];
+    x[0] = r1   + r0;
+    x[2] = r1   - r0;
+
+    r0   = x[5] + x[1];
+    r1   = x[7] + x[3];
+    x[3] = r2   + r3;
+    x[1] = r2   - r3;
+    x[7] = r1   + r0;
+    x[5] = r1   - r0;
+}
+
+/** 16 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_16(FLOAT *x) {
+    FLOAT r0 = x[1] - x[9];
+    FLOAT r1 = x[0] - x[8];
+
+    x[8]  += x[0];
+    x[9]  += x[1];
+    x[0]   = ((r0 + r1) * AFT_PI2_8);
+    x[1]   = ((r0 - r1) * AFT_PI2_8);
+
+    r0     = x[3]  - x[11];
+    r1     = x[10] - x[2];
+    x[10] += x[2];
+    x[11] += x[3];
+    x[2]   = r0;
+    x[3]   = r1;
+
+    r0     = x[12] - x[4];
+    r1     = x[13] - x[5];
+    x[12] += x[4];
+    x[13] += x[5];
+    x[4]   = ((r0 - r1) * AFT_PI2_8);
+    x[5]   = ((r0 + r1) * AFT_PI2_8);
+
+    r0     = x[14] - x[6];
+    r1     = x[15] - x[7];
+    x[14] += x[6];
+    x[15] += x[7];
+    x[6]   = r0;
+    x[7]   = r1;
+
+    mdct_butterfly_8(x);
+    mdct_butterfly_8(x+8);
+}
+
+/** 32 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_32(FLOAT *x) {
+    FLOAT r0 = x[30] - x[14];
+    FLOAT r1 = x[31] - x[15];
+
+    x[30] += x[14];
+    x[31] += x[15];
+    x[14]  = r0;
+    x[15]  = r1;
+
+    r0     = x[28] - x[12];
+    r1     = x[29] - x[13];
+    x[28] +=         x[12];
+    x[29] +=         x[13];
+    x[12]  = (r0 * AFT_PI1_8 - r1 * AFT_PI3_8);
+    x[13]  = (r0 * AFT_PI3_8 + r1 * AFT_PI1_8);
+
+    r0     = x[26] - x[10];
+    r1     = x[27] - x[11];
+    x[26] +=         x[10];
+    x[27] +=         x[11];
+    x[10]  = ((r0 - r1) * AFT_PI2_8);
+    x[11]  = ((r0 + r1) * AFT_PI2_8);
+
+    r0     = x[24] - x[8];
+    r1     = x[25] - x[9];
+    x[24] += x[8];
+    x[25] += x[9];
+    x[8]   = (r0 * AFT_PI3_8 - r1 * AFT_PI1_8);
+    x[9]   = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8);
+
+    r0     = x[22] - x[6];
+    r1     = x[7]  - x[23];
+    x[22] += x[6];
+    x[23] += x[7];
+    x[6]   = r1;
+    x[7]   = r0;
+
+    r0     = x[4] - x[20];
+    r1     = x[5] - x[21];
+    x[20] += x[4];
+    x[21] += x[5];
+    x[4]   = (r1 * AFT_PI1_8 + r0 * AFT_PI3_8);
+    x[5]   = (r1 * AFT_PI3_8 - r0 * AFT_PI1_8);
+
+    r0     = x[2]  - x[18];
+    r1     = x[3]  - x[19];
+    x[18] += x[2];
+    x[19] += x[3];
+    x[2]   = ((r1 + r0) * AFT_PI2_8);
+    x[3]   = ((r1 - r0) * AFT_PI2_8);
+
+    r0     = x[0] - x[16];
+    r1     = x[1] - x[17];
+    x[16] += x[0];
+    x[17] += x[1];
+    x[0]   = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8);
+    x[1]   = (r1 * AFT_PI1_8 - r0 * AFT_PI3_8);
+
+    mdct_butterfly_16(x);
+    mdct_butterfly_16(x+16);
+}
+
+/** N-point first stage butterfly (in place, 2 register) */
+static inline void
+mdct_butterfly_first(FLOAT *trig, FLOAT *x, int points)
+{
+    FLOAT *x1 = x + points - 8;
+    FLOAT *x2 = x + (points>>1) - 8;
+    FLOAT r0;
+    FLOAT r1;
+
+    do {
+        r0     = x1[6] - x2[6];
+        r1     = x1[7] - x2[7];
+        x1[6] += x2[6];
+        x1[7] += x2[7];
+        x2[6]  = (r1 * trig[1] + r0 * trig[0]);
+        x2[7]  = (r1 * trig[0] - r0 * trig[1]);
+
+        r0     = x1[4] - x2[4];
+        r1     = x1[5] - x2[5];
+        x1[4] += x2[4];
+        x1[5] += x2[5];
+        x2[4]  = (r1 * trig[5] + r0 * trig[4]);
+        x2[5]  = (r1 * trig[4] - r0 * trig[5]);
+
+        r0     = x1[2] - x2[2];
+        r1     = x1[3] - x2[3];
+        x1[2] += x2[2];
+        x1[3] += x2[3];
+        x2[2]  = (r1 * trig[9] + r0 * trig[8]);
+        x2[3]  = (r1 * trig[8] - r0 * trig[9]);
+
+        r0     = x1[0] - x2[0];
+        r1     = x1[1] - x2[1];
+        x1[0] += x2[0];
+        x1[1] += x2[1];
+        x2[0]  = (r1 * trig[13] + r0 * trig[12]);
+        x2[1]  = (r1 * trig[12] - r0 * trig[13]);
+
+        x1 -= 8;
+        x2 -= 8;
+        trig += 16;
+    } while (x2 >= x);
+}
+
+/** N/stage point generic N stage butterfly (in place, 2 register) */
+static inline void
+mdct_butterfly_generic(FLOAT *trig, FLOAT *x, int points, int trigint)
+{
+    FLOAT *x1 = x + points - 8;
+    FLOAT *x2 = x + (points>>1) - 8;
+    FLOAT r0;
+    FLOAT r1;
+
+    do {
+        r0     = x1[6] - x2[6];
+        r1     = x1[7] - x2[7];
+        x1[6] += x2[6];
+        x1[7] += x2[7];
+        x2[6]  = (r1 * trig[1] + r0 * trig[0]);
+        x2[7]  = (r1 * trig[0] - r0 * trig[1]);
+
+        trig += trigint;
+
+        r0     = x1[4] - x2[4];
+        r1     = x1[5] - x2[5];
+        x1[4] += x2[4];
+        x1[5] += x2[5];
+        x2[4]  = (r1 * trig[1] + r0 * trig[0]);
+        x2[5]  = (r1 * trig[0] - r0 * trig[1]);
+
+        trig += trigint;
+
+        r0     = x1[2] - x2[2];
+        r1     = x1[3] - x2[3];
+        x1[2] += x2[2];
+        x1[3] += x2[3];
+        x2[2]  = (r1 * trig[1] + r0 * trig[0]);
+        x2[3]  = (r1 * trig[0] - r0 * trig[1]);
+
+        trig += trigint;
+
+        r0     = x1[0] - x2[0];
+        r1     = x1[1] - x2[1];
+        x1[0] += x2[0];
+        x1[1] += x2[1];
+        x2[0]  = (r1 * trig[1] + r0 * trig[0]);
+        x2[1]  = (r1 * trig[0] - r0 * trig[1]);
+
+        trig += trigint;
+        x1 -= 8;
+        x2 -= 8;
+    } while (x2 >= x);
+}
+
+static inline void
+mdct_butterflies(MDCTContext *mdct, FLOAT *x, int points)
+{
+    FLOAT *trig = mdct->trig;
+    int stages = mdct->log2n-5;
+    int i, j;
+
+    if (--stages > 0)
+        mdct_butterfly_first(trig, x, points);
+
+    for (i = 1; --stages > 0; i++)
+        for (j = 0; j < (1<<i); j++)
+            mdct_butterfly_generic(trig, x+(points>>i)*j, points>>i, 4<<i);
+
+    for (j = 0; j < points; j += 32)
+        mdct_butterfly_32(x+j);
+}
+
+static inline void
+mdct_bitreverse(MDCTContext *mdct, FLOAT *x)
+{
+    int n = mdct->n;
+    int *bit = mdct->bitrev;
+    FLOAT *w0 = x;
+    FLOAT *w1 = x = w0+(n>>1);
+    FLOAT *trig = mdct->trig+n;
+
+    do {
+        FLOAT *x0 = x+bit[0];
+        FLOAT *x1 = x+bit[1];
+
+        FLOAT  r0 = x0[1]- x1[1];
+        FLOAT  r1 = x0[0]+ x1[0];
+        FLOAT  r2 = (r1 * trig[0] + r0 * trig[1]);
+        FLOAT  r3 = (r1 * trig[1] - r0 * trig[0]);
+
+        w1 -= 4;
+
+        r0 = (x0[1] + x1[1]) * FCONST(0.5);
+        r1 = (x0[0] - x1[0]) * FCONST(0.5);
+
+        w0[0] = r0 + r2;
+        w1[2] = r0 - r2;
+        w0[1] = r1 + r3;
+        w1[3] = r3 - r1;
+
+        x0 = x+bit[2];
+        x1 = x+bit[3];
+
+        r0 = x0[1] - x1[1];
+        r1 = x0[0] + x1[0];
+        r2 = (r1 * trig[2] + r0 * trig[3]);
+        r3 = (r1 * trig[3] - r0 * trig[2]);
+
+        r0 = (x0[1] + x1[1]) * FCONST(0.5);
+        r1 = (x0[0] - x1[0]) * FCONST(0.5);
+
+        w0[2] = r0 + r2;
+        w1[0] = r0 - r2;
+        w0[3] = r1 + r3;
+        w1[1] = r3 - r1;
+
+        trig += 4;
+        bit += 4;
+        w0 += 4;
+    } while (w0 < w1);
+}
+
+void
+mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in)
+{
+    int n = mdct->n;
+    int n2 = n>>1;
+    int n4 = n>>2;
+    int n8 = n>>3;
+    FLOAT *w = alloca(n*sizeof(*w));
+    FLOAT *w2 = w+n2;
+
+    FLOAT *x0 = in+n2+n4;
+    FLOAT *x1 = x0+1;
+    FLOAT *trig = mdct->trig + n2;
+    FLOAT r0;
+    FLOAT r1;
+    int i;
+
+    for (i = 0; i < n8; i += 2) {
+        x0 -= 4;
+        trig -= 2;
+        r0 = x0[2] + x1[0];
+        r1 = x0[0] + x1[2];
+        w2[i]   = (r1*trig[1] + r0*trig[0]);
+        w2[i+1] = (r1*trig[0] - r0*trig[1]);
+        x1 += 4;
+    }
+
+    x1 = in+1;
+    for (; i < n2-n8; i += 2) {
+        trig -= 2;
+        x0 -= 4;
+        r0 = x0[2] - x1[0];
+        r1 = x0[0] - x1[2];
+        w2[i]   = (r1*trig[1] + r0*trig[0]);
+        w2[i+1] = (r1*trig[0] - r0*trig[1]);
+        x1 += 4;
+    }
+
+    x0 = in+n;
+    for (; i < n2; i += 2) {
+        trig -= 2;
+        x0 -= 4;
+        r0 = -x0[2] - x1[0];
+        r1 = -x0[0] - x1[2];
+        w2[i]   = (r1*trig[1] + r0*trig[0]);
+        w2[i+1] = (r1*trig[0] - r0*trig[1]);
+        x1 += 4;
+    }
+
+    mdct_butterflies(mdct, w2, n2);
+    mdct_bitreverse(mdct, w);
+
+    trig = mdct->trig+n2;
+    x0 = out+n2;
+    for (i = 0; i < n4; i++) {
+        x0--;
+        out[i] = ((w[0]*trig[0]+w[1]*trig[1])*mdct->scale);
+        x0[0]  = ((w[0]*trig[1]-w[1]*trig[0])*mdct->scale);
+        w += 2;
+        trig += 2;
+    }
+}
+
diff --git a/src/mdct/vorbis_impl/mdct.h b/src/mdct/vorbis_impl/mdct.h
new file mode 100644
index 0000000..c80a351
--- /dev/null
+++ b/src/mdct/vorbis_impl/mdct.h
@@ -0,0 +1,94 @@
+/**
+ *
+ * This file is derived from libvorbis
+ * Copyright (c) 2002, Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file mdct.h
+ * MDCT header
+ */
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "../common.h"
+
+#if defined(HAVE_MMX) || defined(HAVE_SSE)
+#include "x86/mdct.h"
+#endif
+#ifdef HAVE_ALTIVEC
+#include "ppc/mdct.h"
+#endif
+
+#define ONE FCONST(1.0)
+#define TWO FCONST(2.0)
+#define AFT_PI3_8 FCONST(0.38268343236508977175)
+#define AFT_PI2_8 FCONST(0.70710678118654752441)
+#define AFT_PI1_8 FCONST(0.92387953251128675613)
+
+struct A52Context;
+struct A52ThreadContext;
+
+typedef struct MDCTContext {
+    void (*mdct)(struct A52ThreadContext *ctx, FLOAT *out, FLOAT *in);
+    void (*mdct_bitreverse)(struct MDCTContext *mdct, FLOAT *x);
+    void (*mdct_butterfly_generic)(struct MDCTContext *mdct, FLOAT *x, int points, int trigint);
+    void (*mdct_butterfly_first)(FLOAT *trig, FLOAT *x, int points);
+    void (*mdct_butterfly_32)(FLOAT *x);
+    FLOAT *trig;
+#ifndef CONFIG_DOUBLE
+#ifdef HAVE_SSE
+    FLOAT *trig_bitreverse;
+    FLOAT *trig_forward;
+    FLOAT *trig_butterfly_first;
+    FLOAT *trig_butterfly_generic8;
+    FLOAT *trig_butterfly_generic16;
+    FLOAT *trig_butterfly_generic32;
+    FLOAT *trig_butterfly_generic64;
+#endif
+#endif /* CONFIG_DOUBLE */
+    int *bitrev;
+    FLOAT scale;
+    int n;
+    int log2n;
+} MDCTContext;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale);
+void mdct_ctx_close(MDCTContext *mdct);
+void mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in);
+#ifdef __cplusplus
+}
+
+#endif
+#endif /* MDCT_H */
author	Daniil Cherednik <dan.cherednik@gmail.com>	2015-11-04 12:59:44 +0300
committer	Daniil Cherednik <dan.cherednik@gmail.com>	2015-11-04 12:59:44 +0300
commit	ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e (patch)
tree	4a77f39c1286322f81d0c743680b6aaaebe80b26
parent	0b3154af9d688e1a5e3c88a4382ff1f6d165e9ac (diff)
download	atracdenc-ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e.tar.gz