aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2015-11-04 12:59:44 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2015-11-04 12:59:44 +0300
commitae793aaa94741b3ca25eff25dd3e7d0fc4f4417e (patch)
tree4a77f39c1286322f81d0c743680b6aaaebe80b26
parent0b3154af9d688e1a5e3c88a4382ff1f6d165e9ac (diff)
downloadatracdenc-ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e.tar.gz
fast mdct added (vorbis implementation)
-rw-r--r--src/Makefile3
-rw-r--r--src/atracdenc.cpp36
-rw-r--r--src/atracdenc.h3
-rw-r--r--src/config.h4
-rw-r--r--src/mdct/Makefile12
-rw-r--r--src/mdct/common.h29
-rw-r--r--src/mdct/mdct.h34
-rw-r--r--src/mdct/mdct_ut.cpp64
-rw-r--r--src/mdct/vorbis_impl/mdct.c450
-rw-r--r--src/mdct/vorbis_impl/mdct.h94
10 files changed, 702 insertions, 27 deletions
diff --git a/src/Makefile b/src/Makefile
index 46afbde..5a58987 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,3 +1,4 @@
all:
- g++ -std=c++11 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp -o atracdenc
+ cd ./mdct && make && cd ../
+ g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc
diff --git a/src/atracdenc.cpp b/src/atracdenc.cpp
index 2855193..c2ecad7 100644
--- a/src/atracdenc.cpp
+++ b/src/atracdenc.cpp
@@ -11,9 +11,12 @@ namespace NAtracDEnc {
using namespace std;
using namespace NBitStream;
using namespace NAtrac1;
+using namespace NMDCT;
TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, bool mono)
: MixChannel(mono)
, Aea(std::move(aea))
+ , Mdct512(2)
+ , Mdct256(1)
{
}
@@ -36,18 +39,6 @@ static void vector_fmul_window(double *dst, const double *src0,
}
}
-vector<double> mdct(double* x, int N) {
- vector<double> res;
- for (int k = 0; k < N; k++) {
- double sum = 0;
- for (int n = 0; n < 2 * N; n++) {
- sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5));
- }
- res.push_back(sum);
- }
- return res;
-}
-
vector<double> midct(double* x, int N) {
vector<double> res;
for (int n = 0; n < 2 * N; n++) {
@@ -61,28 +52,21 @@ vector<double> midct(double* x, int N) {
return res;
}
-
void TAtrac1Processor::Mdct(double Specs[512], double* low, double* mid, double* hi) {
uint32_t pos = 0;
for (uint32_t band = 0; band < QMF_BANDS; band++) {
double* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
uint32_t bufSz = (band == 2) ? 256 : 128;
- uint32_t xxx = (band == 2) ? 112 : 48;
- double tmp[512];
- //if (band == 2) {
- // for (uint32_t i = 0; i < bufSz; i++) {
- // srcBuf[i] = srcBuf[i] * 2;
- // }
- //}
- for (uint32_t i = 0; i < 512; i++)
- tmp[i] = 0;
- memcpy(&tmp[xxx], &srcBuf[bufSz], 32 * sizeof(double));
+ uint32_t winStart = (band == 2) ? 112 : 48;
+ vector<double> tmp(512);
+
+ memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(double));
for (int i = 0; i < 32; i++) {
srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[bufSz - 32 + i];
srcBuf[bufSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[bufSz - 32 + i];
}
- memcpy(&tmp[xxx+32], &srcBuf[0], bufSz * sizeof(double));
- const vector<double>& sp = mdct(&tmp[0], bufSz);
+ memcpy(&tmp[winStart+32], &srcBuf[0], bufSz * sizeof(double));
+ const vector<double>& sp = (band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0]);
if (band) {
for (uint32_t j = 0; j < sp.size()/2; j++) {
Specs[pos+j] = sp[bufSz - 1 -j];
@@ -200,7 +184,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
vector<double> specs;
specs.resize(512);
for (int i = 0; i < NumSamples; ++i) {
- src[i] = data[i][channel] / 256.0;
+ src[i] = data[i][channel];
}
SplitFilterBank[channel].Split(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
diff --git a/src/atracdenc.h b/src/atracdenc.h
index 2fb114e..a0ab786 100644
--- a/src/atracdenc.h
+++ b/src/atracdenc.h
@@ -4,6 +4,7 @@
#include "atrac/atrac1.h"
#include "atrac/atrac1_qmf.h"
#include "atrac/atrac1_scale.h"
+#include "mdct/mdct.h"
namespace NAtracDEnc {
@@ -22,6 +23,8 @@ class TAtrac1Processor : public TAtrac1Data {
Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2];
Atrac1SplitFilterBank<double> SplitFilterBank[2];
+ NMDCT::TMDCT<512> Mdct512;
+ NMDCT::TMDCT<256> Mdct256;
void IMdct(double specs[512], const TBlockSize& mode, double* low, double* mid, double* hi);
void Mdct(double specs[512], double* low, double* mid, double* hi);
NAtrac1::TScaler Scaler;
diff --git a/src/config.h b/src/config.h
new file mode 100644
index 0000000..942841a
--- /dev/null
+++ b/src/config.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#define CONFIG_DOUBLE
+
diff --git a/src/mdct/Makefile b/src/mdct/Makefile
new file mode 100644
index 0000000..cffd39d
--- /dev/null
+++ b/src/mdct/Makefile
@@ -0,0 +1,12 @@
+all: lib
+
+lib:
+ gcc -O2 -c vorbis_impl/mdct.c -o mdct_impl.o
+
+test: lib
+ g++ -std=c++11 mdct_ut.cpp -I ../../3rd/gtest-1.7.0/include/ ../../3rd/gtest-1.7.0/src/gtest-all.o ../../3rd/gtest-1.7.0/src/gtest_main.o ./mdct_impl.o -o mdct_ut
+ ./mdct_ut
+
+clean:
+ rm ./mdct_ut
+ rm ./mdct_impl.o
diff --git a/src/mdct/common.h b/src/mdct/common.h
new file mode 100644
index 0000000..d217ed5
--- /dev/null
+++ b/src/mdct/common.h
@@ -0,0 +1,29 @@
+#pragma once
+#include "../config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef CONFIG_DOUBLE
+typedef double FLOAT;
+#define FCONST(X) (X)
+#define AFT_COS cos
+#define AFT_SIN sin
+#define AFT_TAN tan
+#define AFT_LOG10 log10
+#define AFT_EXP exp
+#define AFT_FABS fabs
+#define AFT_SQRT sqrt
+#define AFT_EXP exp
+#else
+typedef float FLOAT;
+#define FCONST(X) (X##f)
+#define AFT_COS cosf
+#define AFT_SIN sinf
+#define AFT_TAN tanf
+#define AFT_LOG10 log10f
+#define AFT_FABS fabsf
+#define AFT_SQRT sqrtf
+#define AFT_EXP expf
+#endif
diff --git a/src/mdct/mdct.h b/src/mdct/mdct.h
new file mode 100644
index 0000000..0637651
--- /dev/null
+++ b/src/mdct/mdct.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "vorbis_impl/mdct.h"
+#include <vector>
+
+namespace NMDCT {
+
+class TMDCTBase {
+protected:
+ MDCTContext Ctx;
+ TMDCTBase(int n, double scale) {
+ mdct_ctx_init(&Ctx, n, scale);
+ };
+ virtual ~TMDCTBase() {
+ mdct_ctx_close(&Ctx);
+ };
+};
+
+
+template<int N>
+class TMDCT : public TMDCTBase {
+ std::vector<double> Buf;
+public:
+ TMDCT(float scale = 1.0)
+ : Buf(N/2)
+ , TMDCTBase(N, scale)
+ {}
+ const std::vector<double>& operator()(double* in) {
+ mdct(&Ctx, &Buf[0], in);
+ return Buf;
+ }
+
+};
+} //namespace NMDCT
diff --git a/src/mdct/mdct_ut.cpp b/src/mdct/mdct_ut.cpp
new file mode 100644
index 0000000..a63544e
--- /dev/null
+++ b/src/mdct/mdct_ut.cpp
@@ -0,0 +1,64 @@
+#include "mdct.h"
+#include <gtest/gtest.h>
+
+#include <vector>
+
+using std::vector;
+using namespace NMDCT;
+
+static vector<double> mdct(double* x, int N) {
+ vector<double> res;
+ for (int k = 0; k < N; k++) {
+ double sum = 0;
+ for (int n = 0; n < 2 * N; n++)
+ sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5));
+
+ res.push_back(sum);
+ }
+ return res;
+}
+
+TEST(TBitStream, MDCT64) {
+ const int N = 64;
+ TMDCT<N> transform(N);
+ vector<double> src(N);
+ for (int i = 0; i < N; i++) {
+ src[i] = i;
+ }
+ const vector<double> res1 = mdct(&src[0], N/2);
+ const vector<double> res2 = transform(&src[0]);
+ EXPECT_EQ(res1.size(), res2.size());
+ for (int i = 0; i < res1.size(); i++) {
+ EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
+ }
+}
+
+TEST(TBitStream, MDCT128) {
+ const int N = 128;
+ TMDCT<N> transform(N);
+ vector<double> src(N);
+ for (int i = 0; i < N; i++) {
+ src[i] = i;
+ }
+ const vector<double> res1 = mdct(&src[0], N/2);
+ const vector<double> res2 = transform(&src[0]);
+ EXPECT_EQ(res1.size(), res2.size());
+ for (int i = 0; i < res1.size(); i++) {
+ EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
+ }
+}
+
+TEST(TBitStream, MDCT256) {
+ const int N = 256;
+ TMDCT<N> transform(N);
+ vector<double> src(N);
+ for (int i = 0; i < N; i++) {
+ src[i] = i;
+ }
+ const vector<double> res1 = mdct(&src[0], N/2);
+ const vector<double> res2 = transform(&src[0]);
+ EXPECT_EQ(res1.size(), res2.size());
+ for (int i = 0; i < res1.size(); i++) {
+ EXPECT_NEAR(res1[i], res2[i], 0.00000001);
+ }
+}
diff --git a/src/mdct/vorbis_impl/mdct.c b/src/mdct/vorbis_impl/mdct.c
new file mode 100644
index 0000000..b297089
--- /dev/null
+++ b/src/mdct/vorbis_impl/mdct.c
@@ -0,0 +1,450 @@
+/**
+ *
+ * This file is derived from libvorbis
+ * Copyright (c) 2002, Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file mdct.c
+ * Modified Discrete Cosine Transform
+ */
+
+#include "mdct.h"
+
+/**
+ * Allocates and initializes lookup tables in the MDCT context.
+ * @param mdct The MDCT context
+ * @param n Number of time-domain samples used in the MDCT transform
+ * @param scale Multiplier
+ */
+void
+mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale)
+{
+ int *bitrev = malloc((n/4) * sizeof(int));
+ FLOAT *trig = malloc((n+n/4) * sizeof(FLOAT));
+ int i;
+ int n2 = (n >> 1);
+ int log2n = mdct->log2n = round(log((float)n)/log(2.f));
+ mdct->n = n;
+ mdct->trig = trig;
+ mdct->bitrev = bitrev;
+
+ // trig lookups
+ for (i = 0; i < n/4; i++) {
+ trig[i*2] = AFT_COS((M_PI/n)*(4*i));
+ trig[i*2+1] = -AFT_SIN((M_PI/n)*(4*i));
+ trig[n2+i*2] = AFT_COS((M_PI/(2*n))*(2*i+1));
+ trig[n2+i*2+1] = AFT_SIN((M_PI/(2*n))*(2*i+1));
+ }
+ for (i = 0; i < n/8; i++) {
+ trig[n+i*2] = AFT_COS((M_PI/n)*(4*i+2))*FCONST(0.5);
+ trig[n+i*2+1] = -AFT_SIN((M_PI/n)*(4*i+2))*FCONST(0.5);
+ }
+
+ // bitreverse lookup
+ {
+ int j, acc;
+ int mask = (1 << (log2n-1)) - 1;
+ int msb = (1 << (log2n-2));
+ for (i = 0; i < n/8; i++) {
+ acc = 0;
+ for (j = 0; msb>>j; j++) {
+ if ((msb>>j) & i)
+ acc |= (1 << j);
+ }
+ bitrev[i*2]= ((~acc) & mask) - 1;
+ bitrev[i*2+1] = acc;
+ }
+ }
+ mdct->scale = scale / (FLOAT)n;
+}
+
+/** Deallocates memory use by the lookup tables in the MDCT context. */
+void
+mdct_ctx_close(MDCTContext *mdct)
+{
+ if (mdct) {
+ if (mdct->trig)
+ free(mdct->trig);
+ if (mdct->bitrev)
+ free(mdct->bitrev);
+ memset(mdct, 0, sizeof(MDCTContext));
+ }
+}
+
+/** 8 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_8(FLOAT *x) {
+ FLOAT r0 = x[6] + x[2];
+ FLOAT r1 = x[6] - x[2];
+ FLOAT r2 = x[4] + x[0];
+ FLOAT r3 = x[4] - x[0];
+
+ x[6] = r0 + r2;
+ x[4] = r0 - r2;
+
+ r0 = x[5] - x[1];
+ r2 = x[7] - x[3];
+ x[0] = r1 + r0;
+ x[2] = r1 - r0;
+
+ r0 = x[5] + x[1];
+ r1 = x[7] + x[3];
+ x[3] = r2 + r3;
+ x[1] = r2 - r3;
+ x[7] = r1 + r0;
+ x[5] = r1 - r0;
+}
+
+/** 16 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_16(FLOAT *x) {
+ FLOAT r0 = x[1] - x[9];
+ FLOAT r1 = x[0] - x[8];
+
+ x[8] += x[0];
+ x[9] += x[1];
+ x[0] = ((r0 + r1) * AFT_PI2_8);
+ x[1] = ((r0 - r1) * AFT_PI2_8);
+
+ r0 = x[3] - x[11];
+ r1 = x[10] - x[2];
+ x[10] += x[2];
+ x[11] += x[3];
+ x[2] = r0;
+ x[3] = r1;
+
+ r0 = x[12] - x[4];
+ r1 = x[13] - x[5];
+ x[12] += x[4];
+ x[13] += x[5];
+ x[4] = ((r0 - r1) * AFT_PI2_8);
+ x[5] = ((r0 + r1) * AFT_PI2_8);
+
+ r0 = x[14] - x[6];
+ r1 = x[15] - x[7];
+ x[14] += x[6];
+ x[15] += x[7];
+ x[6] = r0;
+ x[7] = r1;
+
+ mdct_butterfly_8(x);
+ mdct_butterfly_8(x+8);
+}
+
+/** 32 point butterfly (in place, 4 register) */
+static inline void
+mdct_butterfly_32(FLOAT *x) {
+ FLOAT r0 = x[30] - x[14];
+ FLOAT r1 = x[31] - x[15];
+
+ x[30] += x[14];
+ x[31] += x[15];
+ x[14] = r0;
+ x[15] = r1;
+
+ r0 = x[28] - x[12];
+ r1 = x[29] - x[13];
+ x[28] += x[12];
+ x[29] += x[13];
+ x[12] = (r0 * AFT_PI1_8 - r1 * AFT_PI3_8);
+ x[13] = (r0 * AFT_PI3_8 + r1 * AFT_PI1_8);
+
+ r0 = x[26] - x[10];
+ r1 = x[27] - x[11];
+ x[26] += x[10];
+ x[27] += x[11];
+ x[10] = ((r0 - r1) * AFT_PI2_8);
+ x[11] = ((r0 + r1) * AFT_PI2_8);
+
+ r0 = x[24] - x[8];
+ r1 = x[25] - x[9];
+ x[24] += x[8];
+ x[25] += x[9];
+ x[8] = (r0 * AFT_PI3_8 - r1 * AFT_PI1_8);
+ x[9] = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8);
+
+ r0 = x[22] - x[6];
+ r1 = x[7] - x[23];
+ x[22] += x[6];
+ x[23] += x[7];
+ x[6] = r1;
+ x[7] = r0;
+
+ r0 = x[4] - x[20];
+ r1 = x[5] - x[21];
+ x[20] += x[4];
+ x[21] += x[5];
+ x[4] = (r1 * AFT_PI1_8 + r0 * AFT_PI3_8);
+ x[5] = (r1 * AFT_PI3_8 - r0 * AFT_PI1_8);
+
+ r0 = x[2] - x[18];
+ r1 = x[3] - x[19];
+ x[18] += x[2];
+ x[19] += x[3];
+ x[2] = ((r1 + r0) * AFT_PI2_8);
+ x[3] = ((r1 - r0) * AFT_PI2_8);
+
+ r0 = x[0] - x[16];
+ r1 = x[1] - x[17];
+ x[16] += x[0];
+ x[17] += x[1];
+ x[0] = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8);
+ x[1] = (r1 * AFT_PI1_8 - r0 * AFT_PI3_8);
+
+ mdct_butterfly_16(x);
+ mdct_butterfly_16(x+16);
+}
+
+/** N-point first stage butterfly (in place, 2 register) */
+static inline void
+mdct_butterfly_first(FLOAT *trig, FLOAT *x, int points)
+{
+ FLOAT *x1 = x + points - 8;
+ FLOAT *x2 = x + (points>>1) - 8;
+ FLOAT r0;
+ FLOAT r1;
+
+ do {
+ r0 = x1[6] - x2[6];
+ r1 = x1[7] - x2[7];
+ x1[6] += x2[6];
+ x1[7] += x2[7];
+ x2[6] = (r1 * trig[1] + r0 * trig[0]);
+ x2[7] = (r1 * trig[0] - r0 * trig[1]);
+
+ r0 = x1[4] - x2[4];
+ r1 = x1[5] - x2[5];
+ x1[4] += x2[4];
+ x1[5] += x2[5];
+ x2[4] = (r1 * trig[5] + r0 * trig[4]);
+ x2[5] = (r1 * trig[4] - r0 * trig[5]);
+
+ r0 = x1[2] - x2[2];
+ r1 = x1[3] - x2[3];
+ x1[2] += x2[2];
+ x1[3] += x2[3];
+ x2[2] = (r1 * trig[9] + r0 * trig[8]);
+ x2[3] = (r1 * trig[8] - r0 * trig[9]);
+
+ r0 = x1[0] - x2[0];
+ r1 = x1[1] - x2[1];
+ x1[0] += x2[0];
+ x1[1] += x2[1];
+ x2[0] = (r1 * trig[13] + r0 * trig[12]);
+ x2[1] = (r1 * trig[12] - r0 * trig[13]);
+
+ x1 -= 8;
+ x2 -= 8;
+ trig += 16;
+ } while (x2 >= x);
+}
+
+/** N/stage point generic N stage butterfly (in place, 2 register) */
+static inline void
+mdct_butterfly_generic(FLOAT *trig, FLOAT *x, int points, int trigint)
+{
+ FLOAT *x1 = x + points - 8;
+ FLOAT *x2 = x + (points>>1) - 8;
+ FLOAT r0;
+ FLOAT r1;
+
+ do {
+ r0 = x1[6] - x2[6];
+ r1 = x1[7] - x2[7];
+ x1[6] += x2[6];
+ x1[7] += x2[7];
+ x2[6] = (r1 * trig[1] + r0 * trig[0]);
+ x2[7] = (r1 * trig[0] - r0 * trig[1]);
+
+ trig += trigint;
+
+ r0 = x1[4] - x2[4];
+ r1 = x1[5] - x2[5];
+ x1[4] += x2[4];
+ x1[5] += x2[5];
+ x2[4] = (r1 * trig[1] + r0 * trig[0]);
+ x2[5] = (r1 * trig[0] - r0 * trig[1]);
+
+ trig += trigint;
+
+ r0 = x1[2] - x2[2];
+ r1 = x1[3] - x2[3];
+ x1[2] += x2[2];
+ x1[3] += x2[3];
+ x2[2] = (r1 * trig[1] + r0 * trig[0]);
+ x2[3] = (r1 * trig[0] - r0 * trig[1]);
+
+ trig += trigint;
+
+ r0 = x1[0] - x2[0];
+ r1 = x1[1] - x2[1];
+ x1[0] += x2[0];
+ x1[1] += x2[1];
+ x2[0] = (r1 * trig[1] + r0 * trig[0]);
+ x2[1] = (r1 * trig[0] - r0 * trig[1]);
+
+ trig += trigint;
+ x1 -= 8;
+ x2 -= 8;
+ } while (x2 >= x);
+}
+
+static inline void
+mdct_butterflies(MDCTContext *mdct, FLOAT *x, int points)
+{
+ FLOAT *trig = mdct->trig;
+ int stages = mdct->log2n-5;
+ int i, j;
+
+ if (--stages > 0)
+ mdct_butterfly_first(trig, x, points);
+
+ for (i = 1; --stages > 0; i++)
+ for (j = 0; j < (1<<i); j++)
+ mdct_butterfly_generic(trig, x+(points>>i)*j, points>>i, 4<<i);
+
+ for (j = 0; j < points; j += 32)
+ mdct_butterfly_32(x+j);
+}
+
+static inline void
+mdct_bitreverse(MDCTContext *mdct, FLOAT *x)
+{
+ int n = mdct->n;
+ int *bit = mdct->bitrev;
+ FLOAT *w0 = x;
+ FLOAT *w1 = x = w0+(n>>1);
+ FLOAT *trig = mdct->trig+n;
+
+ do {
+ FLOAT *x0 = x+bit[0];
+ FLOAT *x1 = x+bit[1];
+
+ FLOAT r0 = x0[1]- x1[1];
+ FLOAT r1 = x0[0]+ x1[0];
+ FLOAT r2 = (r1 * trig[0] + r0 * trig[1]);
+ FLOAT r3 = (r1 * trig[1] - r0 * trig[0]);
+
+ w1 -= 4;
+
+ r0 = (x0[1] + x1[1]) * FCONST(0.5);
+ r1 = (x0[0] - x1[0]) * FCONST(0.5);
+
+ w0[0] = r0 + r2;
+ w1[2] = r0 - r2;
+ w0[1] = r1 + r3;
+ w1[3] = r3 - r1;
+
+ x0 = x+bit[2];
+ x1 = x+bit[3];
+
+ r0 = x0[1] - x1[1];
+ r1 = x0[0] + x1[0];
+ r2 = (r1 * trig[2] + r0 * trig[3]);
+ r3 = (r1 * trig[3] - r0 * trig[2]);
+
+ r0 = (x0[1] + x1[1]) * FCONST(0.5);
+ r1 = (x0[0] - x1[0]) * FCONST(0.5);
+
+ w0[2] = r0 + r2;
+ w1[0] = r0 - r2;
+ w0[3] = r1 + r3;
+ w1[1] = r3 - r1;
+
+ trig += 4;
+ bit += 4;
+ w0 += 4;
+ } while (w0 < w1);
+}
+
+void
+mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in)
+{
+ int n = mdct->n;
+ int n2 = n>>1;
+ int n4 = n>>2;
+ int n8 = n>>3;
+ FLOAT *w = alloca(n*sizeof(*w));
+ FLOAT *w2 = w+n2;
+
+ FLOAT *x0 = in+n2+n4;
+ FLOAT *x1 = x0+1;
+ FLOAT *trig = mdct->trig + n2;
+ FLOAT r0;
+ FLOAT r1;
+ int i;
+
+ for (i = 0; i < n8; i += 2) {
+ x0 -= 4;
+ trig -= 2;
+ r0 = x0[2] + x1[0];
+ r1 = x0[0] + x1[2];
+ w2[i] = (r1*trig[1] + r0*trig[0]);
+ w2[i+1] = (r1*trig[0] - r0*trig[1]);
+ x1 += 4;
+ }
+
+ x1 = in+1;
+ for (; i < n2-n8; i += 2) {
+ trig -= 2;
+ x0 -= 4;
+ r0 = x0[2] - x1[0];
+ r1 = x0[0] - x1[2];
+ w2[i] = (r1*trig[1] + r0*trig[0]);
+ w2[i+1] = (r1*trig[0] - r0*trig[1]);
+ x1 += 4;
+ }
+
+ x0 = in+n;
+ for (; i < n2; i += 2) {
+ trig -= 2;
+ x0 -= 4;
+ r0 = -x0[2] - x1[0];
+ r1 = -x0[0] - x1[2];
+ w2[i] = (r1*trig[1] + r0*trig[0]);
+ w2[i+1] = (r1*trig[0] - r0*trig[1]);
+ x1 += 4;
+ }
+
+ mdct_butterflies(mdct, w2, n2);
+ mdct_bitreverse(mdct, w);
+
+ trig = mdct->trig+n2;
+ x0 = out+n2;
+ for (i = 0; i < n4; i++) {
+ x0--;
+ out[i] = ((w[0]*trig[0]+w[1]*trig[1])*mdct->scale);
+ x0[0] = ((w[0]*trig[1]-w[1]*trig[0])*mdct->scale);
+ w += 2;
+ trig += 2;
+ }
+}
+
diff --git a/src/mdct/vorbis_impl/mdct.h b/src/mdct/vorbis_impl/mdct.h
new file mode 100644
index 0000000..c80a351
--- /dev/null
+++ b/src/mdct/vorbis_impl/mdct.h
@@ -0,0 +1,94 @@
+/**
+ *
+ * This file is derived from libvorbis
+ * Copyright (c) 2002, Xiph.org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file mdct.h
+ * MDCT header
+ */
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "../common.h"
+
+#if defined(HAVE_MMX) || defined(HAVE_SSE)
+#include "x86/mdct.h"
+#endif
+#ifdef HAVE_ALTIVEC
+#include "ppc/mdct.h"
+#endif
+
+#define ONE FCONST(1.0)
+#define TWO FCONST(2.0)
+#define AFT_PI3_8 FCONST(0.38268343236508977175)
+#define AFT_PI2_8 FCONST(0.70710678118654752441)
+#define AFT_PI1_8 FCONST(0.92387953251128675613)
+
+struct A52Context;
+struct A52ThreadContext;
+
+typedef struct MDCTContext {
+ void (*mdct)(struct A52ThreadContext *ctx, FLOAT *out, FLOAT *in);
+ void (*mdct_bitreverse)(struct MDCTContext *mdct, FLOAT *x);
+ void (*mdct_butterfly_generic)(struct MDCTContext *mdct, FLOAT *x, int points, int trigint);
+ void (*mdct_butterfly_first)(FLOAT *trig, FLOAT *x, int points);
+ void (*mdct_butterfly_32)(FLOAT *x);
+ FLOAT *trig;
+#ifndef CONFIG_DOUBLE
+#ifdef HAVE_SSE
+ FLOAT *trig_bitreverse;
+ FLOAT *trig_forward;
+ FLOAT *trig_butterfly_first;
+ FLOAT *trig_butterfly_generic8;
+ FLOAT *trig_butterfly_generic16;
+ FLOAT *trig_butterfly_generic32;
+ FLOAT *trig_butterfly_generic64;
+#endif
+#endif /* CONFIG_DOUBLE */
+ int *bitrev;
+ FLOAT scale;
+ int n;
+ int log2n;
+} MDCTContext;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale);
+void mdct_ctx_close(MDCTContext *mdct);
+void mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in);
+#ifdef __cplusplus
+}
+
+#endif
+#endif /* MDCT_H */