diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2015-11-04 12:59:44 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2015-11-04 12:59:44 +0300 |
commit | ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e (patch) | |
tree | 4a77f39c1286322f81d0c743680b6aaaebe80b26 | |
parent | 0b3154af9d688e1a5e3c88a4382ff1f6d165e9ac (diff) | |
download | atracdenc-ae793aaa94741b3ca25eff25dd3e7d0fc4f4417e.tar.gz |
fast mdct added (vorbis implementation)
-rw-r--r-- | src/Makefile | 3 | ||||
-rw-r--r-- | src/atracdenc.cpp | 36 | ||||
-rw-r--r-- | src/atracdenc.h | 3 | ||||
-rw-r--r-- | src/config.h | 4 | ||||
-rw-r--r-- | src/mdct/Makefile | 12 | ||||
-rw-r--r-- | src/mdct/common.h | 29 | ||||
-rw-r--r-- | src/mdct/mdct.h | 34 | ||||
-rw-r--r-- | src/mdct/mdct_ut.cpp | 64 | ||||
-rw-r--r-- | src/mdct/vorbis_impl/mdct.c | 450 | ||||
-rw-r--r-- | src/mdct/vorbis_impl/mdct.h | 94 |
10 files changed, 702 insertions, 27 deletions
diff --git a/src/Makefile b/src/Makefile index 46afbde..5a58987 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,3 +1,4 @@ all: - g++ -std=c++11 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp -o atracdenc + cd ./mdct && make && cd ../ + g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc diff --git a/src/atracdenc.cpp b/src/atracdenc.cpp index 2855193..c2ecad7 100644 --- a/src/atracdenc.cpp +++ b/src/atracdenc.cpp @@ -11,9 +11,12 @@ namespace NAtracDEnc { using namespace std; using namespace NBitStream; using namespace NAtrac1; +using namespace NMDCT; TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, bool mono) : MixChannel(mono) , Aea(std::move(aea)) + , Mdct512(2) + , Mdct256(1) { } @@ -36,18 +39,6 @@ static void vector_fmul_window(double *dst, const double *src0, } } -vector<double> mdct(double* x, int N) { - vector<double> res; - for (int k = 0; k < N; k++) { - double sum = 0; - for (int n = 0; n < 2 * N; n++) { - sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5)); - } - res.push_back(sum); - } - return res; -} - vector<double> midct(double* x, int N) { vector<double> res; for (int n = 0; n < 2 * N; n++) { @@ -61,28 +52,21 @@ vector<double> midct(double* x, int N) { return res; } - void TAtrac1Processor::Mdct(double Specs[512], double* low, double* mid, double* hi) { uint32_t pos = 0; for (uint32_t band = 0; band < QMF_BANDS; band++) { double* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi; uint32_t bufSz = (band == 2) ? 256 : 128; - uint32_t xxx = (band == 2) ? 112 : 48; - double tmp[512]; - //if (band == 2) { - // for (uint32_t i = 0; i < bufSz; i++) { - // srcBuf[i] = srcBuf[i] * 2; - // } - //} - for (uint32_t i = 0; i < 512; i++) - tmp[i] = 0; - memcpy(&tmp[xxx], &srcBuf[bufSz], 32 * sizeof(double)); + uint32_t winStart = (band == 2) ? 112 : 48; + vector<double> tmp(512); + + memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(double)); for (int i = 0; i < 32; i++) { srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[bufSz - 32 + i]; srcBuf[bufSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[bufSz - 32 + i]; } - memcpy(&tmp[xxx+32], &srcBuf[0], bufSz * sizeof(double)); - const vector<double>& sp = mdct(&tmp[0], bufSz); + memcpy(&tmp[winStart+32], &srcBuf[0], bufSz * sizeof(double)); + const vector<double>& sp = (band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0]); if (band) { for (uint32_t j = 0; j < sp.size()/2; j++) { Specs[pos+j] = sp[bufSz - 1 -j]; @@ -200,7 +184,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { vector<double> specs; specs.resize(512); for (int i = 0; i < NumSamples; ++i) { - src[i] = data[i][channel] / 256.0; + src[i] = data[i][channel]; } SplitFilterBank[channel].Split(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]); diff --git a/src/atracdenc.h b/src/atracdenc.h index 2fb114e..a0ab786 100644 --- a/src/atracdenc.h +++ b/src/atracdenc.h @@ -4,6 +4,7 @@ #include "atrac/atrac1.h" #include "atrac/atrac1_qmf.h" #include "atrac/atrac1_scale.h" +#include "mdct/mdct.h" namespace NAtracDEnc { @@ -22,6 +23,8 @@ class TAtrac1Processor : public TAtrac1Data { Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2]; Atrac1SplitFilterBank<double> SplitFilterBank[2]; + NMDCT::TMDCT<512> Mdct512; + NMDCT::TMDCT<256> Mdct256; void IMdct(double specs[512], const TBlockSize& mode, double* low, double* mid, double* hi); void Mdct(double specs[512], double* low, double* mid, double* hi); NAtrac1::TScaler Scaler; diff --git a/src/config.h b/src/config.h new file mode 100644 index 0000000..942841a --- /dev/null +++ b/src/config.h @@ -0,0 +1,4 @@ +#pragma once + +#define CONFIG_DOUBLE + diff --git a/src/mdct/Makefile b/src/mdct/Makefile new file mode 100644 index 0000000..cffd39d --- /dev/null +++ b/src/mdct/Makefile @@ -0,0 +1,12 @@ +all: lib + +lib: + gcc -O2 -c vorbis_impl/mdct.c -o mdct_impl.o + +test: lib + g++ -std=c++11 mdct_ut.cpp -I ../../3rd/gtest-1.7.0/include/ ../../3rd/gtest-1.7.0/src/gtest-all.o ../../3rd/gtest-1.7.0/src/gtest_main.o ./mdct_impl.o -o mdct_ut + ./mdct_ut + +clean: + rm ./mdct_ut + rm ./mdct_impl.o diff --git a/src/mdct/common.h b/src/mdct/common.h new file mode 100644 index 0000000..d217ed5 --- /dev/null +++ b/src/mdct/common.h @@ -0,0 +1,29 @@ +#pragma once +#include "../config.h" + +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#ifdef CONFIG_DOUBLE +typedef double FLOAT; +#define FCONST(X) (X) +#define AFT_COS cos +#define AFT_SIN sin +#define AFT_TAN tan +#define AFT_LOG10 log10 +#define AFT_EXP exp +#define AFT_FABS fabs +#define AFT_SQRT sqrt +#define AFT_EXP exp +#else +typedef float FLOAT; +#define FCONST(X) (X##f) +#define AFT_COS cosf +#define AFT_SIN sinf +#define AFT_TAN tanf +#define AFT_LOG10 log10f +#define AFT_FABS fabsf +#define AFT_SQRT sqrtf +#define AFT_EXP expf +#endif diff --git a/src/mdct/mdct.h b/src/mdct/mdct.h new file mode 100644 index 0000000..0637651 --- /dev/null +++ b/src/mdct/mdct.h @@ -0,0 +1,34 @@ +#pragma once + +#include "vorbis_impl/mdct.h" +#include <vector> + +namespace NMDCT { + +class TMDCTBase { +protected: + MDCTContext Ctx; + TMDCTBase(int n, double scale) { + mdct_ctx_init(&Ctx, n, scale); + }; + virtual ~TMDCTBase() { + mdct_ctx_close(&Ctx); + }; +}; + + +template<int N> +class TMDCT : public TMDCTBase { + std::vector<double> Buf; +public: + TMDCT(float scale = 1.0) + : Buf(N/2) + , TMDCTBase(N, scale) + {} + const std::vector<double>& operator()(double* in) { + mdct(&Ctx, &Buf[0], in); + return Buf; + } + +}; +} //namespace NMDCT diff --git a/src/mdct/mdct_ut.cpp b/src/mdct/mdct_ut.cpp new file mode 100644 index 0000000..a63544e --- /dev/null +++ b/src/mdct/mdct_ut.cpp @@ -0,0 +1,64 @@ +#include "mdct.h" +#include <gtest/gtest.h> + +#include <vector> + +using std::vector; +using namespace NMDCT; + +static vector<double> mdct(double* x, int N) { + vector<double> res; + for (int k = 0; k < N; k++) { + double sum = 0; + for (int n = 0; n < 2 * N; n++) + sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5)); + + res.push_back(sum); + } + return res; +} + +TEST(TBitStream, MDCT64) { + const int N = 64; + TMDCT<N> transform(N); + vector<double> src(N); + for (int i = 0; i < N; i++) { + src[i] = i; + } + const vector<double> res1 = mdct(&src[0], N/2); + const vector<double> res2 = transform(&src[0]); + EXPECT_EQ(res1.size(), res2.size()); + for (int i = 0; i < res1.size(); i++) { + EXPECT_NEAR(res1[i], res2[i], 0.0000000001); + } +} + +TEST(TBitStream, MDCT128) { + const int N = 128; + TMDCT<N> transform(N); + vector<double> src(N); + for (int i = 0; i < N; i++) { + src[i] = i; + } + const vector<double> res1 = mdct(&src[0], N/2); + const vector<double> res2 = transform(&src[0]); + EXPECT_EQ(res1.size(), res2.size()); + for (int i = 0; i < res1.size(); i++) { + EXPECT_NEAR(res1[i], res2[i], 0.0000000001); + } +} + +TEST(TBitStream, MDCT256) { + const int N = 256; + TMDCT<N> transform(N); + vector<double> src(N); + for (int i = 0; i < N; i++) { + src[i] = i; + } + const vector<double> res1 = mdct(&src[0], N/2); + const vector<double> res2 = transform(&src[0]); + EXPECT_EQ(res1.size(), res2.size()); + for (int i = 0; i < res1.size(); i++) { + EXPECT_NEAR(res1[i], res2[i], 0.00000001); + } +} diff --git a/src/mdct/vorbis_impl/mdct.c b/src/mdct/vorbis_impl/mdct.c new file mode 100644 index 0000000..b297089 --- /dev/null +++ b/src/mdct/vorbis_impl/mdct.c @@ -0,0 +1,450 @@ +/** + * + * This file is derived from libvorbis + * Copyright (c) 2002, Xiph.org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file mdct.c + * Modified Discrete Cosine Transform + */ + +#include "mdct.h" + +/** + * Allocates and initializes lookup tables in the MDCT context. + * @param mdct The MDCT context + * @param n Number of time-domain samples used in the MDCT transform + * @param scale Multiplier + */ +void +mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale) +{ + int *bitrev = malloc((n/4) * sizeof(int)); + FLOAT *trig = malloc((n+n/4) * sizeof(FLOAT)); + int i; + int n2 = (n >> 1); + int log2n = mdct->log2n = round(log((float)n)/log(2.f)); + mdct->n = n; + mdct->trig = trig; + mdct->bitrev = bitrev; + + // trig lookups + for (i = 0; i < n/4; i++) { + trig[i*2] = AFT_COS((M_PI/n)*(4*i)); + trig[i*2+1] = -AFT_SIN((M_PI/n)*(4*i)); + trig[n2+i*2] = AFT_COS((M_PI/(2*n))*(2*i+1)); + trig[n2+i*2+1] = AFT_SIN((M_PI/(2*n))*(2*i+1)); + } + for (i = 0; i < n/8; i++) { + trig[n+i*2] = AFT_COS((M_PI/n)*(4*i+2))*FCONST(0.5); + trig[n+i*2+1] = -AFT_SIN((M_PI/n)*(4*i+2))*FCONST(0.5); + } + + // bitreverse lookup + { + int j, acc; + int mask = (1 << (log2n-1)) - 1; + int msb = (1 << (log2n-2)); + for (i = 0; i < n/8; i++) { + acc = 0; + for (j = 0; msb>>j; j++) { + if ((msb>>j) & i) + acc |= (1 << j); + } + bitrev[i*2]= ((~acc) & mask) - 1; + bitrev[i*2+1] = acc; + } + } + mdct->scale = scale / (FLOAT)n; +} + +/** Deallocates memory use by the lookup tables in the MDCT context. */ +void +mdct_ctx_close(MDCTContext *mdct) +{ + if (mdct) { + if (mdct->trig) + free(mdct->trig); + if (mdct->bitrev) + free(mdct->bitrev); + memset(mdct, 0, sizeof(MDCTContext)); + } +} + +/** 8 point butterfly (in place, 4 register) */ +static inline void +mdct_butterfly_8(FLOAT *x) { + FLOAT r0 = x[6] + x[2]; + FLOAT r1 = x[6] - x[2]; + FLOAT r2 = x[4] + x[0]; + FLOAT r3 = x[4] - x[0]; + + x[6] = r0 + r2; + x[4] = r0 - r2; + + r0 = x[5] - x[1]; + r2 = x[7] - x[3]; + x[0] = r1 + r0; + x[2] = r1 - r0; + + r0 = x[5] + x[1]; + r1 = x[7] + x[3]; + x[3] = r2 + r3; + x[1] = r2 - r3; + x[7] = r1 + r0; + x[5] = r1 - r0; +} + +/** 16 point butterfly (in place, 4 register) */ +static inline void +mdct_butterfly_16(FLOAT *x) { + FLOAT r0 = x[1] - x[9]; + FLOAT r1 = x[0] - x[8]; + + x[8] += x[0]; + x[9] += x[1]; + x[0] = ((r0 + r1) * AFT_PI2_8); + x[1] = ((r0 - r1) * AFT_PI2_8); + + r0 = x[3] - x[11]; + r1 = x[10] - x[2]; + x[10] += x[2]; + x[11] += x[3]; + x[2] = r0; + x[3] = r1; + + r0 = x[12] - x[4]; + r1 = x[13] - x[5]; + x[12] += x[4]; + x[13] += x[5]; + x[4] = ((r0 - r1) * AFT_PI2_8); + x[5] = ((r0 + r1) * AFT_PI2_8); + + r0 = x[14] - x[6]; + r1 = x[15] - x[7]; + x[14] += x[6]; + x[15] += x[7]; + x[6] = r0; + x[7] = r1; + + mdct_butterfly_8(x); + mdct_butterfly_8(x+8); +} + +/** 32 point butterfly (in place, 4 register) */ +static inline void +mdct_butterfly_32(FLOAT *x) { + FLOAT r0 = x[30] - x[14]; + FLOAT r1 = x[31] - x[15]; + + x[30] += x[14]; + x[31] += x[15]; + x[14] = r0; + x[15] = r1; + + r0 = x[28] - x[12]; + r1 = x[29] - x[13]; + x[28] += x[12]; + x[29] += x[13]; + x[12] = (r0 * AFT_PI1_8 - r1 * AFT_PI3_8); + x[13] = (r0 * AFT_PI3_8 + r1 * AFT_PI1_8); + + r0 = x[26] - x[10]; + r1 = x[27] - x[11]; + x[26] += x[10]; + x[27] += x[11]; + x[10] = ((r0 - r1) * AFT_PI2_8); + x[11] = ((r0 + r1) * AFT_PI2_8); + + r0 = x[24] - x[8]; + r1 = x[25] - x[9]; + x[24] += x[8]; + x[25] += x[9]; + x[8] = (r0 * AFT_PI3_8 - r1 * AFT_PI1_8); + x[9] = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8); + + r0 = x[22] - x[6]; + r1 = x[7] - x[23]; + x[22] += x[6]; + x[23] += x[7]; + x[6] = r1; + x[7] = r0; + + r0 = x[4] - x[20]; + r1 = x[5] - x[21]; + x[20] += x[4]; + x[21] += x[5]; + x[4] = (r1 * AFT_PI1_8 + r0 * AFT_PI3_8); + x[5] = (r1 * AFT_PI3_8 - r0 * AFT_PI1_8); + + r0 = x[2] - x[18]; + r1 = x[3] - x[19]; + x[18] += x[2]; + x[19] += x[3]; + x[2] = ((r1 + r0) * AFT_PI2_8); + x[3] = ((r1 - r0) * AFT_PI2_8); + + r0 = x[0] - x[16]; + r1 = x[1] - x[17]; + x[16] += x[0]; + x[17] += x[1]; + x[0] = (r1 * AFT_PI3_8 + r0 * AFT_PI1_8); + x[1] = (r1 * AFT_PI1_8 - r0 * AFT_PI3_8); + + mdct_butterfly_16(x); + mdct_butterfly_16(x+16); +} + +/** N-point first stage butterfly (in place, 2 register) */ +static inline void +mdct_butterfly_first(FLOAT *trig, FLOAT *x, int points) +{ + FLOAT *x1 = x + points - 8; + FLOAT *x2 = x + (points>>1) - 8; + FLOAT r0; + FLOAT r1; + + do { + r0 = x1[6] - x2[6]; + r1 = x1[7] - x2[7]; + x1[6] += x2[6]; + x1[7] += x2[7]; + x2[6] = (r1 * trig[1] + r0 * trig[0]); + x2[7] = (r1 * trig[0] - r0 * trig[1]); + + r0 = x1[4] - x2[4]; + r1 = x1[5] - x2[5]; + x1[4] += x2[4]; + x1[5] += x2[5]; + x2[4] = (r1 * trig[5] + r0 * trig[4]); + x2[5] = (r1 * trig[4] - r0 * trig[5]); + + r0 = x1[2] - x2[2]; + r1 = x1[3] - x2[3]; + x1[2] += x2[2]; + x1[3] += x2[3]; + x2[2] = (r1 * trig[9] + r0 * trig[8]); + x2[3] = (r1 * trig[8] - r0 * trig[9]); + + r0 = x1[0] - x2[0]; + r1 = x1[1] - x2[1]; + x1[0] += x2[0]; + x1[1] += x2[1]; + x2[0] = (r1 * trig[13] + r0 * trig[12]); + x2[1] = (r1 * trig[12] - r0 * trig[13]); + + x1 -= 8; + x2 -= 8; + trig += 16; + } while (x2 >= x); +} + +/** N/stage point generic N stage butterfly (in place, 2 register) */ +static inline void +mdct_butterfly_generic(FLOAT *trig, FLOAT *x, int points, int trigint) +{ + FLOAT *x1 = x + points - 8; + FLOAT *x2 = x + (points>>1) - 8; + FLOAT r0; + FLOAT r1; + + do { + r0 = x1[6] - x2[6]; + r1 = x1[7] - x2[7]; + x1[6] += x2[6]; + x1[7] += x2[7]; + x2[6] = (r1 * trig[1] + r0 * trig[0]); + x2[7] = (r1 * trig[0] - r0 * trig[1]); + + trig += trigint; + + r0 = x1[4] - x2[4]; + r1 = x1[5] - x2[5]; + x1[4] += x2[4]; + x1[5] += x2[5]; + x2[4] = (r1 * trig[1] + r0 * trig[0]); + x2[5] = (r1 * trig[0] - r0 * trig[1]); + + trig += trigint; + + r0 = x1[2] - x2[2]; + r1 = x1[3] - x2[3]; + x1[2] += x2[2]; + x1[3] += x2[3]; + x2[2] = (r1 * trig[1] + r0 * trig[0]); + x2[3] = (r1 * trig[0] - r0 * trig[1]); + + trig += trigint; + + r0 = x1[0] - x2[0]; + r1 = x1[1] - x2[1]; + x1[0] += x2[0]; + x1[1] += x2[1]; + x2[0] = (r1 * trig[1] + r0 * trig[0]); + x2[1] = (r1 * trig[0] - r0 * trig[1]); + + trig += trigint; + x1 -= 8; + x2 -= 8; + } while (x2 >= x); +} + +static inline void +mdct_butterflies(MDCTContext *mdct, FLOAT *x, int points) +{ + FLOAT *trig = mdct->trig; + int stages = mdct->log2n-5; + int i, j; + + if (--stages > 0) + mdct_butterfly_first(trig, x, points); + + for (i = 1; --stages > 0; i++) + for (j = 0; j < (1<<i); j++) + mdct_butterfly_generic(trig, x+(points>>i)*j, points>>i, 4<<i); + + for (j = 0; j < points; j += 32) + mdct_butterfly_32(x+j); +} + +static inline void +mdct_bitreverse(MDCTContext *mdct, FLOAT *x) +{ + int n = mdct->n; + int *bit = mdct->bitrev; + FLOAT *w0 = x; + FLOAT *w1 = x = w0+(n>>1); + FLOAT *trig = mdct->trig+n; + + do { + FLOAT *x0 = x+bit[0]; + FLOAT *x1 = x+bit[1]; + + FLOAT r0 = x0[1]- x1[1]; + FLOAT r1 = x0[0]+ x1[0]; + FLOAT r2 = (r1 * trig[0] + r0 * trig[1]); + FLOAT r3 = (r1 * trig[1] - r0 * trig[0]); + + w1 -= 4; + + r0 = (x0[1] + x1[1]) * FCONST(0.5); + r1 = (x0[0] - x1[0]) * FCONST(0.5); + + w0[0] = r0 + r2; + w1[2] = r0 - r2; + w0[1] = r1 + r3; + w1[3] = r3 - r1; + + x0 = x+bit[2]; + x1 = x+bit[3]; + + r0 = x0[1] - x1[1]; + r1 = x0[0] + x1[0]; + r2 = (r1 * trig[2] + r0 * trig[3]); + r3 = (r1 * trig[3] - r0 * trig[2]); + + r0 = (x0[1] + x1[1]) * FCONST(0.5); + r1 = (x0[0] - x1[0]) * FCONST(0.5); + + w0[2] = r0 + r2; + w1[0] = r0 - r2; + w0[3] = r1 + r3; + w1[1] = r3 - r1; + + trig += 4; + bit += 4; + w0 += 4; + } while (w0 < w1); +} + +void +mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in) +{ + int n = mdct->n; + int n2 = n>>1; + int n4 = n>>2; + int n8 = n>>3; + FLOAT *w = alloca(n*sizeof(*w)); + FLOAT *w2 = w+n2; + + FLOAT *x0 = in+n2+n4; + FLOAT *x1 = x0+1; + FLOAT *trig = mdct->trig + n2; + FLOAT r0; + FLOAT r1; + int i; + + for (i = 0; i < n8; i += 2) { + x0 -= 4; + trig -= 2; + r0 = x0[2] + x1[0]; + r1 = x0[0] + x1[2]; + w2[i] = (r1*trig[1] + r0*trig[0]); + w2[i+1] = (r1*trig[0] - r0*trig[1]); + x1 += 4; + } + + x1 = in+1; + for (; i < n2-n8; i += 2) { + trig -= 2; + x0 -= 4; + r0 = x0[2] - x1[0]; + r1 = x0[0] - x1[2]; + w2[i] = (r1*trig[1] + r0*trig[0]); + w2[i+1] = (r1*trig[0] - r0*trig[1]); + x1 += 4; + } + + x0 = in+n; + for (; i < n2; i += 2) { + trig -= 2; + x0 -= 4; + r0 = -x0[2] - x1[0]; + r1 = -x0[0] - x1[2]; + w2[i] = (r1*trig[1] + r0*trig[0]); + w2[i+1] = (r1*trig[0] - r0*trig[1]); + x1 += 4; + } + + mdct_butterflies(mdct, w2, n2); + mdct_bitreverse(mdct, w); + + trig = mdct->trig+n2; + x0 = out+n2; + for (i = 0; i < n4; i++) { + x0--; + out[i] = ((w[0]*trig[0]+w[1]*trig[1])*mdct->scale); + x0[0] = ((w[0]*trig[1]-w[1]*trig[0])*mdct->scale); + w += 2; + trig += 2; + } +} + diff --git a/src/mdct/vorbis_impl/mdct.h b/src/mdct/vorbis_impl/mdct.h new file mode 100644 index 0000000..c80a351 --- /dev/null +++ b/src/mdct/vorbis_impl/mdct.h @@ -0,0 +1,94 @@ +/** + * + * This file is derived from libvorbis + * Copyright (c) 2002, Xiph.org Foundation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Xiph.org Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file mdct.h + * MDCT header + */ + +#ifndef MDCT_H +#define MDCT_H + +#include "../common.h" + +#if defined(HAVE_MMX) || defined(HAVE_SSE) +#include "x86/mdct.h" +#endif +#ifdef HAVE_ALTIVEC +#include "ppc/mdct.h" +#endif + +#define ONE FCONST(1.0) +#define TWO FCONST(2.0) +#define AFT_PI3_8 FCONST(0.38268343236508977175) +#define AFT_PI2_8 FCONST(0.70710678118654752441) +#define AFT_PI1_8 FCONST(0.92387953251128675613) + +struct A52Context; +struct A52ThreadContext; + +typedef struct MDCTContext { + void (*mdct)(struct A52ThreadContext *ctx, FLOAT *out, FLOAT *in); + void (*mdct_bitreverse)(struct MDCTContext *mdct, FLOAT *x); + void (*mdct_butterfly_generic)(struct MDCTContext *mdct, FLOAT *x, int points, int trigint); + void (*mdct_butterfly_first)(FLOAT *trig, FLOAT *x, int points); + void (*mdct_butterfly_32)(FLOAT *x); + FLOAT *trig; +#ifndef CONFIG_DOUBLE +#ifdef HAVE_SSE + FLOAT *trig_bitreverse; + FLOAT *trig_forward; + FLOAT *trig_butterfly_first; + FLOAT *trig_butterfly_generic8; + FLOAT *trig_butterfly_generic16; + FLOAT *trig_butterfly_generic32; + FLOAT *trig_butterfly_generic64; +#endif +#endif /* CONFIG_DOUBLE */ + int *bitrev; + FLOAT scale; + int n; + int log2n; +} MDCTContext; + +#ifdef __cplusplus +extern "C" { +#endif +void mdct_ctx_init(MDCTContext *mdct, int n, FLOAT scale); +void mdct_ctx_close(MDCTContext *mdct); +void mdct(MDCTContext *mdct, FLOAT *out, FLOAT *in); +#ifdef __cplusplus +} + +#endif +#endif /* MDCT_H */ |