diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-06-19 02:58:23 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-06-19 03:31:55 +0300 |
commit | 1151d5831f19a9f24dd0c545a4968606712a62d2 (patch) | |
tree | c978c1b9a3fc86fef531dd412fe6b7668b7c0567 /src/atrac3denc.cpp | |
parent | 8d65a0bd0774e03b3d10354e15f2f3361a2ce26a (diff) | |
download | atracdenc-1151d5831f19a9f24dd0c545a4968606712a62d2.tar.gz |
some improvements of ATRAC3 implementation:atrac3
- simple (ATRAC1 like) psychoacoustic added
- possibility to encode tonal components
- simple tonal component extractor
- refactoring
Diffstat (limited to 'src/atrac3denc.cpp')
-rw-r--r-- | src/atrac3denc.cpp | 144 |
1 files changed, 111 insertions, 33 deletions
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp index 7557d5e..432fb18 100644 --- a/src/atrac3denc.cpp +++ b/src/atrac3denc.cpp @@ -1,22 +1,23 @@ #include "atrac3denc.h" -#include "atrac/atrac3_bitstream.h" +#include "transient_detector.h" #include "util.h" #include <assert.h> - +#include <algorithm> #include <iostream> - +#include <cmath> namespace NAtracDEnc { using namespace NMDCT; +using namespace NAtrac3; using std::vector; -void TAtrac3MDCT::Mdct(double specs[1024], double* bands[4], TGainModulatorArray gainModulators) { +void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators) { for (int band = 0; band < 4; ++band) { - double* srcBuff = bands[band]; - double* const curSpec = &specs[band*256]; + TFloat* srcBuff = bands[band]; + TFloat* const curSpec = &specs[band*256]; TGainModulator modFn = gainModulators[band]; - vector<double> tmp(512); - memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(double)); + vector<TFloat> tmp(512); + memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat)); if (modFn) { modFn(tmp.data(), srcBuff); } @@ -24,30 +25,30 @@ void TAtrac3MDCT::Mdct(double specs[1024], double* bands[4], TGainModulatorArray srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i]; srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i]; } - memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(double)); - const vector<double>& sp = Mdct512(&tmp[0]); + memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat)); + const vector<TFloat>& sp = Mdct512(&tmp[0]); assert(sp.size() == 256); - memcpy(curSpec, sp.data(), 256 * sizeof(double)); + memcpy(curSpec, sp.data(), 256 * sizeof(TFloat)); if (band & 1) { SwapArray(curSpec, 256); } } } -void TAtrac3MDCT::Midct(double specs[1024], double* bands[4], TGainDemodulatorArray gainDemodulators) { +void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators) { for (int band = 0; band < 4; ++band) { - double* dstBuff = bands[band]; - double* curSpec = &specs[band*256]; - double* prevBuff = dstBuff + 256; + TFloat* dstBuff = bands[band]; + TFloat* curSpec = &specs[band*256]; + TFloat* prevBuff = dstBuff + 256; TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band]; if (band & 1) { SwapArray(curSpec, 256); } - vector<double> inv = Midct512(curSpec); + vector<TFloat> inv = Midct512(curSpec); assert(inv.size()/2 == 256); for (int j = 0; j < 256; ++j) { - inv[j] *= 2 * DecodeWindow[j]; - inv[511 - j] *= 2 * DecodeWindow[j]; + inv[j] *= /*2 */ DecodeWindow[j]; + inv[511 - j] *= /*2*/ DecodeWindow[j]; } if (demodFn) { demodFn(dstBuff, inv.data(), prevBuff); @@ -56,13 +57,14 @@ void TAtrac3MDCT::Midct(double specs[1024], double* bands[4], TGainDemodulatorAr dstBuff[j] = inv[j] + prevBuff[j]; } } - memcpy(prevBuff, &inv[256], sizeof(double)*256); + memcpy(prevBuff, &inv[256], sizeof(TFloat)*256); } } -TAtrac3Processor::TAtrac3Processor(TAeaPtr&& oma, const TContainerParams& params) +TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSettings&& encoderSettings) : Oma(std::move(oma)) - , Params(params) + , Params(std::move(encoderSettings)) + , TransientDetectors(2 * 4, TTransientDetector(8, 256)) //2 - channels, 4 - bands {} TAtrac3Processor::~TAtrac3Processor() @@ -97,35 +99,111 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra } } -TPCMEngine<double>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { +//TODO: +TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) { + TAtrac3Data::TTonalComponents res; + const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 }; + for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { + //disable for frequence above 16KHz until we works without proper psy + if (bandNum > 2) + continue; + for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) { + const uint16_t specNumStart = SpecsStartLong[blockNum]; + const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum]; + float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]); + if (!isnan(level)) { + for (uint16_t n = specNumStart; n < specNumEnd; ++n) { + //TODO: + TFloat absValue = std::abs(specs[n]); + if (absValue > 65535.0) { + TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0; + + std::cerr << "shift overflowed value " << specs[n] << " " << specs[n] - shift << " " << shift << std::endl; + res.push_back({n, specs[n] - shift}); + specs[n] = shift; + } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) { + res.push_back({n, specs[n]/* - level*/}); + specs[n] = 0;//level; + } + + } + + } + } + } + return res; +} +std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents) { + vector<TTonalComponent> componentMap; + for (uint16_t i = 0; i < tonalComponents.size();) { + const uint16_t startPos = i; + uint16_t curPos; + do { + curPos = tonalComponents[i].Pos; + ++i; + } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7); + const uint16_t len = i - startPos; + TFloat tmp[8]; + for (uint8_t j = 0; j < len; ++j) + tmp[j] = tonalComponents[startPos + j].Val; + const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len); + componentMap.push_back({&tonalComponents[startPos], 7, scaledBlock}); + } + return componentMap; +} + +TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector) { + assert(false); //not implemented + return {}; +} + +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { TOma* omaptr = dynamic_cast<TOma*>(Oma.get()); if (!omaptr) { std::cerr << "Wrong container" << std::endl; abort(); } - TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, Params); - return [this, bitStreamWriter](double* data, const TPCMEngine<double>::ProcessMeta& meta) { + TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, *Params.ConteinerParams); + return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { for (uint32_t channel=0; channel < 2; channel++) { - vector<double> specs(1024); - double src[NumSamples]; + vector<TFloat> specs(1024); + TFloat src[NumSamples]; for (int i = 0; i < NumSamples; ++i) { - src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)]; //no mono mode in atrac3. //TODO we can double frame after encoding + src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding } - double* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]}; + TFloat* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]}; SplitFilterBank[channel].Split(&src[0], p); - - TAtrac3Data::SubbandInfo siCur; + + TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ? + TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band Mdct(specs.data(), p, MakeGainModulatorArray(siCur)); - const TBlockSize blockSize(false, false, false); - bitStreamWriter->WriteSoundUnit(siCur, Scaler.Scale(specs, blockSize)); + TTonalComponents tonals = Params.NoTonalComponents ? + TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) { + std::vector<TFloat> magnitude(len); + for (uint16_t i = 0; i < len; ++i) { + magnitude[i] = std::abs(spec[i]); + } + float median = CalcMedian(magnitude.data(), len); + for (uint16_t i = 0; i < len; ++i) { + if (median > 0.001) { + return median; + } + } + return NAN; + }); + + const std::vector<TTonalComponent>& components = MapTonalComponents(tonals); + + //TBlockSize for ATRAC3 - 4 subband, all are long (no short window) + bitStreamWriter->WriteSoundUnit(siCur, components, Scaler.ScaleFrame(specs, TBlockSize())); } }; } -TPCMEngine<double>::TProcessLambda TAtrac3Processor::GetDecodeLambda() { +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda() { abort(); return {}; } |