aboutsummaryrefslogtreecommitdiffstats
path: root/src/atrac3denc.cpp
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2016-03-13 09:49:33 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2016-09-02 21:21:28 +0300
commitcfaa2cd39b7256a868a4f5cd83aac207df6bd1b3 (patch)
tree75efff26584e046566d17cd308d45b6b0fd5abfc /src/atrac3denc.cpp
parentb4df8a7c2dd12eea27c8cc52bd52a1bb8c00943f (diff)
downloadatracdenc-cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3.tar.gz
Dirty implementation of atrac3 encoder:
- no JS mode - constant quantiser for tonal components - gain controll implemented but produces some artifacts with real signals. - etc...
Diffstat (limited to 'src/atrac3denc.cpp')
-rw-r--r--src/atrac3denc.cpp357
1 files changed, 357 insertions, 0 deletions
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
new file mode 100644
index 0000000..fa7724c
--- /dev/null
+++ b/src/atrac3denc.cpp
@@ -0,0 +1,357 @@
+#include "atrac3denc.h"
+#include "transient_detector.h"
+#include "util.h"
+#include <assert.h>
+#include <algorithm>
+#include <iostream>
+#include <cmath>
+namespace NAtracDEnc {
+
+using namespace NMDCT;
+using namespace NAtrac3;
+using std::vector;
+
+void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4], TGainModulatorArray gainModulators)
+{
+ for (int band = 0; band < 4; ++band) {
+ TFloat* srcBuff = bands[band];
+ TFloat* const curSpec = &specs[band*256];
+ TGainModulator modFn = gainModulators[band];
+ vector<TFloat> tmp(512);
+ memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat));
+ if (modFn) {
+ modFn(tmp.data(), srcBuff);
+ }
+ TFloat max = 0.0;
+ for (int i = 0; i < 256; i++) {
+ max = std::max(max, std::abs(srcBuff[i]));
+ srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i];
+ srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i];
+ }
+ memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat));
+ const vector<TFloat>& sp = Mdct512(&tmp[0]);
+ assert(sp.size() == 256);
+ memcpy(curSpec, sp.data(), 256 * sizeof(TFloat));
+ if (band & 1) {
+ SwapArray(curSpec, 256);
+ }
+ maxLevels[band] = max;
+ }
+}
+
+void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators)
+{
+ static TFloat dummy[4];
+ Mdct(specs, bands, dummy, gainModulators);
+}
+
+void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators)
+{
+ for (int band = 0; band < 4; ++band) {
+ TFloat* dstBuff = bands[band];
+ TFloat* curSpec = &specs[band*256];
+ TFloat* prevBuff = dstBuff + 256;
+ TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band];
+ if (band & 1) {
+ SwapArray(curSpec, 256);
+ }
+ vector<TFloat> inv = Midct512(curSpec);
+ assert(inv.size()/2 == 256);
+ for (int j = 0; j < 256; ++j) {
+ inv[j] *= /*2 */ DecodeWindow[j];
+ inv[511 - j] *= /*2*/ DecodeWindow[j];
+ }
+ if (demodFn) {
+ demodFn(dstBuff, inv.data(), prevBuff);
+ } else {
+ for (uint32_t j = 0; j < 256; ++j) {
+ dstBuff[j] = inv[j] + prevBuff[j];
+ }
+ }
+ memcpy(prevBuff, &inv[256], sizeof(TFloat)*256);
+ }
+}
+
+TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
+ : Oma(std::move(oma))
+ , Params(std::move(encoderSettings))
+ , TransientDetectors(2 * 4, TTransientDetector(8, 256)) //2 - channels, 4 - bands
+{}
+
+TAtrac3Processor::~TAtrac3Processor()
+{}
+
+TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si)
+{
+ switch (si.GetQmfNum()) {
+ case 1:
+ {
+ return {{ GainProcessor.Modulate(si.GetGainPoints(0)), TAtrac3MDCT::TGainModulator(),
+ TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }};
+ }
+ case 2:
+ {
+ return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+ TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }};
+ }
+ case 3:
+ {
+ return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+ GainProcessor.Modulate(si.GetGainPoints(2)), TAtrac3MDCT::TGainModulator() }};
+ }
+ case 4:
+ {
+ return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+ GainProcessor.Modulate(si.GetGainPoints(2)), GainProcessor.Modulate(si.GetGainPoints(3)) }};
+ }
+ default:
+ assert(false);
+ return {};
+
+ }
+}
+
+//TODO:
+TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn)
+{
+ TAtrac3Data::TTonalComponents res;
+ const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 };
+ for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+ //disable for frequence above 16KHz until we works without proper psy
+ if (bandNum > 2)
+ continue;
+ for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) {
+ const uint16_t specNumStart = SpecsStartLong[blockNum];
+ const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum];
+ float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]);
+ if (!isnan(level)) {
+ for (uint16_t n = specNumStart; n < specNumEnd; ++n) {
+ //TODO:
+ TFloat absValue = std::abs(specs[n]);
+ if (absValue > 65535.0) {
+ TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0;
+ std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl;
+ //res.push_back({n, specs[n] - shift});
+ specs[n] = shift;
+ } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) {
+ res.push_back({n, specs[n]/* - level*/});
+ specs[n] = 0;//level;
+ }
+
+ }
+
+ }
+ }
+ }
+ return res;
+}
+
+std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents)
+{
+ vector<TTonalComponent> componentMap;
+ for (uint16_t i = 0; i < tonalComponents.size();) {
+ const uint16_t startPos = i;
+ uint16_t curPos;
+ do {
+ curPos = tonalComponents[i].Pos;
+ ++i;
+ } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7);
+ const uint16_t len = i - startPos;
+ TFloat tmp[8];
+ for (uint8_t j = 0; j < len; ++j)
+ tmp[j] = tonalComponents[startPos + j].Val;
+ const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len);
+ componentMap.push_back({&tonalComponents[startPos], 7, scaledBlock});
+ }
+ return componentMap;
+}
+
+
+TFloat TAtrac3Processor::LimitRel(TFloat x)
+{
+ return std::min(std::max(x, GainLevel[15]), GainLevel[0]);
+}
+
+TAtrac3Processor::TTransientParam TAtrac3Processor::CalcTransientParam(const std::vector<TFloat>& gain, const TFloat lastMax)
+{
+ int32_t attackLocation = 0;
+ TFloat attackRelation = 1;
+
+ const TFloat attackThreshold = 4;
+ //pre-echo searching
+ TFloat tmp;
+ TFloat q = lastMax; //std::max(lastMax, gain[0]);
+ tmp = gain[0] / q;
+ if (tmp > attackThreshold) {
+ attackRelation = tmp;
+ } else {
+ for (uint32_t i = 0; i < gain.size() -1; ++i) {
+ q = std::max(q, gain[i]);
+ tmp = gain[i+1] / q;
+ if (tmp > attackThreshold) {
+ attackRelation = tmp;
+ attackLocation = i;
+ break;
+ }
+ }
+ }
+
+ int32_t releaseLocation = 0;
+ TFloat releaseRelation = 1;
+
+ const TFloat releaseTreshold = 4;
+ //post-echo searching
+ q = 0;
+ for (uint32_t i = gain.size() - 1; i > 0; --i) {
+ q = std::max(q, gain[i]);
+ tmp = gain[i-1] / q;
+ if (tmp > releaseTreshold) {
+ releaseRelation = tmp;
+ releaseLocation = i;
+ break;
+ }
+ }
+ if (releaseLocation == 0) {
+ q = std::max(q, gain[0]);
+ tmp = lastMax / q;
+ if (tmp > releaseTreshold) {
+ releaseRelation = tmp;
+ }
+ }
+
+ return {attackLocation, attackRelation, releaseLocation, releaseRelation};
+}
+
+TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4],
+ uint32_t channel,
+ TTransientDetector* transientDetector)
+{
+ TAtrac3Data::SubbandInfo siCur;
+ for (int band = 0; band < 4; ++band) {
+
+ TFloat invBuf[256];
+ if (band & 1) {
+ memcpy(invBuf, in[band], 256*sizeof(TFloat));
+ InvertSpectrInPlase<256>(invBuf);
+ }
+ const TFloat* srcBuff = (band & 1) ? invBuf : in[band];
+
+ const TFloat* const lastMax = &PrevPeak[channel][band];
+
+ std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve;
+ std::vector<TFloat> gain = AnalyzeGain(srcBuff, 256, 32, false);
+
+ auto transientParam = CalcTransientParam(gain, *lastMax);
+ bool hasTransient = (transientParam.AttackRelation != 1.0 || transientParam.ReleaseRelation != 1.0);
+
+ //combine attack and release
+ TFloat relA = 1;
+ TFloat relB = 1;
+ TFloat relC = 1;
+ uint32_t loc1 = 0;
+ uint32_t loc2 = 0;
+ if (transientParam.AttackLocation < transientParam.ReleaseLocation) {
+ //Peak like transient
+ relA = transientParam.AttackRelation;
+ loc1 = transientParam.AttackLocation;
+ relB = 1;
+ loc2 = transientParam.ReleaseLocation;
+ relC = transientParam.ReleaseRelation;
+ } else if (transientParam.AttackLocation > transientParam.ReleaseLocation) {
+ //Hole like transient
+ relA = transientParam.AttackRelation;
+ loc1 = transientParam.ReleaseLocation;
+ relB = transientParam.AttackRelation * transientParam.ReleaseRelation;
+ loc2 = transientParam.AttackLocation;
+ relC = transientParam.ReleaseRelation;
+ } else {
+ //???
+ //relA = relB = relC = transientParam.AttackRelation * transientParam.ReleaseRelation;
+ //loc1 = loc2 = transientParam.ReleaseLocation;
+ hasTransient = false;
+ }
+ //std::cout << "loc: " << loc1 << " " << loc2 << " rel: " << relA << " " << relB << " " << relC << std::endl;
+
+ if (relC != 1) {
+ relA /= relC;
+ relB /= relC;
+ relC = 1.0;
+ }
+ auto relToIdx = [this](TFloat rel) {
+ rel = LimitRel(1/rel);
+ return (uint32_t)(15 - Log2FloatToIdx(rel, 2048));
+ };
+ curve.push_back({relToIdx(relA), loc1});
+ if (loc1 != loc2) {
+ curve.push_back({relToIdx(relB), loc2});
+ }
+ if (loc2 != 31) {
+ curve.push_back({relToIdx(relC), 31});
+ }
+
+ if (hasTransient) {
+ siCur.AddSubbandCurve(band, std::move(curve));
+ }
+
+ }
+ return siCur;
+}
+
+
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda()
+{
+ TOma* omaptr = dynamic_cast<TOma*>(Oma.get());
+ if (!omaptr) {
+ std::cerr << "Wrong container" << std::endl;
+ abort();
+ }
+
+ TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, *Params.ConteinerParams);
+ return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
+ for (uint32_t channel=0; channel < 2; channel++) {
+ vector<TFloat> specs(1024);
+ TFloat src[NumSamples];
+
+ for (size_t i = 0; i < NumSamples; ++i) {
+ src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding
+ }
+
+ TFloat* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]};
+ SplitFilterBank[channel].Split(&src[0], p);
+
+ TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ?
+ TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band
+
+ TFloat* maxOverlapLevels = PrevPeak[channel];
+
+ Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(siCur));
+ TTonalComponents tonals = Params.NoTonalComponents ?
+ TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) {
+ std::vector<TFloat> magnitude(len);
+ for (uint16_t i = 0; i < len; ++i) {
+ magnitude[i] = std::abs(spec[i]);
+ }
+ float median = CalcMedian(magnitude.data(), len);
+ for (uint16_t i = 0; i < len; ++i) {
+ if (median > 0.001) {
+ return median;
+ }
+ }
+ return NAN;
+ });
+
+ const std::vector<TTonalComponent>& components = MapTonalComponents(tonals);
+
+ //TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
+ bitStreamWriter->WriteSoundUnit(siCur, components, Scaler.ScaleFrame(specs, TBlockSize()));
+ }
+ };
+}
+
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda()
+{
+ abort();
+ return {};
+}
+
+}//namespace NAtracDEnc