diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-07-17 17:50:38 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-07-17 17:50:38 +0300 |
commit | 842bd06107983bf1775e9c1bbafc8cbe43ffb164 (patch) | |
tree | 4808bed3ae63572f092686f2e93ff44a2676abaa | |
parent | 1151d5831f19a9f24dd0c545a4968606712a62d2 (diff) | |
download | atracdenc-atrac3_gaincontrol.tar.gz |
Experimental implementation of gain control.atrac3_gaincontrol
-rw-r--r-- | src/atrac/atrac3.h | 4 | ||||
-rw-r--r-- | src/atrac3denc.cpp | 231 | ||||
-rw-r--r-- | src/atrac3denc.h | 20 | ||||
-rw-r--r-- | src/gain_processor.h | 16 | ||||
-rw-r--r-- | src/main.cpp | 2 | ||||
-rw-r--r-- | src/transient_detector.cpp | 8 | ||||
-rw-r--r-- | src/transient_detector.h | 4 | ||||
-rw-r--r-- | src/util.h | 3 |
8 files changed, 255 insertions, 33 deletions
diff --git a/src/atrac/atrac3.h b/src/atrac/atrac3.h index e65e71a..a1e35f3 100644 --- a/src/atrac/atrac3.h +++ b/src/atrac/atrac3.h @@ -197,8 +197,8 @@ public: public: static const uint32_t MaxGainPointsNum = 8; struct TGainPoint { - const uint32_t Level; - const uint32_t Location; + uint32_t Level; + uint32_t Location; }; private: std::vector<std::vector<TGainPoint>> Info; diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp index 432fb18..b0ecc48 100644 --- a/src/atrac3denc.cpp +++ b/src/atrac3denc.cpp @@ -11,18 +11,35 @@ using namespace NMDCT; using namespace NAtrac3; using std::vector; -void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators) { + +static void hpFilter(const TFloat* in, TFloat* out, uint32_t n) +{ + TFloat t0 = 0; + TFloat t1 = 0; + for (uint32_t i = 0; i < n; ++i) { + TFloat x = in[i] / 4.0f; + TFloat y = t0 + x; + t0 = t1 + y - 2.0f * x; + t1 = x - .5f * y; + out[i] = y; + } +} + +void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4], TGainModulatorArray gainModulators) +{ for (int band = 0; band < 4; ++band) { TFloat* srcBuff = bands[band]; TFloat* const curSpec = &specs[band*256]; TGainModulator modFn = gainModulators[band]; vector<TFloat> tmp(512); + TFloat maxOverlapGain = 0.0; memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat)); if (modFn) { modFn(tmp.data(), srcBuff); } for (int i = 0; i < 256; i++) { srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i]; + maxOverlapGain = std::max(maxOverlapGain, std::abs(srcBuff[256+i])); srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i]; } memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat)); @@ -32,10 +49,12 @@ void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray if (band & 1) { SwapArray(curSpec, 256); } + maxLevels[band] = maxOverlapGain; } } -void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators) { +void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators) +{ for (int band = 0; band < 4; ++band) { TFloat* dstBuff = bands[band]; TFloat* curSpec = &specs[band*256]; @@ -70,7 +89,8 @@ TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSetting TAtrac3Processor::~TAtrac3Processor() {} -TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si) { +TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si) +{ switch (si.GetQmfNum()) { case 1: { @@ -100,7 +120,8 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra } //TODO: -TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) { +TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) +{ TAtrac3Data::TTonalComponents res; const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 }; for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { @@ -117,9 +138,8 @@ TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* s TFloat absValue = std::abs(specs[n]); if (absValue > 65535.0) { TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0; - - std::cerr << "shift overflowed value " << specs[n] << " " << specs[n] - shift << " " << shift << std::endl; - res.push_back({n, specs[n] - shift}); + std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl; + //res.push_back({n, specs[n] - shift}); specs[n] = shift; } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) { res.push_back({n, specs[n]/* - level*/}); @@ -133,7 +153,8 @@ TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* s } return res; } -std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents) { +std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents) +{ vector<TTonalComponent> componentMap; for (uint16_t i = 0; i < tonalComponents.size();) { const uint16_t startPos = i; @@ -152,12 +173,190 @@ std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalCo return componentMap; } -TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector) { - assert(false); //not implemented - return {}; + +TFloat TAtrac3Processor::LimitRel(TFloat x) +{ + return std::min(std::max(x, GainLevel[15]), GainLevel[0]); +} + +uint32_t TAtrac3Processor::CheckLevelOverflow(const TFloat probe, uint32_t levelIdx) +{ + //std::cout << "CheckLevelOverflow: " << probe << " start idx: " << levelIdx << std::endl; + while (probe / GainLevel[levelIdx] > 65535) { + if (levelIdx == 0) { + std::cerr << "level too hi" << std::endl; + break; + } + levelIdx--; + } + return levelIdx; +} + +vector<TAtrac3Data::SubbandInfo::TGainPoint> TAtrac3Processor::FilterCurve(const vector<SubbandInfo::TGainPoint>& curve, + const int threshold) +{ + if (curve.empty()) + return curve; + +#ifndef NDEBUG + int prev = -1; + for (auto v : curve) { + assert((int)v.Location > prev); +// std::cout << "in: " << v.Level << " " << v.Location << " threshold: " << threshold << std::endl; + prev = v.Location; + } +#endif + + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> res; + res.push_back(curve[curve.size() - 1]); + for (int32_t i = curve.size() - 1; i >=0;) { + uint32_t minSeenVal = curve[i].Level; + uint32_t maxSeenVal = curve[i].Level; + + int32_t j = i; + for (;;) { + minSeenVal = std::min(curve[j].Level, minSeenVal); + maxSeenVal = std::max(curve[j].Level, maxSeenVal); + + uint32_t curVal = curve[j].Level; +/* + std::cout << "i: " << i + << " j: " << j + << " minSeenVal: " << minSeenVal + << " maxSeenVal: " << maxSeenVal + << " curVal: " << curVal + << std::endl; +*/ + if ((j == 0 && (curve[0].Level != curve[1].Level)) || + (curVal - minSeenVal > threshold) || + (maxSeenVal - curVal > threshold) ) + { + res.push_back(curve[j]); + break; + } + if (j == 0) + break; + j--; + } + i = j; + if (i == 0) + break; + } + std::reverse(res.begin(), res.end()); + +// for (auto v : res) +// std::cout << "out: " << v.Level << " " << v.Location << std::endl; + + if (res.size() < TAtrac3Data::SubbandInfo::MaxGainPointsNum) { + return res; + } + return FilterCurve(res, threshold + 1); +} + +//TODO: implement real transient detector +bool checkTransient(TFloat cur, TFloat prev) +{ + TFloat x = (cur > prev) ? cur / prev : prev / cur; + if (x > 6) + return true; + + return false; } -TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { +std::vector<TFloat> TAtrac3Processor::CalcBaseLevel(const TFloat prev, const std::vector<TFloat>& gain) { + + TFloat maxRel = 1.0; + bool done = false; + //TODO: recheck it. It looks like we realy need to compare only prev and last point + for (int i = gain.size() - 1; i < gain.size(); ++i) { + if (prev > gain[i] && prev / gain[i] > maxRel) { + maxRel = prev / gain[i]; + done = true; + } + } + + TFloat val0 = gain[gain.size() - 1]; + if (done) { + const TFloat rel = LimitRel(maxRel); + uint32_t relIdx = 15 - Log2FloatToIdx(rel, 2048); + val0 = prev / GainLevel[relIdx]; + } + + TFloat val1 = gain[gain.size() - 1]; + std::vector<TFloat> baseLine(gain.size()); + + baseLine[0] = val0; + baseLine[baseLine.size() - 1] = val1; + TFloat a = (baseLine[baseLine.size() - 1] - baseLine[0]) / baseLine.size(); + + for (int i = 1; i < baseLine.size() - 1; i++) { + baseLine[i] = i * a + baseLine[0]; + } + return baseLine; +} + +TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4], + uint32_t channel, + TTransientDetector* transientDetector) +{ + TAtrac3Data::SubbandInfo siCur; + for (int band = 0; band < 4; ++band) { + + const TFloat* srcBuff = in[band]; + TFloat* const lastLevel = &LastLevels[channel][band]; + TFloat* const lastHPLevel = &LastHPLevels[channel][band]; + TFloat* const lastMax = &PrevPeak[channel][band]; + + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve; + //RMS gain + std::vector<TFloat> gain = AnalyzeGain(srcBuff, 256, 32, true); + //std::cout << "gain prev: " << *lastLevel << std::endl; + //for ( auto vvv : gain ) { + // std::cout << " gain: " << vvv << std::endl; + //} + int32_t gainPos = gain.size() - 2; + bool hasTransient = false; + + std::vector<TFloat> base = CalcBaseLevel(*lastLevel, gain); + + TFloat hpSig[256]; + hpFilter(srcBuff, &hpSig[0], 256); + //Peak gain + std::vector<TFloat> hpGain = AnalyzeGain(&hpSig[0], 256, 32, false); + + for (; gainPos >= 0; --gainPos) { + const TFloat val = (gainPos == 0) ? *lastLevel : gain[gainPos]; + + const TFloat hpval = (gainPos == 0) ? *lastHPLevel : hpGain[gainPos]; + if (!hasTransient && checkTransient(hpval, hpGain[gainPos + 1])) { + //std::cout << "hasTransient true at: " << gainPos << " base: " << base[gainPos] << std::endl; + hasTransient = true; + } + + const TFloat rel = LimitRel(val / base[gainPos]); + uint32_t scaleIdx = 15 - Log2FloatToIdx(rel, 2048); + + curve.push_back({scaleIdx, (uint32_t)gainPos /*+ !!gainPos*/}); + } + + + *lastLevel = gain[gain.size() -1]; + *lastHPLevel = hpGain[gain.size() -1]; + if (hasTransient) { + std::reverse(curve.begin(), curve.end()); + auto t = CheckLevelOverflow(*lastMax, curve[0].Level); + //std::cout << "overflow: " << curve[0].Level << " new: " << t << " max: " << *lastMax << std::endl; + curve[0].Level = t; + siCur.AddSubbandCurve(band, std::move(FilterCurve(curve, 0))); + } + + } + return siCur; +} + + +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() +{ TOma* omaptr = dynamic_cast<TOma*>(Oma.get()); if (!omaptr) { std::cerr << "Wrong container" << std::endl; @@ -169,6 +368,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { for (uint32_t channel=0; channel < 2; channel++) { vector<TFloat> specs(1024); TFloat src[NumSamples]; + for (int i = 0; i < NumSamples; ++i) { src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding } @@ -179,7 +379,9 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ? TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band - Mdct(specs.data(), p, MakeGainModulatorArray(siCur)); + TFloat* maxOverlapLevels = PrevPeak[channel]; + + Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(siCur)); TTonalComponents tonals = Params.NoTonalComponents ? TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) { std::vector<TFloat> magnitude(len); @@ -203,7 +405,8 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() { }; } -TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda() { +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda() +{ abort(); return {}; } diff --git a/src/atrac3denc.h b/src/atrac3denc.h index 5984728..4b0980c 100644 --- a/src/atrac3denc.h +++ b/src/atrac3denc.h @@ -30,8 +30,13 @@ public: using TGainDemodulator = TAtrac3GainProcessor::TGainDemodulator; typedef std::array<TGainDemodulator, 4> TGainDemodulatorArray; typedef std::array<TGainModulator, 4> TGainModulatorArray; - void Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators = TGainModulatorArray()); - void Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators = TGainDemodulatorArray()); + void Mdct(TFloat specs[1024], + TFloat* bands[4], + TFloat maxLevels[4], + TGainModulatorArray gainModulators = TGainModulatorArray()); + void Midct(TFloat specs[1024], + TFloat* bands[4], + TGainDemodulatorArray gainDemodulators = TGainDemodulatorArray()); protected: TAtrac3MDCT::TGainModulatorArray MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si); }; @@ -43,6 +48,11 @@ class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT, public v TCompressedIOPtr Oma; const NAtrac3::TAtrac3EncoderSettings Params; TFloat PcmBuffer[2][4][256 + 256]; //2 channel, 4 band, 256 sample + 256 for overlap buffer + + TFloat LastLevels[2][4]; //2 channel, 4 band - level of last subblock, used to create curve + TFloat LastHPLevels[2][4]; //2 channel, 4 band - level of last HP filtered subblock, used for transient detection + TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling + Atrac3SplitFilterBank<TFloat> SplitFilterBank[2]; TScaler<TAtrac3Data> Scaler; std::vector<TTransientDetector> TransientDetectors; @@ -50,11 +60,11 @@ class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT, public v #ifdef ATRAC_UT_PUBLIC public: #endif - uint32_t CheckLevelOverflow(const std::vector<TFloat>& gain, const TAtrac3Data::SubbandInfo::TGainPoint& point); - std::vector<SubbandInfo::TGainPoint> FilterCurve(const std::vector<TFloat>& gain, - const std::vector<SubbandInfo::TGainPoint>& curve, + uint32_t CheckLevelOverflow(TFloat max, uint32_t levelIdx); + std::vector<SubbandInfo::TGainPoint> FilterCurve(const std::vector<SubbandInfo::TGainPoint>& curve, const int threshold); TFloat LimitRel(TFloat x); + std::vector<TFloat> CalcBaseLevel(TFloat prev, const std::vector<TFloat>& gain); TAtrac3Data::SubbandInfo CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector); TonalComponentMask AnalyzeTonalComponent(TFloat* specs); TTonalComponents ExtractTonalComponents(TFloat* specs, TTonalDetector fn); diff --git a/src/gain_processor.h b/src/gain_processor.h index 04f2041..0fa9728 100644 --- a/src/gain_processor.h +++ b/src/gain_processor.h @@ -24,17 +24,21 @@ public: * so next transformation (mdct #3) gets modulated first part */ typedef std::function<void(TFloat* bufCur, TFloat* bufNext)> TGainModulator; - static TFloat GetGainInc(uint32_t levelIdxCur) { + static TFloat GetGainInc(uint32_t levelIdxCur) + { const int incPos = T::ExponentOffset - levelIdxCur + T::GainInterpolationPosShift; return T::GainInterpolation[incPos]; } - static TFloat GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext) { + static TFloat GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext) + { const int incPos = levelIdxNext - levelIdxCur + T::GainInterpolationPosShift; return T::GainInterpolation[incPos]; } - TGainDemodulator Demodulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giNow, const std::vector<typename T::SubbandInfo::TGainPoint>& giNext) { + TGainDemodulator Demodulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giNow, + const std::vector<typename T::SubbandInfo::TGainPoint>& giNext) + { return [=](TFloat* out, TFloat* cur, TFloat* prev) { uint32_t pos = 0; const TFloat scale = giNext.size() ? T::GainLevel[giNext[0].Level] : 1; @@ -43,7 +47,8 @@ public: const uint32_t levelPos = giNow[i].Level; assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0])); TFloat level = T::GainLevel[levelPos]; - const int incPos = ((i + 1) < giNow.size() ? giNow[i + 1].Level : T::ExponentOffset) - giNow[i].Level + T::GainInterpolationPosShift; + const int incPos = ((i + 1) < giNow.size() ? giNow[i + 1].Level : T::ExponentOffset) + - giNow[i].Level + T::GainInterpolationPosShift; TFloat gainInc = T::GainInterpolation[incPos]; for (; pos < lastPos; pos++) { //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << std::endl; @@ -72,7 +77,8 @@ public: const uint32_t levelPos = giCur[i].Level; assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0])); TFloat level = T::GainLevel[levelPos]; - const int incPos = ((i + 1) < giCur.size() ? giCur[i + 1].Level : T::ExponentOffset) - giCur[i].Level + T::GainInterpolationPosShift; + const int incPos = ((i + 1) < giCur.size() ? giCur[i + 1].Level : T::ExponentOffset) + - giCur[i].Level + T::GainInterpolationPosShift; TFloat gainInc = T::GainInterpolation[incPos]; for (; pos < lastPos; pos++) { //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " << bufCur[pos] << " level: " << level << " bufNext: " << bufNext[pos] << std::endl; diff --git a/src/main.cpp b/src/main.cpp index b35f6bb..b902af4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -188,7 +188,7 @@ int main(int argc, char* const* argv) bool fastBfuNumSearch = false; bool mono = false; bool noStdOut = false; - bool noGainControl = true; + bool noGainControl = false; bool noTonalComponents = false; NAtrac1::TAtrac1EncodeSettings::EWindowMode windowMode = NAtrac1::TAtrac1EncodeSettings::EWindowMode::EWM_AUTO; uint32_t winMask = 0; //0 - all is long diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp index b0e4aab..d7695f1 100644 --- a/src/transient_detector.cpp +++ b/src/transient_detector.cpp @@ -9,7 +9,7 @@ using std::vector; static TFloat calculateRMS(const TFloat* in, uint32_t n) { TFloat s = 0; for (uint32_t i = 0; i < n; i++) { - s += in[i] * in[i]; + s += (in[i] * in[i]); } s /= n; return sqrt(s); @@ -68,14 +68,14 @@ bool TTransientDetector::Detect(const TFloat* buf) { return trans; } -std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints) { +std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints, bool useRms) { vector<TFloat> res; const uint32_t step = len / maxPoints; for (uint32_t pos = 0; pos < len; pos += step) { - TFloat rms = calculatePeak(in + pos, step); + TFloat rms = useRms ? calculateRMS(in + pos, step) : calculatePeak(in + pos, step); res.emplace_back(rms); } return res; } -} +} //namespace NAtracDEnc diff --git a/src/transient_detector.h b/src/transient_detector.h index 004eff6..46b774f 100644 --- a/src/transient_detector.h +++ b/src/transient_detector.h @@ -6,6 +6,7 @@ #include "config.h" namespace NAtracDEnc { + class TTransientDetector { const uint32_t ShortSz; const uint32_t BlockSz; @@ -28,5 +29,6 @@ public: uint32_t GetLastTransientPos() const { return LastTransientPos; } }; -std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints); +std::vector<TFloat> AnalyzeGain(const TFloat* in, uint32_t len, uint32_t maxPoints, bool useRms); + } @@ -2,6 +2,7 @@ #include <cstdint> #include <vector> #include <algorithm> +#include <cmath> #include "config.h" @@ -30,7 +31,7 @@ inline uint16_t GetFirstSetBit(uint32_t x) { template<class T> inline uint16_t Log2FloatToIdx(T x, uint16_t shift) { T t = x * shift; - return GetFirstSetBit(t); + return GetFirstSetBit(std::trunc(t)); } template<class T> |