diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-10-11 00:07:48 +0200 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-10-11 00:07:48 +0200 |
commit | 57ba306db046601b96b5a1943e63574a37c5e96c (patch) | |
tree | 85103e059c0153923bcb844ea69f7117e6d9b8a5 /src | |
parent | fe377370c1ae4691a65270123345f225e3159ed8 (diff) | |
download | atracdenc-57ba306db046601b96b5a1943e63574a37c5e96c.tar.gz |
Attempt to use adaptive ATH for ATRAC1
Diffstat (limited to 'src')
-rw-r--r-- | src/atrac/atrac1_bitalloc.cpp | 11 | ||||
-rw-r--r-- | src/atrac/atrac1_bitalloc.h | 7 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.cpp | 36 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.h | 2 | ||||
-rw-r--r-- | src/atrac/atrac_scale.h | 2 | ||||
-rw-r--r-- | src/atrac1denc.cpp | 54 | ||||
-rw-r--r-- | src/atrac1denc.h | 6 |
7 files changed, 100 insertions, 18 deletions
diff --git a/src/atrac/atrac1_bitalloc.cpp b/src/atrac/atrac1_bitalloc.cpp index a2e63fa..21584a9 100644 --- a/src/atrac/atrac1_bitalloc.cpp +++ b/src/atrac/atrac1_bitalloc.cpp @@ -127,12 +127,15 @@ vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TSc const uint32_t bfuNum, const TFloat spread, const TFloat shift, - const TBlockSize& blockSize) { + const TBlockSize& blockSize, + const float loudness) { vector<uint32_t> bitsPerEachBlock(bfuNum); for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { bool shortBlock = blockSize.LogCount[BfuToBand(i)]; const uint32_t fix = shortBlock ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i]; - if (!shortBlock && scaledBlocks[i].MaxEnergy < ATHLong[i]) { + float ath = ATHLong[i] * loudness; + //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl; + if (!shortBlock && scaledBlocks[i].MaxEnergy < ath) { bitsPerEachBlock[i] = 0; } else { int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; @@ -183,7 +186,7 @@ uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, return curBfuId; } -uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) { +uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) { uint32_t bfuIdx = BfuIdxConst ? BfuIdxConst - 1 : 7; bool autoBfu = !BfuIdxConst; TFloat spread = AnalizeScaleFactorSpread(scaledBlocks); @@ -204,7 +207,7 @@ uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlo bool bfuNumChanged = false; for (;;) { const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx], - spread, shift, blockSize); + spread, shift, blockSize, loudness); uint32_t bitsUsed = 0; for (size_t i = 0; i < tmpAlloc.size(); i++) { bitsUsed += SpecsPerBlock[i] * tmpAlloc[i]; diff --git a/src/atrac/atrac1_bitalloc.h b/src/atrac/atrac1_bitalloc.h index b1f6b54..4c541b8 100644 --- a/src/atrac/atrac1_bitalloc.h +++ b/src/atrac/atrac1_bitalloc.h @@ -33,7 +33,7 @@ class IAtrac1BitAlloc { public: IAtrac1BitAlloc() {}; virtual ~IAtrac1BitAlloc() {}; - virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) = 0; + virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) = 0; }; class TBitsBooster : public virtual TAtrac1Data { @@ -56,7 +56,8 @@ public: class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster, public virtual IAtrac1BitAlloc { std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, - const TFloat spread, const TFloat shift, const TBlockSize& blockSize); + const TFloat spread, const TFloat shift, const TBlockSize& blockSize, + const float loudness); const uint32_t BfuIdxConst; const bool FastBfuNumSearch; static std::vector<TFloat> ATHLong; @@ -66,7 +67,7 @@ class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster public: TAtrac1SimpleBitAlloc(ICompressedOutput* container, uint32_t bfuIdxConst, bool fastBfuNumSearch); ~TAtrac1SimpleBitAlloc() {}; - uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) override; + uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) override; }; } //namespace NAtrac1 diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp index 089bf47..18f855c 100644 --- a/src/atrac/atrac_psy_common.cpp +++ b/src/atrac/atrac_psy_common.cpp @@ -135,4 +135,40 @@ vector<float> CalcATH(int len, int sampleRate) return res; } +float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz) +{ + float s = 0; + if (e1 != nullptr) { + for (size_t i = 0; i < sz; i++) { + s += (e0[i] + e1[i]) * weight[i]; + } + + s *= 0.5; + + } else { + for (size_t i = 0; i < sz; i++) { + s += e0[i] * weight[i]; + } + } + + return 0.98 * prevLoud + 0.02 * s; +} + +vector<float> CreateLoudnessCurve(size_t sz) +{ + std::vector<float> res; + res.resize(sz); + + for (size_t i = 0; i < sz; i++) { + float f = (float)(i + 3) * 0.5 * 44100 / (float)sz; + float t = std::log10(f) - 3.5; + t = -10 * t * t + 3 - f / 3000; + t = std::pow(10, (0.1 * t)); + //std::cerr << i << " => " << f << " " << t <<std::endl; + res[i] = t; + } + + return res; +} + } // namespace NAtracDEnc diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h index 1a826bb..ffd1f03 100644 --- a/src/atrac/atrac_psy_common.h +++ b/src/atrac/atrac_psy_common.h @@ -23,5 +23,7 @@ namespace NAtracDEnc { TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks); std::vector<float> CalcATH(int len, int sampleRate); +float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz); +std::vector<float> CreateLoudnessCurve(size_t sz); } //namespace NAtracDEnc diff --git a/src/atrac/atrac_scale.h b/src/atrac/atrac_scale.h index b590662..ec83059 100644 --- a/src/atrac/atrac_scale.h +++ b/src/atrac/atrac_scale.h @@ -77,7 +77,7 @@ public: TBlockSize() : LogCount({{0, 0, 0, 0}}) {} - const std::array<int, 4> LogCount; + std::array<int, 4> LogCount; }; } //namespace NAtracDEnc diff --git a/src/atrac1denc.cpp b/src/atrac1denc.cpp index d622007..08f7a1d 100644 --- a/src/atrac1denc.cpp +++ b/src/atrac1denc.cpp @@ -24,6 +24,7 @@ #include "atrac/atrac1_dequantiser.h" #include "atrac/atrac1_qmf.h" #include "atrac/atrac1_bitalloc.h" +#include "atrac/atrac_psy_common.h" #include "util.h" namespace NAtracDEnc { @@ -35,6 +36,7 @@ using std::vector; TAtrac1Encoder::TAtrac1Encoder(TCompressedOutputPtr&& aea, TAtrac1EncodeSettings&& settings) : Aea(std::move(aea)) , Settings(std::move(settings)) + , LoudnessCurve(CreateLoudnessCurve(NumSamples)) { } @@ -164,22 +166,38 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Decoder::GetLambda() { TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() { const uint32_t srcChannels = Aea->GetChannelNum(); - vector<IAtrac1BitAlloc*> bitAlloc; - for (size_t i = 0; i < srcChannels; i++) { - bitAlloc.push_back(new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch())); + vector<IAtrac1BitAlloc*> bitAlloc(srcChannels); + + for (auto& x : bitAlloc) { + x = new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch()); } - return [this, srcChannels, bitAlloc](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) { + struct TChannelData { + TChannelData() + : Specs(NumSamples) + , Energy(NumSamples) + {} + + vector<TFloat> Specs; + vector<TFloat> Energy; + }; + + using TData = vector<TChannelData>; + auto buf = std::make_shared<TData>(srcChannels); + + return [this, srcChannels, bitAlloc, buf](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) { + TBlockSize blockSz[2]; + + uint32_t windowMasks[2] = {0}; for (uint32_t channel = 0; channel < srcChannels; channel++) { TFloat src[NumSamples]; - vector<TFloat> specs(512); for (size_t i = 0; i < NumSamples; ++i) { src[i] = data[i * srcChannels + channel]; } AnalysisFilterBank[channel].Analysis(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]); - uint32_t windowMask = 0; + uint32_t& windowMask = windowMasks[channel]; if (Settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) { windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]); @@ -194,10 +212,28 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() { //no transient detection, use given mask windowMask = Settings.GetWindowMask(); } - const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi - Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize); - bitAlloc[channel]->Write(Scaler.ScaleFrame(specs, blockSize), blockSize); + blockSz[channel] = TBlockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi + + auto& specs = (*buf)[channel].Specs; + + Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]); + + auto& erg = (*buf)[channel].Energy; + + for (size_t i = 0; i < specs.size(); i++) { + erg[i] = specs[i] * specs[i]; + } + } + + if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) { + Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), (*buf)[1].Energy.data(), LoudnessCurve.data(), NumSamples); + } else if (windowMasks[0] == 0) { + Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), nullptr, LoudnessCurve.data(), NumSamples); + } + + for (uint32_t channel = 0; channel < srcChannels; channel++) { + bitAlloc[channel]->Write(Scaler.ScaleFrame((*buf)[channel].Specs, blockSz[channel]), blockSz[channel], Loudness / LoudFactor); } }; } diff --git a/src/atrac1denc.h b/src/atrac1denc.h index 5752f22..5e6c4fe 100644 --- a/src/atrac1denc.h +++ b/src/atrac1denc.h @@ -66,6 +66,8 @@ class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public vir Atrac1AnalysisFilterBank<TFloat> AnalysisFilterBank[2]; + const std::vector<float> LoudnessCurve; + class TTransientDetectors { std::vector<TTransientDetector> transientDetectorLow; std::vector<TTransientDetector> transientDetectorMid; @@ -94,8 +96,10 @@ class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public vir } }; TAtrac1Encoder::TTransientDetectors TransientDetectors; - + TScaler<TAtrac1Data> Scaler; + static constexpr float LoudFactor = 0.006; + float Loudness = LoudFactor; public: TAtrac1Encoder(TCompressedOutputPtr&& aea, NAtrac1::TAtrac1EncodeSettings&& settings); |