diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-11-30 13:32:34 +0100 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-12-01 22:28:26 +0100 |
commit | 2ca3d7fb3cfdc0b311bb20426d53782910dee64e (patch) | |
tree | 3db4ea83192bf999e0afb5039e9d1441069a259b /src | |
parent | 4be2e387a494b1c02d2ca0747dd64b6ba8980d21 (diff) | |
download | atracdenc-2ca3d7fb3cfdc0b311bb20426d53782910dee64e.tar.gz |
Tiny quality improvements for ATRAC3 compatible mode:
* Use adaptive ATH, but efficient is limited due to qmf aliasing.
* Use loudness instead of energy to split M/S frame size.
Diffstat (limited to 'src')
-rw-r--r-- | src/atrac/atrac3_bitstream.cpp | 56 | ||||
-rw-r--r-- | src/atrac/atrac3_bitstream.h | 17 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.cpp | 19 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.h | 12 | ||||
-rw-r--r-- | src/atrac1denc.cpp | 15 | ||||
-rw-r--r-- | src/atrac3denc.cpp | 37 | ||||
-rw-r--r-- | src/atrac3denc.h | 4 |
7 files changed, 106 insertions, 54 deletions
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp index 3b8337d..9362ca0 100644 --- a/src/atrac/atrac3_bitstream.cpp +++ b/src/atrac/atrac3_bitstream.cpp @@ -40,6 +40,31 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = { 1, 0 }; +std::vector<TFloat> TAtrac3BitStreamWriter::ATH; +TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) + : Container(container) + , Params(params) + , BfuIdxConst(bfuIdxConst) +{ + NEnv::SetRoundFloat(); + if (ATH.size()) { + return; + } + ATH.reserve(MaxBfus); + auto ATHSpec = CalcATH(1024, 44100); + for (size_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { + for (size_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) { + const size_t specNumStart = this->SpecsStartLong[blockNum]; + float x = 999; + for (size_t line = specNumStart; line < specNumStart + this->SpecsPerBlock[blockNum]; line++) { + x = fmin(x, ATHSpec[line]); + } + x = pow(10, 0.1 * x); + ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it? + } + } +} + uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], const uint32_t blockSize, NBitStream::TBitStream* bitStream) { @@ -165,7 +190,7 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}}; std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce, - const uint16_t targetBits, int mt[MaxSpecs]) + const uint16_t targetBits, int mt[MaxSpecs], float laudness) { const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; if (scaledBlocks.empty()) { @@ -194,7 +219,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co TFloat minShift = -8; for (;;) { TFloat shift = (maxShift + minShift) / 2; - const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift); + const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness); auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt); auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr); @@ -453,18 +478,25 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const TFloat spread, - const TFloat shift) + const TFloat shift, + const TFloat loudness) { vector<uint32_t> bitsPerEachBlock(bfuNum); for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { - const uint32_t fix = FixedBitAllocTable[i]; - int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; - if (tmp > 7) { - bitsPerEachBlock[i] = 7; - } else if (tmp < 0) { + float ath = ATH[i] * loudness; + //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl; + if (scaledBlocks[i].MaxEnergy < ath) { bitsPerEachBlock[i] = 0; } else { - bitsPerEachBlock[i] = tmp; + const uint32_t fix = FixedBitAllocTable[i]; + int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; + if (tmp > 7) { + bitsPerEachBlock[i] = 7; + } else if (tmp < 0) { + bitsPerEachBlock[i] = 0; + } else { + bitsPerEachBlock[i] = tmp; + } } } return bitsPerEachBlock; @@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz, if (elements[1].ScaledBlocks.empty()) { return maxAllowedShift; } else { - TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy); + TFloat ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness); //std::cerr << ratio << std::endl; return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift); } } -void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements) +void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness) { ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2); @@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { const TSingleChannelElement& sce = singleChannelElements[channel]; - allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]); + allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness); } for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h index 152437b..e67f726 100644 --- a/src/atrac/atrac3_bitstream.h +++ b/src/atrac/atrac3_bitstream.h @@ -45,9 +45,10 @@ public: TAtrac3Data::SubbandInfo SubbandInfo; std::vector<TTonalBlock> TonalBlocks; std::vector<TScaledBlock> ScaledBlocks; - TFloat Energy; + TFloat Loudness; }; private: + static std::vector<TFloat> ATH; struct TTonalComponentsSubGroup { std::vector<uint8_t> SubGroupMap; @@ -65,10 +66,10 @@ private: const uint32_t blockSize, NBitStream::TBitStream* bitStream); std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, - uint32_t bfuNum, TFloat spread, TFloat shift); + uint32_t bfuNum, TFloat spread, TFloat shift, TFloat loudness); std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce, - uint16_t targetBits, int mt[MaxSpecs]); + uint16_t targetBits, int mt[MaxSpecs], float laudness); std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce, const std::vector<uint32_t>& precisionPerEachBlocks, @@ -85,15 +86,9 @@ private: const std::vector<uint32_t>& allocTable, NBitStream::TBitStream* bitStream); public: - TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3 - : Container(container) - , Params(params) - , BfuIdxConst(bfuIdxConst) - { - NEnv::SetRoundFloat(); - } + TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst); - void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements); + void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness); }; } // namespace NAtrac3 diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp index 18f855c..71c1190 100644 --- a/src/atrac/atrac_psy_common.cpp +++ b/src/atrac/atrac_psy_common.cpp @@ -135,25 +135,6 @@ vector<float> CalcATH(int len, int sampleRate) return res; } -float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz) -{ - float s = 0; - if (e1 != nullptr) { - for (size_t i = 0; i < sz; i++) { - s += (e0[i] + e1[i]) * weight[i]; - } - - s *= 0.5; - - } else { - for (size_t i = 0; i < sz; i++) { - s += e0[i] * weight[i]; - } - } - - return 0.98 * prevLoud + 0.02 * s; -} - vector<float> CreateLoudnessCurve(size_t sz) { std::vector<float> res; diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h index ffd1f03..854dfd1 100644 --- a/src/atrac/atrac_psy_common.h +++ b/src/atrac/atrac_psy_common.h @@ -23,7 +23,17 @@ namespace NAtracDEnc { TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks); std::vector<float> CalcATH(int len, int sampleRate); -float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz); + +inline float TrackLoudness(float prevLoud, float l0, float l1) +{ + return 0.98 * prevLoud + 0.01 * (l0 + l1); +} + +inline float TrackLoudness(float prevLoud, float l) +{ + return 0.98 * prevLoud + 0.02 * l; +} + std::vector<float> CreateLoudnessCurve(size_t sz); } //namespace NAtracDEnc diff --git a/src/atrac1denc.cpp b/src/atrac1denc.cpp index 08f7a1d..4682fe2 100644 --- a/src/atrac1denc.cpp +++ b/src/atrac1denc.cpp @@ -175,11 +175,11 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() { struct TChannelData { TChannelData() : Specs(NumSamples) - , Energy(NumSamples) + , Loudness(0.0) {} vector<TFloat> Specs; - vector<TFloat> Energy; + float Loudness; }; using TData = vector<TChannelData>; @@ -219,17 +219,18 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() { Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]); - auto& erg = (*buf)[channel].Energy; - + float l = 0.0; for (size_t i = 0; i < specs.size(); i++) { - erg[i] = specs[i] * specs[i]; + float e = specs[i] * specs[i]; + l += e * LoudnessCurve[i]; } + (*buf)[channel].Loudness = l; } if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) { - Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), (*buf)[1].Energy.data(), LoudnessCurve.data(), NumSamples); + Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness, (*buf)[1].Loudness); } else if (windowMasks[0] == 0) { - Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), nullptr, LoudnessCurve.data(), NumSamples); + Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness); } for (uint32_t channel = 0; channel < srcChannels; channel++) { diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp index a07c11e..65eef67 100644 --- a/src/atrac3denc.cpp +++ b/src/atrac3denc.cpp @@ -18,6 +18,7 @@ #include "atrac3denc.h" #include "transient_detector.h" +#include "atrac/atrac_psy_common.h" #include <assert.h> #include <algorithm> #include <iostream> @@ -91,6 +92,7 @@ void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorAr TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSettings&& encoderSettings) : Oma(std::move(oma)) , Params(std::move(encoderSettings)) + , LoudnessCurve(CreateLoudnessCurve(NumSamples)) , SingleChannelElements(Params.SourceChannels) , TransientParamsHistory(Params.SourceChannels, std::vector<TTransientParam>(4)) {} @@ -289,7 +291,19 @@ void TAtrac3Encoder::Matrixing() TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda() { std::shared_ptr<TAtrac3BitStreamWriter> bitStreamWriter(new TAtrac3BitStreamWriter(Oma.get(), *Params.ConteinerParams, Params.BfuIdxConst)); - return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { + + struct TChannelData { + TChannelData() + : Specs(NumSamples) + {} + + vector<TFloat> Specs; + }; + + using TData = vector<TChannelData>; + auto buf = std::make_shared<TData>(2); + + return [this, bitStreamWriter, buf](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { using TSce = TAtrac3BitStreamWriter::TSingleChannelElement; for (uint32_t channel = 0; channel < meta.Channels; channel++) { @@ -310,7 +324,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda() } for (uint32_t channel = 0; channel < meta.Channels; channel++) { - vector<TFloat> specs(1024); + auto& specs = (*buf)[channel].Specs; TSce* sce = &SingleChannelElements[channel]; sce->SubbandInfo.Reset(); @@ -326,11 +340,26 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda() Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo)); } - sce->Energy = CalcEnergy(specs); + float l = 0; + for (size_t i = 0; i < specs.size(); i++) { + float e = specs[i] * specs[i]; + l += e * LoudnessCurve[i]; + } + + sce->Loudness = l; //TBlockSize for ATRAC3 - 4 subband, all are long (no short window) sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize()); + } + if (meta.Channels == 2 && !Params.ConteinerParams->Js) { + const TSce& sce0 = SingleChannelElements[0]; + const TSce& sce1 = SingleChannelElements[1]; + Loudness = TrackLoudness(Loudness, sce0.Loudness, sce1.Loudness); + } else { + // 1 channel or Js. In case of Js we do not use side channel to adjust loudness + const TSce& sce0 = SingleChannelElements[0]; + Loudness = TrackLoudness(Loudness, sce0.Loudness); } if (Params.ConteinerParams->Js && meta.Channels == 1) { @@ -341,7 +370,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda() SingleChannelElements[1].SubbandInfo.Info.resize(1); } - bitStreamWriter->WriteSoundUnit(SingleChannelElements); + bitStreamWriter->WriteSoundUnit(SingleChannelElements, Loudness); }; } diff --git a/src/atrac3denc.h b/src/atrac3denc.h index 1aea07e..50f2df0 100644 --- a/src/atrac3denc.h +++ b/src/atrac3denc.h @@ -82,11 +82,13 @@ protected: class TAtrac3Encoder : public IProcessor<TFloat>, public TAtrac3MDCT { TCompressedOutputPtr Oma; const NAtrac3::TAtrac3EncoderSettings Params; + const std::vector<float> LoudnessCurve; TDelayBuffer<TFloat, 8, 256> PcmBuffer; //8 = 2 channels * 4 bands TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling Atrac3AnalysisFilterBank<TFloat> AnalysisFilterBank[2]; + TScaler<TAtrac3Data> Scaler; std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements; public: @@ -100,6 +102,8 @@ public: }; private: std::vector<std::vector<TTransientParam>> TransientParamsHistory; + static constexpr float LoudFactor = 0.006; + float Loudness = LoudFactor; #ifdef ATRAC_UT_PUBLIC public: #endif |