diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-11-30 13:32:34 +0100 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2024-12-01 22:28:26 +0100 |
commit | 2ca3d7fb3cfdc0b311bb20426d53782910dee64e (patch) | |
tree | 3db4ea83192bf999e0afb5039e9d1441069a259b /src/atrac | |
parent | 4be2e387a494b1c02d2ca0747dd64b6ba8980d21 (diff) | |
download | atracdenc-2ca3d7fb3cfdc0b311bb20426d53782910dee64e.tar.gz |
Tiny quality improvements for ATRAC3 compatible mode:
* Use adaptive ATH, but efficient is limited due to qmf aliasing.
* Use loudness instead of energy to split M/S frame size.
Diffstat (limited to 'src/atrac')
-rw-r--r-- | src/atrac/atrac3_bitstream.cpp | 56 | ||||
-rw-r--r-- | src/atrac/atrac3_bitstream.h | 17 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.cpp | 19 | ||||
-rw-r--r-- | src/atrac/atrac_psy_common.h | 12 |
4 files changed, 61 insertions, 43 deletions
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp index 3b8337d..9362ca0 100644 --- a/src/atrac/atrac3_bitstream.cpp +++ b/src/atrac/atrac3_bitstream.cpp @@ -40,6 +40,31 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = { 1, 0 }; +std::vector<TFloat> TAtrac3BitStreamWriter::ATH; +TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) + : Container(container) + , Params(params) + , BfuIdxConst(bfuIdxConst) +{ + NEnv::SetRoundFloat(); + if (ATH.size()) { + return; + } + ATH.reserve(MaxBfus); + auto ATHSpec = CalcATH(1024, 44100); + for (size_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { + for (size_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) { + const size_t specNumStart = this->SpecsStartLong[blockNum]; + float x = 999; + for (size_t line = specNumStart; line < specNumStart + this->SpecsPerBlock[blockNum]; line++) { + x = fmin(x, ATHSpec[line]); + } + x = pow(10, 0.1 * x); + ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it? + } + } +} + uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], const uint32_t blockSize, NBitStream::TBitStream* bitStream) { @@ -165,7 +190,7 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}}; std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce, - const uint16_t targetBits, int mt[MaxSpecs]) + const uint16_t targetBits, int mt[MaxSpecs], float laudness) { const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; if (scaledBlocks.empty()) { @@ -194,7 +219,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co TFloat minShift = -8; for (;;) { TFloat shift = (maxShift + minShift) / 2; - const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift); + const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness); auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt); auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr); @@ -453,18 +478,25 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const TFloat spread, - const TFloat shift) + const TFloat shift, + const TFloat loudness) { vector<uint32_t> bitsPerEachBlock(bfuNum); for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { - const uint32_t fix = FixedBitAllocTable[i]; - int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; - if (tmp > 7) { - bitsPerEachBlock[i] = 7; - } else if (tmp < 0) { + float ath = ATH[i] * loudness; + //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl; + if (scaledBlocks[i].MaxEnergy < ath) { bitsPerEachBlock[i] = 0; } else { - bitsPerEachBlock[i] = tmp; + const uint32_t fix = FixedBitAllocTable[i]; + int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; + if (tmp > 7) { + bitsPerEachBlock[i] = 7; + } else if (tmp < 0) { + bitsPerEachBlock[i] = 0; + } else { + bitsPerEachBlock[i] = tmp; + } } } return bitsPerEachBlock; @@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz, if (elements[1].ScaledBlocks.empty()) { return maxAllowedShift; } else { - TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy); + TFloat ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness); //std::cerr << ratio << std::endl; return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift); } } -void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements) +void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness) { ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2); @@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { const TSingleChannelElement& sce = singleChannelElements[channel]; - allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]); + allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness); } for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h index 152437b..e67f726 100644 --- a/src/atrac/atrac3_bitstream.h +++ b/src/atrac/atrac3_bitstream.h @@ -45,9 +45,10 @@ public: TAtrac3Data::SubbandInfo SubbandInfo; std::vector<TTonalBlock> TonalBlocks; std::vector<TScaledBlock> ScaledBlocks; - TFloat Energy; + TFloat Loudness; }; private: + static std::vector<TFloat> ATH; struct TTonalComponentsSubGroup { std::vector<uint8_t> SubGroupMap; @@ -65,10 +66,10 @@ private: const uint32_t blockSize, NBitStream::TBitStream* bitStream); std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, - uint32_t bfuNum, TFloat spread, TFloat shift); + uint32_t bfuNum, TFloat spread, TFloat shift, TFloat loudness); std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce, - uint16_t targetBits, int mt[MaxSpecs]); + uint16_t targetBits, int mt[MaxSpecs], float laudness); std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce, const std::vector<uint32_t>& precisionPerEachBlocks, @@ -85,15 +86,9 @@ private: const std::vector<uint32_t>& allocTable, NBitStream::TBitStream* bitStream); public: - TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3 - : Container(container) - , Params(params) - , BfuIdxConst(bfuIdxConst) - { - NEnv::SetRoundFloat(); - } + TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst); - void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements); + void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness); }; } // namespace NAtrac3 diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp index 18f855c..71c1190 100644 --- a/src/atrac/atrac_psy_common.cpp +++ b/src/atrac/atrac_psy_common.cpp @@ -135,25 +135,6 @@ vector<float> CalcATH(int len, int sampleRate) return res; } -float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz) -{ - float s = 0; - if (e1 != nullptr) { - for (size_t i = 0; i < sz; i++) { - s += (e0[i] + e1[i]) * weight[i]; - } - - s *= 0.5; - - } else { - for (size_t i = 0; i < sz; i++) { - s += e0[i] * weight[i]; - } - } - - return 0.98 * prevLoud + 0.02 * s; -} - vector<float> CreateLoudnessCurve(size_t sz) { std::vector<float> res; diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h index ffd1f03..854dfd1 100644 --- a/src/atrac/atrac_psy_common.h +++ b/src/atrac/atrac_psy_common.h @@ -23,7 +23,17 @@ namespace NAtracDEnc { TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks); std::vector<float> CalcATH(int len, int sampleRate); -float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz); + +inline float TrackLoudness(float prevLoud, float l0, float l1) +{ + return 0.98 * prevLoud + 0.01 * (l0 + l1); +} + +inline float TrackLoudness(float prevLoud, float l) +{ + return 0.98 * prevLoud + 0.02 * l; +} + std::vector<float> CreateLoudnessCurve(size_t sz); } //namespace NAtracDEnc |