aboutsummaryrefslogtreecommitdiffstats
path: root/src/atrac
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2024-11-30 13:32:34 +0100
committerDaniil Cherednik <dan.cherednik@gmail.com>2024-12-01 22:28:26 +0100
commit2ca3d7fb3cfdc0b311bb20426d53782910dee64e (patch)
tree3db4ea83192bf999e0afb5039e9d1441069a259b /src/atrac
parent4be2e387a494b1c02d2ca0747dd64b6ba8980d21 (diff)
downloadatracdenc-2ca3d7fb3cfdc0b311bb20426d53782910dee64e.tar.gz
Tiny quality improvements for ATRAC3 compatible mode:
* Use adaptive ATH, but efficient is limited due to qmf aliasing. * Use loudness instead of energy to split M/S frame size.
Diffstat (limited to 'src/atrac')
-rw-r--r--src/atrac/atrac3_bitstream.cpp56
-rw-r--r--src/atrac/atrac3_bitstream.h17
-rw-r--r--src/atrac/atrac_psy_common.cpp19
-rw-r--r--src/atrac/atrac_psy_common.h12
4 files changed, 61 insertions, 43 deletions
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp
index 3b8337d..9362ca0 100644
--- a/src/atrac/atrac3_bitstream.cpp
+++ b/src/atrac/atrac3_bitstream.cpp
@@ -40,6 +40,31 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
1, 0
};
+std::vector<TFloat> TAtrac3BitStreamWriter::ATH;
+TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst)
+ : Container(container)
+ , Params(params)
+ , BfuIdxConst(bfuIdxConst)
+{
+ NEnv::SetRoundFloat();
+ if (ATH.size()) {
+ return;
+ }
+ ATH.reserve(MaxBfus);
+ auto ATHSpec = CalcATH(1024, 44100);
+ for (size_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+ for (size_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) {
+ const size_t specNumStart = this->SpecsStartLong[blockNum];
+ float x = 999;
+ for (size_t line = specNumStart; line < specNumStart + this->SpecsPerBlock[blockNum]; line++) {
+ x = fmin(x, ATHSpec[line]);
+ }
+ x = pow(10, 0.1 * x);
+ ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it?
+ }
+ }
+}
+
uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
const uint32_t blockSize, NBitStream::TBitStream* bitStream)
{
@@ -165,7 +190,7 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision
static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}};
std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce,
- const uint16_t targetBits, int mt[MaxSpecs])
+ const uint16_t targetBits, int mt[MaxSpecs], float laudness)
{
const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
if (scaledBlocks.empty()) {
@@ -194,7 +219,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
TFloat minShift = -8;
for (;;) {
TFloat shift = (maxShift + minShift) / 2;
- const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift);
+ const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness);
auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt);
auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr);
@@ -453,18 +478,25 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
const uint32_t bfuNum,
const TFloat spread,
- const TFloat shift)
+ const TFloat shift,
+ const TFloat loudness)
{
vector<uint32_t> bitsPerEachBlock(bfuNum);
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
- const uint32_t fix = FixedBitAllocTable[i];
- int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
- if (tmp > 7) {
- bitsPerEachBlock[i] = 7;
- } else if (tmp < 0) {
+ float ath = ATH[i] * loudness;
+ //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
+ if (scaledBlocks[i].MaxEnergy < ath) {
bitsPerEachBlock[i] = 0;
} else {
- bitsPerEachBlock[i] = tmp;
+ const uint32_t fix = FixedBitAllocTable[i];
+ int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
+ if (tmp > 7) {
+ bitsPerEachBlock[i] = 7;
+ } else if (tmp < 0) {
+ bitsPerEachBlock[i] = 0;
+ } else {
+ bitsPerEachBlock[i] = tmp;
+ }
}
}
return bitsPerEachBlock;
@@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz,
if (elements[1].ScaledBlocks.empty()) {
return maxAllowedShift;
} else {
- TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy);
+ TFloat ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness);
//std::cerr << ratio << std::endl;
return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift);
}
}
-void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements)
+void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness)
{
ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2);
@@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&
for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
const TSingleChannelElement& sce = singleChannelElements[channel];
- allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]);
+ allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness);
}
for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h
index 152437b..e67f726 100644
--- a/src/atrac/atrac3_bitstream.h
+++ b/src/atrac/atrac3_bitstream.h
@@ -45,9 +45,10 @@ public:
TAtrac3Data::SubbandInfo SubbandInfo;
std::vector<TTonalBlock> TonalBlocks;
std::vector<TScaledBlock> ScaledBlocks;
- TFloat Energy;
+ TFloat Loudness;
};
private:
+ static std::vector<TFloat> ATH;
struct TTonalComponentsSubGroup {
std::vector<uint8_t> SubGroupMap;
@@ -65,10 +66,10 @@ private:
const uint32_t blockSize, NBitStream::TBitStream* bitStream);
std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
- uint32_t bfuNum, TFloat spread, TFloat shift);
+ uint32_t bfuNum, TFloat spread, TFloat shift, TFloat loudness);
std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce,
- uint16_t targetBits, int mt[MaxSpecs]);
+ uint16_t targetBits, int mt[MaxSpecs], float laudness);
std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce,
const std::vector<uint32_t>& precisionPerEachBlocks,
@@ -85,15 +86,9 @@ private:
const std::vector<uint32_t>& allocTable,
NBitStream::TBitStream* bitStream);
public:
- TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3
- : Container(container)
- , Params(params)
- , BfuIdxConst(bfuIdxConst)
- {
- NEnv::SetRoundFloat();
- }
+ TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst);
- void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements);
+ void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness);
};
} // namespace NAtrac3
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
index 18f855c..71c1190 100644
--- a/src/atrac/atrac_psy_common.cpp
+++ b/src/atrac/atrac_psy_common.cpp
@@ -135,25 +135,6 @@ vector<float> CalcATH(int len, int sampleRate)
return res;
}
-float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz)
-{
- float s = 0;
- if (e1 != nullptr) {
- for (size_t i = 0; i < sz; i++) {
- s += (e0[i] + e1[i]) * weight[i];
- }
-
- s *= 0.5;
-
- } else {
- for (size_t i = 0; i < sz; i++) {
- s += e0[i] * weight[i];
- }
- }
-
- return 0.98 * prevLoud + 0.02 * s;
-}
-
vector<float> CreateLoudnessCurve(size_t sz)
{
std::vector<float> res;
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
index ffd1f03..854dfd1 100644
--- a/src/atrac/atrac_psy_common.h
+++ b/src/atrac/atrac_psy_common.h
@@ -23,7 +23,17 @@ namespace NAtracDEnc {
TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
std::vector<float> CalcATH(int len, int sampleRate);
-float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz);
+
+inline float TrackLoudness(float prevLoud, float l0, float l1)
+{
+ return 0.98 * prevLoud + 0.01 * (l0 + l1);
+}
+
+inline float TrackLoudness(float prevLoud, float l)
+{
+ return 0.98 * prevLoud + 0.02 * l;
+}
+
std::vector<float> CreateLoudnessCurve(size_t sz);
} //namespace NAtracDEnc