aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2024-11-30 13:32:34 +0100
committerDaniil Cherednik <dan.cherednik@gmail.com>2024-12-01 22:28:26 +0100
commit2ca3d7fb3cfdc0b311bb20426d53782910dee64e (patch)
tree3db4ea83192bf999e0afb5039e9d1441069a259b /src
parent4be2e387a494b1c02d2ca0747dd64b6ba8980d21 (diff)
downloadatracdenc-2ca3d7fb3cfdc0b311bb20426d53782910dee64e.tar.gz
Tiny quality improvements for ATRAC3 compatible mode:
* Use adaptive ATH, but efficient is limited due to qmf aliasing. * Use loudness instead of energy to split M/S frame size.
Diffstat (limited to 'src')
-rw-r--r--src/atrac/atrac3_bitstream.cpp56
-rw-r--r--src/atrac/atrac3_bitstream.h17
-rw-r--r--src/atrac/atrac_psy_common.cpp19
-rw-r--r--src/atrac/atrac_psy_common.h12
-rw-r--r--src/atrac1denc.cpp15
-rw-r--r--src/atrac3denc.cpp37
-rw-r--r--src/atrac3denc.h4
7 files changed, 106 insertions, 54 deletions
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp
index 3b8337d..9362ca0 100644
--- a/src/atrac/atrac3_bitstream.cpp
+++ b/src/atrac/atrac3_bitstream.cpp
@@ -40,6 +40,31 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
1, 0
};
+std::vector<TFloat> TAtrac3BitStreamWriter::ATH;
+TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst)
+ : Container(container)
+ , Params(params)
+ , BfuIdxConst(bfuIdxConst)
+{
+ NEnv::SetRoundFloat();
+ if (ATH.size()) {
+ return;
+ }
+ ATH.reserve(MaxBfus);
+ auto ATHSpec = CalcATH(1024, 44100);
+ for (size_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+ for (size_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) {
+ const size_t specNumStart = this->SpecsStartLong[blockNum];
+ float x = 999;
+ for (size_t line = specNumStart; line < specNumStart + this->SpecsPerBlock[blockNum]; line++) {
+ x = fmin(x, ATHSpec[line]);
+ }
+ x = pow(10, 0.1 * x);
+ ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it?
+ }
+ }
+}
+
uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
const uint32_t blockSize, NBitStream::TBitStream* bitStream)
{
@@ -165,7 +190,7 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision
static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}};
std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce,
- const uint16_t targetBits, int mt[MaxSpecs])
+ const uint16_t targetBits, int mt[MaxSpecs], float laudness)
{
const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
if (scaledBlocks.empty()) {
@@ -194,7 +219,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
TFloat minShift = -8;
for (;;) {
TFloat shift = (maxShift + minShift) / 2;
- const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift);
+ const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness);
auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt);
auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr);
@@ -453,18 +478,25 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
const uint32_t bfuNum,
const TFloat spread,
- const TFloat shift)
+ const TFloat shift,
+ const TFloat loudness)
{
vector<uint32_t> bitsPerEachBlock(bfuNum);
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
- const uint32_t fix = FixedBitAllocTable[i];
- int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
- if (tmp > 7) {
- bitsPerEachBlock[i] = 7;
- } else if (tmp < 0) {
+ float ath = ATH[i] * loudness;
+ //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
+ if (scaledBlocks[i].MaxEnergy < ath) {
bitsPerEachBlock[i] = 0;
} else {
- bitsPerEachBlock[i] = tmp;
+ const uint32_t fix = FixedBitAllocTable[i];
+ int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
+ if (tmp > 7) {
+ bitsPerEachBlock[i] = 7;
+ } else if (tmp < 0) {
+ bitsPerEachBlock[i] = 0;
+ } else {
+ bitsPerEachBlock[i] = tmp;
+ }
}
}
return bitsPerEachBlock;
@@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz,
if (elements[1].ScaledBlocks.empty()) {
return maxAllowedShift;
} else {
- TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy);
+ TFloat ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness);
//std::cerr << ratio << std::endl;
return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift);
}
}
-void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements)
+void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness)
{
ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2);
@@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&
for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
const TSingleChannelElement& sce = singleChannelElements[channel];
- allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]);
+ allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness);
}
for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h
index 152437b..e67f726 100644
--- a/src/atrac/atrac3_bitstream.h
+++ b/src/atrac/atrac3_bitstream.h
@@ -45,9 +45,10 @@ public:
TAtrac3Data::SubbandInfo SubbandInfo;
std::vector<TTonalBlock> TonalBlocks;
std::vector<TScaledBlock> ScaledBlocks;
- TFloat Energy;
+ TFloat Loudness;
};
private:
+ static std::vector<TFloat> ATH;
struct TTonalComponentsSubGroup {
std::vector<uint8_t> SubGroupMap;
@@ -65,10 +66,10 @@ private:
const uint32_t blockSize, NBitStream::TBitStream* bitStream);
std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
- uint32_t bfuNum, TFloat spread, TFloat shift);
+ uint32_t bfuNum, TFloat spread, TFloat shift, TFloat loudness);
std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce,
- uint16_t targetBits, int mt[MaxSpecs]);
+ uint16_t targetBits, int mt[MaxSpecs], float laudness);
std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce,
const std::vector<uint32_t>& precisionPerEachBlocks,
@@ -85,15 +86,9 @@ private:
const std::vector<uint32_t>& allocTable,
NBitStream::TBitStream* bitStream);
public:
- TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3
- : Container(container)
- , Params(params)
- , BfuIdxConst(bfuIdxConst)
- {
- NEnv::SetRoundFloat();
- }
+ TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst);
- void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements);
+ void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness);
};
} // namespace NAtrac3
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
index 18f855c..71c1190 100644
--- a/src/atrac/atrac_psy_common.cpp
+++ b/src/atrac/atrac_psy_common.cpp
@@ -135,25 +135,6 @@ vector<float> CalcATH(int len, int sampleRate)
return res;
}
-float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz)
-{
- float s = 0;
- if (e1 != nullptr) {
- for (size_t i = 0; i < sz; i++) {
- s += (e0[i] + e1[i]) * weight[i];
- }
-
- s *= 0.5;
-
- } else {
- for (size_t i = 0; i < sz; i++) {
- s += e0[i] * weight[i];
- }
- }
-
- return 0.98 * prevLoud + 0.02 * s;
-}
-
vector<float> CreateLoudnessCurve(size_t sz)
{
std::vector<float> res;
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
index ffd1f03..854dfd1 100644
--- a/src/atrac/atrac_psy_common.h
+++ b/src/atrac/atrac_psy_common.h
@@ -23,7 +23,17 @@ namespace NAtracDEnc {
TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
std::vector<float> CalcATH(int len, int sampleRate);
-float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz);
+
+inline float TrackLoudness(float prevLoud, float l0, float l1)
+{
+ return 0.98 * prevLoud + 0.01 * (l0 + l1);
+}
+
+inline float TrackLoudness(float prevLoud, float l)
+{
+ return 0.98 * prevLoud + 0.02 * l;
+}
+
std::vector<float> CreateLoudnessCurve(size_t sz);
} //namespace NAtracDEnc
diff --git a/src/atrac1denc.cpp b/src/atrac1denc.cpp
index 08f7a1d..4682fe2 100644
--- a/src/atrac1denc.cpp
+++ b/src/atrac1denc.cpp
@@ -175,11 +175,11 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
struct TChannelData {
TChannelData()
: Specs(NumSamples)
- , Energy(NumSamples)
+ , Loudness(0.0)
{}
vector<TFloat> Specs;
- vector<TFloat> Energy;
+ float Loudness;
};
using TData = vector<TChannelData>;
@@ -219,17 +219,18 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]);
- auto& erg = (*buf)[channel].Energy;
-
+ float l = 0.0;
for (size_t i = 0; i < specs.size(); i++) {
- erg[i] = specs[i] * specs[i];
+ float e = specs[i] * specs[i];
+ l += e * LoudnessCurve[i];
}
+ (*buf)[channel].Loudness = l;
}
if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) {
- Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), (*buf)[1].Energy.data(), LoudnessCurve.data(), NumSamples);
+ Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness, (*buf)[1].Loudness);
} else if (windowMasks[0] == 0) {
- Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), nullptr, LoudnessCurve.data(), NumSamples);
+ Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness);
}
for (uint32_t channel = 0; channel < srcChannels; channel++) {
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index a07c11e..65eef67 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -18,6 +18,7 @@
#include "atrac3denc.h"
#include "transient_detector.h"
+#include "atrac/atrac_psy_common.h"
#include <assert.h>
#include <algorithm>
#include <iostream>
@@ -91,6 +92,7 @@ void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorAr
TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
: Oma(std::move(oma))
, Params(std::move(encoderSettings))
+ , LoudnessCurve(CreateLoudnessCurve(NumSamples))
, SingleChannelElements(Params.SourceChannels)
, TransientParamsHistory(Params.SourceChannels, std::vector<TTransientParam>(4))
{}
@@ -289,7 +291,19 @@ void TAtrac3Encoder::Matrixing()
TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
{
std::shared_ptr<TAtrac3BitStreamWriter> bitStreamWriter(new TAtrac3BitStreamWriter(Oma.get(), *Params.ConteinerParams, Params.BfuIdxConst));
- return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
+
+ struct TChannelData {
+ TChannelData()
+ : Specs(NumSamples)
+ {}
+
+ vector<TFloat> Specs;
+ };
+
+ using TData = vector<TChannelData>;
+ auto buf = std::make_shared<TData>(2);
+
+ return [this, bitStreamWriter, buf](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
using TSce = TAtrac3BitStreamWriter::TSingleChannelElement;
for (uint32_t channel = 0; channel < meta.Channels; channel++) {
@@ -310,7 +324,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
}
for (uint32_t channel = 0; channel < meta.Channels; channel++) {
- vector<TFloat> specs(1024);
+ auto& specs = (*buf)[channel].Specs;
TSce* sce = &SingleChannelElements[channel];
sce->SubbandInfo.Reset();
@@ -326,11 +340,26 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
}
- sce->Energy = CalcEnergy(specs);
+ float l = 0;
+ for (size_t i = 0; i < specs.size(); i++) {
+ float e = specs[i] * specs[i];
+ l += e * LoudnessCurve[i];
+ }
+
+ sce->Loudness = l;
//TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize());
+ }
+ if (meta.Channels == 2 && !Params.ConteinerParams->Js) {
+ const TSce& sce0 = SingleChannelElements[0];
+ const TSce& sce1 = SingleChannelElements[1];
+ Loudness = TrackLoudness(Loudness, sce0.Loudness, sce1.Loudness);
+ } else {
+ // 1 channel or Js. In case of Js we do not use side channel to adjust loudness
+ const TSce& sce0 = SingleChannelElements[0];
+ Loudness = TrackLoudness(Loudness, sce0.Loudness);
}
if (Params.ConteinerParams->Js && meta.Channels == 1) {
@@ -341,7 +370,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
SingleChannelElements[1].SubbandInfo.Info.resize(1);
}
- bitStreamWriter->WriteSoundUnit(SingleChannelElements);
+ bitStreamWriter->WriteSoundUnit(SingleChannelElements, Loudness);
};
}
diff --git a/src/atrac3denc.h b/src/atrac3denc.h
index 1aea07e..50f2df0 100644
--- a/src/atrac3denc.h
+++ b/src/atrac3denc.h
@@ -82,11 +82,13 @@ protected:
class TAtrac3Encoder : public IProcessor<TFloat>, public TAtrac3MDCT {
TCompressedOutputPtr Oma;
const NAtrac3::TAtrac3EncoderSettings Params;
+ const std::vector<float> LoudnessCurve;
TDelayBuffer<TFloat, 8, 256> PcmBuffer; //8 = 2 channels * 4 bands
TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling
Atrac3AnalysisFilterBank<TFloat> AnalysisFilterBank[2];
+
TScaler<TAtrac3Data> Scaler;
std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements;
public:
@@ -100,6 +102,8 @@ public:
};
private:
std::vector<std::vector<TTransientParam>> TransientParamsHistory;
+ static constexpr float LoudFactor = 0.006;
+ float Loudness = LoudFactor;
#ifdef ATRAC_UT_PUBLIC
public:
#endif