aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2024-10-11 00:07:48 +0200
committerDaniil Cherednik <dan.cherednik@gmail.com>2024-10-11 00:07:48 +0200
commit57ba306db046601b96b5a1943e63574a37c5e96c (patch)
tree85103e059c0153923bcb844ea69f7117e6d9b8a5
parentfe377370c1ae4691a65270123345f225e3159ed8 (diff)
downloadatracdenc-new_psy.tar.gz
Attempt to use adaptive ATH for ATRAC1new_psy
-rw-r--r--src/atrac/atrac1_bitalloc.cpp11
-rw-r--r--src/atrac/atrac1_bitalloc.h7
-rw-r--r--src/atrac/atrac_psy_common.cpp36
-rw-r--r--src/atrac/atrac_psy_common.h2
-rw-r--r--src/atrac/atrac_scale.h2
-rw-r--r--src/atrac1denc.cpp54
-rw-r--r--src/atrac1denc.h6
7 files changed, 100 insertions, 18 deletions
diff --git a/src/atrac/atrac1_bitalloc.cpp b/src/atrac/atrac1_bitalloc.cpp
index a2e63fa..21584a9 100644
--- a/src/atrac/atrac1_bitalloc.cpp
+++ b/src/atrac/atrac1_bitalloc.cpp
@@ -127,12 +127,15 @@ vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TSc
const uint32_t bfuNum,
const TFloat spread,
const TFloat shift,
- const TBlockSize& blockSize) {
+ const TBlockSize& blockSize,
+ const float loudness) {
vector<uint32_t> bitsPerEachBlock(bfuNum);
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
bool shortBlock = blockSize.LogCount[BfuToBand(i)];
const uint32_t fix = shortBlock ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i];
- if (!shortBlock && scaledBlocks[i].MaxEnergy < ATHLong[i]) {
+ float ath = ATHLong[i] * loudness;
+ //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
+ if (!shortBlock && scaledBlocks[i].MaxEnergy < ath) {
bitsPerEachBlock[i] = 0;
} else {
int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
@@ -183,7 +186,7 @@ uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed,
return curBfuId;
}
-uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) {
+uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) {
uint32_t bfuIdx = BfuIdxConst ? BfuIdxConst - 1 : 7;
bool autoBfu = !BfuIdxConst;
TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
@@ -204,7 +207,7 @@ uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlo
bool bfuNumChanged = false;
for (;;) {
const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx],
- spread, shift, blockSize);
+ spread, shift, blockSize, loudness);
uint32_t bitsUsed = 0;
for (size_t i = 0; i < tmpAlloc.size(); i++) {
bitsUsed += SpecsPerBlock[i] * tmpAlloc[i];
diff --git a/src/atrac/atrac1_bitalloc.h b/src/atrac/atrac1_bitalloc.h
index b1f6b54..4c541b8 100644
--- a/src/atrac/atrac1_bitalloc.h
+++ b/src/atrac/atrac1_bitalloc.h
@@ -33,7 +33,7 @@ class IAtrac1BitAlloc {
public:
IAtrac1BitAlloc() {};
virtual ~IAtrac1BitAlloc() {};
- virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) = 0;
+ virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) = 0;
};
class TBitsBooster : public virtual TAtrac1Data {
@@ -56,7 +56,8 @@ public:
class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster, public virtual IAtrac1BitAlloc {
std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum,
- const TFloat spread, const TFloat shift, const TBlockSize& blockSize);
+ const TFloat spread, const TFloat shift, const TBlockSize& blockSize,
+ const float loudness);
const uint32_t BfuIdxConst;
const bool FastBfuNumSearch;
static std::vector<TFloat> ATHLong;
@@ -66,7 +67,7 @@ class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster
public:
TAtrac1SimpleBitAlloc(ICompressedOutput* container, uint32_t bfuIdxConst, bool fastBfuNumSearch);
~TAtrac1SimpleBitAlloc() {};
- uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) override;
+ uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) override;
};
} //namespace NAtrac1
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
index 089bf47..18f855c 100644
--- a/src/atrac/atrac_psy_common.cpp
+++ b/src/atrac/atrac_psy_common.cpp
@@ -135,4 +135,40 @@ vector<float> CalcATH(int len, int sampleRate)
return res;
}
+float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz)
+{
+ float s = 0;
+ if (e1 != nullptr) {
+ for (size_t i = 0; i < sz; i++) {
+ s += (e0[i] + e1[i]) * weight[i];
+ }
+
+ s *= 0.5;
+
+ } else {
+ for (size_t i = 0; i < sz; i++) {
+ s += e0[i] * weight[i];
+ }
+ }
+
+ return 0.98 * prevLoud + 0.02 * s;
+}
+
+vector<float> CreateLoudnessCurve(size_t sz)
+{
+ std::vector<float> res;
+ res.resize(sz);
+
+ for (size_t i = 0; i < sz; i++) {
+ float f = (float)(i + 3) * 0.5 * 44100 / (float)sz;
+ float t = std::log10(f) - 3.5;
+ t = -10 * t * t + 3 - f / 3000;
+ t = std::pow(10, (0.1 * t));
+ //std::cerr << i << " => " << f << " " << t <<std::endl;
+ res[i] = t;
+ }
+
+ return res;
+}
+
} // namespace NAtracDEnc
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
index 1a826bb..ffd1f03 100644
--- a/src/atrac/atrac_psy_common.h
+++ b/src/atrac/atrac_psy_common.h
@@ -23,5 +23,7 @@ namespace NAtracDEnc {
TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
std::vector<float> CalcATH(int len, int sampleRate);
+float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz);
+std::vector<float> CreateLoudnessCurve(size_t sz);
} //namespace NAtracDEnc
diff --git a/src/atrac/atrac_scale.h b/src/atrac/atrac_scale.h
index b590662..ec83059 100644
--- a/src/atrac/atrac_scale.h
+++ b/src/atrac/atrac_scale.h
@@ -77,7 +77,7 @@ public:
TBlockSize()
: LogCount({{0, 0, 0, 0}})
{}
- const std::array<int, 4> LogCount;
+ std::array<int, 4> LogCount;
};
} //namespace NAtracDEnc
diff --git a/src/atrac1denc.cpp b/src/atrac1denc.cpp
index d622007..08f7a1d 100644
--- a/src/atrac1denc.cpp
+++ b/src/atrac1denc.cpp
@@ -24,6 +24,7 @@
#include "atrac/atrac1_dequantiser.h"
#include "atrac/atrac1_qmf.h"
#include "atrac/atrac1_bitalloc.h"
+#include "atrac/atrac_psy_common.h"
#include "util.h"
namespace NAtracDEnc {
@@ -35,6 +36,7 @@ using std::vector;
TAtrac1Encoder::TAtrac1Encoder(TCompressedOutputPtr&& aea, TAtrac1EncodeSettings&& settings)
: Aea(std::move(aea))
, Settings(std::move(settings))
+ , LoudnessCurve(CreateLoudnessCurve(NumSamples))
{
}
@@ -164,22 +166,38 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Decoder::GetLambda() {
TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
const uint32_t srcChannels = Aea->GetChannelNum();
- vector<IAtrac1BitAlloc*> bitAlloc;
- for (size_t i = 0; i < srcChannels; i++) {
- bitAlloc.push_back(new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch()));
+ vector<IAtrac1BitAlloc*> bitAlloc(srcChannels);
+
+ for (auto& x : bitAlloc) {
+ x = new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch());
}
- return [this, srcChannels, bitAlloc](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) {
+ struct TChannelData {
+ TChannelData()
+ : Specs(NumSamples)
+ , Energy(NumSamples)
+ {}
+
+ vector<TFloat> Specs;
+ vector<TFloat> Energy;
+ };
+
+ using TData = vector<TChannelData>;
+ auto buf = std::make_shared<TData>(srcChannels);
+
+ return [this, srcChannels, bitAlloc, buf](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) {
+ TBlockSize blockSz[2];
+
+ uint32_t windowMasks[2] = {0};
for (uint32_t channel = 0; channel < srcChannels; channel++) {
TFloat src[NumSamples];
- vector<TFloat> specs(512);
for (size_t i = 0; i < NumSamples; ++i) {
src[i] = data[i * srcChannels + channel];
}
AnalysisFilterBank[channel].Analysis(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
- uint32_t windowMask = 0;
+ uint32_t& windowMask = windowMasks[channel];
if (Settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) {
windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]);
@@ -194,10 +212,28 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
//no transient detection, use given mask
windowMask = Settings.GetWindowMask();
}
- const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
- Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize);
- bitAlloc[channel]->Write(Scaler.ScaleFrame(specs, blockSize), blockSize);
+ blockSz[channel] = TBlockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
+
+ auto& specs = (*buf)[channel].Specs;
+
+ Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]);
+
+ auto& erg = (*buf)[channel].Energy;
+
+ for (size_t i = 0; i < specs.size(); i++) {
+ erg[i] = specs[i] * specs[i];
+ }
+ }
+
+ if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) {
+ Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), (*buf)[1].Energy.data(), LoudnessCurve.data(), NumSamples);
+ } else if (windowMasks[0] == 0) {
+ Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), nullptr, LoudnessCurve.data(), NumSamples);
+ }
+
+ for (uint32_t channel = 0; channel < srcChannels; channel++) {
+ bitAlloc[channel]->Write(Scaler.ScaleFrame((*buf)[channel].Specs, blockSz[channel]), blockSz[channel], Loudness / LoudFactor);
}
};
}
diff --git a/src/atrac1denc.h b/src/atrac1denc.h
index 5752f22..5e6c4fe 100644
--- a/src/atrac1denc.h
+++ b/src/atrac1denc.h
@@ -66,6 +66,8 @@ class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public vir
Atrac1AnalysisFilterBank<TFloat> AnalysisFilterBank[2];
+ const std::vector<float> LoudnessCurve;
+
class TTransientDetectors {
std::vector<TTransientDetector> transientDetectorLow;
std::vector<TTransientDetector> transientDetectorMid;
@@ -94,8 +96,10 @@ class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public vir
}
};
TAtrac1Encoder::TTransientDetectors TransientDetectors;
-
+
TScaler<TAtrac1Data> Scaler;
+ static constexpr float LoudFactor = 0.006;
+ float Loudness = LoudFactor;
public:
TAtrac1Encoder(TCompressedOutputPtr&& aea, NAtrac1::TAtrac1EncodeSettings&& settings);