summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <[email protected]>2026-04-18 16:51:23 +0200
committerDaniil Cherednik <[email protected]>2026-04-20 00:27:45 +0200
commite784b79cfef682059cf5cc8cf9bed635e714c439 (patch)
treeba213071aec875714cd7deb1d3f45ecaa676e37b
parenta958b27a43f0a436406dc51b942ca2f3a417e7a7 (diff)
atrac3: reimplement tonal encoding. Use flatness-based tonal extractionnew_psy
- Add shared CalcSpectralFlatnessPerBfu helper in atrac_psy_common with BFU-table mapping. - Implement ATRAC3 tonal extraction: compute MDCT energy, estimate per-BFU flatness, extract up to 5-bin strongest tonal run in low-flatness BFUs, and zero extracted bins in residual. - Map extracted tonal bins into TTonalBlocks and integrate them into bitstream coding. - Update ATRAC3 bit allocation - reduce residual bits for BFUs with tonal blocks, and increase tonal quantizer selection. - Restore --notonal CLI option in main.cpp for A/B comparison.
-rw-r--r--src/atrac/at3/atrac3_bitstream.cpp59
-rw-r--r--src/atrac/atrac_psy_common.cpp45
-rw-r--r--src/atrac/atrac_psy_common.h18
-rw-r--r--src/atrac/atrac_psy_common_ut.cpp376
-rw-r--r--src/atrac3denc.cpp96
-rw-r--r--src/atrac3denc.h3
-rw-r--r--src/main.cpp1
-rw-r--r--test/CMakeLists.txt1
8 files changed, 581 insertions, 18 deletions
diff --git a/src/atrac/at3/atrac3_bitstream.cpp b/src/atrac/at3/atrac3_bitstream.cpp
index 9d4a948..96a40af 100644
--- a/src/atrac/at3/atrac3_bitstream.cpp
+++ b/src/atrac/at3/atrac3_bitstream.cpp
@@ -33,6 +33,12 @@ namespace NAtrac3 {
using std::vector;
+// BFU right border frequencies at 44.1 kHz (kHz), computed from
+// TAtrac3Data::BlockSizeTab[bfu + 1] * 44100 / (2 * 1024):
+// bfu 0.. 7: 0.172, 0.345, 0.517, 0.689, 0.861, 1.034, 1.206, 1.378
+// bfu 8..15: 1.723, 2.067, 2.412, 2.756, 3.101, 3.445, 3.790, 4.134
+// bfu 16..23: 4.823, 5.513, 6.202, 6.891, 7.580, 8.269, 8.958, 9.647
+// bfu 24..31: 10.336, 11.025, 12.403, 13.781, 15.159, 16.538, 19.294, 22.050
static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 1, 1, 1,
@@ -206,13 +212,14 @@ bool ConsiderEnergyErr(const vector<float>& err, vector<uint32_t>& bits)
return adjusted;
}
-vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
+vector<uint32_t> CalcBitsAllocation(const TAtrac3BitStreamWriter::TSingleChannelElement& sce,
const uint32_t bfuNum,
const float spread,
const float shift,
- const float loudness,
- const int gainBoostPerBand[TAtrac3Data::NumQMF])
+ const float loudness)
{
+ const std::vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
+ const auto gainBoostPerBand = sce.GainBoostPerBand;
vector<uint32_t> bitsPerEachBlock(bfuNum);
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
const float ath = ATH[i] * loudness;
@@ -254,30 +261,47 @@ vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlock
}
}
}
+
+ for (const TTonalBlock& tc : sce.TonalBlocks) {
+ ASSERT(tc.ScaledBlock.Values.size() < 8);
+ ASSERT(tc.ScaledBlock.Values.size() > 0);
+ if(tc.ValPtr->Bfu < bitsPerEachBlock.size()) {
+ if (bitsPerEachBlock[tc.ValPtr->Bfu] > 2) {
+ bitsPerEachBlock[tc.ValPtr->Bfu] -= 1;
+ }
+ }
+ }
+
return bitsPerEachBlock;
}
-uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents,
+uint32_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents,
const vector<uint32_t>& allocTable,
TTonalComponentsSubGroup groups[64])
{
for (const TTonalBlock& tc : tonalComponents) {
ASSERT(tc.ScaledBlock.Values.size() < 8);
ASSERT(tc.ScaledBlock.Values.size() > 0);
- ASSERT(tc.ValPtr->Bfu < allocTable.size());
- const auto quant = std::max((uint32_t)2, std::min(allocTable[tc.ValPtr->Bfu] + 1, (uint32_t)7));
+ ASSERT(tc.ValPtr);
+ const uint32_t bfu = tc.ValPtr->Bfu;
+ if (bfu >= allocTable.size()) {
+ // NumBfu may be reduced by allocator tail trimming.
+ // Skip tonal blocks that map to BFUs outside current allocation table.
+ continue;
+ }
+ const auto quant = std::max((uint32_t)2, std::min(allocTable[bfu] + 4, (uint32_t)7));
groups[quant * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc);
}
- uint8_t tcsgn = 0;
- for (uint8_t i = 0; i < 64; ++i) {
- size_t startPos;
- size_t curPos = 0;
+ uint32_t tcsgn = 0;
+ for (uint32_t i = 0; i < 64; ++i) {
+ uint32_t startPos;
+ uint32_t curPos = 0;
while (curPos < groups[i].SubGroupPtr.size()) {
startPos = curPos;
++tcsgn;
groups[i].SubGroupMap.push_back(static_cast<uint8_t>(curPos));
- uint8_t groupLimiter = 0;
+ uint32_t groupLimiter = 0;
do {
++curPos;
if (curPos == groups[i].SubGroupPtr.size()) {
@@ -289,7 +313,7 @@ uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents,
groupLimiter = 0;
startPos = curPos;
}
- } while (groupLimiter < 7);
+ } while (groupLimiter < 7u);
}
}
return tcsgn;
@@ -302,12 +326,12 @@ uint16_t EncodeTonalComponents(const TAtrac3BitStreamWriter::TSingleChannelEleme
const uint16_t bitsUsedOld = bitStream ? (uint16_t)bitStream->GetSizeInBits() : 0;
const std::vector<TTonalBlock>& tonalComponents = sce.TonalBlocks;
const TAtrac3Data::SubbandInfo& subbandInfo = sce.SubbandInfo;
- const uint8_t numQmfBand = subbandInfo.GetQmfNum();
+ const uint32_t numQmfBand = subbandInfo.GetQmfNum();
uint16_t bitsUsed = 0;
//group tonal components with same quantizer and len
TTonalComponentsSubGroup groups[64];
- const uint8_t tcsgn = GroupTonalComponents(tonalComponents, allocTable, groups);
+ const uint32_t tcsgn = GroupTonalComponents(tonalComponents, allocTable, groups);
ASSERT(tcsgn < 32);
@@ -369,7 +393,7 @@ uint16_t EncodeTonalComponents(const TAtrac3BitStreamWriter::TSingleChannelEleme
bitsUsed += numQmfBand;
if (bitStream) {
- for (uint8_t j = 0; j < numQmfBand; ++j) {
+ for (uint32_t j = 0; j < numQmfBand; ++j) {
bitStream->Write((bool)bandFlags.i[j], 1);
}
}
@@ -543,12 +567,11 @@ public:
}
const float shift = ba.Continue();
- vector<uint32_t> tmpAlloc = CalcBitsAllocation(ctx->Sce->ScaledBlocks,
+ vector<uint32_t> tmpAlloc = CalcBitsAllocation(*ctx->Sce,
ctx->NumBfu,
ctx->Spread,
shift,
- ctx->Loudness,
- ctx->Sce->GainBoostPerBand);
+ ctx->Loudness);
ctx->EnergyErr.assign(ctx->NumBfu, 0.0f);
std::pair<uint8_t, uint32_t> consumption;
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
index 01f494f..ac8c743 100644
--- a/src/atrac/atrac_psy_common.cpp
+++ b/src/atrac/atrac_psy_common.cpp
@@ -18,6 +18,8 @@
#include "atrac_psy_common.h"
+#include <algorithm>
+#include <cassert>
#include <cmath>
#include <iostream>
@@ -153,4 +155,47 @@ vector<float> CreateLoudnessCurve(size_t sz)
return res;
}
+vector<float> CalcSpectralFlatnessPerBfu(const vector<float>& mdctEnergy,
+ const uint32_t* specsStart,
+ const uint32_t* specsPerBlock,
+ size_t numBfu,
+ float energyFloor)
+{
+ assert(specsStart != nullptr);
+ assert(specsPerBlock != nullptr);
+ const float floor = std::max(energyFloor, 1e-20f);
+
+ vector<float> flatness(numBfu, 1.0f);
+ for (size_t bfu = 0; bfu < numBfu; ++bfu) {
+ const size_t start = specsStart[bfu];
+ const size_t len = specsPerBlock[bfu];
+ const size_t end = start + len;
+ assert(end <= mdctEnergy.size());
+ if (len == 0 || end > mdctEnergy.size()) {
+ flatness[bfu] = 1.0f;
+ continue;
+ }
+
+ double arithMean = 0.0;
+ double meanLog = 0.0;
+ for (size_t i = start; i < end; ++i) {
+ const double e = std::max(0.0f, mdctEnergy[i]);
+ arithMean += e;
+ meanLog += std::log(std::max<double>(e, floor));
+ }
+ arithMean /= static_cast<double>(len);
+ meanLog /= static_cast<double>(len);
+
+ if (arithMean <= floor) {
+ flatness[bfu] = 1.0f;
+ continue;
+ }
+
+ const double geomMean = std::exp(meanLog);
+ const double ratio = geomMean / arithMean;
+ flatness[bfu] = static_cast<float>(std::min(1.0, std::max(0.0, ratio)));
+ }
+ return flatness;
+}
+
} // namespace NAtracDEnc
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
index ebeeef3..970585c 100644
--- a/src/atrac/atrac_psy_common.h
+++ b/src/atrac/atrac_psy_common.h
@@ -18,12 +18,30 @@
#pragma once
#include "atrac_scale.h"
+#include <cstdint>
#include <stddef.h>
+#include <vector>
namespace NAtracDEnc {
float AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
std::vector<float> CalcATH(int len, int sampleRate);
+std::vector<float> CalcSpectralFlatnessPerBfu(const std::vector<float>& mdctEnergy,
+ const uint32_t* specsStart,
+ const uint32_t* specsPerBlock,
+ size_t numBfu,
+ float energyFloor = 1e-12f);
+
+template <class TData>
+inline std::vector<float> CalcSpectralFlatnessPerBfu(const std::vector<float>& mdctEnergy,
+ float energyFloor = 1e-12f)
+{
+ return CalcSpectralFlatnessPerBfu(mdctEnergy,
+ TData::SpecsStartLong,
+ TData::SpecsPerBlock,
+ TData::MaxBfus,
+ energyFloor);
+}
inline float TrackLoudness(float prevLoud, float l0, float l1)
{
diff --git a/src/atrac/atrac_psy_common_ut.cpp b/src/atrac/atrac_psy_common_ut.cpp
new file mode 100644
index 0000000..91eeaae
--- /dev/null
+++ b/src/atrac/atrac_psy_common_ut.cpp
@@ -0,0 +1,376 @@
+/*
+ * This file is part of AtracDEnc.
+ *
+ * AtracDEnc is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * AtracDEnc is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with AtracDEnc; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "atrac_psy_common.h"
+#include "at1/atrac1.h"
+#include "at3/atrac3.h"
+#include "at3/atrac3_qmf.h"
+#include "at3p/at3p_tables.h"
+#include "mdct/mdct.h"
+
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <random>
+#include <type_traits>
+
+using namespace NAtracDEnc;
+
+namespace {
+
+template <class TData>
+size_t NumSpecs()
+{
+ return static_cast<size_t>(TData::SpecsStartLong[TData::MaxBfus - 1]) +
+ static_cast<size_t>(TData::SpecsPerBlock[TData::MaxBfus - 1]);
+}
+
+template <class TData>
+void VerifyImpulseMapsToSingleBfu()
+{
+ const size_t numBfu = TData::MaxBfus;
+ const size_t numSpecs = NumSpecs<TData>();
+ std::vector<float> baseEnergy(numSpecs, 1.0f);
+
+ for (size_t bfu = 0; bfu < numBfu; ++bfu) {
+ std::vector<float> mdctEnergy = baseEnergy;
+ const size_t impulsePos = TData::SpecsStartLong[bfu];
+ mdctEnergy[impulsePos] = 32.0f;
+
+ const std::vector<float> flatness = CalcSpectralFlatnessPerBfu<TData>(mdctEnergy);
+ ASSERT_EQ(flatness.size(), numBfu);
+ EXPECT_LT(flatness[bfu], 0.95f) << "bfu=" << bfu;
+
+ for (size_t i = 0; i < numBfu; ++i) {
+ if (i == bfu) {
+ continue;
+ }
+ EXPECT_NEAR(flatness[i], 1.0f, 1e-6f) << "bfu=" << bfu << " changed=" << i;
+ }
+ }
+}
+
+std::vector<float> CalcSineWindow(size_t n)
+{
+ constexpr float kPi = 3.14159265358979323846f;
+ std::vector<float> w(n);
+ for (size_t i = 0; i < n; ++i) {
+ w[i] = std::sin((kPi * (static_cast<float>(i) + 0.5f)) /
+ static_cast<float>(n));
+ }
+ return w;
+}
+
+template <class TSampleFn>
+std::vector<float> BuildAtrac3EnergyViaQmfMdct(TSampleFn&& sampleFn)
+{
+ NAtrac3::TAtrac3Data initTables;
+ (void)initTables;
+
+ constexpr size_t kFrameSz = NAtrac3::TAtrac3Data::NumSamples; // 1024
+ constexpr size_t kNumFrames = 2;
+ constexpr size_t kSubbands = 4;
+ constexpr size_t kSubbandSamples = 256;
+ constexpr size_t kMdctInput = 512;
+
+ std::array<float, kFrameSz * kNumFrames> pcm{};
+ for (size_t i = 0; i < pcm.size(); ++i) {
+ pcm[i] = sampleFn(i);
+ }
+
+ Atrac3AnalysisFilterBank analysis;
+ NMDCT::TMDCT<kMdctInput> mdct512(1.0f);
+ std::array<std::array<float, kMdctInput>, kSubbands> bandState{};
+ std::array<std::array<float, kSubbandSamples>, kSubbands> subbands{};
+ std::array<float*, kSubbands> subPtrs = {
+ subbands[0].data(), subbands[1].data(), subbands[2].data(), subbands[3].data()
+ };
+ std::array<float, NAtrac3::TAtrac3Data::NumSpecs> specs = {};
+
+ for (size_t frame = 0; frame < kNumFrames; ++frame) {
+ analysis.Analysis(&pcm[frame * kFrameSz], subPtrs.data());
+
+ for (size_t band = 0; band < kSubbands; ++band) {
+ auto& state = bandState[band];
+ for (size_t i = 0; i < kSubbandSamples; ++i) {
+ state[kSubbandSamples + i] = subbands[band][i];
+ }
+
+ std::array<float, kMdctInput> tmp = {};
+ std::copy_n(state.data(), kSubbandSamples, tmp.data());
+ for (size_t i = 0; i < kSubbandSamples; ++i) {
+ const float cur = state[kSubbandSamples + i];
+ state[i] = NAtrac3::TAtrac3Data::EncodeWindow[i] * cur;
+ tmp[kSubbandSamples + i] = NAtrac3::TAtrac3Data::EncodeWindow[kSubbandSamples - 1 - i] * cur;
+ }
+
+ const std::vector<float>& specBand = mdct512(tmp.data());
+ float* dst = specs.data() + band * kSubbandSamples;
+ std::copy_n(specBand.data(), kSubbandSamples, dst);
+ if (band & 1) {
+ std::reverse(dst, dst + kSubbandSamples);
+ }
+ }
+ }
+
+ std::vector<float> e(NAtrac3::TAtrac3Data::NumSpecs, 1e-12f);
+ for (size_t i = 0; i < e.size(); ++i) {
+ e[i] += specs[i] * specs[i];
+ }
+ return e;
+}
+
+template <class TData>
+std::vector<float> BuildToneEnergy(float toneHz = 1000.0f)
+{
+ auto genTone = [toneHz](size_t i) {
+ constexpr float kPi = 3.14159265358979323846f;
+ constexpr float kSampleRate = 44100.0f;
+ const float phase = 2.0f * kPi * toneHz * static_cast<float>(i) / kSampleRate + 0.37f;
+ return std::sin(phase);
+ };
+
+ if constexpr (std::is_same_v<TData, NAtrac1::TAtrac1Data>) {
+ constexpr size_t n = 1024;
+ std::vector<float> in(n);
+ const std::vector<float> w = CalcSineWindow(n);
+ for (size_t i = 0; i < n; ++i) {
+ in[i] = genTone(i) * w[i];
+ }
+ NMDCT::TMDCT<n> mdct(n);
+ const auto& spec = mdct(in.data());
+ std::vector<float> e(spec.size(), 1e-12f);
+ for (size_t i = 0; i < spec.size(); ++i) {
+ e[i] += spec[i] * spec[i];
+ }
+ return e;
+ } else if constexpr (std::is_same_v<TData, NAtrac3::TAtrac3Data>) {
+ return BuildAtrac3EnergyViaQmfMdct([&](size_t i) {
+ return genTone(i);
+ });
+ } else {
+ static_assert(std::is_same_v<TData, NAt3p::TScaleTable>, "Unsupported codec table for tone energy");
+ constexpr size_t n = 4096;
+ std::vector<float> in(n);
+ const std::vector<float> w = CalcSineWindow(n);
+ for (size_t i = 0; i < n; ++i) {
+ in[i] = genTone(i) * w[i];
+ }
+ NMDCT::TMDCT<n> mdct(n);
+ const auto& spec = mdct(in.data());
+ std::vector<float> e(spec.size(), 1e-12f);
+ for (size_t i = 0; i < spec.size(); ++i) {
+ e[i] += spec[i] * spec[i];
+ }
+ return e;
+ }
+}
+
+template <class TData>
+std::vector<float> BuildWhiteNoiseEnergy()
+{
+ std::mt19937 gen(0xC0FFEEu + static_cast<uint32_t>(NumSpecs<TData>()));
+ std::normal_distribution<float> dist(0.0f, 1.0f);
+ auto genNoise = [&gen, &dist](size_t, size_t) {
+ return dist(gen);
+ };
+
+ if constexpr (std::is_same_v<TData, NAtrac1::TAtrac1Data>) {
+ constexpr size_t n = 1024;
+ std::vector<float> in(n);
+ const std::vector<float> w = CalcSineWindow(n);
+ for (size_t i = 0; i < n; ++i) {
+ in[i] = genNoise(i, n) * w[i];
+ }
+ NMDCT::TMDCT<n> mdct(n);
+ const auto& spec = mdct(in.data());
+ std::vector<float> e(spec.size(), 1e-12f);
+ for (size_t i = 0; i < spec.size(); ++i) {
+ e[i] += spec[i] * spec[i];
+ }
+ return e;
+ } else if constexpr (std::is_same_v<TData, NAtrac3::TAtrac3Data>) {
+ return BuildAtrac3EnergyViaQmfMdct([&](size_t i) {
+ return genNoise(i, NAtrac3::TAtrac3Data::NumSpecs * 2);
+ });
+ } else {
+ static_assert(std::is_same_v<TData, NAt3p::TScaleTable>, "Unsupported codec table for noise energy");
+ constexpr size_t n = 4096;
+ std::vector<float> in(n);
+ const std::vector<float> w = CalcSineWindow(n);
+ for (size_t i = 0; i < n; ++i) {
+ in[i] = genNoise(i, n) * w[i];
+ }
+ NMDCT::TMDCT<n> mdct(n);
+ const auto& spec = mdct(in.data());
+ std::vector<float> e(spec.size(), 1e-12f);
+ for (size_t i = 0; i < spec.size(); ++i) {
+ e[i] += spec[i] * spec[i];
+ }
+ return e;
+ }
+}
+
+template <class TData>
+std::vector<float> CalcBfuEnergy(const std::vector<float>& mdctEnergy)
+{
+ const size_t numBfu = TData::MaxBfus;
+ std::vector<float> bfuEnergy(numBfu, 0.0f);
+ for (size_t bfu = 0; bfu < numBfu; ++bfu) {
+ const size_t start = TData::SpecsStartLong[bfu];
+ const size_t len = TData::SpecsPerBlock[bfu];
+ float sum = 0.0f;
+ for (size_t i = start; i < start + len; ++i) {
+ sum += mdctEnergy[i];
+ }
+ bfuEnergy[bfu] = sum;
+ }
+ return bfuEnergy;
+}
+
+float WeightedMean(const std::vector<float>& values, const std::vector<float>& weights)
+{
+ EXPECT_EQ(values.size(), weights.size());
+ if (values.size() != weights.size()) {
+ return 0.0f;
+ }
+ const float wsum = std::accumulate(weights.begin(), weights.end(), 0.0f);
+ EXPECT_GT(wsum, 0.0f);
+ if (wsum <= 0.0f) {
+ return 0.0f;
+ }
+ float sum = 0.0f;
+ for (size_t i = 0; i < values.size(); ++i) {
+ sum += values[i] * weights[i];
+ }
+ return sum / wsum;
+}
+
+template <class TData>
+void VerifyToneVsNoiseFlatness(const char* codecName)
+{
+ const std::vector<float> toneEnergy = BuildToneEnergy<TData>();
+ const std::vector<float> noiseEnergy = BuildWhiteNoiseEnergy<TData>();
+ ASSERT_EQ(toneEnergy.size(), NumSpecs<TData>());
+ ASSERT_EQ(noiseEnergy.size(), NumSpecs<TData>());
+
+ const std::vector<float> toneFlatness = CalcSpectralFlatnessPerBfu<TData>(toneEnergy);
+ const std::vector<float> noiseFlatness = CalcSpectralFlatnessPerBfu<TData>(noiseEnergy);
+
+ const std::vector<float> toneBfuEnergy = CalcBfuEnergy<TData>(toneEnergy);
+ const std::vector<float> noiseBfuEnergy = CalcBfuEnergy<TData>(noiseEnergy);
+
+ const float toneWeightedFlatness = WeightedMean(toneFlatness, toneBfuEnergy);
+ const float noiseWeightedFlatness = WeightedMean(noiseFlatness, noiseBfuEnergy);
+
+ std::cerr << "[FlatnessUT] codec=" << codecName
+ << " signal=tone weighted=" << std::fixed << std::setprecision(6)
+ << toneWeightedFlatness << "\n";
+ std::cerr << "[FlatnessUT] codec=" << codecName
+ << " signal=noise weighted=" << std::fixed << std::setprecision(6)
+ << noiseWeightedFlatness << "\n";
+
+ EXPECT_GT(noiseWeightedFlatness, toneWeightedFlatness + 0.08f);
+}
+
+void VerifyAtrac3ToneFrequencyFlatness(float toneHz)
+{
+ const std::vector<float> toneEnergy = BuildToneEnergy<NAtrac3::TAtrac3Data>(toneHz);
+ const std::vector<float> noiseEnergy = BuildWhiteNoiseEnergy<NAtrac3::TAtrac3Data>();
+
+ const std::vector<float> toneFlatness = CalcSpectralFlatnessPerBfu<NAtrac3::TAtrac3Data>(toneEnergy);
+ const std::vector<float> noiseFlatness = CalcSpectralFlatnessPerBfu<NAtrac3::TAtrac3Data>(noiseEnergy);
+ const std::vector<float> toneBfuEnergy = CalcBfuEnergy<NAtrac3::TAtrac3Data>(toneEnergy);
+ const std::vector<float> noiseBfuEnergy = CalcBfuEnergy<NAtrac3::TAtrac3Data>(noiseEnergy);
+
+ for (size_t bfu = 0; bfu < toneFlatness.size(); ++bfu) {
+ std::cerr << "[FlatnessUT] codec=atrac3 signal=tone freq_hz=" << toneHz
+ << " bfu=" << bfu
+ << " flatness=" << std::fixed << std::setprecision(6)
+ << toneFlatness[bfu] << "\n";
+ }
+ for (size_t bfu = 0; bfu < noiseFlatness.size(); ++bfu) {
+ std::cerr << "[FlatnessUT] codec=atrac3 signal=noise"
+ << " bfu=" << bfu
+ << " flatness=" << std::fixed << std::setprecision(6)
+ << noiseFlatness[bfu] << "\n";
+ }
+
+ const float toneWeightedFlatness = WeightedMean(toneFlatness, toneBfuEnergy);
+ const float noiseWeightedFlatness = WeightedMean(noiseFlatness, noiseBfuEnergy);
+
+ std::cerr << "[FlatnessUT] codec=atrac3 signal=tone freq_hz=" << toneHz
+ << " weighted=" << std::fixed << std::setprecision(6)
+ << toneWeightedFlatness << "\n";
+ std::cerr << "[FlatnessUT] codec=atrac3 signal=noise weighted="
+ << std::fixed << std::setprecision(6)
+ << noiseWeightedFlatness << "\n";
+
+ EXPECT_GT(noiseWeightedFlatness, toneWeightedFlatness + 0.05f);
+}
+
+} // namespace
+
+TEST(AtracPsyCommon, SpectralFlatnessUniformBlock)
+{
+ const uint32_t start[1] = {0};
+ const uint32_t size[1] = {8};
+ const std::vector<float> mdctEnergy(8, 4.0f);
+ const std::vector<float> flatness = CalcSpectralFlatnessPerBfu(mdctEnergy, start, size, 1);
+ ASSERT_EQ(flatness.size(), 1u);
+ EXPECT_NEAR(flatness[0], 1.0f, 1e-6f);
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac1)
+{
+ VerifyImpulseMapsToSingleBfu<NAtrac1::TAtrac1Data>();
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac3)
+{
+ VerifyImpulseMapsToSingleBfu<NAtrac3::TAtrac3Data>();
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac3Plus)
+{
+ VerifyImpulseMapsToSingleBfu<NAt3p::TScaleTable>();
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac1)
+{
+ VerifyToneVsNoiseFlatness<NAtrac1::TAtrac1Data>("atrac1");
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac3)
+{
+ VerifyToneVsNoiseFlatness<NAtrac3::TAtrac3Data>("atrac3");
+}
+
+TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac3Plus)
+{
+ VerifyToneVsNoiseFlatness<NAt3p::TScaleTable>("atrac3plus");
+}
+
+TEST(AtracPsyCommon, SpectralFlatness10kToneAtrac3)
+{
+ VerifyAtrac3ToneFrequencyFlatness(10000.0f);
+}
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index d7024e6..6f8d62c 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -548,6 +548,90 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
}
}
+TAtrac3Data::TTonalComponents TAtrac3Encoder::ExtractTonalComponents(float* specs,
+ const std::vector<float>& flatnessPerBfu)
+{
+ TAtrac3Data::TTonalComponents res;
+ static constexpr float kFlatnessThreshold = 0.01f;
+ static constexpr uint32_t kMaxTonalLen = 5;
+ // BFU below 8 is too short to get notisiable profit
+ // BFU above 29 is hard to tune
+ for (uint32_t blockNum = 8; blockNum < 29u; ++blockNum) {
+ if (blockNum >= flatnessPerBfu.size()) {
+ break;
+ }
+
+ const float flatness = flatnessPerBfu[blockNum];
+ if (flatness >= kFlatnessThreshold) {
+ continue;
+ }
+
+ const uint32_t specNumStart = TAtrac3Data::SpecsStartLong[blockNum];
+ const uint32_t blockLen = TAtrac3Data::SpecsPerBlock[blockNum];
+ const uint32_t specNumEnd = specNumStart + blockLen;
+ if (specNumStart >= specNumEnd) {
+ continue;
+ }
+
+ const uint32_t maxLen = std::min(kMaxTonalLen, blockLen);
+ float bestScore = -1.0f;
+ uint32_t bestStart = specNumStart;
+ uint32_t bestLen = 1;
+ for (uint32_t start = specNumStart; start < specNumEnd; ++start) {
+ const uint32_t maxLenForStart = std::min(maxLen, specNumEnd - start);
+ float score = 0.0f;
+ for (uint32_t len = 1; len <= maxLenForStart; ++len) {
+ score += std::abs(specs[start + len - 1]);
+ if (score > bestScore) {
+ bestScore = score;
+ bestStart = start;
+ bestLen = len;
+ }
+ }
+ }
+
+ if (bestScore <= 0.0f) {
+ continue;
+ }
+
+ /*
+ std::cerr << "atrac3 tonal bfu=" << (uint32_t)blockNum
+ << " flatness=" << flatness
+ << " start=" << bestStart
+ << " len=" << bestLen
+ << " score=" << bestScore
+ << std::endl; */
+
+ for (uint32_t n = 0; n < bestLen; ++n) {
+ const uint32_t pos = bestStart + n;
+ res.push_back({(uint16_t)pos, specs[pos], (uint8_t)blockNum});
+ specs[pos] = 0.0f;
+ }
+ }
+
+ return res;
+}
+
+
+void TAtrac3Encoder::MapTonalComponents(const TAtrac3Data::TTonalComponents& tonalComponents, vector<TTonalBlock>* componentMap)
+{
+ for (size_t i = 0; i < tonalComponents.size();) {
+ const uint32_t startPos = i;
+ uint32_t curPos;
+ do {
+ curPos = tonalComponents[i].Pos;
+ ++i;
+ } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7);
+ const uint32_t len = i - startPos;
+ float tmp[8];
+ for (uint32_t j = 0; j < len; ++j)
+ tmp[j] = tonalComponents[startPos + j].Val;
+ const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len);
+ componentMap->push_back({&tonalComponents[startPos], scaledBlock});
+ }
+}
+
+
void TAtrac3Encoder::Matrixing()
{
for (uint32_t subband = 0; subband < 4; subband++) {
@@ -624,9 +708,12 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
<< "channels:\n";
}
+ TAtrac3Data::TTonalComponents tonals[2];
+
for (uint32_t channel = 0; channel < meta.Channels; channel++) {
auto& specs = (*buf)[channel].Specs;
TSce* sce = &SingleChannelElements[channel];
+ sce->TonalBlocks.clear();
sce->SubbandInfo.Reset();
if (!Params.NoGainControll) {
@@ -653,14 +740,23 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
}
+ vector<float> mdctEnergy(specs.size(), 0.0f);
float l = 0;
for (size_t i = 0; i < specs.size(); i++) {
float e = specs[i] * specs[i];
+ mdctEnergy[i] = e;
l += e * LoudnessCurve[i];
}
sce->Loudness = l;
+ if (!Params.NoTonalComponents) {
+ const vector<float> flatnessPerBfu = CalcSpectralFlatnessPerBfu<TAtrac3Data>(mdctEnergy);
+ tonals[channel] = ExtractTonalComponents(specs.data(), flatnessPerBfu);
+ sce->TonalBlocks.clear();
+ MapTonalComponents(tonals[channel], &sce->TonalBlocks);
+ }
+
//TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
sce->ScaledBlocks = Scaler.ScaleFrame(specs, TAtrac3Data::TBlockSizeMod());
}
diff --git a/src/atrac3denc.h b/src/atrac3denc.h
index 38ba0c2..68224ff 100644
--- a/src/atrac3denc.h
+++ b/src/atrac3denc.h
@@ -113,7 +113,10 @@ public:
void CreateSubbandInfo(const float* upInput[4], uint32_t channel,
TAtrac3Data::SubbandInfo* subbandInfo,
int gainBoostPerBand[TAtrac3Data::NumQMF]);
+ TAtrac3Data::TTonalComponents ExtractTonalComponents(float* specs,
+ const std::vector<float>& flatnessPerBfu);
void Matrixing();
+ void MapTonalComponents(const TAtrac3Data::TTonalComponents& tonalComponents, std::vector<NAtracDEnc::NAtrac3::TTonalBlock>* componentMap);
public:
TAtrac3Encoder(TCompressedOutputPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings);
diff --git a/src/main.cpp b/src/main.cpp
index 4cd7128..b1356b0 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -325,6 +325,7 @@ int main_(int argc, char* const* argv)
{ "bfuidxfast", no_argument, NULL, O_BFUIDXFAST},
{ "notransient", optional_argument, NULL, O_NOTRANSIENT},
{ "nostdout", no_argument, NULL, O_NOSTDOUT},
+ { "notonal", no_argument, NULL, O_NOTONAL},
{ "nogaincontrol", no_argument, NULL, O_NOGAINCONTROL},
{ "advanced", required_argument, NULL, O_ADVANCED_OPT},
{ "yaml-log", required_argument, NULL, O_YAML_LOG},
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8f92f68..481d228 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -19,6 +19,7 @@ set(atracdenc_ut
${CMAKE_SOURCE_DIR}/src/transient_detector_ut.cpp
${CMAKE_SOURCE_DIR}/src/transient_spectral_upsampler_ut.cpp
${CMAKE_SOURCE_DIR}/src/atrac/atrac_scale_ut.cpp
+ ${CMAKE_SOURCE_DIR}/src/atrac/atrac_psy_common_ut.cpp
)
add_executable(atracdenc_ut ${atracdenc_ut})