diff options
| author | Daniil Cherednik <[email protected]> | 2026-04-18 16:51:23 +0200 |
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2026-04-20 00:27:45 +0200 |
| commit | e784b79cfef682059cf5cc8cf9bed635e714c439 (patch) | |
| tree | ba213071aec875714cd7deb1d3f45ecaa676e37b | |
| parent | a958b27a43f0a436406dc51b942ca2f3a417e7a7 (diff) | |
atrac3: reimplement tonal encoding. Use flatness-based tonal extractionnew_psy
- Add shared CalcSpectralFlatnessPerBfu helper in atrac_psy_common
with BFU-table mapping.
- Implement ATRAC3 tonal extraction: compute MDCT energy, estimate
per-BFU flatness, extract up to 5-bin strongest tonal run in
low-flatness BFUs, and zero extracted bins in residual.
- Map extracted tonal bins into TTonalBlocks and integrate them into
bitstream coding.
- Update ATRAC3 bit allocation - reduce residual bits for BFUs with tonal
blocks, and increase tonal quantizer selection.
- Restore --notonal CLI option in main.cpp for A/B comparison.
| -rw-r--r-- | src/atrac/at3/atrac3_bitstream.cpp | 59 | ||||
| -rw-r--r-- | src/atrac/atrac_psy_common.cpp | 45 | ||||
| -rw-r--r-- | src/atrac/atrac_psy_common.h | 18 | ||||
| -rw-r--r-- | src/atrac/atrac_psy_common_ut.cpp | 376 | ||||
| -rw-r--r-- | src/atrac3denc.cpp | 96 | ||||
| -rw-r--r-- | src/atrac3denc.h | 3 | ||||
| -rw-r--r-- | src/main.cpp | 1 | ||||
| -rw-r--r-- | test/CMakeLists.txt | 1 |
8 files changed, 581 insertions, 18 deletions
diff --git a/src/atrac/at3/atrac3_bitstream.cpp b/src/atrac/at3/atrac3_bitstream.cpp index 9d4a948..96a40af 100644 --- a/src/atrac/at3/atrac3_bitstream.cpp +++ b/src/atrac/at3/atrac3_bitstream.cpp @@ -33,6 +33,12 @@ namespace NAtrac3 { using std::vector; +// BFU right border frequencies at 44.1 kHz (kHz), computed from +// TAtrac3Data::BlockSizeTab[bfu + 1] * 44100 / (2 * 1024): +// bfu 0.. 7: 0.172, 0.345, 0.517, 0.689, 0.861, 1.034, 1.206, 1.378 +// bfu 8..15: 1.723, 2.067, 2.412, 2.756, 3.101, 3.445, 3.790, 4.134 +// bfu 16..23: 4.823, 5.513, 6.202, 6.891, 7.580, 8.269, 8.958, 9.647 +// bfu 24..31: 10.336, 11.025, 12.403, 13.781, 15.159, 16.538, 19.294, 22.050 static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = { 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, @@ -206,13 +212,14 @@ bool ConsiderEnergyErr(const vector<float>& err, vector<uint32_t>& bits) return adjusted; } -vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, +vector<uint32_t> CalcBitsAllocation(const TAtrac3BitStreamWriter::TSingleChannelElement& sce, const uint32_t bfuNum, const float spread, const float shift, - const float loudness, - const int gainBoostPerBand[TAtrac3Data::NumQMF]) + const float loudness) { + const std::vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; + const auto gainBoostPerBand = sce.GainBoostPerBand; vector<uint32_t> bitsPerEachBlock(bfuNum); for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { const float ath = ATH[i] * loudness; @@ -254,30 +261,47 @@ vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlock } } } + + for (const TTonalBlock& tc : sce.TonalBlocks) { + ASSERT(tc.ScaledBlock.Values.size() < 8); + ASSERT(tc.ScaledBlock.Values.size() > 0); + if(tc.ValPtr->Bfu < bitsPerEachBlock.size()) { + if (bitsPerEachBlock[tc.ValPtr->Bfu] > 2) { + bitsPerEachBlock[tc.ValPtr->Bfu] -= 1; + } + } + } + return bitsPerEachBlock; } -uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents, +uint32_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents, const vector<uint32_t>& allocTable, TTonalComponentsSubGroup groups[64]) { for (const TTonalBlock& tc : tonalComponents) { ASSERT(tc.ScaledBlock.Values.size() < 8); ASSERT(tc.ScaledBlock.Values.size() > 0); - ASSERT(tc.ValPtr->Bfu < allocTable.size()); - const auto quant = std::max((uint32_t)2, std::min(allocTable[tc.ValPtr->Bfu] + 1, (uint32_t)7)); + ASSERT(tc.ValPtr); + const uint32_t bfu = tc.ValPtr->Bfu; + if (bfu >= allocTable.size()) { + // NumBfu may be reduced by allocator tail trimming. + // Skip tonal blocks that map to BFUs outside current allocation table. + continue; + } + const auto quant = std::max((uint32_t)2, std::min(allocTable[bfu] + 4, (uint32_t)7)); groups[quant * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc); } - uint8_t tcsgn = 0; - for (uint8_t i = 0; i < 64; ++i) { - size_t startPos; - size_t curPos = 0; + uint32_t tcsgn = 0; + for (uint32_t i = 0; i < 64; ++i) { + uint32_t startPos; + uint32_t curPos = 0; while (curPos < groups[i].SubGroupPtr.size()) { startPos = curPos; ++tcsgn; groups[i].SubGroupMap.push_back(static_cast<uint8_t>(curPos)); - uint8_t groupLimiter = 0; + uint32_t groupLimiter = 0; do { ++curPos; if (curPos == groups[i].SubGroupPtr.size()) { @@ -289,7 +313,7 @@ uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents, groupLimiter = 0; startPos = curPos; } - } while (groupLimiter < 7); + } while (groupLimiter < 7u); } } return tcsgn; @@ -302,12 +326,12 @@ uint16_t EncodeTonalComponents(const TAtrac3BitStreamWriter::TSingleChannelEleme const uint16_t bitsUsedOld = bitStream ? (uint16_t)bitStream->GetSizeInBits() : 0; const std::vector<TTonalBlock>& tonalComponents = sce.TonalBlocks; const TAtrac3Data::SubbandInfo& subbandInfo = sce.SubbandInfo; - const uint8_t numQmfBand = subbandInfo.GetQmfNum(); + const uint32_t numQmfBand = subbandInfo.GetQmfNum(); uint16_t bitsUsed = 0; //group tonal components with same quantizer and len TTonalComponentsSubGroup groups[64]; - const uint8_t tcsgn = GroupTonalComponents(tonalComponents, allocTable, groups); + const uint32_t tcsgn = GroupTonalComponents(tonalComponents, allocTable, groups); ASSERT(tcsgn < 32); @@ -369,7 +393,7 @@ uint16_t EncodeTonalComponents(const TAtrac3BitStreamWriter::TSingleChannelEleme bitsUsed += numQmfBand; if (bitStream) { - for (uint8_t j = 0; j < numQmfBand; ++j) { + for (uint32_t j = 0; j < numQmfBand; ++j) { bitStream->Write((bool)bandFlags.i[j], 1); } } @@ -543,12 +567,11 @@ public: } const float shift = ba.Continue(); - vector<uint32_t> tmpAlloc = CalcBitsAllocation(ctx->Sce->ScaledBlocks, + vector<uint32_t> tmpAlloc = CalcBitsAllocation(*ctx->Sce, ctx->NumBfu, ctx->Spread, shift, - ctx->Loudness, - ctx->Sce->GainBoostPerBand); + ctx->Loudness); ctx->EnergyErr.assign(ctx->NumBfu, 0.0f); std::pair<uint8_t, uint32_t> consumption; diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp index 01f494f..ac8c743 100644 --- a/src/atrac/atrac_psy_common.cpp +++ b/src/atrac/atrac_psy_common.cpp @@ -18,6 +18,8 @@ #include "atrac_psy_common.h" +#include <algorithm> +#include <cassert> #include <cmath> #include <iostream> @@ -153,4 +155,47 @@ vector<float> CreateLoudnessCurve(size_t sz) return res; } +vector<float> CalcSpectralFlatnessPerBfu(const vector<float>& mdctEnergy, + const uint32_t* specsStart, + const uint32_t* specsPerBlock, + size_t numBfu, + float energyFloor) +{ + assert(specsStart != nullptr); + assert(specsPerBlock != nullptr); + const float floor = std::max(energyFloor, 1e-20f); + + vector<float> flatness(numBfu, 1.0f); + for (size_t bfu = 0; bfu < numBfu; ++bfu) { + const size_t start = specsStart[bfu]; + const size_t len = specsPerBlock[bfu]; + const size_t end = start + len; + assert(end <= mdctEnergy.size()); + if (len == 0 || end > mdctEnergy.size()) { + flatness[bfu] = 1.0f; + continue; + } + + double arithMean = 0.0; + double meanLog = 0.0; + for (size_t i = start; i < end; ++i) { + const double e = std::max(0.0f, mdctEnergy[i]); + arithMean += e; + meanLog += std::log(std::max<double>(e, floor)); + } + arithMean /= static_cast<double>(len); + meanLog /= static_cast<double>(len); + + if (arithMean <= floor) { + flatness[bfu] = 1.0f; + continue; + } + + const double geomMean = std::exp(meanLog); + const double ratio = geomMean / arithMean; + flatness[bfu] = static_cast<float>(std::min(1.0, std::max(0.0, ratio))); + } + return flatness; +} + } // namespace NAtracDEnc diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h index ebeeef3..970585c 100644 --- a/src/atrac/atrac_psy_common.h +++ b/src/atrac/atrac_psy_common.h @@ -18,12 +18,30 @@ #pragma once #include "atrac_scale.h" +#include <cstdint> #include <stddef.h> +#include <vector> namespace NAtracDEnc { float AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks); std::vector<float> CalcATH(int len, int sampleRate); +std::vector<float> CalcSpectralFlatnessPerBfu(const std::vector<float>& mdctEnergy, + const uint32_t* specsStart, + const uint32_t* specsPerBlock, + size_t numBfu, + float energyFloor = 1e-12f); + +template <class TData> +inline std::vector<float> CalcSpectralFlatnessPerBfu(const std::vector<float>& mdctEnergy, + float energyFloor = 1e-12f) +{ + return CalcSpectralFlatnessPerBfu(mdctEnergy, + TData::SpecsStartLong, + TData::SpecsPerBlock, + TData::MaxBfus, + energyFloor); +} inline float TrackLoudness(float prevLoud, float l0, float l1) { diff --git a/src/atrac/atrac_psy_common_ut.cpp b/src/atrac/atrac_psy_common_ut.cpp new file mode 100644 index 0000000..91eeaae --- /dev/null +++ b/src/atrac/atrac_psy_common_ut.cpp @@ -0,0 +1,376 @@ +/* + * This file is part of AtracDEnc. + * + * AtracDEnc is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * AtracDEnc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with AtracDEnc; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "atrac_psy_common.h" +#include "at1/atrac1.h" +#include "at3/atrac3.h" +#include "at3/atrac3_qmf.h" +#include "at3p/at3p_tables.h" +#include "mdct/mdct.h" + +#include <gtest/gtest.h> +#include <algorithm> +#include <array> +#include <cmath> +#include <iomanip> +#include <iostream> +#include <numeric> +#include <random> +#include <type_traits> + +using namespace NAtracDEnc; + +namespace { + +template <class TData> +size_t NumSpecs() +{ + return static_cast<size_t>(TData::SpecsStartLong[TData::MaxBfus - 1]) + + static_cast<size_t>(TData::SpecsPerBlock[TData::MaxBfus - 1]); +} + +template <class TData> +void VerifyImpulseMapsToSingleBfu() +{ + const size_t numBfu = TData::MaxBfus; + const size_t numSpecs = NumSpecs<TData>(); + std::vector<float> baseEnergy(numSpecs, 1.0f); + + for (size_t bfu = 0; bfu < numBfu; ++bfu) { + std::vector<float> mdctEnergy = baseEnergy; + const size_t impulsePos = TData::SpecsStartLong[bfu]; + mdctEnergy[impulsePos] = 32.0f; + + const std::vector<float> flatness = CalcSpectralFlatnessPerBfu<TData>(mdctEnergy); + ASSERT_EQ(flatness.size(), numBfu); + EXPECT_LT(flatness[bfu], 0.95f) << "bfu=" << bfu; + + for (size_t i = 0; i < numBfu; ++i) { + if (i == bfu) { + continue; + } + EXPECT_NEAR(flatness[i], 1.0f, 1e-6f) << "bfu=" << bfu << " changed=" << i; + } + } +} + +std::vector<float> CalcSineWindow(size_t n) +{ + constexpr float kPi = 3.14159265358979323846f; + std::vector<float> w(n); + for (size_t i = 0; i < n; ++i) { + w[i] = std::sin((kPi * (static_cast<float>(i) + 0.5f)) / + static_cast<float>(n)); + } + return w; +} + +template <class TSampleFn> +std::vector<float> BuildAtrac3EnergyViaQmfMdct(TSampleFn&& sampleFn) +{ + NAtrac3::TAtrac3Data initTables; + (void)initTables; + + constexpr size_t kFrameSz = NAtrac3::TAtrac3Data::NumSamples; // 1024 + constexpr size_t kNumFrames = 2; + constexpr size_t kSubbands = 4; + constexpr size_t kSubbandSamples = 256; + constexpr size_t kMdctInput = 512; + + std::array<float, kFrameSz * kNumFrames> pcm{}; + for (size_t i = 0; i < pcm.size(); ++i) { + pcm[i] = sampleFn(i); + } + + Atrac3AnalysisFilterBank analysis; + NMDCT::TMDCT<kMdctInput> mdct512(1.0f); + std::array<std::array<float, kMdctInput>, kSubbands> bandState{}; + std::array<std::array<float, kSubbandSamples>, kSubbands> subbands{}; + std::array<float*, kSubbands> subPtrs = { + subbands[0].data(), subbands[1].data(), subbands[2].data(), subbands[3].data() + }; + std::array<float, NAtrac3::TAtrac3Data::NumSpecs> specs = {}; + + for (size_t frame = 0; frame < kNumFrames; ++frame) { + analysis.Analysis(&pcm[frame * kFrameSz], subPtrs.data()); + + for (size_t band = 0; band < kSubbands; ++band) { + auto& state = bandState[band]; + for (size_t i = 0; i < kSubbandSamples; ++i) { + state[kSubbandSamples + i] = subbands[band][i]; + } + + std::array<float, kMdctInput> tmp = {}; + std::copy_n(state.data(), kSubbandSamples, tmp.data()); + for (size_t i = 0; i < kSubbandSamples; ++i) { + const float cur = state[kSubbandSamples + i]; + state[i] = NAtrac3::TAtrac3Data::EncodeWindow[i] * cur; + tmp[kSubbandSamples + i] = NAtrac3::TAtrac3Data::EncodeWindow[kSubbandSamples - 1 - i] * cur; + } + + const std::vector<float>& specBand = mdct512(tmp.data()); + float* dst = specs.data() + band * kSubbandSamples; + std::copy_n(specBand.data(), kSubbandSamples, dst); + if (band & 1) { + std::reverse(dst, dst + kSubbandSamples); + } + } + } + + std::vector<float> e(NAtrac3::TAtrac3Data::NumSpecs, 1e-12f); + for (size_t i = 0; i < e.size(); ++i) { + e[i] += specs[i] * specs[i]; + } + return e; +} + +template <class TData> +std::vector<float> BuildToneEnergy(float toneHz = 1000.0f) +{ + auto genTone = [toneHz](size_t i) { + constexpr float kPi = 3.14159265358979323846f; + constexpr float kSampleRate = 44100.0f; + const float phase = 2.0f * kPi * toneHz * static_cast<float>(i) / kSampleRate + 0.37f; + return std::sin(phase); + }; + + if constexpr (std::is_same_v<TData, NAtrac1::TAtrac1Data>) { + constexpr size_t n = 1024; + std::vector<float> in(n); + const std::vector<float> w = CalcSineWindow(n); + for (size_t i = 0; i < n; ++i) { + in[i] = genTone(i) * w[i]; + } + NMDCT::TMDCT<n> mdct(n); + const auto& spec = mdct(in.data()); + std::vector<float> e(spec.size(), 1e-12f); + for (size_t i = 0; i < spec.size(); ++i) { + e[i] += spec[i] * spec[i]; + } + return e; + } else if constexpr (std::is_same_v<TData, NAtrac3::TAtrac3Data>) { + return BuildAtrac3EnergyViaQmfMdct([&](size_t i) { + return genTone(i); + }); + } else { + static_assert(std::is_same_v<TData, NAt3p::TScaleTable>, "Unsupported codec table for tone energy"); + constexpr size_t n = 4096; + std::vector<float> in(n); + const std::vector<float> w = CalcSineWindow(n); + for (size_t i = 0; i < n; ++i) { + in[i] = genTone(i) * w[i]; + } + NMDCT::TMDCT<n> mdct(n); + const auto& spec = mdct(in.data()); + std::vector<float> e(spec.size(), 1e-12f); + for (size_t i = 0; i < spec.size(); ++i) { + e[i] += spec[i] * spec[i]; + } + return e; + } +} + +template <class TData> +std::vector<float> BuildWhiteNoiseEnergy() +{ + std::mt19937 gen(0xC0FFEEu + static_cast<uint32_t>(NumSpecs<TData>())); + std::normal_distribution<float> dist(0.0f, 1.0f); + auto genNoise = [&gen, &dist](size_t, size_t) { + return dist(gen); + }; + + if constexpr (std::is_same_v<TData, NAtrac1::TAtrac1Data>) { + constexpr size_t n = 1024; + std::vector<float> in(n); + const std::vector<float> w = CalcSineWindow(n); + for (size_t i = 0; i < n; ++i) { + in[i] = genNoise(i, n) * w[i]; + } + NMDCT::TMDCT<n> mdct(n); + const auto& spec = mdct(in.data()); + std::vector<float> e(spec.size(), 1e-12f); + for (size_t i = 0; i < spec.size(); ++i) { + e[i] += spec[i] * spec[i]; + } + return e; + } else if constexpr (std::is_same_v<TData, NAtrac3::TAtrac3Data>) { + return BuildAtrac3EnergyViaQmfMdct([&](size_t i) { + return genNoise(i, NAtrac3::TAtrac3Data::NumSpecs * 2); + }); + } else { + static_assert(std::is_same_v<TData, NAt3p::TScaleTable>, "Unsupported codec table for noise energy"); + constexpr size_t n = 4096; + std::vector<float> in(n); + const std::vector<float> w = CalcSineWindow(n); + for (size_t i = 0; i < n; ++i) { + in[i] = genNoise(i, n) * w[i]; + } + NMDCT::TMDCT<n> mdct(n); + const auto& spec = mdct(in.data()); + std::vector<float> e(spec.size(), 1e-12f); + for (size_t i = 0; i < spec.size(); ++i) { + e[i] += spec[i] * spec[i]; + } + return e; + } +} + +template <class TData> +std::vector<float> CalcBfuEnergy(const std::vector<float>& mdctEnergy) +{ + const size_t numBfu = TData::MaxBfus; + std::vector<float> bfuEnergy(numBfu, 0.0f); + for (size_t bfu = 0; bfu < numBfu; ++bfu) { + const size_t start = TData::SpecsStartLong[bfu]; + const size_t len = TData::SpecsPerBlock[bfu]; + float sum = 0.0f; + for (size_t i = start; i < start + len; ++i) { + sum += mdctEnergy[i]; + } + bfuEnergy[bfu] = sum; + } + return bfuEnergy; +} + +float WeightedMean(const std::vector<float>& values, const std::vector<float>& weights) +{ + EXPECT_EQ(values.size(), weights.size()); + if (values.size() != weights.size()) { + return 0.0f; + } + const float wsum = std::accumulate(weights.begin(), weights.end(), 0.0f); + EXPECT_GT(wsum, 0.0f); + if (wsum <= 0.0f) { + return 0.0f; + } + float sum = 0.0f; + for (size_t i = 0; i < values.size(); ++i) { + sum += values[i] * weights[i]; + } + return sum / wsum; +} + +template <class TData> +void VerifyToneVsNoiseFlatness(const char* codecName) +{ + const std::vector<float> toneEnergy = BuildToneEnergy<TData>(); + const std::vector<float> noiseEnergy = BuildWhiteNoiseEnergy<TData>(); + ASSERT_EQ(toneEnergy.size(), NumSpecs<TData>()); + ASSERT_EQ(noiseEnergy.size(), NumSpecs<TData>()); + + const std::vector<float> toneFlatness = CalcSpectralFlatnessPerBfu<TData>(toneEnergy); + const std::vector<float> noiseFlatness = CalcSpectralFlatnessPerBfu<TData>(noiseEnergy); + + const std::vector<float> toneBfuEnergy = CalcBfuEnergy<TData>(toneEnergy); + const std::vector<float> noiseBfuEnergy = CalcBfuEnergy<TData>(noiseEnergy); + + const float toneWeightedFlatness = WeightedMean(toneFlatness, toneBfuEnergy); + const float noiseWeightedFlatness = WeightedMean(noiseFlatness, noiseBfuEnergy); + + std::cerr << "[FlatnessUT] codec=" << codecName + << " signal=tone weighted=" << std::fixed << std::setprecision(6) + << toneWeightedFlatness << "\n"; + std::cerr << "[FlatnessUT] codec=" << codecName + << " signal=noise weighted=" << std::fixed << std::setprecision(6) + << noiseWeightedFlatness << "\n"; + + EXPECT_GT(noiseWeightedFlatness, toneWeightedFlatness + 0.08f); +} + +void VerifyAtrac3ToneFrequencyFlatness(float toneHz) +{ + const std::vector<float> toneEnergy = BuildToneEnergy<NAtrac3::TAtrac3Data>(toneHz); + const std::vector<float> noiseEnergy = BuildWhiteNoiseEnergy<NAtrac3::TAtrac3Data>(); + + const std::vector<float> toneFlatness = CalcSpectralFlatnessPerBfu<NAtrac3::TAtrac3Data>(toneEnergy); + const std::vector<float> noiseFlatness = CalcSpectralFlatnessPerBfu<NAtrac3::TAtrac3Data>(noiseEnergy); + const std::vector<float> toneBfuEnergy = CalcBfuEnergy<NAtrac3::TAtrac3Data>(toneEnergy); + const std::vector<float> noiseBfuEnergy = CalcBfuEnergy<NAtrac3::TAtrac3Data>(noiseEnergy); + + for (size_t bfu = 0; bfu < toneFlatness.size(); ++bfu) { + std::cerr << "[FlatnessUT] codec=atrac3 signal=tone freq_hz=" << toneHz + << " bfu=" << bfu + << " flatness=" << std::fixed << std::setprecision(6) + << toneFlatness[bfu] << "\n"; + } + for (size_t bfu = 0; bfu < noiseFlatness.size(); ++bfu) { + std::cerr << "[FlatnessUT] codec=atrac3 signal=noise" + << " bfu=" << bfu + << " flatness=" << std::fixed << std::setprecision(6) + << noiseFlatness[bfu] << "\n"; + } + + const float toneWeightedFlatness = WeightedMean(toneFlatness, toneBfuEnergy); + const float noiseWeightedFlatness = WeightedMean(noiseFlatness, noiseBfuEnergy); + + std::cerr << "[FlatnessUT] codec=atrac3 signal=tone freq_hz=" << toneHz + << " weighted=" << std::fixed << std::setprecision(6) + << toneWeightedFlatness << "\n"; + std::cerr << "[FlatnessUT] codec=atrac3 signal=noise weighted=" + << std::fixed << std::setprecision(6) + << noiseWeightedFlatness << "\n"; + + EXPECT_GT(noiseWeightedFlatness, toneWeightedFlatness + 0.05f); +} + +} // namespace + +TEST(AtracPsyCommon, SpectralFlatnessUniformBlock) +{ + const uint32_t start[1] = {0}; + const uint32_t size[1] = {8}; + const std::vector<float> mdctEnergy(8, 4.0f); + const std::vector<float> flatness = CalcSpectralFlatnessPerBfu(mdctEnergy, start, size, 1); + ASSERT_EQ(flatness.size(), 1u); + EXPECT_NEAR(flatness[0], 1.0f, 1e-6f); +} + +TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac1) +{ + VerifyImpulseMapsToSingleBfu<NAtrac1::TAtrac1Data>(); +} + +TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac3) +{ + VerifyImpulseMapsToSingleBfu<NAtrac3::TAtrac3Data>(); +} + +TEST(AtracPsyCommon, SpectralFlatnessBfuMappingAtrac3Plus) +{ + VerifyImpulseMapsToSingleBfu<NAt3p::TScaleTable>(); +} + +TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac1) +{ + VerifyToneVsNoiseFlatness<NAtrac1::TAtrac1Data>("atrac1"); +} + +TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac3) +{ + VerifyToneVsNoiseFlatness<NAtrac3::TAtrac3Data>("atrac3"); +} + +TEST(AtracPsyCommon, SpectralFlatnessToneVsNoiseAtrac3Plus) +{ + VerifyToneVsNoiseFlatness<NAt3p::TScaleTable>("atrac3plus"); +} + +TEST(AtracPsyCommon, SpectralFlatness10kToneAtrac3) +{ + VerifyAtrac3ToneFrequencyFlatness(10000.0f); +} diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp index d7024e6..6f8d62c 100644 --- a/src/atrac3denc.cpp +++ b/src/atrac3denc.cpp @@ -548,6 +548,90 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4], } } +TAtrac3Data::TTonalComponents TAtrac3Encoder::ExtractTonalComponents(float* specs, + const std::vector<float>& flatnessPerBfu) +{ + TAtrac3Data::TTonalComponents res; + static constexpr float kFlatnessThreshold = 0.01f; + static constexpr uint32_t kMaxTonalLen = 5; + // BFU below 8 is too short to get notisiable profit + // BFU above 29 is hard to tune + for (uint32_t blockNum = 8; blockNum < 29u; ++blockNum) { + if (blockNum >= flatnessPerBfu.size()) { + break; + } + + const float flatness = flatnessPerBfu[blockNum]; + if (flatness >= kFlatnessThreshold) { + continue; + } + + const uint32_t specNumStart = TAtrac3Data::SpecsStartLong[blockNum]; + const uint32_t blockLen = TAtrac3Data::SpecsPerBlock[blockNum]; + const uint32_t specNumEnd = specNumStart + blockLen; + if (specNumStart >= specNumEnd) { + continue; + } + + const uint32_t maxLen = std::min(kMaxTonalLen, blockLen); + float bestScore = -1.0f; + uint32_t bestStart = specNumStart; + uint32_t bestLen = 1; + for (uint32_t start = specNumStart; start < specNumEnd; ++start) { + const uint32_t maxLenForStart = std::min(maxLen, specNumEnd - start); + float score = 0.0f; + for (uint32_t len = 1; len <= maxLenForStart; ++len) { + score += std::abs(specs[start + len - 1]); + if (score > bestScore) { + bestScore = score; + bestStart = start; + bestLen = len; + } + } + } + + if (bestScore <= 0.0f) { + continue; + } + + /* + std::cerr << "atrac3 tonal bfu=" << (uint32_t)blockNum + << " flatness=" << flatness + << " start=" << bestStart + << " len=" << bestLen + << " score=" << bestScore + << std::endl; */ + + for (uint32_t n = 0; n < bestLen; ++n) { + const uint32_t pos = bestStart + n; + res.push_back({(uint16_t)pos, specs[pos], (uint8_t)blockNum}); + specs[pos] = 0.0f; + } + } + + return res; +} + + +void TAtrac3Encoder::MapTonalComponents(const TAtrac3Data::TTonalComponents& tonalComponents, vector<TTonalBlock>* componentMap) +{ + for (size_t i = 0; i < tonalComponents.size();) { + const uint32_t startPos = i; + uint32_t curPos; + do { + curPos = tonalComponents[i].Pos; + ++i; + } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7); + const uint32_t len = i - startPos; + float tmp[8]; + for (uint32_t j = 0; j < len; ++j) + tmp[j] = tonalComponents[startPos + j].Val; + const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len); + componentMap->push_back({&tonalComponents[startPos], scaledBlock}); + } +} + + void TAtrac3Encoder::Matrixing() { for (uint32_t subband = 0; subband < 4; subband++) { @@ -624,9 +708,12 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda() << "channels:\n"; } + TAtrac3Data::TTonalComponents tonals[2]; + for (uint32_t channel = 0; channel < meta.Channels; channel++) { auto& specs = (*buf)[channel].Specs; TSce* sce = &SingleChannelElements[channel]; + sce->TonalBlocks.clear(); sce->SubbandInfo.Reset(); if (!Params.NoGainControll) { @@ -653,14 +740,23 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda() Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo)); } + vector<float> mdctEnergy(specs.size(), 0.0f); float l = 0; for (size_t i = 0; i < specs.size(); i++) { float e = specs[i] * specs[i]; + mdctEnergy[i] = e; l += e * LoudnessCurve[i]; } sce->Loudness = l; + if (!Params.NoTonalComponents) { + const vector<float> flatnessPerBfu = CalcSpectralFlatnessPerBfu<TAtrac3Data>(mdctEnergy); + tonals[channel] = ExtractTonalComponents(specs.data(), flatnessPerBfu); + sce->TonalBlocks.clear(); + MapTonalComponents(tonals[channel], &sce->TonalBlocks); + } + //TBlockSize for ATRAC3 - 4 subband, all are long (no short window) sce->ScaledBlocks = Scaler.ScaleFrame(specs, TAtrac3Data::TBlockSizeMod()); } diff --git a/src/atrac3denc.h b/src/atrac3denc.h index 38ba0c2..68224ff 100644 --- a/src/atrac3denc.h +++ b/src/atrac3denc.h @@ -113,7 +113,10 @@ public: void CreateSubbandInfo(const float* upInput[4], uint32_t channel, TAtrac3Data::SubbandInfo* subbandInfo, int gainBoostPerBand[TAtrac3Data::NumQMF]); + TAtrac3Data::TTonalComponents ExtractTonalComponents(float* specs, + const std::vector<float>& flatnessPerBfu); void Matrixing(); + void MapTonalComponents(const TAtrac3Data::TTonalComponents& tonalComponents, std::vector<NAtracDEnc::NAtrac3::TTonalBlock>* componentMap); public: TAtrac3Encoder(TCompressedOutputPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings); diff --git a/src/main.cpp b/src/main.cpp index 4cd7128..b1356b0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -325,6 +325,7 @@ int main_(int argc, char* const* argv) { "bfuidxfast", no_argument, NULL, O_BFUIDXFAST}, { "notransient", optional_argument, NULL, O_NOTRANSIENT}, { "nostdout", no_argument, NULL, O_NOSTDOUT}, + { "notonal", no_argument, NULL, O_NOTONAL}, { "nogaincontrol", no_argument, NULL, O_NOGAINCONTROL}, { "advanced", required_argument, NULL, O_ADVANCED_OPT}, { "yaml-log", required_argument, NULL, O_YAML_LOG}, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8f92f68..481d228 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,6 +19,7 @@ set(atracdenc_ut ${CMAKE_SOURCE_DIR}/src/transient_detector_ut.cpp ${CMAKE_SOURCE_DIR}/src/transient_spectral_upsampler_ut.cpp ${CMAKE_SOURCE_DIR}/src/atrac/atrac_scale_ut.cpp + ${CMAKE_SOURCE_DIR}/src/atrac/atrac_psy_common_ut.cpp ) add_executable(atracdenc_ut ${atracdenc_ut}) |
