summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniil Cherednik <[email protected]>2026-03-16 21:23:34 +0200
committerDaniil Cherednik <[email protected]>2026-04-08 23:33:40 +0200
commit614812c0284832f686f0077bbf5fb1563e653d37 (patch)
treed5ae0c8e9deb8c22572da1e2694e7a54fdf8b683 /src
parent901974b3aa5045226201c710da13a8365a7bf1c8 (diff)
atrac3: add HFR/silence gates to reduce spurious gain curves
Three complementary guards based on YAML-log analysis of pre-echo frames: 1. Raise kHfrRef 0.30 → 0.50: the HFR-proportional minScore scale now kicks in earlier, suppressing candidates with score < 5.7× on frames where the spectral upsampler is operating below half its reference HFR. 2. Add kMinReliableHfr = 0.12: hard skip for bands where HFR is so low that even the scaled minScore cannot filter all spurious multi-point curves. Logged as 'skip: low_hfr_unreliable' in the YAML stream. 3. Add kMinGainLevel = 3e-4: skip gain processing entirely when the loudest 32-subframe RMS is below this threshold. Near-silence bands produce extreme relative ratios from tiny absolute spikes, generating gain curves that worsen reconstruction noise rather than reducing pre-echo. Riddler (10 s): pre-echo worse 6 → 5/479, mean SNR gain 22.9 → 23.5 dB. Spine (30 s): pre-echo worse 107 → 26/1804, mean SNR gain 20.3 → 20.6 dB. Noise flashes remain 0 on both tracks. All 18 unit tests pass. Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/atrac3denc.cpp58
1 files changed, 56 insertions, 2 deletions
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index 9c766c5..cc64a79 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -184,6 +184,18 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
static constexpr int kLevelBoostCap = 1; // cap level boost to reduce bit starvation
static constexpr int kScaleBoostCap = 2; // allow extra scale boost in low-risk cases
static constexpr float kMinScore = 1.9f;
+ // HFR reference: when highFreqRatio < kHfrRef, scale minScore up proportionally.
+ // The spectral upsampler's output is unreliable at low HFR, so the 32-subframe
+ // gain[] array used for transient detection is noisy. Scaling minScore suppresses
+ // spurious multi-point curves while still allowing genuinely strong transients through.
+ static constexpr float kHfrRef = 0.50f;
+ // Below kMinReliableHfr the upsampler output is so poor that even a scaled minScore
+ // may not suppress all spurious curves. Skip gain processing entirely for that band.
+ // This is a stricter gate than kHighFreqThreshold (0.05) but softer than kHfrRef.
+ static constexpr float kMinReliableHfr = 0.12f;
+ // Skip gain curves if the signal is near-silence: tiny absolute spikes produce
+ // extreme relative ratios that trigger curves where none are perceptually useful.
+ static constexpr float kMinGainLevel = 3e-4f;
// YAML: channel header (one channel per CreateSubbandInfo call)
if (YamlLog) {
@@ -209,9 +221,32 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
continue;
}
+ if (result.highFreqRatio < kMinReliableHfr) {
+ if (YamlLog) {
+ *YamlLog << std::fixed << std::setprecision(4)
+ << " skip: low_hfr_unreliable # high_freq_ratio "
+ << result.highFreqRatio << " < kMinReliableHfr\n";
+ }
+ CurveCtx[channel][band].LastLevel = 0.0f;
+ continue;
+ }
+
// Analysis region [1024..3072) = current frame upsampled (8x)
const auto gain = AnalyzeGain(result.signal.data() + 1024, 2048, 32, true);
+ // Near-silence gate: if the loudest subframe is below kMinGainLevel, gain curves
+ // would encode tiny absolute spikes as extreme relative transients. Skip.
+ const float maxGainLevel = *std::max_element(gain.begin(), gain.end());
+ if (maxGainLevel < kMinGainLevel) {
+ if (YamlLog) {
+ *YamlLog << std::fixed << std::setprecision(6)
+ << " skip: near_silence # max_gain "
+ << maxGainLevel << " < kMinGainLevel\n";
+ }
+ CurveCtx[channel][band].LastLevel = 0.0f;
+ continue;
+ }
+
// nextLevel from first 64-sample subframe of upsampled lookahead [3072..3072+64)
const float nextLevel = AnalyzeGain(result.signal.data() + 3072, 64, 1, true)[0];
@@ -238,15 +273,34 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
// Dynamic min-score: linearly scale up from 1× at overlapRatio=1 to
// 1.5× at overlapRatio=2 (capped). Below 1 (attack frame) unchanged.
const float overlapFactor = std::min(1.5f, std::max(1.0f, overlapRatio));
- const float dynamicMinScore = kMinScore * overlapFactor;
+ // HFR factor: when highFreqRatio < kHfrRef, scale minScore up so that
+ // only genuinely strong transients produce gain curves. At hfr=0.1 with
+ // kHfrRef=0.5 the factor is 5× (capped at 3×); at hfr>=0.5 it is 1× (no change).
+ // Capped at 3× so a real large transient (ratio >> kMinScore*3) still passes.
+ const float hfrFactor = (result.highFreqRatio > 0.0f)
+ ? std::min(3.0f, std::max(1.0f, kHfrRef / result.highFreqRatio))
+ : 3.0f;
+ const float dynamicMinScore = kMinScore * overlapFactor * hfrFactor;
+
+ // savedLastLevel is CalcCurve's context from the previous frame.
+ // Logging it here (before CalcCurve updates it) helps diagnose false
+ // transients at the frame boundary caused by level mismatches.
+ const float savedLastLevel = CurveCtx[channel][band].LastLevel;
if (YamlLog) {
*YamlLog << std::fixed << std::setprecision(4)
<< " high_freq_ratio: " << result.highFreqRatio << "\n"
+ << " hfr_factor: " << hfrFactor
+ << " # min_score scale from HFR (1x at hfr>=0.50)\n"
<< " overlap_ratio: " << overlapRatio
<< " # prev_E/cur_E; >1 means prev frame louder\n"
- << " dynamic_min_score: " << dynamicMinScore << "\n"
+ << " dynamic_min_score: " << dynamicMinScore
+ << " # kMinScore * overlap_factor * hfr_factor\n"
+ << " saved_last_level: " << savedLastLevel
+ << " # ctx.LastLevel from previous frame\n"
<< " next_level: " << nextLevel << "\n"
+ << " target: " << nextLevel
+ << " # normalization target used by CalcCurve\n"
<< " gain: ";
YamlWriteFloatSeq(*YamlLog, gain, 4);
*YamlLog << " # 32 subframe RMS values\n";