diff options
Diffstat (limited to 'src/atrac')
| -rw-r--r-- | src/atrac/at3/atrac3_bitstream.cpp | 105 | ||||
| -rw-r--r-- | src/atrac/atrac_enc_cache.cpp | 51 | ||||
| -rw-r--r-- | src/atrac/atrac_enc_cache.h | 95 |
3 files changed, 222 insertions, 29 deletions
diff --git a/src/atrac/at3/atrac3_bitstream.cpp b/src/atrac/at3/atrac3_bitstream.cpp index 56ef5a5..9b1a61d 100644 --- a/src/atrac/at3/atrac3_bitstream.cpp +++ b/src/atrac/at3/atrac3_bitstream.cpp @@ -19,6 +19,7 @@ #include "atrac3_bitstream.h" #include "qmf/qmf.h" #include <atrac/atrac_psy_common.h> +#include <atrac/atrac_enc_cache.h> #include <bitstream/bitstream.h> #include <util.h> #include <env.h> @@ -147,40 +148,82 @@ uint32_t VLCEnc(const uint32_t selector, const int mantissas[TAtrac3Data::MaxSpe return bitsUsed; } +// Cached per-BFU quantization result reused across the bit-allocation binary +// search. For a fixed (channel, bfu, wordlen) within one frame the quantized +// mantissas and their CLC/VLC costs are deterministic, so we compute them once. +class TAt3SpecUnit : public TUnit { +public: + // TEncCache::TProvideUnit factory: build the unit and quantize `values`. + static TUnit* Provide(size_t /*ch*/, size_t bfu, size_t wordlen, const float* values, void*) { + auto* u = new TAt3SpecUnit(); + const uint32_t first = TAtrac3Data::BlockSizeTab[bfu]; + const uint32_t last = TAtrac3Data::BlockSizeTab[bfu + 1]; + const uint32_t blockSize = last - first; + const float mul = TAtrac3Data::MaxQuant[std::min((uint32_t)wordlen, (uint32_t)7)]; + + u->Wordlen = wordlen; + u->Multiplier = mul; + u->Mantisas.resize(blockSize); + // `ea` (extended/adaptive rounding) depends only on bfu, so it is + // constant for a given cache key. + u->EnergyErr = QuantMantisas(values, 0, blockSize, mul, bfu > LOSY_NAQ_START, u->Mantisas.data()); + u->ClcBits = CLCEnc(wordlen, u->Mantisas.data(), blockSize, nullptr); + u->VlcBits = VLCEnc(wordlen, u->Mantisas.data(), blockSize, nullptr); + return u; + } + + float EnergyErr = 0.0f; + uint32_t ClcBits = 0; // CLC spectrum cost (no per-block header bits) + uint32_t VlcBits = 0; // VLC spectrum cost (no per-block header bits) +}; + +// atrac3 has only MS stereo and BFUs carry no channel identity, so the cache +// (reset per channel) is keyed purely on <bfu, wordlen>; `ch` is unused. +static size_t MakeAt3SpecKey(size_t /*ch*/, size_t bfu, size_t wordlen) { + ASSERT(bfu < 32); + ASSERT(wordlen < 8); + return (bfu << 3) | wordlen; +} +// Upper bound on MakeAt3SpecKey(): bfu < 32, wordlen < 8. +static constexpr size_t kAt3SpecCacheKeys = 1u << 8; + std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TAtrac3BitStreamWriter::TSingleChannelElement& sce, const vector<uint32_t>& precisionPerEachBlocks, int* mantisas, - vector<float>& energyErr) + vector<float>& energyErr, + TEncCache& cache) { const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; const uint32_t numBlocks = precisionPerEachBlocks.size(); uint32_t bitsUsed = numBlocks * 3; - auto lambda = [numBlocks, mantisas, &precisionPerEachBlocks, &scaledBlocks, &energyErr](bool clcMode, bool calcMant) { - uint32_t bits = 0; - for (uint32_t i = 0; i < numBlocks; ++i) { - if (precisionPerEachBlocks[i] == 0) { - continue; - } - bits += 6; // sfi - const uint32_t first = TAtrac3Data::BlockSizeTab[i]; - const uint32_t last = TAtrac3Data::BlockSizeTab[i + 1]; - const uint32_t blockSize = last - first; - const float mul = TAtrac3Data::MaxQuant[std::min(precisionPerEachBlocks[i], (uint32_t)7)]; - if (calcMant) { - const float* values = scaledBlocks[i].Values.data(); - energyErr[i] = QuantMantisas(values, first, last, mul, i > LOSY_NAQ_START, mantisas); - } - bits += clcMode ? CLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr) - : VLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr); + // Per-block header (sfi) bits are common to both coding modes; only the + // spectrum cost differs. We accumulate the CLC and VLC spectrum costs from + // the cached units and pick the cheaper mode once at the end. + uint32_t clcSpecBits = 0; + uint32_t vlcSpecBits = 0; + for (uint32_t i = 0; i < numBlocks; ++i) { + if (precisionPerEachBlocks[i] == 0) { + continue; } - return bits; - }; + bitsUsed += 6; // sfi + const uint32_t first = TAtrac3Data::BlockSizeTab[i]; + const uint32_t last = TAtrac3Data::BlockSizeTab[i + 1]; + const uint32_t blockSize = last - first; + + auto* unit = static_cast<TAt3SpecUnit*>( + cache.GetOrCompute(0, i, precisionPerEachBlocks[i], scaledBlocks[i].Values.data())); - const uint32_t clcBits = lambda(true, true); - const uint32_t vlcBits = lambda(false, false); - const bool mode = clcBits <= vlcBits; - return std::make_pair(mode, bitsUsed + (mode ? clcBits : vlcBits)); + // Mirror the cached block-local mantissas into the frame-global array + // for the eventual EncodeSpecs() dump. + std::copy_n(unit->GetMantisas().data(), blockSize, mantisas + first); + energyErr[i] = unit->EnergyErr; + clcSpecBits += unit->ClcBits; + vlcSpecBits += unit->VlcBits; + } + + const bool mode = clcSpecBits <= vlcSpecBits; + return std::make_pair(mode, bitsUsed + (mode ? clcSpecBits : vlcSpecBits)); } static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precisionPerEachBlocks) @@ -593,7 +636,8 @@ public: ctx->EnergyErr.assign(ctx->NumBfu, 0.0f); std::pair<uint8_t, uint32_t> consumption; do { - consumption = CalcSpecsBitsConsumption(*ctx->Sce, tmpAlloc, ctx->Mantissas.data(), ctx->EnergyErr); + consumption = CalcSpecsBitsConsumption(*ctx->Sce, tmpAlloc, ctx->Mantissas.data(), + ctx->EnergyErr, SpecCache); } while (ConsiderEnergyErr(ctx->EnergyErr, tmpAlloc)); uint32_t totalBits = consumption.second + EncodeTonalComponents(*ctx->Sce, tmpAlloc, nullptr); @@ -615,11 +659,13 @@ public: } void Dump(NBitStream::TBitStream& bs) override { - if (!Ctx) { - return; + if (Ctx) { + EncodeSpecs(*Ctx->Sce, &bs, Ctx->PrecisionPerBlock, Ctx->CodingMode, Ctx->Mantissas.data()); + Ctx = nullptr; } - EncodeSpecs(*Ctx->Sce, &bs, Ctx->PrecisionPerBlock, Ctx->CodingMode, Ctx->Mantissas.data()); - Ctx = nullptr; + // The cached quantization results are only valid for the channel/frame + // just finished; drop them before the next channel reuses this part. + SpecCache.Reset(); } void Reset() noexcept override { @@ -632,6 +678,7 @@ public: private: TEncodeCtx* Ctx = nullptr; + TEncCache SpecCache{kAt3SpecCacheKeys, &TAt3SpecUnit::Provide, &MakeAt3SpecKey}; }; std::vector<IBitStreamPartEncoder::TPtr> CreateEncParts() diff --git a/src/atrac/atrac_enc_cache.cpp b/src/atrac/atrac_enc_cache.cpp new file mode 100644 index 0000000..2dc781c --- /dev/null +++ b/src/atrac/atrac_enc_cache.cpp @@ -0,0 +1,51 @@ +/* + * This file is part of AtracDEnc. + * + * AtracDEnc is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * AtracDEnc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with AtracDEnc; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "atrac_enc_cache.h" + +namespace NAtracDEnc { + +TEncCache::TEncCache(size_t numKeys, TProvideUnit provideUnit, TMakeKey makeKey, void* opaque) + : UnitBuffers(numKeys) + , ProvideUnit(provideUnit) + , MakeKey(makeKey) + , Opaque(opaque) +{ +} + +TUnit* TEncCache::GetOrCompute(size_t ch, size_t bfu, size_t wordlen, const float* values) +{ + const size_t key = MakeKey(ch, bfu, wordlen); + + std::unique_ptr<TUnit>& slot = UnitBuffers[key]; + if (!slot) { + slot.reset(ProvideUnit(ch, bfu, wordlen, values, Opaque)); + } + + return slot.get(); +} + +void TEncCache::Reset() +{ + // Keep the vector sized; just drop the cached units for the next frame. + for (std::unique_ptr<TUnit>& slot : UnitBuffers) { + slot.reset(); + } +} + +} // namespace NAtracDEnc diff --git a/src/atrac/atrac_enc_cache.h b/src/atrac/atrac_enc_cache.h new file mode 100644 index 0000000..0a9cdb1 --- /dev/null +++ b/src/atrac/atrac_enc_cache.h @@ -0,0 +1,95 @@ +#pragma once + +/* + * This file is part of AtracDEnc. + * + * AtracDEnc is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * AtracDEnc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with AtracDEnc; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <vector> + +namespace NAtracDEnc { + +// Codec-agnostic base for a single cached encoding unit (BFU / quant unit). +// +// A codec subclasses TUnit and quantizes the scaled spectrum into Mantisas, +// filling the bookkeeping fields needed to later write the unit into the +// stream. The actual computation lives in the user-supplied ProvideUnit +// factory (see TEncCache) so the cached hot path carries no extra virtual +// dispatch. The result is produced once per cache lifetime for a given key +// and then reused across the bit-allocation search. +class TUnit { +public: + virtual ~TUnit() = default; + + const std::vector<int>& GetMantisas() const { return Mantisas; } + uint32_t GetWordlen() const { return Wordlen; } + float GetMultiplier() const { return Multiplier; } + uint16_t GetConsumedBits() const { return ConsumedBits; } + +protected: + // Info needed to write the unit into the stream after encoding. + std::vector<int> Mantisas; + uint32_t Wordlen = 0; + float Multiplier = 1.0f; + uint16_t ConsumedBits = 0; // Number of bits consumed by the quantized spectrum +}; + +// Caches per-unit encoding results during the bit-allocation search. +// +// Within a single frame the scaled spectrum of a given (ch, bfu, wordlen) +// is fixed, so its quantized mantissas and bit consumption are deterministic. +// The binary search requests the same combinations repeatedly; this cache +// computes each one once. +// +// The key space is small and dense, so units are stored in a vector that is +// directly indexed by a user-supplied key function (nullptr slot == not yet +// computed) rather than in a std::map. +class TEncCache { +public: + // Build the right TUnit subclass for this key and quantize `values` into + // it. Invoked only on a cache miss. `opaque` carries user context + // (e.g. scale tables / per-frame data). + using TProvideUnit = TUnit* (*)(size_t ch, size_t bfu, size_t wordlen, + const float* values, void* opaque); + + // Pack (ch, bfu, wordlen) into a dense vector index. Codec specific. + using TMakeKey = size_t (*)(size_t ch, size_t bfu, size_t wordlen); + + // `numKeys` is the upper bound on MakeKey() values; the backing vector is + // sized once to it. ProvideUnit/MakeKey must agree on this bound. + TEncCache(size_t numKeys, TProvideUnit provideUnit, TMakeKey makeKey, void* opaque = nullptr); + + TEncCache(const TEncCache&) = delete; + TEncCache& operator=(const TEncCache&) = delete; + + // Return the cached unit for (ch, bfu, wordlen), creating and computing + // it via ProvideUnit on the first request. + TUnit* GetOrCompute(size_t ch, size_t bfu, size_t wordlen, const float* values); + + // Drop all cached units. Call at frame boundaries. + void Reset(); + +private: + std::vector<std::unique_ptr<TUnit>> UnitBuffers; // direct-indexed by MakeKey() + TProvideUnit ProvideUnit; + TMakeKey MakeKey; + void* Opaque; +}; + +} // namespace NAtracDEnc |
