diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2018-07-02 00:03:58 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2018-07-02 00:03:58 +0300 |
commit | 005877ed1be0012444e6086d3d5d04d61c00a92f (patch) | |
tree | 7cd468e78a437a454bb94e2f3660cd35e952519f | |
parent | 8ec37180923dd976bc60b379d2940988b94da2fc (diff) | |
download | atracdenc-005877ed1be0012444e6086d3d5d04d61c00a92f.tar.gz |
atrac3: Tonal component coding improvements:
- Move tonal coding in to bit allocation loop.
- Tonal extraction pessimization (should be rewritten with proper psy).
- Some tuning.
Results:
- Fixed high frequency cut while tonal component encoding enabled.
- Reduce spectr holes.
-rw-r--r-- | src/atrac/atrac3.h | 1 | ||||
-rw-r--r-- | src/atrac/atrac3_bitstream.cpp | 105 | ||||
-rw-r--r-- | src/atrac/atrac3_bitstream.h | 30 | ||||
-rw-r--r-- | src/atrac3denc.cpp | 17 |
4 files changed, 99 insertions, 54 deletions
diff --git a/src/atrac/atrac3.h b/src/atrac/atrac3.h index bc90f7e..b313d88 100644 --- a/src/atrac/atrac3.h +++ b/src/atrac/atrac3.h @@ -245,6 +245,7 @@ public: struct TTonalVal { const uint16_t Pos; const double Val; + const uint8_t Bfu; }; typedef std::vector<TTonalVal> TTonalComponents; }; diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp index 9d0b47d..d50d7ed 100644 --- a/src/atrac/atrac3_bitstream.cpp +++ b/src/atrac/atrac3_bitstream.cpp @@ -34,8 +34,8 @@ using std::vector; using std::memset; static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = { - 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 3, 3, 3, 3, 3, + 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 0 }; @@ -95,9 +95,11 @@ uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int manti return bitsUsed; } -std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(const vector<TScaledBlock>& scaledBlocks, +std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(const TSingleChannelElement& sce, const vector<uint32_t>& precisionPerEachBlocks, int* mantisas) { + + const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; const uint32_t numBlocks = precisionPerEachBlocks.size(); uint32_t bitsUsed = numBlocks * 3; @@ -141,9 +143,10 @@ static inline bool CheckBfus(uint8_t* numBfu, const vector<uint32_t>& precisionP return false; } -std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const vector<TScaledBlock>& scaledBlocks, +std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce, uint16_t bitsUsed, int mt[MaxSpecs]) { + const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; TFloat spread = AnalizeScaleFactorSpread(scaledBlocks); uint8_t numBfu = BfuIdxConst ? BfuIdxConst : 32; @@ -159,7 +162,11 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co for (;;) { TFloat shift = (maxShift + minShift) / 2; const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift); - const auto consumption = CalcSpecsBitsConsumption(scaledBlocks, tmpAlloc, mt); + auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt); + + auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr); + //std::cerr << consumption.second << " |tonal: " << bitsUsedByTonal << std::endl; + consumption.second += bitsUsedByTonal; if (consumption.second < bitsAvaliablePerBfus) { if (maxShift - minShift < 0.1) { @@ -179,15 +186,19 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co } } } + //std::cerr << "==" << std::endl; return { mode, precisionPerEachBlocks }; } -void TAtrac3BitStreamWriter::EncodeSpecs(const vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream, - const uint16_t bitsUsed) +void TAtrac3BitStreamWriter::EncodeSpecs(const TSingleChannelElement& sce, NBitStream::TBitStream* bitStream, + uint16_t bitsUsed) { int mt[MaxSpecs]; - auto allocation = CreateAllocation(scaledBlocks, bitsUsed, mt); + const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; + + auto allocation = CreateAllocation(sce, bitsUsed, mt); const vector<uint32_t>& precisionPerEachBlocks = allocation.second; + EncodeTonalComponents(sce, precisionPerEachBlocks, bitStream); const uint32_t numBlocks = precisionPerEachBlocks.size(); //number of blocks to save const uint32_t codingMode = allocation.first;//0 - VLC, 1 - CLC @@ -220,15 +231,19 @@ void TAtrac3BitStreamWriter::EncodeSpecs(const vector<TScaledBlock>& scaledBlock } uint8_t TAtrac3BitStreamWriter::GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents, + const vector<uint32_t>& allocTable, TTonalComponentsSubGroup groups[64]) { for (const TTonalBlock& tc : tonalComponents) { assert(tc.ScaledBlock.Values.size() < 8); assert(tc.ScaledBlock.Values.size() > 0); - assert(tc.QuantIdx >1); - assert(tc.QuantIdx <8); - groups[tc.QuantIdx * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc); + assert(tc.ValPtr->Bfu < allocTable.size()); + auto quant = std::max((uint32_t)2, std::min(allocTable[tc.ValPtr->Bfu] + 1, (uint32_t)7)); + //std::cerr << " | " << tc.ValPtr->Pos << " | " << (int)tc.ValPtr->Bfu << " | " << quant << std::endl; + groups[quant * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc); } + + //std::cerr << "=====" << std::endl; uint8_t tcsgn = 0; //for each group for (uint8_t i = 0; i < 64; ++i) { @@ -257,20 +272,30 @@ uint8_t TAtrac3BitStreamWriter::GroupTonalComponents(const std::vector<TTonalBlo return tcsgn; } -uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalBlock>& tonalComponents, - NBitStream::TBitStream* bitStream, uint8_t numQmfBand) +uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelElement& sce, + const vector<uint32_t>& allocTable, + NBitStream::TBitStream* bitStream) { - const uint16_t bitsUsed = bitStream->GetSizeInBits(); + const uint16_t bitsUsedOld = bitStream ? bitStream->GetSizeInBits() : 0; + const std::vector<TTonalBlock>& tonalComponents = sce.TonalBlocks; + const TAtrac3Data::SubbandInfo& subbandInfo = sce.SubbandInfo; + const uint8_t numQmfBand = subbandInfo.GetQmfNum(); + uint16_t bitsUsed = 0; + //group tonal components with same quantizer and len TTonalComponentsSubGroup groups[64]; - const uint8_t tcsgn = GroupTonalComponents(tonalComponents, groups); + const uint8_t tcsgn = GroupTonalComponents(tonalComponents, allocTable, groups); assert(tcsgn < 32); - bitStream->Write(tcsgn, 5); + + bitsUsed += 5; + if (bitStream) + bitStream->Write(tcsgn, 5); + if (tcsgn == 0) { for (int i = 0; i < 64; ++i) assert(groups[i].SubGroupPtr.size() == 0); - return 5; //wrote 0 but 5 bits for tcsgn + return bitsUsed; } //Coding mode: // 0 - All are VLC @@ -279,7 +304,9 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalB // 3 - Own mode for each component //TODO: implement switch for best coding mode. Now VLC for all - bitStream->Write(0, 2); + bitsUsed += 2; + if (bitStream) + bitStream->Write(0, 2); uint8_t tcgnCheck = 0; //for each group of equal quantiser and len @@ -316,17 +343,24 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalB tcgnCheck++; - for (uint8_t j = 0; j < numQmfBand; ++j) { - bitStream->Write((bool)bandFlags.i[j], 1); + bitsUsed += numQmfBand; + if (bitStream) { + for (uint8_t j = 0; j < numQmfBand; ++j) { + bitStream->Write((bool)bandFlags.i[j], 1); + } } //write number of coded values for components in current group assert(codedValues > 0); - bitStream->Write(codedValues - 1, 3); + bitsUsed += 3; + if (bitStream) + bitStream->Write(codedValues - 1, 3); //write quant index assert((i >> 3) > 1); assert((i >> 3) < 8); assert(i); - bitStream->Write(i >> 3, 3); + bitsUsed += 3; + if (bitStream) + bitStream->Write(i >> 3, 3); uint8_t lastPos = subGroupStartPos; uint8_t checkPos = 0; for (uint16_t j = 0; j < 16; ++j) { @@ -336,17 +370,24 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalB const uint8_t codedComponents = bandFlags.c[j]; assert(codedComponents < 8); - bitStream->Write(codedComponents, 3); + bitsUsed += 3; + if (bitStream) + bitStream->Write(codedComponents, 3); uint8_t k = lastPos; for (; k < lastPos + codedComponents; ++k) { assert(curGroup.SubGroupPtr[k]->ValPtr->Pos >= j * 64); uint16_t relPos = curGroup.SubGroupPtr[k]->ValPtr->Pos - j * 64; assert(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex < 64); - bitStream->Write(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex, 6); + + bitsUsed += 6; + if (bitStream) + bitStream->Write(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex, 6); assert(relPos < 64); - bitStream->Write(relPos, 6); + bitsUsed += 6; + if (bitStream) + bitStream->Write(relPos, 6); assert(curGroup.SubGroupPtr[k]->ScaledBlock.Values.size() < 8); int mantisas[256]; @@ -358,7 +399,7 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalB //VLCEnc assert(i); - VLCEnc(i>>3, mantisas, curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(), bitStream); + bitsUsed += VLCEnc(i>>3, mantisas, curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(), bitStream); } @@ -370,7 +411,9 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalB } } assert(tcgnCheck == tcsgn); - return bitStream->GetSizeInBits() - bitsUsed; + if (bitStream) + assert(bitStream->GetSizeInBits() - bitsUsedOld == bitsUsed); + return bitsUsed; } vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, @@ -403,7 +446,6 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { const TSingleChannelElement& sce = singleChannelElements[channel]; const TAtrac3Data::SubbandInfo& subbandInfo = sce.SubbandInfo; - const std::vector<TTonalBlock>& tonalComponents = sce.TonalBlocks; NBitStream::TBitStream* bitStream = &bitStreams[channel]; @@ -430,17 +472,14 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& } } const uint16_t bitsUsedByGainInfoAndHeader = bitStream->GetSizeInBits(); - const uint16_t bitsUsedByTonal = EncodeTonalComponents(tonalComponents, bitStream, numQmfBand); - usedBits[channel] = bitsUsedByGainInfoAndHeader + bitsUsedByTonal; + usedBits[channel] = bitsUsedByGainInfoAndHeader; assert(bitStream->GetSizeInBits() == usedBits[channel]); } for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) { const TSingleChannelElement& sce = singleChannelElements[channel]; - const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks; NBitStream::TBitStream* bitStream = &bitStreams[channel]; - //spec - EncodeSpecs(scaledBlocks, bitStream, usedBits[channel]); + EncodeSpecs(sce, bitStream, usedBits[channel]); if (!Container) abort(); diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h index 186e267..4def07c 100644 --- a/src/atrac/atrac3_bitstream.h +++ b/src/atrac/atrac3_bitstream.h @@ -30,17 +30,23 @@ namespace NAtracDEnc { namespace NAtrac3 { struct TTonalBlock { - TTonalBlock(const TAtrac3Data::TTonalVal* valPtr, uint8_t quantIdx, const TScaledBlock& scaledBlock) + TTonalBlock(const TAtrac3Data::TTonalVal* valPtr, const TScaledBlock& scaledBlock) : ValPtr(valPtr) - , QuantIdx(quantIdx) , ScaledBlock(scaledBlock) {} const TAtrac3Data::TTonalVal* ValPtr = nullptr; - uint8_t QuantIdx = 0; TScaledBlock ScaledBlock; }; class TAtrac3BitStreamWriter : public virtual TAtrac3Data { +public: + struct TSingleChannelElement { + TAtrac3Data::SubbandInfo SubbandInfo; + std::vector<TTonalBlock> TonalBlocks; + std::vector<TScaledBlock> ScaledBlocks; + }; +private: + struct TTonalComponentsSubGroup { std::vector<uint8_t> SubGroupMap; std::vector<const TTonalBlock*> SubGroupPtr; @@ -59,21 +65,23 @@ class TAtrac3BitStreamWriter : public virtual TAtrac3Data { std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, uint32_t bfuNum, TFloat spread, TFloat shift); - std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const std::vector<TScaledBlock>& scaledBlocks, + std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce, uint16_t bitsUsed, int mt[MaxSpecs]); - std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const std::vector<TScaledBlock>& scaledBlocks, + std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce, const std::vector<uint32_t>& precisionPerEachBlocks, int* mantisas); - void EncodeSpecs(const std::vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream, + void EncodeSpecs(const TSingleChannelElement& sce, NBitStream::TBitStream* bitStream, uint16_t bitsUsed); uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents, + const std::vector<uint32_t>& allocTable, TTonalComponentsSubGroup groups[64]); - uint16_t EncodeTonalComponents(const std::vector<TTonalBlock>& tonalComponents, - NBitStream::TBitStream* bitStream, uint8_t numQmfBand); + uint16_t EncodeTonalComponents(const TSingleChannelElement& sce, + const std::vector<uint32_t>& allocTable, + NBitStream::TBitStream* bitStream); public: TAtrac3BitStreamWriter(TOma* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3 : Container(container) @@ -83,12 +91,6 @@ public: } - struct TSingleChannelElement { - TAtrac3Data::SubbandInfo SubbandInfo; - std::vector<TTonalBlock> TonalBlocks; - std::vector<TScaledBlock> ScaledBlocks; - }; - void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements); }; diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp index 202e10e..0dc9e4f 100644 --- a/src/atrac3denc.cpp +++ b/src/atrac3denc.cpp @@ -133,10 +133,10 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) { TAtrac3Data::TTonalComponents res; - const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 }; + const float thresholds[TAtrac3Data::NumQMF] = { 16, 2.4, 2.8, 3.2 }; for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { //disable for frequence above 16KHz until we works without proper psy - if (bandNum > 2) + if (bandNum) continue; for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) { const uint16_t specNumStart = SpecsStartLong[blockNum]; @@ -146,13 +146,14 @@ TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* s for (uint16_t n = specNumStart; n < specNumEnd; ++n) { //TODO: TFloat absValue = std::abs(specs[n]); + //std::cerr << n << " " << absValue << " " << level << std::endl; if (absValue > 65535.0) { TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0; std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl; //res.push_back({n, specs[n] - shift}); specs[n] = shift; - } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) { - res.push_back({n, specs[n]/* - level*/}); + } else if (std::abs(specs[n]) / level > thresholds[bandNum]) { + res.push_back({n, specs[n]/* - level*/, blockNum}); specs[n] = 0;//level; } @@ -178,7 +179,7 @@ void TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponent for (uint8_t j = 0; j < len; ++j) tmp[j] = tonalComponents[startPos + j].Val; const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len); - componentMap->push_back({&tonalComponents[startPos], 7, scaledBlock}); + componentMap->push_back({&tonalComponents[startPos], scaledBlock}); } } @@ -377,12 +378,14 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() tonals[channel] = Params.NoTonalComponents ? TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) { std::vector<TFloat> magnitude(len); + //TFloat s = 0.0; for (uint16_t i = 0; i < len; ++i) { magnitude[i] = std::abs(spec[i]); + // s += magnitude[i]; } - float median = CalcMedian(magnitude.data(), len); + float median = CalcMedian(magnitude.data(), len); for (uint16_t i = 0; i < len; ++i) { - if (median > 0.001) { + if (median > 1.0) { return median; } } |