diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-03-13 09:49:33 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2016-09-02 21:21:28 +0300 |
commit | cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3 (patch) | |
tree | 75efff26584e046566d17cd308d45b6b0fd5abfc | |
parent | b4df8a7c2dd12eea27c8cc52bd52a1bb8c00943f (diff) | |
download | atracdenc-cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3.tar.gz |
Dirty implementation of atrac3 encoder:
- no JS mode
- constant quantiser for tonal components
- gain controll implemented but produces some artifacts with real signals.
- etc...
49 files changed, 3070 insertions, 406 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bce62f..6b36f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,18 @@ endmacro(use_cxx11) use_cxx11() +macro(use_c11) + if (CMAKE_VERSION VERSION_LESS "3.1") + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + set (CMAKE_C_FLAGS "--std=gnu11 ${CMAKE_C_FLAGS}") + endif () + else () + set (CMAKE_C_STANDARD 11) + endif () +endmacro(use_c11) + +use_c11() + add_subdirectory(3rd/gtest-1.7.0) add_subdirectory(src) add_subdirectory(test) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fd7fe24..c9c797a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) +#add_definitions( "-Wall -O2 -g -Rpass-analysis=loop-vectorize" ) +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer") + add_definitions( "-Wall -O2 -g" ) project(atracdenc) @@ -8,11 +11,30 @@ set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules") INCLUDE(FindLibSndFile) include_directories(${LIBSNDFILE_INCLUDE_DIR}) +include_directories("oma/liboma/include") -set(SOURCE_LIB mdct/vorbis_impl/mdct.c) -set(SOURCE_EXE main.cpp wav.cpp aea.cpp transient_detector.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac_scale.cpp atrac/atrac1_bitalloc.cpp) +set(SOURCE_MDCT_LIB mdct/vorbis_impl/mdct.c) +set(SOURCE_OMA_LIB oma/liboma/src/liboma.c) +set(SOURCE_EXE + main.cpp + wav.cpp + aea.cpp + transient_detector.cpp + atrac1denc.cpp + bitstream/bitstream.cpp + atrac/atrac1.cpp + atrac/atrac1_dequantiser.cpp + atrac/atrac_scale.cpp + atrac/atrac_psy_common.cpp + atrac/atrac1_bitalloc.cpp + oma.cpp + atrac3denc.cpp + atrac/atrac3.cpp + atrac/atrac3_bitstream.cpp + ) -add_library(mdct_impl STATIC ${SOURCE_LIB}) +add_library(mdct_impl STATIC ${SOURCE_MDCT_LIB}) +add_library(oma STATIC ${SOURCE_OMA_LIB}) add_executable(atracdenc ${SOURCE_EXE}) -target_link_libraries(atracdenc mdct_impl ${SNDFILE_LIBRARIES}) +target_link_libraries(atracdenc mdct_impl oma ${SNDFILE_LIBRARIES}) @@ -48,5 +48,4 @@ public: long long GetLengthInSamples() const override; }; -typedef std::unique_ptr<IAtrac1IO> TAeaPtr; diff --git a/src/atrac/atrac1.cpp b/src/atrac/atrac1.cpp index 26d8218..b71e5ae 100644 --- a/src/atrac/atrac1.cpp +++ b/src/atrac/atrac1.cpp @@ -1,10 +1,15 @@ #include "atrac1.h" -constexpr uint32_t TAtrac1Data::BlocksPerBand[QMF_BANDS + 1]; -constexpr uint32_t TAtrac1Data::SpecsPerBlock[MAX_BFUS]; -constexpr uint32_t TAtrac1Data::SpecsStartLong[MAX_BFUS]; -constexpr uint32_t TAtrac1Data::SpecsStartShort[MAX_BFUS]; +namespace NAtracDEnc { +namespace NAtrac1 { + +constexpr uint32_t TAtrac1Data::BlocksPerBand[NumQMF + 1]; +constexpr uint32_t TAtrac1Data::SpecsPerBlock[MaxBfus]; +constexpr uint32_t TAtrac1Data::SpecsStartLong[MaxBfus]; +constexpr uint32_t TAtrac1Data::SpecsStartShort[MaxBfus]; constexpr uint32_t TAtrac1Data::BfuAmountTab[8]; double TAtrac1Data::ScaleTable[64] = {0}; double TAtrac1Data::SineWindow[32] = {0}; +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1.h b/src/atrac/atrac1.h index cb0df1f..9736ea7 100644 --- a/src/atrac/atrac1.h +++ b/src/atrac/atrac1.h @@ -4,23 +4,52 @@ #include <map> #include <math.h> #include "../bitstream/bitstream.h" -const int QMF_BANDS = 3; -const int MAX_BFUS = 52; +namespace NAtracDEnc { +namespace NAtrac1 { + +class TAtrac1EncodeSettings { +public: + enum class EWindowMode { + EWM_NOTRANSIENT, + EWM_AUTO + }; +private: + const uint32_t BfuIdxConst = 0; + const bool FastBfuNumSearch = false; + EWindowMode WindowMode = EWindowMode::EWM_AUTO; + const uint32_t WindowMask = 0; +public: + TAtrac1EncodeSettings() + {} + TAtrac1EncodeSettings(uint32_t bfuIdxConst, bool fastBfuNumSearch, EWindowMode windowMode, uint32_t windowMask) + : BfuIdxConst(bfuIdxConst) + , FastBfuNumSearch(fastBfuNumSearch) + , WindowMode(windowMode) + , WindowMask(windowMask) + {} + uint32_t GetBfuIdxConst() const { return BfuIdxConst; } + bool GetFastBfuNumSearch() const { return FastBfuNumSearch; } + EWindowMode GetWindowMode() const {return WindowMode; } + uint32_t GetWindowMask() const {return WindowMask; } +}; class TAtrac1Data { +public: + static constexpr uint8_t MaxBfus = 52; + static constexpr uint8_t NumQMF = 3; protected: - static constexpr uint32_t SpecsPerBlock[MAX_BFUS] = { + static constexpr uint32_t SpecsPerBlock[MaxBfus] = { 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, // low band 6, 6, 6, 6, 7, 7, 7, 7, 9, 9, 9, 9, 10, 10, 10, 10, // middle band 12, 12, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20 // high band }; - static constexpr uint32_t BlocksPerBand[QMF_BANDS + 1] = {0, 20, 36, 52}; - static constexpr uint32_t SpecsStartLong[MAX_BFUS] = { + static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 20, 36, 52}; + static constexpr uint32_t SpecsStartLong[MaxBfus] = { 0, 8, 16, 24, 32, 36, 40, 44, 48, 56, 64, 72, 80, 86, 92, 98, 104, 110, 116, 122, 128, 134, 140, 146, 152, 159, 166, 173, 180, 189, 198, 207, 216, 226, 236, 246, 256, 268, 280, 292, 304, 316, 328, 340, 352, 372, 392, 412, 432, 452, 472, 492, }; - static constexpr uint32_t SpecsStartShort[MAX_BFUS] = { + static constexpr uint32_t SpecsStartShort[MaxBfus] = { 0, 32, 64, 96, 8, 40, 72, 104, 12, 44, 76, 108, 20, 52, 84, 116, 26, 58, 90, 122, 128, 160, 192, 224, 134, 166, 198, 230, 141, 173, 205, 237, 150, 182, 214, 246, 256, 288, 320, 352, 384, 416, 448, 480, 268, 300, 332, 364, 396, 428, 460, 492 @@ -30,8 +59,6 @@ protected: static const uint32_t BitsPerBfuAmountTabIdx = 3; static const uint32_t BitsPerIDWL = 4; static const uint32_t BitsPerIDSF = 6; - static const uint32_t NumSamples = 512; - static const uint8_t NumQMF = QMF_BANDS; static double ScaleTable[64]; static double SineWindow[32]; @@ -43,6 +70,7 @@ protected: return 2; } public: + static const uint32_t NumSamples = 512; TAtrac1Data() { if (ScaleTable[0] == 0) { for (uint32_t i = 0; i < 64; i++) { @@ -57,30 +85,5 @@ public: } }; -class TBlockSize { - static std::array<int, QMF_BANDS> Parse(NBitStream::TBitStream* stream) { - std::array<int,QMF_BANDS> tmp; - tmp[0] = 2 - stream->Read(2); - tmp[1] = 2 - stream->Read(2); - tmp[2] = 3 - stream->Read(2); - stream->Read(2); //skip unused 2 bits - return tmp; - } - static std::array<int,QMF_BANDS> Create(bool lowShort, bool midShort, bool hiShort) { - std::array<int,QMF_BANDS> tmp; - tmp[0] = lowShort ? 2 : 0; - tmp[1] = midShort ? 2 : 0; - tmp[2] = hiShort ? 3 : 0; - return tmp; - } -public: - TBlockSize(NBitStream::TBitStream* stream) - : LogCount(Parse(stream)) - {} - TBlockSize(bool lowShort, bool midShort, bool hiShort) - : LogCount(Create(lowShort, midShort, hiShort)) - {} - const std::array<int,QMF_BANDS> LogCount; -}; - - +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1_bitalloc.cpp b/src/atrac/atrac1_bitalloc.cpp index 783ffd1..0db6272 100644 --- a/src/atrac/atrac1_bitalloc.cpp +++ b/src/atrac/atrac1_bitalloc.cpp @@ -1,9 +1,12 @@ #include "atrac1_bitalloc.h" +#include "atrac_psy_common.h" #include "atrac_scale.h" #include "atrac1.h" #include <math.h> #include <cassert> #include "../bitstream/bitstream.h" + +namespace NAtracDEnc { namespace NAtrac1 { using std::vector; @@ -11,47 +14,26 @@ using std::cerr; using std::endl; using std::pair; -static const uint32_t FixedBitAllocTableLong[MAX_BFUS] = { +static const uint32_t FixedBitAllocTableLong[TAtrac1BitStreamWriter::MaxBfus] = { 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 0, 0, 0 }; -static const uint32_t FixedBitAllocTableShort[MAX_BFUS] = { +static const uint32_t FixedBitAllocTableShort[TAtrac1BitStreamWriter::MaxBfus] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; -static const uint32_t BitBoostMask[MAX_BFUS] = { +static const uint32_t BitBoostMask[TAtrac1BitStreamWriter::MaxBfus] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -//returns 1 for tone-like, 0 - noise-like -static double AnalizeSpread(const std::vector<TScaledBlock>& scaledBlocks) { - double s = 0.0; - for (size_t i = 0; i < scaledBlocks.size(); ++i) { - s += scaledBlocks[i].ScaleFactorIndex; - } - s /= scaledBlocks.size(); - double sigma = 0.0; - double xxx = 0.0; - for (size_t i = 0; i < scaledBlocks.size(); ++i) { - xxx = (scaledBlocks[i].ScaleFactorIndex - s); - xxx *= xxx; - sigma += xxx; - } - sigma /= scaledBlocks.size(); - sigma = sqrt(sigma); - if (sigma > 14.0) - sigma = 14.0; - return sigma/14.0; -} - TBitsBooster::TBitsBooster() { - for (uint32_t i = 0; i < MAX_BFUS; ++i) { + for (uint32_t i = 0; i < MaxBfus; ++i) { if (BitBoostMask[i] == 0) continue; const uint32_t nBits = SpecsPerBlock[i]; @@ -68,7 +50,6 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3 //the key too low if (maxIt == BitsBoostMap.begin()) return surplus; - //std::cout << "key: " << key << " min key: " << MinKey << " it pos: " << maxIt->first << endl; while (surplus >= MinKey) { bool done = true; @@ -76,7 +57,6 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3 const uint32_t curBits = it->first; const uint32_t curPos = it->second; - //std::cout << "key: " << key << " curBits: " << curBits << endl; assert(key >= curBits); if (curPos >= bitsPerEachBlock->size()) break; @@ -90,23 +70,25 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3 (*bitsPerEachBlock)[curPos] += nBitsPerSpec; surplus -= curBits * nBitsPerSpec; - //std::cout << "added: " << curPos << " " << nBitsPerSpec << " got: " << (*bitsPerEachBlock)[curPos] << endl; done = false; } if (done) break; } - //std::cout << "boost: " << surplus << " was " << target - cur << endl; return surplus; } -vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const double spread, const double shift, const TBlockSize& blockSize) { +vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, + const uint32_t bfuNum, + const TFloat spread, + const TFloat shift, + const TBlockSize& blockSize) { vector<uint32_t> bitsPerEachBlock(bfuNum); for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { const uint32_t fix = blockSize.LogCount[BfuToBand(i)] ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i]; - int tmp = spread * ( (double)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; + int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; if (tmp > 16) { bitsPerEachBlock[i] = 16; } else if (tmp < 2) { @@ -143,7 +125,8 @@ uint32_t TAtrac1SimpleBitAlloc::GetMaxUsedBfuId(const vector<uint32_t>& bitsPerE return idx; } -uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, uint32_t curBfuId, const vector<uint32_t>& bitsPerEachBlock) { +uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, + uint32_t curBfuId, const vector<uint32_t>& bitsPerEachBlock) { uint32_t usedBfuId = GetMaxUsedBfuId(bitsPerEachBlock); if (usedBfuId < curBfuId) { *changed = true; @@ -151,32 +134,34 @@ uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, uint32_t curBfuId, } return curBfuId; } + uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) { uint32_t bfuIdx = BfuIdxConst ? BfuIdxConst - 1 : 7; bool autoBfu = !BfuIdxConst; - double spread = AnalizeSpread(scaledBlocks); + TFloat spread = AnalizeScaleFactorSpread(scaledBlocks); vector<uint32_t> bitsPerEachBlock(BfuAmountTab[bfuIdx]); uint32_t targetBitsPerBfus; uint32_t curBitsPerBfus; for (;;) { bitsPerEachBlock.resize(BfuAmountTab[bfuIdx]); - const uint32_t bitsAvaliablePerBfus = SoundUnitSize * 8 - BitsPerBfuAmountTabIdx - 32 - 2 - 3 - bitsPerEachBlock.size() * (BitsPerIDWL + BitsPerIDSF); - double maxShift = 15; - double minShift = -3; - double shift = 3.0; + const uint32_t bitsAvaliablePerBfus = SoundUnitSize * 8 - BitsPerBfuAmountTabIdx - 32 - 2 - 3 - + bitsPerEachBlock.size() * (BitsPerIDWL + BitsPerIDSF); + TFloat maxShift = 15; + TFloat minShift = -3; + TFloat shift = 3.0; const uint32_t maxBits = bitsAvaliablePerBfus; const uint32_t minBits = bitsAvaliablePerBfus - 110; bool bfuNumChanged = false; for (;;) { - const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx], spread, shift, blockSize); + const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx], + spread, shift, blockSize); uint32_t bitsUsed = 0; for (size_t i = 0; i < tmpAlloc.size(); i++) { bitsUsed += SpecsPerBlock[i] * tmpAlloc[i]; } - //std::cout << spread << " bitsUsed: " << bitsUsed << " min " << minBits << " max " << maxBits << " " << maxShift << " " << minShift << " " << endl; if (bitsUsed < minBits) { if (maxShift - minShift < 0.1) { if (autoBfu) { @@ -214,7 +199,10 @@ uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlo return BfuAmountTab[bfuIdx]; } -void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks, uint32_t bfuAmountIdx, const TBlockSize& blockSize) { +void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachBlock, + const std::vector<TScaledBlock>& scaledBlocks, + uint32_t bfuAmountIdx, + const TBlockSize& blockSize) { NBitStream::TBitStream bitStream; size_t bitUsed = 0; if (bfuAmountIdx >= (1 << BitsPerBfuAmountTabIdx)) { @@ -252,8 +240,8 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB if (wordLength == 0 || wordLength == 1) continue; - const double multiple = ((1 << (wordLength - 1)) - 1); - for (const double val : scaledBlocks[i].Values) { + const TFloat multiple = ((1 << (wordLength - 1)) - 1); + for (const TFloat val : scaledBlocks[i].Values) { const int tmp = round(val * multiple); const uint32_t testwl = bitsPerEachBlock[i] ? (bitsPerEachBlock[i] - 1) : 0; const uint32_t a = !!testwl + testwl; @@ -280,4 +268,5 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB Container->WriteFrame(bitStream.GetBytes()); } -} +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1_bitalloc.h b/src/atrac/atrac1_bitalloc.h index ce7b6fb..c5c4ad2 100644 --- a/src/atrac/atrac1_bitalloc.h +++ b/src/atrac/atrac1_bitalloc.h @@ -7,7 +7,9 @@ #include <map> #include <cstdint> +namespace NAtracDEnc { namespace NAtrac1 { + using NAtracDEnc::TScaledBlock; class IAtrac1BitAlloc { @@ -32,11 +34,13 @@ public: explicit TAtrac1BitStreamWriter(TAea* container) : Container(container) {}; - void WriteBitStream(const std::vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks, uint32_t bfuAmountIdx, const TBlockSize& blockSize); + void WriteBitStream(const std::vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks, + uint32_t bfuAmountIdx, const TBlockSize& blockSize); }; class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster, public virtual IAtrac1BitAlloc { - std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const double spread, const double shift, const TBlockSize& blockSize); + std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, + const TFloat spread, const TFloat shift, const TBlockSize& blockSize); const uint32_t BfuIdxConst; const bool FastBfuNumSearch; uint32_t GetMaxUsedBfuId(const std::vector<uint32_t>& bitsPerEachBlock); @@ -51,4 +55,5 @@ public: uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) override; }; -} +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1_dequantiser.cpp b/src/atrac/atrac1_dequantiser.cpp index 8229822..83abc76 100644 --- a/src/atrac/atrac1_dequantiser.cpp +++ b/src/atrac/atrac1_dequantiser.cpp @@ -1,15 +1,16 @@ #include "atrac1_dequantiser.h" #include <string.h> - +namespace NAtracDEnc { namespace NAtrac1 { + using namespace NBitStream; TAtrac1Dequantiser::TAtrac1Dequantiser() { } -void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, double specs[512]) { - uint32_t wordLens[MAX_BFUS]; - uint32_t idScaleFactors[MAX_BFUS]; +void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, TFloat specs[512]) { + uint32_t wordLens[MaxBfus]; + uint32_t idScaleFactors[MaxBfus]; const uint32_t numBFUs = BfuAmountTab[stream->Read(3)]; stream->Read(2); stream->Read(3); @@ -21,28 +22,29 @@ void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, doubl for (uint32_t i = 0; i < numBFUs; i++) { idScaleFactors[i] = stream->Read(6); } - for (uint32_t i = numBFUs; i < MAX_BFUS; i++) { + for (uint32_t i = numBFUs; i < MaxBfus; i++) { wordLens[i] = idScaleFactors[i] = 0; } - for (uint32_t bandNum = 0; bandNum < QMF_BANDS; bandNum++) { + for (uint32_t bandNum = 0; bandNum < NumQMF; bandNum++) { for (uint32_t bfuNum = BlocksPerBand[bandNum]; bfuNum < BlocksPerBand[bandNum + 1]; bfuNum++) { const uint32_t numSpecs = SpecsPerBlock[bfuNum]; const uint32_t wordLen = !!wordLens[bfuNum] + wordLens[bfuNum]; - const double scaleFactor = ScaleTable[idScaleFactors[bfuNum]]; + const TFloat scaleFactor = ScaleTable[idScaleFactors[bfuNum]]; const uint32_t startPos = bs.LogCount[bandNum] ? SpecsStartShort[bfuNum] : SpecsStartLong[bfuNum]; if (wordLen) { - double maxQuant = 1.0 / (double)((1 << (wordLen - 1)) - 1); + TFloat maxQuant = 1.0 / (TFloat)((1 << (wordLen - 1)) - 1); //cout << "BFU ("<< bfuNum << ") :" << "wordLen " << wordLen << " maxQuant " << maxQuant << " scaleFactor " << scaleFactor << " id " << idScaleFactors[bfuNum] << " num Specs " << numSpecs << " short: "<< (int)bs.LogCount[bandNum] << endl; for (uint32_t i = 0; i < numSpecs; i++ ) { specs[startPos + i] = scaleFactor * maxQuant * MakeSign(stream->Read(wordLen), wordLen); } } else { - memset(&specs[startPos], 0, numSpecs * sizeof(double)); + memset(&specs[startPos], 0, numSpecs * sizeof(TFloat)); } } } } -} +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1_dequantiser.h b/src/atrac/atrac1_dequantiser.h index 112fc8b..8b2a8b4 100644 --- a/src/atrac/atrac1_dequantiser.h +++ b/src/atrac/atrac1_dequantiser.h @@ -1,12 +1,16 @@ #pragma once #include "atrac1.h" +#include "atrac_scale.h" +namespace NAtracDEnc { namespace NAtrac1 { class TAtrac1Dequantiser : public TAtrac1Data { public: TAtrac1Dequantiser(); - void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, double specs[512]); + void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, TFloat specs[512]); }; -} + +} //namespace NAtrac1 +} //namespace NAtracDEnc diff --git a/src/atrac/atrac1_qmf.h b/src/atrac/atrac1_qmf.h index 8550932..37d0bba 100644 --- a/src/atrac/atrac1_qmf.h +++ b/src/atrac/atrac1_qmf.h @@ -2,24 +2,26 @@ #include "../qmf/qmf.h" +namespace NAtracDEnc { + template<class TIn> class Atrac1SplitFilterBank { const static int nInSamples = 512; const static int delayComp = 39; TQmf<TIn, nInSamples> Qmf1; TQmf<TIn, nInSamples / 2> Qmf2; - std::vector<double> MidLowTmp; - std::vector<double> DelayBuf; + std::vector<TFloat> MidLowTmp; + std::vector<TFloat> DelayBuf; public: Atrac1SplitFilterBank() { MidLowTmp.resize(512); DelayBuf.resize(delayComp + 512); } - void Split(TIn* pcm, double* low, double* mid, double* hi) { - memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(double) * delayComp); + void Split(TIn* pcm, TFloat* low, TFloat* mid, TFloat* hi) { + memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) * delayComp); Qmf1.Split(pcm, &MidLowTmp[0], &DelayBuf[delayComp]); Qmf2.Split(&MidLowTmp[0], low, mid); - memcpy(hi, &DelayBuf[0], sizeof(double) * 256); + memcpy(hi, &DelayBuf[0], sizeof(TFloat) * 256); } }; @@ -29,19 +31,19 @@ class Atrac1SynthesisFilterBank { const static int delayComp = 39; TQmf<TOut, nInSamples> Qmf1; TQmf<TOut, nInSamples / 2> Qmf2; - std::vector<double> MidLowTmp; - std::vector<double> DelayBuf; + std::vector<TFloat> MidLowTmp; + std::vector<TFloat> DelayBuf; public: Atrac1SynthesisFilterBank() { MidLowTmp.resize(512); DelayBuf.resize(delayComp + 512); } - void Synthesis(TOut* pcm, double* low, double* mid, double* hi) { - memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(double) * delayComp); - memcpy(&DelayBuf[delayComp], hi, sizeof(double) * 256); + void Synthesis(TOut* pcm, TFloat* low, TFloat* mid, TFloat* hi) { + memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) * delayComp); + memcpy(&DelayBuf[delayComp], hi, sizeof(TFloat) * 256); Qmf2.Merge(&MidLowTmp[0], &low[0], &mid[0]); Qmf1.Merge(&pcm[0], &MidLowTmp[0], &DelayBuf[0]); } }; - +} //namespace NAtracDEnc diff --git a/src/atrac/atrac3.cpp b/src/atrac/atrac3.cpp new file mode 100644 index 0000000..e587d2c --- /dev/null +++ b/src/atrac/atrac3.cpp @@ -0,0 +1,33 @@ +#include "atrac3.h" +#include <algorithm> + +namespace NAtracDEnc { +namespace NAtrac3 { + +constexpr uint32_t TAtrac3Data::BlockSizeTab[33]; +constexpr uint32_t TAtrac3Data::ClcLengthTab[8]; +constexpr double TAtrac3Data::MaxQuant[8]; +constexpr uint32_t TAtrac3Data::BlocksPerBand[4 + 1]; +constexpr uint32_t TAtrac3Data::SpecsPerBlock[33]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable1[HuffTable1Sz]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable2[HuffTable2Sz]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable3[HuffTable3Sz]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable5[HuffTable5Sz]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable6[HuffTable6Sz]; +constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable7[HuffTable7Sz]; +constexpr TAtrac3Data::THuffTablePair TAtrac3Data::HuffTables[7]; + +constexpr TContainerParams TAtrac3Data::ContainerParams[8]; +double TAtrac3Data::EncodeWindow[256] = {0}; +double TAtrac3Data::DecodeWindow[256] = {0}; +double TAtrac3Data::ScaleTable[64] = {0}; +double TAtrac3Data::GainLevel[16]; +double TAtrac3Data::GainInterpolation[31]; + +const TContainerParams* TAtrac3Data::GetContainerParamsForBitrate(uint32_t bitrate) { + std::cout << bitrate << std::endl; + return std::lower_bound(ContainerParams, ContainerParams+8, bitrate); +} + +} // namespace NAtrac3 +} // namespace NAtracDEnc diff --git a/src/atrac/atrac3.h b/src/atrac/atrac3.h new file mode 100644 index 0000000..21f1e34 --- /dev/null +++ b/src/atrac/atrac3.h @@ -0,0 +1,242 @@ +#pragma once +#include <math.h> +#include <cstdint> +#include <vector> +#include <cassert> +#include <iostream> + +namespace NAtracDEnc { +namespace NAtrac3 { + +struct TContainerParams { + const uint32_t Bitrate; + const uint16_t FrameSz; + const bool Js; +}; + +inline bool operator< (const TContainerParams& x, const TContainerParams& y) +{ + return x.Bitrate < y.Bitrate; +} +inline bool operator> (const TContainerParams& x, const TContainerParams& y) +{ + return x.Bitrate > y.Bitrate; +} +inline bool operator< (const TContainerParams& x, const unsigned int y) +{ + return x.Bitrate < y; +} +inline bool operator> (const TContainerParams& x, const unsigned int y) +{ + return x.Bitrate > y; +} + +class TAtrac3Data { +public: + static constexpr uint8_t MaxBfus = 32; + static constexpr uint32_t NumSamples = 1024; +//protected: + static const uint32_t MDCTSz = 512; + static double ScaleTable[64]; + static double EncodeWindow[256]; + static double DecodeWindow[256]; + static double GainLevel[16]; + static double GainInterpolation[31]; + static constexpr int32_t ExponentOffset = 4; + static constexpr int32_t LocScale = 3; + static constexpr int32_t LocSz = 1 << LocScale; + static constexpr int32_t GainInterpolationPosShift = 15; + + static constexpr uint32_t NumSpecs = NumSamples; + static const uint32_t frameSz = 152; + static constexpr double MaxQuant[8] = { + 0.0, 1.5, 2.5, 3.5, + 4.5, 7.5, 15.5, 31.5 + }; + static constexpr uint32_t BlockSizeTab[33] = { + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 144, 160, 176, + 192, 224, 256, 288, 320, 352, 384, 416, + 448, 480, 512, 576, 640, 704, 768, 896, + 1024 + }; + static constexpr uint32_t const * const SpecsStartShort = &BlockSizeTab[0]; + + static constexpr uint32_t const * const SpecsStartLong = &BlockSizeTab[0]; + static constexpr uint32_t ClcLengthTab[8] = { 0, 4, 3, 3, 4, 4, 5, 6 }; + static constexpr int NumQMF = 4; + static constexpr uint32_t MaxSpecs = NumSamples; //1024 + static constexpr uint32_t MaxSpecsPerBlock = 128; + + static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 18, 26, 30, 32}; + static constexpr uint32_t SpecsPerBlock[33] = { + 8, 8, 8, 8, 8, 8, 8, 8, + 16, 16, 16, 16, 16, 16, 16, 16, + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 64, 64, 64, 64, 128, 128, + 128 + }; + struct THuffEntry { + const uint8_t Code; + const uint8_t Bits; + }; + static constexpr uint8_t HuffTable1Sz = 9; + static constexpr THuffEntry HuffTable1[HuffTable1Sz] = { + { 0x0, 1 }, + { 0x4, 3 }, { 0x5, 3 }, + { 0xC, 4 }, { 0xD, 4 }, + { 0x1C, 5 }, { 0x1D, 5 }, { 0x1E, 5 }, { 0x1F, 5 } + }; + static constexpr uint8_t HuffTable2Sz = 5; + static constexpr THuffEntry HuffTable2[HuffTable2Sz] = { + { 0x0, 1 }, + { 0x4, 3 }, { 0x5, 3 }, { 0x6, 3 }, { 0x7, 3 } + }; + static constexpr uint8_t HuffTable3Sz = 7; + static constexpr THuffEntry HuffTable3[HuffTable3Sz] = { + { 0x0, 1 }, + { 0x4, 3}, { 0x5, 3 }, + { 0xC, 4 }, { 0xD, 4 }, { 0xE, 4 }, { 0xF, 4 } + }; + static constexpr uint8_t HuffTable5Sz = 15; + static constexpr THuffEntry HuffTable5[HuffTable5Sz] = { + { 0x0, 2 }, + { 0x2, 3 }, { 0x3, 3 }, + { 0x8, 4 }, { 0x9, 4 }, { 0xA, 4 }, { 0xB, 4 }, //{ 0xC, 4 }, { 0xD, 4 }, + { 0x1C, 5 }, { 0x1D, 5 }, + { 0x3C, 6 }, { 0x3D, 6 }, { 0x3E, 6 }, { 0x3F, 6}, + { 0xC, 4 }, { 0xD, 4 } //TODO: is it right table??? + }; + static constexpr uint8_t HuffTable6Sz = 31; + static constexpr THuffEntry HuffTable6[HuffTable6Sz] = { + { 0x0, 3 }, + { 0x2, 4 }, { 0x3, 4 }, { 0x4, 4 }, { 0x5, 4 }, { 0x6, 4 }, { 0x7, 4 }, //{ 0x8, 4 }, { 0x9, 4 }, + { 0x14, 5 }, { 0x15, 5 }, { 0x16, 5 }, { 0x17, 5 }, { 0x18, 5 }, { 0x19, 5 }, + { 0x34, 6 }, { 0x35, 6 }, { 0x36, 6 }, { 0x37, 6 }, { 0x38, 6 }, { 0x39, 6 }, { 0x3A, 6 }, { 0x3B, 6 }, + { 0x78, 7 }, { 0x79, 7 }, { 0x7A, 7 }, { 0x7B, 7 }, { 0x7C, 7 }, { 0x7D, 7 }, { 0x7E, 7 }, { 0x7F, 7 }, + { 0x8, 4 }, { 0x9, 4 } //TODO: is it right table??? + }; + static constexpr uint8_t HuffTable7Sz = 63; + static constexpr THuffEntry HuffTable7[HuffTable7Sz] = { + { 0x0, 3 }, + //{ 0x2, 4 }, { 0x3, 4 }, + { 0x8, 5 }, { 0x9, 5 }, { 0xA, 5}, { 0xB, 5 }, { 0xC, 5 }, { 0xD, 5 }, { 0xE, 5}, { 0xF, 5 }, { 0x10, 5 }, + { 0x11, 5 }, + { 0x24, 6 }, { 0x25, 6 }, { 0x26, 6 }, { 0x27, 6 }, { 0x28, 6 }, { 0x29, 6 }, { 0x2A, 6 }, { 0x2B, 6 }, + { 0x2C, 6 }, { 0x2D, 6 }, { 0x2E, 6 }, { 0x2F, 6 }, { 0x30, 6 }, { 0x31, 6 }, { 0x32, 6 }, { 0x33, 6 }, + { 0x68, 7 }, { 0x69, 7 }, { 0x6A, 7 }, { 0x6B, 7 }, { 0x6C, 7 }, { 0x6D, 7 }, { 0x6E, 7 }, + { 0x6F, 7 }, { 0x70, 7 }, { 0x71, 7 }, { 0x72, 7 }, { 0x73, 7 }, { 0x74, 7 }, { 0x75, 7 }, + { 0xEC, 8 }, { 0xED, 8 }, { 0xEE, 8 }, { 0xEF, 8 }, { 0xF0, 8 }, { 0xF1, 8 }, { 0xF2, 8 }, { 0xF3, 8 }, + { 0xF4, 8 }, { 0xF5, 8 }, + { 0xF6, 8 }, { 0xF7, 8 }, { 0xF8, 8 }, { 0xF9, 8 }, { 0xFA, 8 }, { 0xFB, 8 }, { 0xFC, 8 }, { 0xFD, 8 }, + { 0xFE, 8 }, { 0xFF, 8 }, + { 0x2, 4 }, { 0x3, 4 } //TODO: is it right table??? + }; + + struct THuffTablePair { + const THuffEntry* Table; + const uint32_t Sz; + }; + + static constexpr THuffTablePair HuffTables[7] { + { HuffTable1, HuffTable1Sz }, + { HuffTable2, HuffTable2Sz }, + { HuffTable3, HuffTable3Sz }, + { HuffTable1, HuffTable1Sz }, + { HuffTable5, HuffTable5Sz }, + { HuffTable6, HuffTable6Sz }, + { HuffTable7, HuffTable7Sz } + }; +public: + TAtrac3Data() { + if (ScaleTable[0] == 0) { + for (uint32_t i = 0; i < 64; i++) { + ScaleTable[i] = pow(2.0, (double)(i - 15.0) / 3.0); + } + } + for (int i = 0; i < 256; i++) { + EncodeWindow[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0)/* * 0.5*/; + } + for (int i = 0; i < 256; i++) { + const double a = EncodeWindow[i]; + const double b = EncodeWindow[255-i]; + DecodeWindow[i] = 2.0 * a / (a*a + b*b); + } + for (int i = 0; i < 16; i++) { + GainLevel[i] = pow(2.0, ExponentOffset - i); + } + for (int i = 0; i < 31; i++) { + GainInterpolation[i] = pow(2.0, -1.0 / LocSz * (i - 15)); + } + } + static uint32_t MantissaToCLcIdx(int32_t mantissa) { + assert(mantissa > -3 && mantissa < 2); + const uint32_t mantissa_clc_rtab[4] = { 2, 3, 0, 1}; + return mantissa_clc_rtab[mantissa + 2]; + } + static uint32_t MantissasToVlcIndex(int32_t a, int32_t b) { + assert(a > -2 && a < 2); + assert(b > -2 && b < 2); + const uint32_t mantissas_vlc_rtab[9] = { 8, 4, 7, 2, 0, 1, 6, 3, 5 }; + const uint8_t idx = 3 * (a + 1) + (b + 1); + return mantissas_vlc_rtab[idx]; + } + static constexpr TContainerParams ContainerParams[8] = { + { 66150, 192, true }, + { 93713, 272, true }, + { 104738, 304, false }, + { 132300, 384, false }, + { 146081, 424, false }, + { 176400, 512, false }, + { 264600, 768, false }, + { 352800, 1024, false } + }; + static const TContainerParams* GetContainerParamsForBitrate(uint32_t bitrate); + + class SubbandInfo { + public: + static const uint32_t MaxGainPointsNum = 8; + struct TGainPoint { + uint32_t Level; + uint32_t Location; + }; + private: + std::vector<std::vector<TGainPoint>> Info; + public: + SubbandInfo() + { + Info.resize(4); + } + void AddSubbandCurve(uint16_t n, std::vector<TGainPoint>&& curve) { + Info[n] = std::move(curve); + } + uint32_t GetQmfNum() const { + return Info.size(); + } + const std::vector<TGainPoint>& GetGainPoints(uint32_t i) const { + return Info[i]; + } + }; + + struct TTonalVal { + const uint16_t Pos; + const double Val; + }; + typedef std::vector<TTonalVal> TTonalComponents; +}; + +struct TAtrac3EncoderSettings { + explicit TAtrac3EncoderSettings(uint32_t bitrate, bool noGainControll, bool noTonalComponents) + : ConteinerParams(TAtrac3Data::GetContainerParamsForBitrate(bitrate)) + , NoGainControll(noGainControll) + , NoTonalComponents(noTonalComponents) + { + std::cout << bitrate << " " << ConteinerParams->Bitrate << std::endl; + } + const TContainerParams* ConteinerParams; + const bool NoGainControll; + const bool NoTonalComponents; +}; + +} // namespace NAtrac3 +} // namespace NAtracDEnc diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp new file mode 100644 index 0000000..e3256b7 --- /dev/null +++ b/src/atrac/atrac3_bitstream.cpp @@ -0,0 +1,424 @@ +#include "atrac3_bitstream.h" +#include "atrac_psy_common.h" +#include "../bitstream/bitstream.h" +#include <cassert> +#include <algorithm> +#include <iostream> +#include <vector> +#include <cstdlib> + +#include <cstring> + +namespace NAtracDEnc { +namespace NAtrac3 { + +using std::vector; +using std::memset; + +static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = { + 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 3, 3, 3, 3, 3, + 3, 2, 2, 1, + 1, 0 +}; + +uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], + const uint32_t blockSize, NBitStream::TBitStream* bitStream) +{ + const uint32_t numBits = ClcLengthTab[selector]; + const uint32_t bitsUsed = (selector > 1) ? numBits * blockSize : numBits * blockSize / 2; + if (!bitStream) + return bitsUsed; + if (selector > 1) { + for (uint32_t i = 0; i < blockSize; ++i) { + bitStream->Write(NBitStream::MakeSign(mantissas[i], numBits), numBits); + } + } else { + for (uint32_t i = 0; i < blockSize / 2; ++i) { + uint32_t code = MantissaToCLcIdx(mantissas[i * 2]) << 2; + code |= MantissaToCLcIdx(mantissas[i * 2 + 1]); + assert(numBits == 4); + bitStream->Write(code, numBits); + } + } + return bitsUsed; +} + +uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], + const uint32_t blockSize, NBitStream::TBitStream* bitStream) +{ + assert(selector > 0); + const THuffEntry* huffTable = HuffTables[selector - 1].Table; + const uint8_t tableSz = HuffTables[selector - 1].Sz; + uint32_t bitsUsed = 0; + if (selector > 1) { + for (uint32_t i = 0; i < blockSize; ++i) { + int m = mantissas[i]; + uint32_t huffS = (m < 0) ? (((uint32_t)(-m)) << 1) | 1 : ((uint32_t)m) << 1; + if (huffS) + huffS -= 1; + assert(huffS < 256); + assert(huffS < tableSz); + bitsUsed += huffTable[huffS].Bits; + if (bitStream) + bitStream->Write(huffTable[huffS].Code, huffTable[huffS].Bits); + } + } else { + assert(tableSz == 9); + for (uint32_t i = 0; i < blockSize / 2; ++i) { + const int ma = mantissas[i * 2]; + const int mb = mantissas[i * 2 + 1]; + const uint32_t huffS = MantissasToVlcIndex(ma, mb); + bitsUsed += huffTable[huffS].Bits; + if (bitStream) + bitStream->Write(huffTable[huffS].Code, huffTable[huffS].Bits); + } + } + return bitsUsed; +} + +std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(const vector<TScaledBlock>& scaledBlocks, + const vector<uint32_t>& precisionPerEachBlocks, int* mantisas) +{ + uint32_t bitsUsed = 5 + 1; //numBlocks + codingMode + const uint32_t numBlocks = precisionPerEachBlocks.size(); + bitsUsed += numBlocks * 3; //used VLC or CLC table (precision) + + auto lambda = [=](bool clcMode, bool calcMant) { + uint32_t bits = 0; + for (uint32_t i = 0; i < numBlocks; ++i) { + if (precisionPerEachBlocks[i] == 0) + continue; + bits += 6; //sfi + const uint32_t first = BlockSizeTab[i]; + const uint32_t last = BlockSizeTab[i+1]; + const uint32_t blockSize = last - first; + const TFloat mul = MaxQuant[std::min(precisionPerEachBlocks[i], (uint32_t)7)]; + if (calcMant) { + for (uint32_t j = 0, f = first; f < last; f++, j++) { + mantisas[f] = round(scaledBlocks[i].Values[j] * mul); + } + } + bits += clcMode ? CLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr) : + VLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr); + } + return bits; + }; + const uint32_t clcBits = lambda(true, true); + const uint32_t vlcBits = lambda(false, false); + bool mode = clcBits <= vlcBits; + return std::make_pair(mode, bitsUsed + (mode ? clcBits : vlcBits)); +} + + +std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const vector<TScaledBlock>& scaledBlocks, + uint16_t bitsUsed, int mt[MaxSpecs]) +{ + TFloat spread = AnalizeScaleFactorSpread(scaledBlocks); + + uint8_t numBfu = 32; + vector<uint32_t> precisionPerEachBlocks(numBfu); + uint8_t mode; + for (;;) { + precisionPerEachBlocks.resize(numBfu); + uint32_t usedBfus = 0; + for (auto v : precisionPerEachBlocks) { + if (v) + usedBfus++; + } + const uint32_t bitsAvaliablePerBfus = 8 * Params.FrameSz/2 - bitsUsed - + 5 - 1 - (numBfu * 3) - (usedBfus * 6); + TFloat maxShift = 15; + TFloat minShift = -3; + TFloat shift = 3.0; + const uint32_t maxBits = bitsAvaliablePerBfus; + const uint32_t minBits = bitsAvaliablePerBfus - 90; + for (;;) { + const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift); + const auto consumption = CalcSpecsBitsConsumption(scaledBlocks, tmpAlloc, mt); + + if (consumption.second < minBits) { + if (maxShift - minShift < 0.1) { + precisionPerEachBlocks = tmpAlloc; + mode = consumption.first; + break; + } + maxShift = shift; + shift -= (shift - minShift) / 2; + } else if (consumption.second > maxBits) { + minShift = shift; + shift += (maxShift - shift) / 2; + } else { + precisionPerEachBlocks = tmpAlloc; + mode = consumption.first; + break; + } + } + break; + + } + return { mode, precisionPerEachBlocks }; +} + +void TAtrac3BitStreamWriter::EncodeSpecs(const vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream, + const uint16_t bitsUsed) +{ + int mt[MaxSpecs]; + auto allocation = CreateAllocation(scaledBlocks, bitsUsed, mt); + const vector<uint32_t>& precisionPerEachBlocks = allocation.second; + const uint32_t numBlocks = precisionPerEachBlocks.size(); //number of blocks to save + const uint32_t codingMode = allocation.first;//0 - VLC, 1 - CLC + + assert(numBlocks <= 32); + bitStream->Write(numBlocks-1, 5); + bitStream->Write(codingMode, 1); + for (uint32_t i = 0; i < numBlocks; ++i) { + uint32_t val = precisionPerEachBlocks[i]; //coding table used (VLC) or number of bits used (CLC) + bitStream->Write(val, 3); + } + for (uint32_t i = 0; i < numBlocks; ++i) { + if (precisionPerEachBlocks[i] == 0) + continue; + bitStream->Write(scaledBlocks[i].ScaleFactorIndex, 6); + } + for (uint32_t i = 0; i < numBlocks; ++i) { + if (precisionPerEachBlocks[i] == 0) + continue; + + const uint32_t first = BlockSizeTab[i]; + const uint32_t last = BlockSizeTab[i+1]; + const uint32_t blockSize = last - first; + + if (codingMode == 1) { + CLCEnc(precisionPerEachBlocks[i], mt + first, blockSize, bitStream); + } else { + VLCEnc(precisionPerEachBlocks[i], mt + first, blockSize, bitStream); + } + } +} + +uint8_t TAtrac3BitStreamWriter::GroupTonalComponents(const std::vector<TTonalComponent>& tonalComponents, + TTonalComponentsSubGroup groups[64]) +{ + for (const TTonalComponent& tc : tonalComponents) { + assert(tc.ScaledBlock.Values.size() < 8); + assert(tc.ScaledBlock.Values.size() > 0); + assert(tc.QuantIdx >1); + assert(tc.QuantIdx <8); + groups[tc.QuantIdx * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc); + } + uint8_t tcsgn = 0; + //for each group + for (uint8_t i = 0; i < 64; ++i) { + uint8_t start_pos; + uint8_t cur_pos = 0; + //scan tonal components + while (cur_pos < groups[i].SubGroupPtr.size()) { + start_pos = cur_pos; + ++tcsgn; + groups[i].SubGroupMap.push_back(cur_pos); + uint8_t groupLimiter = 0; + //allow not grather than 8 components in one subgroup limited by 64 specs + do { + ++cur_pos; + if (cur_pos == groups[i].SubGroupPtr.size()) + break; + if (groups[i].SubGroupPtr[cur_pos]->ValPtr->Pos - (groups[i].SubGroupPtr[start_pos]->ValPtr->Pos & ~63) < 64) { + ++groupLimiter; + } else { + groupLimiter = 0; + start_pos = cur_pos; + } + } while (groupLimiter < 7); + } + } + return tcsgn; +} + +uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalComponent>& tonalComponents, + NBitStream::TBitStream* bitStream, uint8_t numQmfBand) +{ + const uint16_t bitsUsed = bitStream->GetSizeInBits(); + //group tonal components with same quantizer and len + TTonalComponentsSubGroup groups[64]; + const uint8_t tcsgn = GroupTonalComponents(tonalComponents, groups); + + assert(tcsgn < 32); + bitStream->Write(tcsgn, 5); + if (tcsgn == 0) { + for (int i = 0; i < 64; ++i) + assert(groups[i].SubGroupPtr.size() == 0); + return 5; //wrote 0 but 5 bits for tcsgn + } + //Coding mode: + // 0 - All are VLC + // 1 - All are CLC + // 2 - Error + // 3 - Own mode for each component + + //TODO: implement switch for best coding mode. Now VLC for all + bitStream->Write(0, 2); + + uint8_t tcgnCheck = 0; + //for each group of equal quantiser and len + for (uint8_t i = 0; i < 64; ++i) { + const TTonalComponentsSubGroup& curGroup = groups[i]; + if (curGroup.SubGroupPtr.size() == 0) { + assert(curGroup.SubGroupMap.size() == 0); + continue; + } + assert(curGroup.SubGroupMap.size()); + for (uint8_t subgroup = 0; subgroup < curGroup.SubGroupMap.size(); ++subgroup) { + const uint8_t subGroupStartPos = curGroup.SubGroupMap[subgroup]; + const uint8_t subGroupEndPos = (subgroup < curGroup.SubGroupMap.size() - 1) ? + curGroup.SubGroupMap[subgroup+1] : curGroup.SubGroupPtr.size(); + assert(subGroupEndPos > subGroupStartPos); + //number of coded values are same in group + const uint8_t codedValues = curGroup.SubGroupPtr[0]->ScaledBlock.Values.size(); + + //Number of tonal component for each 64spec block. Used to set qmf band flags and simplify band encoding loop + uint8_t bandFlags[16]; + memset(bandFlags, 0, 16 * sizeof(uint8_t)); + assert(numQmfBand <= 4); + for (uint8_t j = subGroupStartPos; j < subGroupEndPos; ++j) { + //assert num of coded values are same in group + assert(codedValues == curGroup.SubGroupPtr[j]->ScaledBlock.Values.size()); + uint8_t specBlock = (curGroup.SubGroupPtr[j]->ValPtr->Pos) >> 6; + assert((specBlock >> 2) < numQmfBand); + bandFlags[specBlock]++; + } + + assert(numQmfBand == 4); + + tcgnCheck++; + + for (uint8_t j = 0; j < numQmfBand; ++j) { + bitStream->Write((bool)(*(uint32_t*)&bandFlags[j<<2]), 1); + } + //write number of coded values for components in current group + assert(codedValues > 0); + bitStream->Write(codedValues - 1, 3); + //write quant index + assert((i >> 3) > 1); + assert((i >> 3) < 8); + assert(i); + bitStream->Write(i >> 3, 3); + uint8_t lastPos = subGroupStartPos; + uint8_t checkPos = 0; + for (uint16_t j = 0; j < 16; ++j) { + if (!(*(uint32_t*)&bandFlags[j & 0xC])) { //discard two bits + continue; + } + + const uint8_t codedComponents = bandFlags[j]; + assert(codedComponents < 8); + bitStream->Write(codedComponents, 3); + uint8_t k = lastPos; + for (; k < lastPos + codedComponents; ++k) { + assert(curGroup.SubGroupPtr[k]->ValPtr->Pos >= j * 64); + uint16_t relPos = curGroup.SubGroupPtr[k]->ValPtr->Pos - j * 64; + assert(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex < 64); + bitStream->Write(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex, 6); + + assert(relPos < 64); + + bitStream->Write(relPos, 6); + + assert(curGroup.SubGroupPtr[k]->ScaledBlock.Values.size() < 8); + int mantisas[256]; + const TFloat mul = MaxQuant[std::min((uint32_t)(i>>3), (uint32_t)7)]; + assert(codedValues == curGroup.SubGroupPtr[k]->ScaledBlock.Values.size()); + for (uint32_t z = 0; z < curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(); ++z) { + mantisas[z] = round(curGroup.SubGroupPtr[k]->ScaledBlock.Values[z] * mul); + } + //VLCEnc + + assert(i); + VLCEnc(i>>3, mantisas, curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(), bitStream); + + + } + lastPos = k; + checkPos = lastPos; + } + + assert(subGroupEndPos == checkPos); + } + } + assert(tcgnCheck == tcsgn); + return bitStream->GetSizeInBits() - bitsUsed; +} + +vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, + const uint32_t bfuNum, + const TFloat spread, + const TFloat shift) +{ + vector<uint32_t> bitsPerEachBlock(bfuNum); + for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) { + const uint32_t fix = FixedBitAllocTable[i]; + int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; + if (tmp > 7) { + bitsPerEachBlock[i] = 7; + } else if (tmp < 0) { + bitsPerEachBlock[i] = 0; + } else { + bitsPerEachBlock[i] = tmp; + } + } + return bitsPerEachBlock; +} + + +void TAtrac3BitStreamWriter::WriteSoundUnit(const TAtrac3Data::SubbandInfo& subbandInfo, + const std::vector<TTonalComponent>& tonalComponents, + const vector<TScaledBlock>& scaledBlocks) +{ + NBitStream::TBitStream bitStream; + if (Params.Js) { + //TODO + } else { + bitStream.Write(0x28, 6); //0x28 - id + } + const uint8_t numQmfBand = subbandInfo.GetQmfNum(); + bitStream.Write(numQmfBand - 1, 2); + + //write gain info + for (uint32_t band = 0; band < numQmfBand; ++band) { + const vector<TAtrac3Data::SubbandInfo::TGainPoint>& GainPoints = subbandInfo.GetGainPoints(band); + assert(GainPoints.size() < TAtrac3Data::SubbandInfo::MaxGainPointsNum); + bitStream.Write(GainPoints.size(), 3); + int s = 0; + for (const TAtrac3Data::SubbandInfo::TGainPoint& point : GainPoints) { + bitStream.Write(point.Level, 4); + bitStream.Write(point.Location, 5); + s++; + assert(s < 8); + } + } + const uint16_t bitsUsedByGainInfo = bitStream.GetSizeInBits() - 8; + const uint16_t bitsUsedByTonal = EncodeTonalComponents(tonalComponents, &bitStream, numQmfBand); + //spec + EncodeSpecs(scaledBlocks, &bitStream, bitsUsedByTonal + bitsUsedByGainInfo); + + if (!Container) + abort(); + if (OutBuffer.empty()) { + std::vector<char> channel = bitStream.GetBytes(); + assert(channel.size() <= Params.FrameSz/2); + channel.resize(Params.FrameSz/2); + OutBuffer.insert(OutBuffer.end(), channel.begin(), channel.end()); + } else { + std::vector<char> channel = bitStream.GetBytes(); + + assert(channel.size() <= Params.FrameSz/2); + channel.resize(Params.FrameSz/2); + OutBuffer.insert(OutBuffer.end(), channel.begin(), channel.end()); + Container->WriteFrame(OutBuffer); + OutBuffer.clear(); + } + +} + +} // namespace NAtrac3 +} // namespace NAtracDEnc diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h new file mode 100644 index 0000000..225d98c --- /dev/null +++ b/src/atrac/atrac3_bitstream.h @@ -0,0 +1,71 @@ +#pragma once +#include "atrac3.h" +#include "atrac1.h" +#include "../aea.h" +#include "../oma.h" +#include "../atrac/atrac1.h" +#include "atrac_scale.h" +#include <vector> +#include <utility> + +namespace NAtracDEnc { +namespace NAtrac3 { + +struct TTonalComponent { + TTonalComponent(const TAtrac3Data::TTonalVal* valPtr, uint8_t quantIdx, const TScaledBlock& scaledBlock) + : ValPtr(valPtr) + , QuantIdx(quantIdx) + , ScaledBlock(scaledBlock) + {} + const TAtrac3Data::TTonalVal* ValPtr = nullptr; + uint8_t QuantIdx = 0; + TScaledBlock ScaledBlock; +}; + +class TAtrac3BitStreamWriter : public virtual TAtrac3Data { + struct TTonalComponentsSubGroup { + std::vector<uint8_t> SubGroupMap; + std::vector<const TTonalComponent*> SubGroupPtr; + }; + TOma* Container; + const TContainerParams Params; + std::vector<char> OutBuffer; + + uint32_t CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], + const uint32_t blockSize, NBitStream::TBitStream* bitStream); + + uint32_t VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock], + const uint32_t blockSize, NBitStream::TBitStream* bitStream); + + std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, + uint32_t bfuNum, TFloat spread, TFloat shift); + + std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const std::vector<TScaledBlock>& scaledBlocks, + uint16_t bitsUsed, int mt[MaxSpecs]); + + std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const std::vector<TScaledBlock>& scaledBlocks, + const std::vector<uint32_t>& precisionPerEachBlocks, + int* mantisas); + + void EncodeSpecs(const std::vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream, + uint16_t bitsUsed); + + uint8_t GroupTonalComponents(const std::vector<TTonalComponent>& tonalComponents, + TTonalComponentsSubGroup groups[64]); + + uint16_t EncodeTonalComponents(const std::vector<TTonalComponent>& tonalComponents, + NBitStream::TBitStream* bitStream, uint8_t numQmfBand); +public: + TAtrac3BitStreamWriter(TOma* container, const TContainerParams& params) //no mono mode for atrac3 + : Container(container) + , Params(params) + { + + } + void WriteSoundUnit(const TAtrac3Data::SubbandInfo& subbandInfo, + const std::vector<TTonalComponent>& tonalComponents, + const std::vector<TScaledBlock>& scaledBlocks); +}; + +} // namespace NAtrac3 +} // namespace NAtracDEnc diff --git a/src/atrac/atrac3_qmf.h b/src/atrac/atrac3_qmf.h new file mode 100644 index 0000000..f0ef805 --- /dev/null +++ b/src/atrac/atrac3_qmf.h @@ -0,0 +1,27 @@ +#pragma once +#include <vector> +#include "../qmf/qmf.h" + +namespace NAtracDEnc { + +template<class TIn> +class Atrac3SplitFilterBank { + const static int nInSamples = 1024; + TQmf<TIn, nInSamples> Qmf1; + TQmf<TIn, nInSamples / 2> Qmf2; + TQmf<TIn, nInSamples / 2> Qmf3; + std::vector<TFloat> Buf1; + std::vector<TFloat> Buf2; +public: + Atrac3SplitFilterBank() { + Buf1.resize(nInSamples); + Buf2.resize(nInSamples); + } + void Split(TIn* pcm, TFloat* subs[4]) { + Qmf1.Split(pcm, Buf1.data(), Buf2.data()); + Qmf2.Split(Buf1.data(), subs[0], subs[1]); + Qmf3.Split(Buf2.data(), subs[3], subs[2]); + } +}; + +} //namespace NAtracDEnc diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp new file mode 100644 index 0000000..f6f3bf7 --- /dev/null +++ b/src/atrac/atrac_psy_common.cpp @@ -0,0 +1,26 @@ +#include "atrac_psy_common.h" + +namespace NAtracDEnc { + +//returns 1 for tone-like, 0 - noise-like +TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks) { + TFloat s = 0.0; + for (size_t i = 0; i < scaledBlocks.size(); ++i) { + s += scaledBlocks[i].ScaleFactorIndex; + } + s /= scaledBlocks.size(); + TFloat sigma = 0.0; + TFloat t = 0.0; + for (size_t i = 0; i < scaledBlocks.size(); ++i) { + t = (scaledBlocks[i].ScaleFactorIndex - s); + t *= t; + sigma += t; + } + sigma /= scaledBlocks.size(); + sigma = sqrt(sigma); + if (sigma > 14.0) + sigma = 14.0; + return sigma/14.0; +} + +} //namespace NAtracDEnc diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h new file mode 100644 index 0000000..4c580a4 --- /dev/null +++ b/src/atrac/atrac_psy_common.h @@ -0,0 +1,8 @@ +#pragma once +#include "atrac_scale.h" + +namespace NAtracDEnc { + +double AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks); + +} //namespace NAtracDEnc diff --git a/src/atrac/atrac_scale.cpp b/src/atrac/atrac_scale.cpp index 1e24cb5..a03bc13 100644 --- a/src/atrac/atrac_scale.cpp +++ b/src/atrac/atrac_scale.cpp @@ -1,48 +1,68 @@ #include "atrac_scale.h" #include "atrac1.h" +#include "atrac3.h" #include <cmath> #include <iostream> #include <algorithm> + namespace NAtracDEnc { + using std::vector; using std::map; -using namespace std; +using std::cerr; +using std::endl; + +using std::abs; + static const uint32_t MAX_SCALE = 65536; template<class TBaseData> -vector<TScaledBlock> TScaler<TBaseData>::Scale(const vector<double>& specs, const TBlockSize& blockSize) { +TScaledBlock TScaler<TBaseData>::Scale(const TFloat* in, uint16_t len) { + TFloat maxAbsSpec = 0; + for (uint16_t i = 0; i < len; ++i) { + const TFloat absSpec = abs(in[i]); + if (absSpec > maxAbsSpec) { + if (absSpec > MAX_SCALE) { + cerr << "Scale error: absSpec > MAX_SCALE, val: " << absSpec << endl; + maxAbsSpec = MAX_SCALE; + } else { + maxAbsSpec = absSpec; + } + } + } + const map<TFloat, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec); + const TFloat scaleFactor = scaleIter->first; + const uint8_t scaleFactorIndex = scaleIter->second; + TScaledBlock res(scaleFactorIndex); + for (uint16_t i = 0; i < len; ++i) { + const TFloat scaledValue = in[i] / scaleFactor; + if (scaledValue > 1.0) { + cerr << "got "<< scaledValue << " it is wrong scalling" << endl; + } + res.Values.push_back(scaledValue); + } + return res; +} + +template<class TBaseData> +vector<TScaledBlock> TScaler<TBaseData>::ScaleFrame(const vector<TFloat>& specs, const TBlockSize& blockSize) { vector<TScaledBlock> scaledBlocks; for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { const bool shortWinMode = !!blockSize.LogCount[bandNum]; for (uint8_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) { - const uint16_t specNumStart = shortWinMode ? this->SpecsStartShort[blockNum] : this->SpecsStartLong[blockNum]; - const uint16_t specNumEnd = specNumStart + this->SpecsPerBlock[blockNum]; - double maxAbsSpec = 0; - for (uint16_t curSpec = specNumStart; curSpec < specNumEnd; ++curSpec) { - const double absSpec = abs(specs[curSpec]); - if (absSpec > maxAbsSpec) { - if (absSpec > MAX_SCALE) { - cerr << "got " << absSpec << " value - overflow" << endl; - maxAbsSpec = MAX_SCALE; - } else { - maxAbsSpec = absSpec; - } - } - } - const map<double, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec); - const double scaleFactor = scaleIter->first; - const uint8_t scaleFactorIndex = scaleIter->second; - scaledBlocks.push_back(TScaledBlock(scaleFactorIndex)); - for (uint16_t specNum = specNumStart; specNum < specNumEnd; ++specNum) { - const double scaledValue = specs[specNum] / scaleFactor; - if (scaledValue > 1.0) - cerr << "got "<< scaledValue << " value - wrong scalling" << endl; - scaledBlocks.back().Values.push_back(scaledValue); - } + const uint16_t specNumStart = shortWinMode ? TBaseData::SpecsStartShort[blockNum] : + TBaseData::SpecsStartLong[blockNum]; + scaledBlocks.emplace_back(Scale(&specs[specNumStart], this->SpecsPerBlock[blockNum])); } } return scaledBlocks; } -template class TScaler<TAtrac1Data>; -} + +template +class TScaler<NAtrac1::TAtrac1Data>; + +template +class TScaler<NAtrac3::TAtrac3Data>; + +} //namespace NAtracDEnc diff --git a/src/atrac/atrac_scale.h b/src/atrac/atrac_scale.h index dd437a2..499fac2 100644 --- a/src/atrac/atrac_scale.h +++ b/src/atrac/atrac_scale.h @@ -4,24 +4,60 @@ #include <cstdint> #include "atrac1.h" +#include "../config.h" + namespace NAtracDEnc { struct TScaledBlock { TScaledBlock(uint8_t sfi) : ScaleFactorIndex(sfi) {} - const uint8_t ScaleFactorIndex = 0; - std::vector<double> Values; + /* const */ uint8_t ScaleFactorIndex = 0; + std::vector<TFloat> Values; }; +class TBlockSize; + template <class TBaseData> class TScaler : public TBaseData { - std::map<double, uint8_t>ScaleIndex; + std::map<TFloat, uint8_t>ScaleIndex; public: TScaler() { for (int i = 0; i < 64; i++) { ScaleIndex[TBaseData::ScaleTable[i]] = i; } } - std::vector<TScaledBlock> Scale(const std::vector<double>& specs, const TBlockSize& blockSize); + TScaledBlock Scale(const TFloat* in, uint16_t len); + std::vector<TScaledBlock> ScaleFrame(const std::vector<TFloat>& specs, const TBlockSize& blockSize); +}; + +class TBlockSize { + static std::array<int, 4> Parse(NBitStream::TBitStream* stream) { + //ATRAC1 - 3 subbands, ATRAC3 - 4 subbands. + //TODO: rewrite + std::array<int, 4> tmp; + tmp[0] = 2 - stream->Read(2); + tmp[1] = 2 - stream->Read(2); + tmp[2] = 3 - stream->Read(2); + stream->Read(2); //skip unused 2 bits + return tmp; + } + static std::array<int, 4> Create(bool lowShort, bool midShort, bool hiShort) { + std::array<int, 4> tmp; + tmp[0] = lowShort ? 2 : 0; + tmp[1] = midShort ? 2 : 0; + tmp[2] = hiShort ? 3 : 0; + return tmp; + } +public: + TBlockSize(NBitStream::TBitStream* stream) + : LogCount(Parse(stream)) + {} + TBlockSize(bool lowShort, bool midShort, bool hiShort) + : LogCount(Create(lowShort, midShort, hiShort)) + {} + TBlockSize() + : LogCount({{0, 0, 0, 0}}) + {} + const std::array<int, 4> LogCount; }; -} +} //namespace NAtracDEnc diff --git a/src/atracdenc.cpp b/src/atrac1denc.cpp index 2de1fda..ff7923c 100644 --- a/src/atracdenc.cpp +++ b/src/atrac1denc.cpp @@ -1,35 +1,27 @@ #include <vector> -#include "atracdenc.h" +#include "atrac1denc.h" #include "bitstream/bitstream.h" #include "atrac/atrac1.h" #include "atrac/atrac1_dequantiser.h" #include "atrac/atrac1_qmf.h" #include "atrac/atrac1_bitalloc.h" +#include "util.h" namespace NAtracDEnc { -using namespace std; using namespace NBitStream; using namespace NAtrac1; using namespace NMDCT; +using std::vector; -template<int N> -static vector<double> invertSpectr(double* in) { - vector<double> buf(N); - memcpy(&buf[0], in, N * sizeof(double)); - for (int i = 0; i < N; i+=2) - buf[i] *= -1; - return buf; -} - -TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, TAtrac1EncodeSettings&& settings) +TAtrac1Processor::TAtrac1Processor(TCompressedIOPtr&& aea, TAtrac1EncodeSettings&& settings) : Aea(std::move(aea)) , Settings(std::move(settings)) { } -static void vector_fmul_window(double *dst, const double *src0, - const double *src1, const double *win, int len) +static void vector_fmul_window(TFloat *dst, const TFloat *src0, + const TFloat *src1, const TFloat *win, int len) { int i, j; @@ -38,21 +30,21 @@ static void vector_fmul_window(double *dst, const double *src0, src0 += len; for (i = -len, j = len - 1; i < 0; i++, j--) { - double s0 = src0[i]; - double s1 = src1[j]; - double wi = win[i]; - double wj = win[j]; + TFloat s0 = src0[i]; + TFloat s1 = src1[j]; + TFloat wi = win[i]; + TFloat wj = win[j]; dst[i] = s0 * wj - s1 * wi; dst[j] = s0 * wi + s1 * wj; } } -vector<double> midct(double* x, int N) { - vector<double> res; +vector<TFloat> midct(TFloat* x, int N) { + vector<TFloat> res; for (int n = 0; n < 2 * N; n++) { - double sum = 0; + TFloat sum = 0; for (int k = 0; k < N; k++) { - sum += (x[k] * cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5))); + sum += (x[k] * cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5))); } res.push_back(sum); @@ -60,36 +52,32 @@ vector<double> midct(double* x, int N) { return res; } -void TAtrac1MDCT::Mdct(double Specs[512], double* low, double* mid, double* hi, const TBlockSize& blockSize) { +void TAtrac1MDCT::Mdct(TFloat Specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize) { uint32_t pos = 0; - for (uint32_t band = 0; band < QMF_BANDS; band++) { + for (uint32_t band = 0; band < NumQMF; band++) { const uint32_t numMdctBlocks = 1 << blockSize.LogCount[band]; - double* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi; + TFloat* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi; uint32_t bufSz = (band == 2) ? 256 : 128; const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32; uint32_t winStart = (numMdctBlocks == 1) ? ((band == 2) ? 112 : 48) : 0; //compensate level for 3rd band in case of short window - const double multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0; - vector<double> tmp(512); + const TFloat multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0; + vector<TFloat> tmp(512); uint32_t blockPos = 0; for (size_t k = 0; k < numMdctBlocks; ++k) { - memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(double)); + memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(TFloat)); for (size_t i = 0; i < 32; i++) { srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[blockPos + blockSz - 32 + i]; srcBuf[blockPos + blockSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[blockPos + blockSz - 32 + i]; } - memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(double)); - const vector<double>& sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]); + memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(TFloat)); + const vector<TFloat>& sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]); for (size_t i = 0; i < sp.size(); i++) { Specs[blockPos + pos + i] = sp[i] * multiple; } if (band) { - for (uint32_t j = 0; j < sp.size() / 2; j++) { - double tmp = Specs[blockPos + pos +j]; - Specs[blockPos + pos + j] = Specs[blockPos + pos + sp.size() - 1 -j]; - Specs[blockPos + pos + sp.size() - 1 -j] = tmp; - } + SwapArray(&Specs[blockPos + pos], sp.size()); } blockPos += 32; @@ -97,29 +85,23 @@ void TAtrac1MDCT::Mdct(double Specs[512], double* low, double* mid, double* hi, pos += bufSz; } } -void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low, double* mid, double* hi) { +void TAtrac1MDCT::IMdct(TFloat Specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi) { uint32_t pos = 0; - for (size_t band = 0; band < QMF_BANDS; band++) { + for (size_t band = 0; band < NumQMF; band++) { const uint32_t numMdctBlocks = 1 << mode.LogCount[band]; const uint32_t bufSz = (band == 2) ? 256 : 128; const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32; uint32_t start = 0; - double* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi; + TFloat* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi; - vector<double> invBuf(512); - double* prevBuf = &dstBuf[bufSz * 2 - 16]; + vector<TFloat> invBuf(512); + TFloat* prevBuf = &dstBuf[bufSz * 2 - 16]; for (uint32_t block = 0; block < numMdctBlocks; block++) { - if (band) { - for (uint32_t j = 0; j < blockSz/2; j++) { - double tmp = Specs[pos+j]; - Specs[pos+j] = Specs[pos + blockSz - 1 -j]; - Specs[pos + blockSz - 1 -j] = tmp; - } + SwapArray(&Specs[pos], blockSz); } - - vector<double> inv = (numMdctBlocks != 1) ? midct(&Specs[pos], blockSz) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]); + vector<TFloat> inv = (numMdctBlocks != 1) ? midct(&Specs[pos], blockSz) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]); for (size_t i = 0; i < (inv.size()/2); i++) { invBuf[start+i] = inv[i + inv.size()/4]; } @@ -131,7 +113,7 @@ void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low, pos += blockSz; } if (numMdctBlocks == 1) - memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(double)); + memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(TFloat)); for (size_t j = 0; j < 16; j++) { dstBuf[bufSz*2 - 16 + j] = invBuf[bufSz - 16 + j]; @@ -139,9 +121,9 @@ void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low, } } -TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() { - return [this](double* data) { - double sum[512]; +TPCMEngine<TFloat>::TProcessLambda TAtrac1Processor::GetDecodeLambda() { + return [this](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { + TFloat sum[512]; const uint32_t srcChannels = Aea->GetChannelNum(); for (uint32_t channel = 0; channel < srcChannels; channel++) { std::unique_ptr<TAea::TFrame> frame(Aea->ReadFrame()); @@ -150,7 +132,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() { TBlockSize mode(&bitstream); TAtrac1Dequantiser dequantiser; - vector<double> specs; + vector<TFloat> specs; specs.resize(512);; dequantiser.Dequant(&bitstream, mode, &specs[0]); @@ -170,7 +152,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() { } -TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { +TPCMEngine<TFloat>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { const uint32_t srcChannels = Aea->GetChannelNum(); vector<IAtrac1BitAlloc*> bitAlloc; for (size_t i = 0; i < srcChannels; i++) { @@ -180,10 +162,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { bitAlloc.push_back(new TAtrac1SimpleBitAlloc(atrac1container, Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch())); } - return [this, srcChannels, bitAlloc](double* data) { + return [this, srcChannels, bitAlloc](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { for (uint32_t channel = 0; channel < srcChannels; channel++) { - double src[NumSamples]; - vector<double> specs(512); + TFloat src[NumSamples]; + vector<TFloat> specs(512); for (size_t i = 0; i < NumSamples; ++i) { src[i] = data[i * srcChannels + channel]; } @@ -194,10 +176,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { if (Settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) { windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]); - const vector<double>& invMid = invertSpectr<128>(&PcmBufMid[channel][0]); + const vector<TFloat>& invMid = InvertSpectr<128>(&PcmBufMid[channel][0]); windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 1).Detect(&invMid[0]) << 1; - const vector<double>& invHi = invertSpectr<256>(&PcmBufHi[channel][0]); + const vector<TFloat>& invHi = InvertSpectr<256>(&PcmBufHi[channel][0]); windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 2).Detect(&invHi[0]) << 2; //std::cout << "trans: " << windowMask << std::endl; @@ -207,15 +189,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() { } const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi - //for (int i = 0; i < 256; ++i) { - // std::cout << PcmBufHi[channel][i] << std::endl; - //} - //std::cout<< "============" << std::endl; Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize); - bitAlloc[channel]->Write(Scaler.Scale(specs, blockSize), blockSize); + bitAlloc[channel]->Write(Scaler.ScaleFrame(specs, blockSize), blockSize); } }; } - -} +} //namespace NAtracDEnc diff --git a/src/atracdenc.h b/src/atrac1denc.h index 30dbd20..693a468 100644 --- a/src/atracdenc.h +++ b/src/atrac1denc.h @@ -1,6 +1,7 @@ #pragma once #include "pcmengin.h" #include "aea.h" +#include "oma.h" #include "atrac_encode_settings.h" #include "transient_detector.h" #include "atrac/atrac1.h" @@ -15,37 +16,38 @@ namespace NAtracDEnc { enum EMode { E_ENCODE = 1, - E_DECODE = 2 + E_DECODE = 2, + E_ATRAC3 = 4 }; -class TAtrac1MDCT : public virtual TAtrac1Data { +class TAtrac1MDCT : public virtual NAtrac1::TAtrac1Data { NMDCT::TMDCT<512> Mdct512; NMDCT::TMDCT<256> Mdct256; NMDCT::TMDCT<64> Mdct64; NMDCT::TMIDCT<512> Midct512; NMDCT::TMIDCT<256> Midct256; public: - void IMdct(double specs[512], const TBlockSize& mode, double* low, double* mid, double* hi); - void Mdct(double specs[512], double* low, double* mid, double* hi, const TBlockSize& blockSize); + void IMdct(TFloat specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi); + void Mdct(TFloat specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize); TAtrac1MDCT() : Mdct512(2) , Mdct256(1) {} }; -class TAtrac1Processor : public IProcessor<double>, public TAtrac1MDCT, public virtual TAtrac1Data { - TAeaPtr Aea; - const TAtrac1EncodeSettings Settings; +class TAtrac1Processor : public IProcessor<TFloat>, public TAtrac1MDCT, public virtual NAtrac1::TAtrac1Data { + TCompressedIOPtr Aea; + const NAtrac1::TAtrac1EncodeSettings Settings; - double PcmBufLow[2][256 + 16]; - double PcmBufMid[2][256 + 16]; - double PcmBufHi[2][512 + 16]; + TFloat PcmBufLow[2][256 + 16]; + TFloat PcmBufMid[2][256 + 16]; + TFloat PcmBufHi[2][512 + 16]; int32_t PcmValueMax = 32767; int32_t PcmValueMin = -32767; - Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2]; - Atrac1SplitFilterBank<double> SplitFilterBank[2]; + Atrac1SynthesisFilterBank<TFloat> SynthesisFilterBank[2]; + Atrac1SplitFilterBank<TFloat> SplitFilterBank[2]; class TTransientDetectors { std::vector<TTransientDetector> transientDetectorLow; @@ -79,9 +81,9 @@ class TAtrac1Processor : public IProcessor<double>, public TAtrac1MDCT, public v TScaler<TAtrac1Data> Scaler; public: - TAtrac1Processor(TAeaPtr&& aea, TAtrac1EncodeSettings&& settings); - TPCMEngine<double>::TProcessLambda GetDecodeLambda() override; + TAtrac1Processor(TCompressedIOPtr&& aea, NAtrac1::TAtrac1EncodeSettings&& settings); + TPCMEngine<TFloat>::TProcessLambda GetDecodeLambda() override; - TPCMEngine<double>::TProcessLambda GetEncodeLambda() override; + TPCMEngine<TFloat>::TProcessLambda GetEncodeLambda() override; }; } diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp new file mode 100644 index 0000000..fa7724c --- /dev/null +++ b/src/atrac3denc.cpp @@ -0,0 +1,357 @@ +#include "atrac3denc.h" +#include "transient_detector.h" +#include "util.h" +#include <assert.h> +#include <algorithm> +#include <iostream> +#include <cmath> +namespace NAtracDEnc { + +using namespace NMDCT; +using namespace NAtrac3; +using std::vector; + +void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4], TGainModulatorArray gainModulators) +{ + for (int band = 0; band < 4; ++band) { + TFloat* srcBuff = bands[band]; + TFloat* const curSpec = &specs[band*256]; + TGainModulator modFn = gainModulators[band]; + vector<TFloat> tmp(512); + memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat)); + if (modFn) { + modFn(tmp.data(), srcBuff); + } + TFloat max = 0.0; + for (int i = 0; i < 256; i++) { + max = std::max(max, std::abs(srcBuff[i])); + srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i]; + srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i]; + } + memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat)); + const vector<TFloat>& sp = Mdct512(&tmp[0]); + assert(sp.size() == 256); + memcpy(curSpec, sp.data(), 256 * sizeof(TFloat)); + if (band & 1) { + SwapArray(curSpec, 256); + } + maxLevels[band] = max; + } +} + +void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators) +{ + static TFloat dummy[4]; + Mdct(specs, bands, dummy, gainModulators); +} + +void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators) +{ + for (int band = 0; band < 4; ++band) { + TFloat* dstBuff = bands[band]; + TFloat* curSpec = &specs[band*256]; + TFloat* prevBuff = dstBuff + 256; + TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band]; + if (band & 1) { + SwapArray(curSpec, 256); + } + vector<TFloat> inv = Midct512(curSpec); + assert(inv.size()/2 == 256); + for (int j = 0; j < 256; ++j) { + inv[j] *= /*2 */ DecodeWindow[j]; + inv[511 - j] *= /*2*/ DecodeWindow[j]; + } + if (demodFn) { + demodFn(dstBuff, inv.data(), prevBuff); + } else { + for (uint32_t j = 0; j < 256; ++j) { + dstBuff[j] = inv[j] + prevBuff[j]; + } + } + memcpy(prevBuff, &inv[256], sizeof(TFloat)*256); + } +} + +TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSettings&& encoderSettings) + : Oma(std::move(oma)) + , Params(std::move(encoderSettings)) + , TransientDetectors(2 * 4, TTransientDetector(8, 256)) //2 - channels, 4 - bands +{} + +TAtrac3Processor::~TAtrac3Processor() +{} + +TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si) +{ + switch (si.GetQmfNum()) { + case 1: + { + return {{ GainProcessor.Modulate(si.GetGainPoints(0)), TAtrac3MDCT::TGainModulator(), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }}; + } + case 2: + { + return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }}; + } + case 3: + { + return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)), + GainProcessor.Modulate(si.GetGainPoints(2)), TAtrac3MDCT::TGainModulator() }}; + } + case 4: + { + return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)), + GainProcessor.Modulate(si.GetGainPoints(2)), GainProcessor.Modulate(si.GetGainPoints(3)) }}; + } + default: + assert(false); + return {}; + + } +} + +//TODO: +TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) +{ + TAtrac3Data::TTonalComponents res; + const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 }; + for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) { + //disable for frequence above 16KHz until we works without proper psy + if (bandNum > 2) + continue; + for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) { + const uint16_t specNumStart = SpecsStartLong[blockNum]; + const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum]; + float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]); + if (!isnan(level)) { + for (uint16_t n = specNumStart; n < specNumEnd; ++n) { + //TODO: + TFloat absValue = std::abs(specs[n]); + if (absValue > 65535.0) { + TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0; + std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl; + //res.push_back({n, specs[n] - shift}); + specs[n] = shift; + } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) { + res.push_back({n, specs[n]/* - level*/}); + specs[n] = 0;//level; + } + + } + + } + } + } + return res; +} + +std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents) +{ + vector<TTonalComponent> componentMap; + for (uint16_t i = 0; i < tonalComponents.size();) { + const uint16_t startPos = i; + uint16_t curPos; + do { + curPos = tonalComponents[i].Pos; + ++i; + } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7); + const uint16_t len = i - startPos; + TFloat tmp[8]; + for (uint8_t j = 0; j < len; ++j) + tmp[j] = tonalComponents[startPos + j].Val; + const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len); + componentMap.push_back({&tonalComponents[startPos], 7, scaledBlock}); + } + return componentMap; +} + + +TFloat TAtrac3Processor::LimitRel(TFloat x) +{ + return std::min(std::max(x, GainLevel[15]), GainLevel[0]); +} + +TAtrac3Processor::TTransientParam TAtrac3Processor::CalcTransientParam(const std::vector<TFloat>& gain, const TFloat lastMax) +{ + int32_t attackLocation = 0; + TFloat attackRelation = 1; + + const TFloat attackThreshold = 4; + //pre-echo searching + TFloat tmp; + TFloat q = lastMax; //std::max(lastMax, gain[0]); + tmp = gain[0] / q; + if (tmp > attackThreshold) { + attackRelation = tmp; + } else { + for (uint32_t i = 0; i < gain.size() -1; ++i) { + q = std::max(q, gain[i]); + tmp = gain[i+1] / q; + if (tmp > attackThreshold) { + attackRelation = tmp; + attackLocation = i; + break; + } + } + } + + int32_t releaseLocation = 0; + TFloat releaseRelation = 1; + + const TFloat releaseTreshold = 4; + //post-echo searching + q = 0; + for (uint32_t i = gain.size() - 1; i > 0; --i) { + q = std::max(q, gain[i]); + tmp = gain[i-1] / q; + if (tmp > releaseTreshold) { + releaseRelation = tmp; + releaseLocation = i; + break; + } + } + if (releaseLocation == 0) { + q = std::max(q, gain[0]); + tmp = lastMax / q; + if (tmp > releaseTreshold) { + releaseRelation = tmp; + } + } + + return {attackLocation, attackRelation, releaseLocation, releaseRelation}; +} + +TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4], + uint32_t channel, + TTransientDetector* transientDetector) +{ + TAtrac3Data::SubbandInfo siCur; + for (int band = 0; band < 4; ++band) { + + TFloat invBuf[256]; + if (band & 1) { + memcpy(invBuf, in[band], 256*sizeof(TFloat)); + InvertSpectrInPlase<256>(invBuf); + } + const TFloat* srcBuff = (band & 1) ? invBuf : in[band]; + + const TFloat* const lastMax = &PrevPeak[channel][band]; + + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve; + std::vector<TFloat> gain = AnalyzeGain(srcBuff, 256, 32, false); + + auto transientParam = CalcTransientParam(gain, *lastMax); + bool hasTransient = (transientParam.AttackRelation != 1.0 || transientParam.ReleaseRelation != 1.0); + + //combine attack and release + TFloat relA = 1; + TFloat relB = 1; + TFloat relC = 1; + uint32_t loc1 = 0; + uint32_t loc2 = 0; + if (transientParam.AttackLocation < transientParam.ReleaseLocation) { + //Peak like transient + relA = transientParam.AttackRelation; + loc1 = transientParam.AttackLocation; + relB = 1; + loc2 = transientParam.ReleaseLocation; + relC = transientParam.ReleaseRelation; + } else if (transientParam.AttackLocation > transientParam.ReleaseLocation) { + //Hole like transient + relA = transientParam.AttackRelation; + loc1 = transientParam.ReleaseLocation; + relB = transientParam.AttackRelation * transientParam.ReleaseRelation; + loc2 = transientParam.AttackLocation; + relC = transientParam.ReleaseRelation; + } else { + //??? + //relA = relB = relC = transientParam.AttackRelation * transientParam.ReleaseRelation; + //loc1 = loc2 = transientParam.ReleaseLocation; + hasTransient = false; + } + //std::cout << "loc: " << loc1 << " " << loc2 << " rel: " << relA << " " << relB << " " << relC << std::endl; + + if (relC != 1) { + relA /= relC; + relB /= relC; + relC = 1.0; + } + auto relToIdx = [this](TFloat rel) { + rel = LimitRel(1/rel); + return (uint32_t)(15 - Log2FloatToIdx(rel, 2048)); + }; + curve.push_back({relToIdx(relA), loc1}); + if (loc1 != loc2) { + curve.push_back({relToIdx(relB), loc2}); + } + if (loc2 != 31) { + curve.push_back({relToIdx(relC), 31}); + } + + if (hasTransient) { + siCur.AddSubbandCurve(band, std::move(curve)); + } + + } + return siCur; +} + + +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() +{ + TOma* omaptr = dynamic_cast<TOma*>(Oma.get()); + if (!omaptr) { + std::cerr << "Wrong container" << std::endl; + abort(); + } + + TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, *Params.ConteinerParams); + return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) { + for (uint32_t channel=0; channel < 2; channel++) { + vector<TFloat> specs(1024); + TFloat src[NumSamples]; + + for (size_t i = 0; i < NumSamples; ++i) { + src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding + } + + TFloat* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]}; + SplitFilterBank[channel].Split(&src[0], p); + + TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ? + TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band + + TFloat* maxOverlapLevels = PrevPeak[channel]; + + Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(siCur)); + TTonalComponents tonals = Params.NoTonalComponents ? + TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) { + std::vector<TFloat> magnitude(len); + for (uint16_t i = 0; i < len; ++i) { + magnitude[i] = std::abs(spec[i]); + } + float median = CalcMedian(magnitude.data(), len); + for (uint16_t i = 0; i < len; ++i) { + if (median > 0.001) { + return median; + } + } + return NAN; + }); + + const std::vector<TTonalComponent>& components = MapTonalComponents(tonals); + + //TBlockSize for ATRAC3 - 4 subband, all are long (no short window) + bitStreamWriter->WriteSoundUnit(siCur, components, Scaler.ScaleFrame(specs, TBlockSize())); + } + }; +} + +TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda() +{ + abort(); + return {}; +} + +}//namespace NAtracDEnc diff --git a/src/atrac3denc.h b/src/atrac3denc.h new file mode 100644 index 0000000..25f5c90 --- /dev/null +++ b/src/atrac3denc.h @@ -0,0 +1,86 @@ +#pragma once +#include "config.h" +#include "pcmengin.h" +#include "oma.h" +#include "aea.h" +#include "atrac/atrac3.h" +#include "atrac/atrac3_qmf.h" +#include "transient_detector.h" + +#include "atrac/atrac3_bitstream.h" +#include "atrac/atrac_scale.h" +#include "mdct/mdct.h" +#include "gain_processor.h" + +#include <functional> +#include <array> +namespace NAtracDEnc { + +class TAtrac3MDCT : public NAtrac3::TAtrac3Data { + NMDCT::TMDCT<512> Mdct512; + NMDCT::TMIDCT<512> Midct512; +public: + typedef TGainProcessor<TAtrac3Data> TAtrac3GainProcessor; + TAtrac3GainProcessor GainProcessor; + TAtrac3MDCT() + : Mdct512(2) + {} +public: + using TGainModulator = TAtrac3GainProcessor::TGainModulator; + using TGainDemodulator = TAtrac3GainProcessor::TGainDemodulator; + typedef std::array<TGainDemodulator, 4> TGainDemodulatorArray; + typedef std::array<TGainModulator, 4> TGainModulatorArray; + void Mdct(TFloat specs[1024], + TFloat* bands[4], + TFloat maxLevels[4], + TGainModulatorArray gainModulators); + void Mdct(TFloat specs[1024], + TFloat* bands[4], + TGainModulatorArray gainModulators = TGainModulatorArray()); + void Midct(TFloat specs[1024], + TFloat* bands[4], + TGainDemodulatorArray gainDemodulators = TGainDemodulatorArray()); +protected: + TAtrac3MDCT::TGainModulatorArray MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si); +}; + +//returns threshhold +typedef std::function<float(const TFloat* p, uint16_t len)> TTonalDetector; + +class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT { + TCompressedIOPtr Oma; + const NAtrac3::TAtrac3EncoderSettings Params; + TFloat PcmBuffer[2][4][256 + 256]; //2 channel, 4 band, 256 sample + 256 for overlap buffer + + TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling + + Atrac3SplitFilterBank<TFloat> SplitFilterBank[2]; + TScaler<TAtrac3Data> Scaler; + std::vector<TTransientDetector> TransientDetectors; + typedef std::array<uint8_t, NumSpecs> TonalComponentMask; +public: + struct TTransientParam { + const int32_t AttackLocation; + const TFloat AttackRelation; + const int32_t ReleaseLocation; + const TFloat ReleaseRelation; + }; +private: + +#ifdef ATRAC_UT_PUBLIC +public: +#endif + TFloat LimitRel(TFloat x); + TTransientParam CalcTransientParam(const std::vector<TFloat>& gain, TFloat lastMax); + TAtrac3Data::SubbandInfo CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector); + TonalComponentMask AnalyzeTonalComponent(TFloat* specs); + TTonalComponents ExtractTonalComponents(TFloat* specs, TTonalDetector fn); + + std::vector<NAtrac3::TTonalComponent> MapTonalComponents(const TTonalComponents& tonalComponents); +public: + TAtrac3Processor(TCompressedIOPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings); + ~TAtrac3Processor(); + TPCMEngine<TFloat>::TProcessLambda GetDecodeLambda() override; + TPCMEngine<TFloat>::TProcessLambda GetEncodeLambda() override; +}; +} diff --git a/src/atrac3denc_ut.cpp b/src/atrac3denc_ut.cpp new file mode 100644 index 0000000..e0602a2 --- /dev/null +++ b/src/atrac3denc_ut.cpp @@ -0,0 +1,335 @@ +#define ATRAC_UT_PUBLIC + +#include "atrac3denc.h" +#include <gtest/gtest.h> + +#include <vector> +#include <cmath> +using std::vector; +using namespace NAtracDEnc; +using namespace NAtrac3; + +static void GenerateSignal(TFloat* buf, size_t n, TFloat f, TFloat a) { + for (size_t i = 0; i < n; ++i) { + buf[i] = a * sin((M_PI/2) * i * f); + } +} + +static void GenerateSignalWithTransient(TFloat* buf, size_t n, TFloat f, TFloat a, + size_t transientPos, size_t transientLen, TFloat transientLev) { + assert(transientPos + transientLen < n); + GenerateSignal(buf, n, f, a); + GenerateSignal(buf+transientPos, transientLen, f, transientLev); +// for (size_t i = transientPos; i < transientPos + transientLen; ++i) { +// buf[i] += (i & 1) ? transientLev : - transientLev; +// } +} + +class TWindowTest : public TAtrac3Data { +public: + void RunTest() { + for (size_t i = 0; i < 256; i++) { + const TFloat ha1 = EncodeWindow[i] / 2.0; //compensation + const TFloat hs1 = DecodeWindow[i]; + const TFloat hs2 = DecodeWindow[255-i]; + const TFloat res = hs1 / (hs1 * hs1 + hs2 * hs2); + EXPECT_NEAR(ha1, res, 0.000000001); + } + } +}; + +template<class T> +class TAtrac3MDCTWorkBuff { + T* Buffer; +public: + static const size_t BandBuffSz = 256; + static const size_t BandBuffAndOverlapSz = BandBuffSz * 2; + static const size_t BuffSz = BandBuffAndOverlapSz * (4 + 4); + T* const Band0; + T* const Band1; + T* const Band2; + T* const Band3; + T* const Band0Res; + T* const Band1Res; + T* const Band2Res; + T* const Band3Res; + TAtrac3MDCTWorkBuff() + : Buffer(new T[BuffSz]) + , Band0(Buffer) + , Band1(Band0 + BandBuffAndOverlapSz) + , Band2(Band1 + BandBuffAndOverlapSz) + , Band3(Band2 + BandBuffAndOverlapSz) + , Band0Res(Band3 + BandBuffAndOverlapSz) + , Band1Res(Band0Res + BandBuffAndOverlapSz) + , Band2Res(Band1Res + BandBuffAndOverlapSz) + , Band3Res(Band2Res + BandBuffAndOverlapSz) + { + memset(Buffer, 0, sizeof(T)*BuffSz); + } + ~TAtrac3MDCTWorkBuff() + { + delete[] Buffer; + } +}; + + +TEST(TAtrac3MDCT, TAtrac3MDCTZeroOneBlock) { + TAtrac3MDCT mdct; + TAtrac3MDCTWorkBuff<TFloat> buff; + size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz; + + vector<TFloat> specs(1024); + + TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 }; + + mdct.Mdct(specs.data(), p); + for(auto s: specs) + EXPECT_NEAR(s, 0.0, 0.0000000001); + + TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res }; + mdct.Midct(specs.data(), p); + + for(size_t i = 0; i < workSz; ++i) + EXPECT_NEAR(buff.Band0Res[i], 0.0, 0.0000000001); + + for(size_t i = 0; i < workSz; ++i) + EXPECT_NEAR(buff.Band1Res[i], 0.0, 0.0000000001); + + for(size_t i = 0; i < workSz; ++i) + EXPECT_NEAR(buff.Band2Res[i], 0.0, 0.0000000001); + + for(size_t i = 0; i < workSz; ++i) + EXPECT_NEAR(buff.Band3Res[i], 0.0, 0.0000000001); + + +} + +TEST(TAtrac3MDCT, TAtrac3MDCTSignal) { + TAtrac3MDCT mdct; + TAtrac3MDCTWorkBuff<TFloat> buff; + size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz; + + const size_t len = 1024; + vector<TFloat> signal(len); + vector<TFloat> signalRes(len); + GenerateSignal(signal.data(), signal.size(), 0.25, 32768); + + for (size_t pos = 0; pos < len; pos += workSz) { + vector<TFloat> specs(1024); + memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat)); + + TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 }; + mdct.Mdct(specs.data(), p); + + TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res }; + mdct.Midct(specs.data(), t); + + memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(TFloat)); + } + + for (int i = workSz; i < len; ++i) + EXPECT_NEAR(signal[i - workSz], signalRes[i], 0.00000001); +} + +TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) { + TAtrac3MDCT mdct; + TAtrac3MDCTWorkBuff<TFloat> buff; + size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz; + + const size_t len = 4096; + vector<TFloat> signal(len, 8000); + vector<TFloat> signalRes(len); + GenerateSignal(signal.data() + 1024, signal.size()-1024, 0.25, 32768); + + for (size_t pos = 0; pos < len; pos += workSz) { + vector<TFloat> specs(1024); + memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat)); + + TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 }; + + if (pos == 256) { //apply gain modulation + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, {{3, 2}}); + + mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()}); + } else if (pos == 1024) { + TAtrac3Data::SubbandInfo siCur; + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}}; + siCur.AddSubbandCurve(0, std::move(curve)); + + mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()}); + } else if (pos == 1024 + 256) { + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, {{1, 0}}); + + mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()}); + } else if (pos == 2048) { + TAtrac3Data::SubbandInfo siCur; + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}}; + siCur.AddSubbandCurve(0, std::move(curve)); + + mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()}); + } else { + mdct.Mdct(specs.data(), p); + } + + TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res }; + + if (pos == 256) { //restore gain modulation + TAtrac3Data::SubbandInfo siCur; + TAtrac3Data::SubbandInfo siNext; + siNext.AddSubbandCurve(0, {{3, 2}}); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 512) { + TAtrac3Data::SubbandInfo siNext; + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, {{3, 2}}); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 1024) { + TAtrac3Data::SubbandInfo siCur; + TAtrac3Data::SubbandInfo siNext; + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}}; + siNext.AddSubbandCurve(0, std::move(curve)); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 1024 + 256) { + TAtrac3Data::SubbandInfo siNext; + TAtrac3Data::SubbandInfo siCur; + siNext.AddSubbandCurve(0, {{1, 0}}); + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}}; + siCur.AddSubbandCurve(0, std::move(curve)); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 1024 + 256 + 256) { + TAtrac3Data::SubbandInfo siNext; + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, {{1, 0}}); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 2048) { + TAtrac3Data::SubbandInfo siCur; + TAtrac3Data::SubbandInfo siNext; + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}}; + siNext.AddSubbandCurve(0, std::move(curve)); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 2048 + 256) { + TAtrac3Data::SubbandInfo siNext; + TAtrac3Data::SubbandInfo siCur; + std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}}; + siCur.AddSubbandCurve(0, std::move(curve)); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else { + mdct.Midct(specs.data(), t); + } + memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(TFloat)); + } + for (int i = workSz; i < len; ++i) { + //std::cout << "res: " << i << " " << signalRes[i] << std::endl; + EXPECT_NEAR(signal[i - workSz], signalRes[i], 0.00000001); + } +} + +TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensationAndManualTransient) { + TAtrac3MDCT mdct; + TAtrac3MDCTWorkBuff<TFloat> buff; + size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz; + + const size_t len = 1024; + vector<TFloat> signal(len); + vector<TFloat> signalRes(len); + GenerateSignalWithTransient(signal.data(), signal.size(), 0.03125, 512.0, + 640, 64, 32768.0); + const std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve1 = {{6, 13}, {4, 14}}; + + for (size_t pos = 0; pos < len; pos += workSz) { + vector<TFloat> specs(1024); + memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat)); + + TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 }; + //for (int i = 0; i < 256; i++) { + // std::cout << i + pos << " " << buff.Band0[i] << std::endl; + //} + + if (pos == 512) { //apply gain modulation + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1)); + + for (int i = 0; i < 256; i++) { + std::cout << i << " " << buff.Band0[i] << std::endl; + } + + mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)), + TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()}); + } else { + mdct.Mdct(specs.data(), p); + } + + for (int i = 0; i < specs.size(); ++i) { + if (i > 240 && i < 256) + specs[i] /= 1.9; + } + TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res }; + if (pos == 512) { //restore gain modulation + TAtrac3Data::SubbandInfo siCur; + TAtrac3Data::SubbandInfo siNext; + siNext.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1)); + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else if (pos == 768) { + TAtrac3Data::SubbandInfo siNext; + TAtrac3Data::SubbandInfo siCur; + siCur.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1)); + + mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(), + TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()}); + } else { + mdct.Midct(specs.data(), t); + } + + memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(TFloat)); + } + for (int i = workSz; i < len; ++i) { + //std::cout << "res: " << i << " " << signalRes[i] << std::endl; + EXPECT_NEAR(signal[i - workSz], signalRes[i], 10); + } +} + +TEST(TAtrac3MDCT, TAtrac3MDCTWindow) { + TWindowTest test; + test.RunTest(); +} + + diff --git a/src/atrac_encode_settings.h b/src/atrac_encode_settings.h index e3ae3b7..fd96fa4 100644 --- a/src/atrac_encode_settings.h +++ b/src/atrac_encode_settings.h @@ -2,29 +2,4 @@ namespace NAtracDEnc { -class TAtrac1EncodeSettings { -public: - enum class EWindowMode { - EWM_NOTRANSIENT, - EWM_AUTO - }; -private: - const uint32_t BfuIdxConst = 0; - const bool FastBfuNumSearch = false; - EWindowMode WindowMode = EWindowMode::EWM_AUTO; - const uint32_t WindowMask = 0; -public: - TAtrac1EncodeSettings(); - TAtrac1EncodeSettings(uint32_t bfuIdxConst, bool fastBfuNumSearch, EWindowMode windowMode, uint32_t windowMask) - : BfuIdxConst(bfuIdxConst) - , FastBfuNumSearch(fastBfuNumSearch) - , WindowMode(windowMode) - , WindowMask(windowMask) - {} - uint32_t GetBfuIdxConst() const { return BfuIdxConst; } - bool GetFastBfuNumSearch() const { return FastBfuNumSearch; } - EWindowMode GetWindowMode() const {return WindowMode; } - uint32_t GetWindowMask() const {return WindowMask; } -}; - } diff --git a/src/atracdenc_ut.cpp b/src/atracdenc_ut.cpp index 5370e9e..5d5f5eb 100644 --- a/src/atracdenc_ut.cpp +++ b/src/atracdenc_ut.cpp @@ -1,16 +1,16 @@ -#include "atracdenc.h" +#include "atrac1denc.h" #include <gtest/gtest.h> #include <vector> using std::vector; using namespace NAtracDEnc; -void CheckResult128(const vector<double>& a, const vector<double>& b) { +void CheckResult128(const vector<TFloat>& a, const vector<TFloat>& b) { for (int i = 0; i < 96; ++i ) { EXPECT_NEAR(a[i], 4 * b[i+32], 0.0000001); } } -void CheckResult256(const vector<double>& a, const vector<double>& b) { +void CheckResult256(const vector<TFloat>& a, const vector<TFloat>& b) { for (int i = 0; i < 192; ++i ) { EXPECT_NEAR(a[i], 2 * b[i+32], 0.0000001); } @@ -19,14 +19,14 @@ void CheckResult256(const vector<double>& a, const vector<double>& b) { TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) { TAtrac1MDCT mdct; - vector<double> low(128 * 2); - vector<double> mid(128 * 2); - vector<double> hi(256 * 2); - vector<double> specs(512 * 2); - - vector<double> lowRes(128 * 2); - vector<double> midRes(128 * 2); - vector<double> hiRes(256 * 2); + vector<TFloat> low(128 * 2); + vector<TFloat> mid(128 * 2); + vector<TFloat> hi(256 * 2); + vector<TFloat> specs(512 * 2); + + vector<TFloat> lowRes(128 * 2); + vector<TFloat> midRes(128 * 2); + vector<TFloat> hiRes(256 * 2); for (int i = 0; i < 128; i++) { low[i] = mid[i] = i; @@ -48,25 +48,25 @@ TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) { TEST(TAtrac1MDCT, TAtrac1MDCTShortEncDec) { TAtrac1MDCT mdct; - vector<double> low(128 * 2); - vector<double> mid(128 * 2); - vector<double> hi(256 * 2); - vector<double> specs(512 * 2); - - vector<double> lowRes(128 * 2); - vector<double> midRes(128 * 2); - vector<double> hiRes(256 * 2); + vector<TFloat> low(128 * 2); + vector<TFloat> mid(128 * 2); + vector<TFloat> hi(256 * 2); + vector<TFloat> specs(512 * 2); + + vector<TFloat> lowRes(128 * 2); + vector<TFloat> midRes(128 * 2); + vector<TFloat> hiRes(256 * 2); for (int i = 0; i < 128; i++) { low[i] = mid[i] = i; } - const vector<double> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation - const vector<double> midCopy = mid; + const vector<TFloat> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation + const vector<TFloat> midCopy = mid; for (int i = 0; i < 256; i++) { hi[i] = i; } - const vector<double> hiCopy = hi; + const vector<TFloat> hiCopy = hi; const TBlockSize blockSize(true, true, true); //short diff --git a/src/bitstream/bitstream.cpp b/src/bitstream/bitstream.cpp index d916f52..e8f1857 100644 --- a/src/bitstream/bitstream.cpp +++ b/src/bitstream/bitstream.cpp @@ -11,7 +11,6 @@ TBitStream::TBitStream(const char* buf, int size) {} TBitStream::TBitStream() {} - void TBitStream::Write(unsigned long long val, int n) { if (n > 23 || n < 0) abort(); @@ -29,10 +28,30 @@ void TBitStream::Write(unsigned long long val, int n) { for (int i = 0; i < n/8 + (overlap ? 2 : 1); ++i) { Buf[bytesPos+i] |= t.bytes[7-i]; + + // std::cout << "bufPos: "<< bytesPos+i << " buf: " << (int)Buf[bytesPos+i] << std::endl; } BitsUsed += n; } +/* +void TBitStream::Write(unsigned long long val, int n) { + if (n > 23 || n < 0) + abort(); + const int bitsLeft = Buf.size() * 8 - BitsUsed; + const int bitsReq = n - bitsLeft; + const int bytesPos = BitsUsed / 8; + const int overlap = BitsUsed % 8; + + if (overlap || bitsReq >= 0) { + Buf.resize(Buf.size() + (bitsReq / 8 + (overlap ? 2 : 1 )), 0); + } + TMix t; + t.ull = (val << (64 - n)) >> overlap; + *(unsigned long long*)&Buf[bytesPos-8] |= t.ull; + BitsUsed += n; +} +*/ unsigned long long TBitStream::Read(int n) { if (n >23 || n < 0) abort(); diff --git a/src/bitstream/bitstream_ut.cpp b/src/bitstream/bitstream_ut.cpp index 109570b..7e246ca 100644 --- a/src/bitstream/bitstream_ut.cpp +++ b/src/bitstream/bitstream_ut.cpp @@ -11,8 +11,10 @@ TEST(TBitStream, DefaultConstructor) { TEST(TBitStream, SimpleWriteRead) { TBitStream bs; bs.Write(5, 3); - EXPECT_EQ(3, bs.GetSizeInBits()); + bs.Write(true, 1); + EXPECT_EQ(4, bs.GetSizeInBits()); EXPECT_EQ(5, bs.Read(3)); + EXPECT_EQ(true, bs.Read(1)); } TEST(TBisStream, OverlapWriteRead) { @@ -30,6 +32,7 @@ TEST(TBisStream, OverlapWriteRead) { EXPECT_EQ(212, bs.Read(22)); EXPECT_EQ(323, bs.Read(22)); } + TEST(TBisStream, OverlapWriteRead2) { TBitStream bs; bs.Write(2, 2); @@ -41,6 +44,64 @@ TEST(TBisStream, OverlapWriteRead2) { EXPECT_EQ(10003, bs.Read(16)); } +TEST(TBisStream, OverlapWriteRead3) { + TBitStream bs; + bs.Write(40, 6); + bs.Write(3, 2); + bs.Write(0, 3); + bs.Write(0, 3); + bs.Write(0, 3); + bs.Write(0, 3); + + bs.Write(3, 5); + bs.Write(1, 2); + bs.Write(1, 1); + bs.Write(1, 1); + bs.Write(1, 1); + bs.Write(1, 1); + + bs.Write(0, 3); + bs.Write(4, 3); + bs.Write(35, 6); + bs.Write(25, 6); + bs.Write(3, 3); + bs.Write(32, 6); + bs.Write(29, 6); + bs.Write(3, 3); + bs.Write(36, 6); + bs.Write(49, 6); + + + + + EXPECT_EQ(40, bs.Read(6)); + EXPECT_EQ(3, bs.Read(2)); + EXPECT_EQ(0, bs.Read(3)); + EXPECT_EQ(0, bs.Read(3)); + EXPECT_EQ(0, bs.Read(3)); + EXPECT_EQ(0, bs.Read(3)); + EXPECT_EQ(3, bs.Read(5)); + + EXPECT_EQ(1, bs.Read(2)); + EXPECT_EQ(1, bs.Read(1)); + EXPECT_EQ(1, bs.Read(1)); + EXPECT_EQ(1, bs.Read(1)); + EXPECT_EQ(1, bs.Read(1)); + + EXPECT_EQ(0, bs.Read(3)); + EXPECT_EQ(4, bs.Read(3)); + EXPECT_EQ(35, bs.Read(6)); + EXPECT_EQ(25, bs.Read(6)); + EXPECT_EQ(3, bs.Read(3)); + EXPECT_EQ(32, bs.Read(6)); + EXPECT_EQ(29, bs.Read(6)); + EXPECT_EQ(3, bs.Read(3)); + EXPECT_EQ(36, bs.Read(6)); + EXPECT_EQ(49, bs.Read(6)); + +} + + TEST(TBisStream, SignWriteRead) { TBitStream bs; bs.Write(MakeSign(-2, 3), 3); diff --git a/src/compressed_io.h b/src/compressed_io.h index d8cfb11..cc45a7f 100644 --- a/src/compressed_io.h +++ b/src/compressed_io.h @@ -2,6 +2,7 @@ #include <vector> #include <array> #include <string> +#include <memory> class ICompressedIO { public: @@ -29,3 +30,5 @@ public: virtual long long GetLengthInSamples() const = 0; virtual ~ICompressedIO() {} }; + +typedef std::unique_ptr<ICompressedIO> TCompressedIOPtr; diff --git a/src/config.h b/src/config.h index 942841a..698b865 100644 --- a/src/config.h +++ b/src/config.h @@ -2,3 +2,8 @@ #define CONFIG_DOUBLE +#ifdef CONFIG_DOUBLE +typedef double TFloat; +#else +typedef float TFloat; +#endif diff --git a/src/gain_processor.h b/src/gain_processor.h new file mode 100644 index 0000000..02c7206 --- /dev/null +++ b/src/gain_processor.h @@ -0,0 +1,104 @@ +#include <functional> + +#include "config.h" + +template<class T> +class TGainProcessor : public T { + +public: + typedef std::function<void(TFloat* out, TFloat* cur, TFloat* prev)> TGainDemodulator; + /* + * example GainModulation: + * PCMinput: + * N b N N + * --------|--------|--------|-------- + * | | - mdct #1 + * | | - mdct #2 + * a + * | | - mdct #3 + * ^^^^^ - modulated by previous step + * lets consider a case we want to modulate mdct #2. + * bufCur - is a buffer of first half of mdct transformation (a) + * bufNext - is a buffer of second half of mdct transformation and overlaping + * (i.e the input buffer started at b point) + * so next transformation (mdct #3) gets modulated first part + */ + typedef std::function<void(TFloat* bufCur, TFloat* bufNext)> TGainModulator; + static TFloat GetGainInc(uint32_t levelIdxCur) + { + const int incPos = T::ExponentOffset - levelIdxCur + T::GainInterpolationPosShift; + return T::GainInterpolation[incPos]; + } + static TFloat GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext) + { + const int incPos = levelIdxNext - levelIdxCur + T::GainInterpolationPosShift; + return T::GainInterpolation[incPos]; + } + + + TGainDemodulator Demodulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giNow, + const std::vector<typename T::SubbandInfo::TGainPoint>& giNext) + { + return [=](TFloat* out, TFloat* cur, TFloat* prev) { + uint32_t pos = 0; + const TFloat scale = giNext.size() ? T::GainLevel[giNext[0].Level] : 1; + for (uint32_t i = 0; i < giNow.size(); ++i) { + uint32_t lastPos = giNow[i].Location << T::LocScale; + const uint32_t levelPos = giNow[i].Level; + assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0])); + TFloat level = T::GainLevel[levelPos]; + const int incPos = ((i + 1) < giNow.size() ? giNow[i + 1].Level : T::ExponentOffset) + - giNow[i].Level + T::GainInterpolationPosShift; + TFloat gainInc = T::GainInterpolation[incPos]; + for (; pos < lastPos; pos++) { + //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << std::endl; + out[pos] = (cur[pos] * scale + prev[pos]) * level; + } + for (; pos < lastPos + T::LocSz; pos++) { + //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << " gainInc: " << gainInc << std::endl; + out[pos] = (cur[pos] * scale + prev[pos]) * level; + level *= gainInc; + } + } + for (; pos < T::MDCTSz/2; pos++) { + //std::cout << "pos: " << pos << " scale: " << scale << std::endl; + out[pos] = cur[pos] * scale + prev[pos]; + } + }; + } + TGainModulator Modulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giCur) { + if (giCur.empty()) + return {}; + return [=](TFloat* bufCur, TFloat* bufNext) { + uint32_t pos = 0; + const TFloat scale = T::GainLevel[giCur[0].Level]; + for (uint32_t i = 0; i < giCur.size(); ++i) { + uint32_t lastPos = giCur[i].Location << T::LocScale; + const uint32_t levelPos = giCur[i].Level; + assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0])); + TFloat level = T::GainLevel[levelPos]; + const int incPos = ((i + 1) < giCur.size() ? giCur[i + 1].Level : T::ExponentOffset) + - giCur[i].Level + T::GainInterpolationPosShift; + TFloat gainInc = T::GainInterpolation[incPos]; + for (; pos < lastPos; pos++) { + //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " << bufCur[pos] << " level: " << level << " bufNext: " << bufNext[pos] << std::endl; + bufCur[pos] /= scale; + bufNext[pos] /= level; + } + for (; pos < lastPos + T::LocSz; pos++) { + + //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " << bufCur[pos] << " level: " << level << " (gainInc) " << gainInc << " bufNext: " << bufNext[pos] << std::endl; + bufCur[pos] /= scale; + bufNext[pos] /= level; + //std::cout << "mod pos: " << pos << " scale: " << scale << " level: " << level << " gainInc: " << gainInc << std::endl; + level *= gainInc; + } + } + for (; pos < T::MDCTSz/2; pos++) { + + //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " << bufCur[pos] << std::endl; + bufCur[pos] /= scale; + } + }; + } +}; diff --git a/src/main.cpp b/src/main.cpp index f74b253..9550ce4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,20 +6,23 @@ #include "pcmengin.h" #include "wav.h" #include "aea.h" -#include "atracdenc.h" +#include "config.h" +#include "atrac1denc.h" +#include "atrac3denc.h" using std::cout; using std::cerr; using std::endl; using std::string; -using std::unique_ptr; -using std::move; using std::stoi; using namespace NAtracDEnc; +typedef std::unique_ptr<TPCMEngine<TFloat>> TPcmEnginePtr; +typedef std::unique_ptr<IProcessor<TFloat>> TAtracProcessorPtr; -static void printUsage(const char* myName) { +static void printUsage(const char* myName) +{ cout << "\tusage: " << myName << " <-e|-d> <-i input> <-o output>\n" << endl; cout << "-e encode mode (PCM -> ATRAC), -i wav file, -o aea file" << endl; cout << "-d decode mode (ATRAC -> PCM), -i aea file, -o wav file" << endl; @@ -27,7 +30,8 @@ static void printUsage(const char* myName) { } -static void printProgress(int percent) { +static void printProgress(int percent) +{ static uint32_t counter; counter++; const char symbols[4] = {'-', '\\', '|', '/'}; @@ -35,26 +39,143 @@ static void printProgress(int percent) { fflush(stdout); } -static string GetHelp() { +static string GetHelp() +{ return "\n--encode -i \t - encode mode" "\n--decode -d \t - decode mode" "\n -i input file" "\n -o output file" - "\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU. WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency." - "\n --bfuidxfast\t enable fast search of BFU amount" - "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window"; + "\n --bitrate (only if supported by codec)" + "\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU (ATRAC1). " + "WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency." + "\n --bfuidxfast\t enable fast search of BFU amount (ATRAC1)" + "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window " + "(ATRAC1)" + /*"\n --nogaincontrol disable gain control (ATRAC3)"*/ + "\n --notonal disable tonal components (ATRAC3)"; } -int main(int argc, char* const* argv) { +static int checkedStoi(const char* data, int min, int max, int def) +{ + int tmp = 0; + try { + tmp = stoi(data); + if (tmp < min || tmp > max) + throw std::invalid_argument(data); + return tmp; + } catch (std::invalid_argument&) { + cerr << "Wrong arg: " << data << " " << def << " will be used" << endl; + return def; + } +} + +enum EOptions +{ + O_ENCODE = 'e', + O_DECODE = 'd', + O_HELP = 'h', + O_BITRATE = 'b', + O_BFUIDXCONST = 1, + O_BFUIDXFAST = 2, + O_NOTRANSIENT = 3, + O_MONO = 'm', + O_NOSTDOUT = '4', + O_NOTONAL = 5, + O_NOGAINCONTROL = 6, +}; + +static void PrepareAtrac1Encoder(const string& inFile, + const string& outFile, + const bool noStdOut, + NAtrac1::TAtrac1EncodeSettings&& encoderSettings, + uint64_t* totalSamples, + TWavPtr* wavIO, + TPcmEnginePtr* pcmEngine, + TAtracProcessorPtr* atracProcessor) +{ + using NAtrac1::TAtrac1Data; + + wavIO->reset(new TWav(inFile)); + const int numChannels = (*wavIO)->GetChannelNum(); + *totalSamples = (*wavIO)->GetTotalSamples(); + //TODO: recheck it + const uint32_t numFrames = numChannels * (*totalSamples) / TAtrac1Data::NumSamples; + TCompressedIOPtr aeaIO = TCompressedIOPtr(new TAea(outFile, "test", numChannels, numFrames)); + pcmEngine->reset(new TPCMEngine<TFloat>(4096, + numChannels, + TPCMEngine<TFloat>::TReaderPtr((*wavIO)->GetPCMReader<TFloat>()))); + if (!noStdOut) + cout << "Input file: " << inFile + << "\n Channels: " << numChannels + << "\n SampleRate: " << (*wavIO)->GetSampleRate() + << "\n TotalSamples: " << totalSamples + << endl; + atracProcessor->reset(new TAtrac1Processor(std::move(aeaIO), std::move(encoderSettings))); +} + +static void PrepareAtrac1Decoder(const string& inFile, + const string& outFile, + const bool noStdOut, + uint64_t* totalSamples, + TWavPtr* wavIO, + TPcmEnginePtr* pcmEngine, + TAtracProcessorPtr* atracProcessor) +{ + TCompressedIOPtr aeaIO = TCompressedIOPtr(new TAea(inFile)); + *totalSamples = aeaIO->GetLengthInSamples(); + uint32_t length = aeaIO->GetLengthInSamples(); + if (!noStdOut) + cout << "Name: " << aeaIO->GetName() + << "\n Channels: " << aeaIO->GetChannelNum() + << "\n Length: " << length + << endl; + wavIO->reset(new TWav(outFile, aeaIO->GetChannelNum(), 44100)); + pcmEngine->reset(new TPCMEngine<TFloat>(4096, + aeaIO->GetChannelNum(), + TPCMEngine<TFloat>::TWriterPtr((*wavIO)->GetPCMWriter<TFloat>()))); + atracProcessor->reset(new TAtrac1Processor(std::move(aeaIO), NAtrac1::TAtrac1EncodeSettings())); +} + +static void PrepareAtrac3Encoder(const string& inFile, + const string& outFile, + const bool noStdOut, + NAtrac3::TAtrac3EncoderSettings&& encoderSettings, + uint64_t* totalSamples, + TWavPtr* wavIO, + TPcmEnginePtr* pcmEngine, + TAtracProcessorPtr* atracProcessor) +{ + std::cout << "WARNING: ATRAC3 is uncompleted, result will be not good )))" << std::endl; + if (!noStdOut) + std::cout << "bitrate " << encoderSettings.ConteinerParams->Bitrate << std::endl; + wavIO->reset(new TWav(inFile)); + const int numChannels = (*wavIO)->GetChannelNum(); + *totalSamples = (*wavIO)->GetTotalSamples(); + TCompressedIOPtr omaIO = TCompressedIOPtr(new TOma(outFile, + "test", + numChannels, + numChannels * (*totalSamples) / 512, OMAC_ID_ATRAC3, + encoderSettings.ConteinerParams->FrameSz)); + pcmEngine->reset(new TPCMEngine<TFloat>(4096, + numChannels, + TPCMEngine<TFloat>::TReaderPtr((*wavIO)->GetPCMReader<TFloat>()))); + atracProcessor->reset(new TAtrac3Processor(std::move(omaIO), std::move(encoderSettings))); +} + +int main(int argc, char* const* argv) +{ const char* myName = argv[0]; static struct option longopts[] = { - { "encode", no_argument, NULL, 'e' }, - { "decode", no_argument, NULL, 'd' }, - { "help", no_argument, NULL, 'h' }, - { "bfuidxconst", required_argument, NULL, 1}, - { "bfuidxfast", no_argument, NULL, 2}, - { "notransient", optional_argument, NULL, 3}, - { "nostdout", no_argument, NULL, 4}, + { "encode", optional_argument, NULL, O_ENCODE }, + { "decode", no_argument, NULL, O_DECODE }, + { "help", no_argument, NULL, O_HELP }, + { "bitrate", required_argument, NULL, O_BITRATE}, + { "bfuidxconst", required_argument, NULL, O_BFUIDXCONST}, + { "bfuidxfast", no_argument, NULL, O_BFUIDXFAST}, + { "notransient", optional_argument, NULL, O_NOTRANSIENT}, + { "nostdout", no_argument, NULL, O_NOSTDOUT}, + { "notonal", no_argument, NULL, O_NOTONAL}, + { "nogaincontrol", no_argument, NULL, O_NOGAINCONTROL}, { NULL, 0, NULL, 0} }; @@ -64,15 +185,23 @@ int main(int argc, char* const* argv) { uint32_t mode = 0; uint32_t bfuIdxConst = 0; //0 - auto, no const bool fastBfuNumSearch = false; - bool nostdout = false; - TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_AUTO; - uint32_t winMask = 0; //all is long + bool noStdOut = false; + bool noGainControl = false; + bool noTonalComponents = false; + NAtrac1::TAtrac1EncodeSettings::EWindowMode windowMode = NAtrac1::TAtrac1EncodeSettings::EWindowMode::EWM_AUTO; + uint32_t winMask = 0; //0 - all is long + uint32_t bitrate = 0; //0 - use default for codec while ((ch = getopt_long(argc, argv, "edhi:o:m", longopts, NULL)) != -1) { switch (ch) { - case 'e': + case O_ENCODE: mode |= E_ENCODE; + if (optarg) { + if (strcmp(optarg, "atrac3") == 0) { + mode |= E_ATRAC3; + } + } break; - case 'd': + case O_DECODE: mode |= E_DECODE; break; case 'i': @@ -81,25 +210,24 @@ int main(int argc, char* const* argv) { case 'o': outFile = optarg; if (outFile == "-") - nostdout = true; + noStdOut = true; break; case 'h': cout << GetHelp() << endl; return 0; break; - case 1: - try { - bfuIdxConst = stoi(optarg); - } catch (std::invalid_argument&) { - cerr << "Wrong arg: " << optarg << " should be (0, 8]" << endl; - return -1; - } + case O_BITRATE: + bitrate = checkedStoi(optarg, 32, 384, 0); + std::cout << "BITRATE" << bitrate << std::endl; + break; + case O_BFUIDXCONST: + bfuIdxConst = checkedStoi(optarg, 1, 8, 0); break; - case 2: + case O_BFUIDXFAST: fastBfuNumSearch = true; break; - case 3: - windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT; + case O_NOTRANSIENT: + windowMode = NAtrac1::TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT; if (optarg) { winMask = stoi(optarg); } @@ -108,8 +236,14 @@ int main(int argc, char* const* argv) { ((winMask & 2) ? "short": "long") << ", hi - " << ((winMask & 4) ? "short": "long") << endl; break; - case 4: - nostdout = true; + case O_NOSTDOUT: + noStdOut = true; + break; + case O_NOTONAL: + noTonalComponents = true; + break; + case O_NOGAINCONTROL: + noGainControl = true; break; default: printUsage(myName); @@ -128,36 +262,50 @@ int main(int argc, char* const* argv) { return 1; } if (bfuIdxConst > 8) { - cerr << "Wrong bfuidxconst value ("<< bfuIdxConst << "). This is advanced options, use --help to get more information" << endl; + cerr << "Wrong bfuidxconst value ("<< bfuIdxConst << "). " + << "This is advanced options, use --help to get more information" + << endl; return 1; } - TPCMEngine<double>* pcmEngine = nullptr; - IProcessor<double>* atracProcessor; + + TPcmEnginePtr pcmEngine; + TAtracProcessorPtr atracProcessor; uint64_t totalSamples = 0; TWavPtr wavIO; - if (mode == E_ENCODE) { - wavIO = TWavPtr(new TWav(inFile)); - const int numChannels = wavIO->GetChannelNum(); - totalSamples = wavIO->GetTotalSamples(); - //TODO: recheck it - TAeaPtr aeaIO = TAeaPtr(new TAea(outFile, "test", numChannels, numChannels * totalSamples / 512)); - pcmEngine = new TPCMEngine<double>(4096, numChannels, TPCMEngine<double>::TReaderPtr(wavIO->GetPCMReader<double>())); - if (!nostdout) - cout << "Input file: " << inFile << "\n Channels: " << numChannels << "\n SampleRate: " << wavIO->GetSampleRate() << "\n TotalSamples: " << totalSamples << endl; - atracProcessor = new TAtrac1Processor(move(aeaIO), TAtrac1EncodeSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask)); - } else if (mode == E_DECODE) { - TAeaPtr aeaIO = TAeaPtr(new TAea(inFile)); - totalSamples = aeaIO->GetLengthInSamples(); - uint32_t length = aeaIO->GetLengthInSamples(); - if (!nostdout) - cout << "Name: " << aeaIO->GetName() << "\n Channels: " << aeaIO->GetChannelNum() << "\n Length: " << length << endl; - wavIO = TWavPtr(new TWav(outFile, aeaIO->GetChannelNum(), 44100)); - pcmEngine = new TPCMEngine<double>(4096, aeaIO->GetChannelNum(), TPCMEngine<double>::TWriterPtr(wavIO->GetPCMWriter<double>())); - atracProcessor = new TAtrac1Processor(move(aeaIO), TAtrac1EncodeSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask)); - } else { - cerr << "Processing mode was not specified" << endl; - return 1; + uint32_t pcmFrameSz = 0; //size of one pcm frame to process + switch (mode) { + case E_ENCODE: + { + using NAtrac1::TAtrac1Data; + NAtrac1::TAtrac1EncodeSettings encoderSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask); + PrepareAtrac1Encoder(inFile, outFile, noStdOut, std::move(encoderSettings), + &totalSamples, &wavIO, &pcmEngine, &atracProcessor); + pcmFrameSz = TAtrac1Data::NumSamples; + } + break; + case E_DECODE: + { + using NAtrac1::TAtrac1Data; + PrepareAtrac1Decoder(inFile, outFile, noStdOut, + &totalSamples, &wavIO, &pcmEngine, &atracProcessor); + pcmFrameSz = TAtrac1Data::NumSamples; + } + break; + case (E_ENCODE | E_ATRAC3): + { + using NAtrac3::TAtrac3Data; + NAtrac3::TAtrac3EncoderSettings encoderSettings(bitrate * 1024, noGainControl, noTonalComponents); + PrepareAtrac3Encoder(inFile, outFile, noStdOut, std::move(encoderSettings), + &totalSamples, &wavIO, &pcmEngine, &atracProcessor); + pcmFrameSz = TAtrac3Data::NumSamples;; + } + break; + default: + { + cerr << "Processing mode was not specified" << endl; + return 1; + } } auto atracLambda = (mode == E_DECODE) ? atracProcessor->GetDecodeLambda() : @@ -165,12 +313,12 @@ int main(int argc, char* const* argv) { uint64_t processed = 0; try { - while (totalSamples > (processed = pcmEngine->ApplyProcess(512, atracLambda))) + while (totalSamples > (processed = pcmEngine->ApplyProcess(pcmFrameSz, atracLambda))) { - if (!nostdout) + if (!noStdOut) printProgress(processed*100/totalSamples); } - if (!nostdout) + if (!noStdOut) cout << "\nDone" << endl; } catch (TAeaIOError err) { diff --git a/src/mdct/mdct.h b/src/mdct/mdct.h index ced049c..33863fb 100644 --- a/src/mdct/mdct.h +++ b/src/mdct/mdct.h @@ -8,7 +8,7 @@ namespace NMDCT { class TMDCTBase { protected: MDCTContext Ctx; - TMDCTBase(int n, double scale) { + TMDCTBase(int n, TFloat scale) { mdct_ctx_init(&Ctx, n, scale); }; virtual ~TMDCTBase() { @@ -19,13 +19,13 @@ protected: template<int N> class TMDCT : public TMDCTBase { - std::vector<double> Buf; + std::vector<TFloat> Buf; public: TMDCT(float scale = 1.0) : TMDCTBase(N, scale) , Buf(N/2) {} - const std::vector<double>& operator()(double* in) { + const std::vector<TFloat>& operator()(TFloat* in) { mdct(&Ctx, &Buf[0], in); return Buf; } @@ -33,13 +33,13 @@ public: template<int N> class TMIDCT : public TMDCTBase { - std::vector<double> Buf; + std::vector<TFloat> Buf; public: TMIDCT(float scale = 1.0) : TMDCTBase(N, scale) , Buf(N) {} - const std::vector<double>& operator()(double* in) { + const std::vector<TFloat>& operator()(TFloat* in) { midct(&Ctx, &Buf[0], in); return Buf; } diff --git a/src/mdct/mdct_ut.cpp b/src/mdct/mdct_ut.cpp index e81bea1..3552afd 100644 --- a/src/mdct/mdct_ut.cpp +++ b/src/mdct/mdct_ut.cpp @@ -7,24 +7,24 @@ using std::vector; using namespace NMDCT; -static vector<double> mdct(double* x, int N) { - vector<double> res; +static vector<TFloat> mdct(TFloat* x, int N) { + vector<TFloat> res; for (int k = 0; k < N; k++) { - double sum = 0; + TFloat sum = 0; for (int n = 0; n < 2 * N; n++) - sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5)); + sum += x[n]* cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5)); res.push_back(sum); } return res; } -static vector<double> midct(double* x, int N) { - vector<double> res; +static vector<TFloat> midct(TFloat* x, int N) { + vector<TFloat> res; for (int n = 0; n < 2 * N; n++) { - double sum = 0; + TFloat sum = 0; for (int k = 0; k < N; k++) - sum += (x[k] * cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5))); + sum += (x[k] * cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5))); res.push_back(sum); } @@ -35,12 +35,12 @@ static vector<double> midct(double* x, int N) { TEST(TBitStream, MDCT64) { const int N = 64; TMDCT<N> transform(N); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N; i++) { src[i] = i; } - const vector<double> res1 = mdct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = mdct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < res1.size(); i++) { EXPECT_NEAR(res1[i], res2[i], 0.0000000001); @@ -50,12 +50,12 @@ TEST(TBitStream, MDCT64) { TEST(TBitStream, MDCT128) { const int N = 128; TMDCT<N> transform(N); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N; i++) { src[i] = i; } - const vector<double> res1 = mdct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = mdct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < res1.size(); i++) { EXPECT_NEAR(res1[i], res2[i], 0.0000000001); @@ -65,12 +65,12 @@ TEST(TBitStream, MDCT128) { TEST(TBitStream, MDCT256) { const int N = 256; TMDCT<N> transform(N); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N; i++) { src[i] = i; } - const vector<double> res1 = mdct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = mdct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < res1.size(); i++) { EXPECT_NEAR(res1[i], res2[i], 0.00000001); @@ -80,12 +80,12 @@ TEST(TBitStream, MDCT256) { TEST(TBitStream, MDCT256_RAND) { const int N = 256; TMDCT<N> transform(N); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N; i++) { src[i] = rand(); } - const vector<double> res1 = mdct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = mdct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < res1.size(); i++) { EXPECT_NEAR(res1[i], res2[i], 0.01); @@ -96,12 +96,12 @@ TEST(TBitStream, MDCT256_RAND) { TEST(TBitStream, MIDCT64) { const int N = 64; TMIDCT<N> transform(1); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N/2; i++) { src[i] = i; } - const vector<double> res1 = midct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = midct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < N; i++) { EXPECT_NEAR(res1[i], res2[i], 0.0000000001); @@ -111,12 +111,12 @@ TEST(TBitStream, MIDCT64) { TEST(TBitStream, MIDCT128) { const int N = 128; TMIDCT<N> transform(1); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N/2; i++) { src[i] = i; } - const vector<double> res1 = midct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = midct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < N; i++) { EXPECT_NEAR(res1[i], res2[i], 0.0000000001); @@ -126,12 +126,12 @@ TEST(TBitStream, MIDCT128) { TEST(TBitStream, MIDCT256) { const int N = 256; TMIDCT<N> transform(1); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N/2; i++) { src[i] = i; } - const vector<double> res1 = midct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = midct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < N; i++) { EXPECT_NEAR(res1[i], res2[i], 0.000000001); @@ -141,12 +141,12 @@ TEST(TBitStream, MIDCT256) { TEST(TBitStream, MIDCT256_RAND) { const int N = 256; TMIDCT<N> transform(1); - vector<double> src(N); + vector<TFloat> src(N); for (int i = 0; i < N/2; i++) { src[i] = rand(); } - const vector<double> res1 = midct(&src[0], N/2); - const vector<double> res2 = transform(&src[0]); + const vector<TFloat> res1 = midct(&src[0], N/2); + const vector<TFloat> res2 = transform(&src[0]); EXPECT_EQ(res1.size(), res2.size()); for (int i = 0; i < N; i++) { EXPECT_NEAR(res1[i], res2[i], 0.01); diff --git a/src/oma.cpp b/src/oma.cpp new file mode 100644 index 0000000..5fd18b7 --- /dev/null +++ b/src/oma.cpp @@ -0,0 +1,42 @@ +#include "oma.h" +#include <stdlib.h> + +TOma::TOma(const std::string& filename, const std::string& title, int numChannel, uint32_t numFrames, int cid, uint32_t framesize) { + oma_info_t info; + info.codec = cid; + info.samplerate = 44100; + info.channel_format = OMA_STEREO; + info.framesize = framesize; + File = oma_open(filename.c_str(), OMAM_W, &info); + if (!File) + abort(); +} + +TOma::~TOma() { + oma_close(File); +} + +std::unique_ptr<ICompressedIO::TFrame> TOma::ReadFrame() { + abort(); + return nullptr; +} + +void TOma::WriteFrame(std::vector<char> data) { + if (oma_write(File, &data[0], 1) == -1) { + fprintf(stderr, "write error\n"); + abort(); + } +} + +std::string TOma::GetName() const { + abort(); + return {}; +} + +int TOma::GetChannelNum() const { + return 2; //for ATRAC3 +} +long long TOma::GetLengthInSamples() const { + abort(); + return 0; +} diff --git a/src/oma.h b/src/oma.h new file mode 100644 index 0000000..838322d --- /dev/null +++ b/src/oma.h @@ -0,0 +1,17 @@ +#pragma once + +#include "compressed_io.h" +#include "oma/liboma/include/oma.h" + + +class TOma : public ICompressedIO { + OMAFILE* File; +public: + TOma(const std::string& filename, const std::string& title, int numChannel, uint32_t numFrames, int cid, uint32_t framesize); + ~TOma(); + std::unique_ptr<TFrame> ReadFrame() override; + void WriteFrame(std::vector<char> data) override; + std::string GetName() const override; + int GetChannelNum() const override; + long long GetLengthInSamples() const override; +}; diff --git a/src/oma/liboma/include/oma.h b/src/oma/liboma/include/oma.h new file mode 100644 index 0000000..29d1b72 --- /dev/null +++ b/src/oma/liboma/include/oma.h @@ -0,0 +1,60 @@ + +#ifndef OMA_H +#define OMA_H + +typedef struct omafile_ctx OMAFILE; + +struct oma_info { + int codec; + int framesize; + int samplerate; + int channel_format; +}; + +enum { + OMAM_R = 0x1, + OMAM_W = 0x2, +}; + +enum { + OMAC_ID_ATRAC3 = 0, + OMAC_ID_ATRAC3PLUS = 1, + OMAC_ID_MP3 = 2, + OMAC_ID_LPCM = 3, + OMAC_ID_WMA = 5 +}; + +enum { + OMA_MONO = 0, + OMA_STEREO = 1, + OMA_STEREO_JS = 2, + OMA_3 = 3, + OMA_4 = 4, + OMA_6 = 5, + OMA_7 = 6, + OMA_8 = 7 + +}; + +typedef struct oma_info oma_info_t; +typedef long long block_count_t; + +#ifdef __cplusplus +extern "C" { +#endif +int oma_get_last_err(); + +OMAFILE* oma_open(const char *path, int mode, oma_info_t *info); +int oma_close(OMAFILE* oma_file); + +block_count_t oma_read(OMAFILE *oma_file, void *ptr, block_count_t blocks); +block_count_t oma_write(OMAFILE *oma_file, const void *ptr, block_count_t blocks); + +oma_info_t* oma_get_info(OMAFILE *oma_file); +int oma_get_bitrate(oma_info_t *info); +const char *oma_get_codecname(oma_info_t *info); +#ifdef __cplusplus +} +#endif + +#endif /* OMA_H */ diff --git a/src/oma/liboma/src/liboma.c b/src/oma/liboma/src/liboma.c new file mode 100644 index 0000000..50297d0 --- /dev/null +++ b/src/oma/liboma/src/liboma.c @@ -0,0 +1,269 @@ +#include "../include/oma.h" +#include "oma_internal.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + +//to use htonl +//TODO: rewrite +#include <arpa/inet.h> + +static const int OMA_HEADER_SIZE = 96; +static const int liboma_samplerates[8] = { 32000, 44100, 48000, 88200, 96000, 0 }; +static const char* codec_name[6] = { "ATRAC3", "ATRAC3PLUS", "MPEG1LAYER3", "LPCM", "", "OMAC_ID_WMA" }; +static char ea3_str[] = {'E', 'A', '3'}; +static int channel_id_to_format_tab[7] = { OMA_MONO, OMA_STEREO, OMA_3, OMA_4, OMA_6, OMA_7, OMA_8 }; +enum { + OMAERR_OK = 0, + OMAERR_IO = -1, + OMAERR_PERM = -2, + OMAERR_FMT = -3, + OMAERR_ENCRYPT = -4, + OMAERR_VAL = -5, + OMAERR_EOF = -6 +}; + +static __thread int err; +int oma_get_last_err() { + return err; +} + +static void save_err(int e) { + err = e; +} + +static int oma_check_header(const char* buf) { + if (memcmp(buf, &ea3_str[0], 3) || buf[4] != 0 || buf[5] != OMA_HEADER_SIZE) { + return OMAERR_FMT; + } + return OMAERR_OK; +} + +static int oma_check_encryption(const char* buf) { + if (buf[6] == -1 && buf[7] == -1) + return OMAERR_OK; + return OMAERR_ENCRYPT; +} + +static int oma_get_samplerate_idx(int samplerate) { + if (samplerate <= 0) { + fprintf(stderr, "wrong samplerate\n"); + return -1; + } + for (int i = 0; ; i++) { + if (liboma_samplerates[i] == samplerate) + return i; + if (liboma_samplerates[i] == 0) + return -1; + } + return -1; +} + +static int oma_read_atrac3_header(uint32_t params, oma_info_t* info) { + const int js = (params >> 17) & 0x1; + const int samplerate = liboma_samplerates[(params >> 13) & 0x7]; + if (samplerate == 0) { + fprintf(stderr, "liboma: wrong samplerate params, can't read header\n"); + return -1; + } + info->codec = OMAC_ID_ATRAC3; + info->framesize = (params & 0x3FF) * 8; + info->samplerate = samplerate; + info->channel_format = js ? OMA_STEREO_JS : OMA_STEREO; + return 0; +} + +static int oma_write_atrac3_header(uint32_t *params, oma_info_t *info) { + const int channel_format = info->channel_format; + if (channel_format != OMA_STEREO_JS && channel_format != OMA_STEREO) { + fprintf(stderr, "wrong channel format\n"); + return -1; + } + const uint32_t js = channel_format == OMA_STEREO_JS; + const int samplerate_idx = oma_get_samplerate_idx(info->samplerate); + if (samplerate_idx == -1) + return -1; + const uint32_t framesz = info->framesize / 8; + fprintf(stderr, "framesize: %d\n", framesz); + if (framesz > 0x3FF) + return -1; + *params = htonl((OMAC_ID_ATRAC3 << 24) | (js << 17) | ((uint32_t)samplerate_idx << 13) | framesz); + return 0; +} + +static int oma_read_atrac3p_header(uint32_t params, oma_info_t* info) { + const int channel_id = (params >> 10) & 7; + if (channel_id == 0) { + return -1; + } + const int samplerate = liboma_samplerates[(params >> 13) & 0x7]; + if (samplerate == 0) { + fprintf(stderr, "liboma: wrong samplerate params, can't read header\n"); + return -1; + } + info->codec = OMAC_ID_ATRAC3PLUS; + info->framesize = ((params & 0x3FF) * 8) + 8; + info->samplerate = samplerate; + uint32_t ch_id = (params >> 10) & 7; + info->channel_format = channel_id_to_format_tab[ch_id - 1]; + return 0; +} + +static int oma_write_header(OMAFILE* ctx, oma_info_t *omainfo) { + if (ctx == NULL || omainfo == NULL) + return -1; + char *headerbuf = (char*)calloc(OMA_HEADER_SIZE, 1); + memcpy(headerbuf, &ea3_str[0], 3); + headerbuf[3] = 1; //??? + headerbuf[5] = OMA_HEADER_SIZE; + headerbuf[6] = 0xFF; + headerbuf[7] = 0xFF; + uint32_t *params = (uint32_t*)(headerbuf+32); + switch (omainfo->codec) { + case OMAC_ID_ATRAC3: + oma_write_atrac3_header(params, omainfo); + break; + case OMAC_ID_ATRAC3PLUS: + assert(0); + break; + default: + assert(0); + break; + } + int rv = fwrite(headerbuf, sizeof(char), OMA_HEADER_SIZE, ctx->file); + if (rv != OMA_HEADER_SIZE) { + fprintf(stderr, "can't write header\n"); + rv = -1; + } + free(headerbuf); + return rv; +} + +static int oma_parse_header(OMAFILE* file) { + char buf[OMA_HEADER_SIZE]; + int read = fread(&buf[0], sizeof(char), OMA_HEADER_SIZE, file->file); + int err = 0; + uint32_t params = 0; + if (OMA_HEADER_SIZE != read) + return feof(file->file) ? OMAERR_FMT : OMAERR_IO; + + err = oma_check_header(&buf[0]); + if (OMAERR_OK != err) + return err; + + err = oma_check_encryption(&buf[0]); + if (OMAERR_OK != err) + return err; + + //detect codecs + params = ((uint8_t)buf[33]) << 16 | ((uint8_t)buf[34]) << 8 | ((uint8_t)buf[35]); + switch (buf[32]) { + case OMAC_ID_ATRAC3: + oma_read_atrac3_header(params, &file->info); + break; + case OMAC_ID_ATRAC3PLUS: + oma_read_atrac3p_header(params, &file->info); + break; + + default: + fprintf(stderr, "got unsupported format: %d\n", buf[32]); + return OMAERR_FMT; + } + + return OMAERR_OK; +} + +OMAFILE* oma_open(const char *path, int mode, oma_info_t *info) { + const static char* modes[3] = {"", "rb", "wb"}; + FILE* file = fopen(path, modes[mode]); + int err = 0; + if (NULL == file) { + return NULL; + } + + struct omafile_ctx *ctx = (struct omafile_ctx*)malloc(sizeof(struct omafile_ctx)); + if (NULL == ctx) { + goto close_ret; + } + + ctx->file = file; + if (mode == OMAM_R) { + err = oma_parse_header(ctx); + if (OMAERR_OK != err) { + goto free_close_ret; + } + } else { + if (!info) { + err = OMAERR_VAL; + goto free_close_ret; + } + memcpy(&ctx->info, info, sizeof(oma_info_t)); + err = oma_write_header(ctx, info); + } + + return ctx; + +free_close_ret: + free(ctx); + +close_ret: + save_err(err); + fclose(file); + return NULL; +} + +int oma_close(OMAFILE *ctx) { + FILE* file = ctx->file; + free(ctx); + fclose(file); + return 0; +} + +block_count_t oma_read(OMAFILE *oma_file, void *ptr, block_count_t blocks) { + size_t read = fread(ptr, oma_file->info.framesize, blocks, oma_file->file); + if (read == blocks) + return read; + if (feof(oma_file->file)) { + save_err(OMAERR_EOF); + return 0; + } + return -1; +} + +block_count_t oma_write(OMAFILE *oma_file, const void *ptr, block_count_t blocks) { + size_t writen = fwrite(ptr, oma_file->info.framesize, blocks, oma_file->file); + if (writen == blocks) + return writen; + return -1; +} + +oma_info_t* oma_get_info(OMAFILE *oma_file) { + if (oma_file == NULL) + return NULL; + return &oma_file->info; +} +int oma_get_bitrate(oma_info_t *info) { + switch (info->codec) { + case OMAC_ID_ATRAC3: + return info->samplerate * info->framesize * 8 / 1024; + break; + case OMAC_ID_ATRAC3PLUS: + return info->samplerate * info->framesize * 8 / 2048; + break; + default: + return -1; + } + return -1; +} + +const char *oma_get_codecname(oma_info_t *info) { + if (info == NULL) + return ""; + int id = info->codec; + if (id < 0 || id > 5) + return ""; + return codec_name[id]; +} diff --git a/src/oma/liboma/src/oma_internal.h b/src/oma/liboma/src/oma_internal.h new file mode 100644 index 0000000..881e89b --- /dev/null +++ b/src/oma/liboma/src/oma_internal.h @@ -0,0 +1,18 @@ +#ifndef OMA_INTERNAL_H +#define OMA_INTERNAL_H + +#include <stdio.h> +#include "oma.h" + +struct omafile_ctx { + FILE* file; + oma_info_t info; +}; + + + +//static inline uint16_t read_big16(void *x) { +// return (((const uint8_t*)x)[0] << 8) | ((const uint8_t)x); +//} + +#endif /* OMA_INTERNAL_H */ diff --git a/src/oma/liboma/src/tools/omacp.c b/src/oma/liboma/src/tools/omacp.c new file mode 100644 index 0000000..3d9190e --- /dev/null +++ b/src/oma/liboma/src/tools/omacp.c @@ -0,0 +1,38 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "oma.h" + +int main(int argc, char* const* argv) { + if (3 != argc) + fprintf(stdout, "usage: \n\t omainfo [in] [out]\n"); + + OMAFILE* infile = oma_open(argv[1], OMAM_R, NULL); + if (NULL == infile) + fprintf(stderr, "Can't open %s to read, err: %d\n", argv[1], oma_get_last_err()); + + oma_info_t *info = oma_get_info(infile); + const char *codecname = oma_get_codecname(info); + const int bitrate = oma_get_bitrate(info); + + fprintf(stdout, "codec: %s, bitrate: %d, channel format: %d\n", codecname, bitrate, info->chanel_format); + + OMAFILE* outfile = oma_open(argv[2], OMAM_W, info); + if (NULL == outfile) + fprintf(stderr, "Can't open %s to write, err: %d\n", argv[2], oma_get_last_err()); + + char* buf = (char*)malloc(info->framesize); + for (;;) { + block_count_t rcount = oma_read(infile, buf, 1); + if (rcount == 0) + break; + if (rcount == -1) { + fprintf(stderr, "read error\n"); + break; + } + if (oma_write(outfile, buf, 1) == -1) { + fprintf(stderr, "write error\n"); + break; + } + } +} diff --git a/src/oma/liboma/src/tools/omainfo.c b/src/oma/liboma/src/tools/omainfo.c new file mode 100644 index 0000000..a487010 --- /dev/null +++ b/src/oma/liboma/src/tools/omainfo.c @@ -0,0 +1,25 @@ +#include <stdio.h> + +#include "oma.h" + +int main(int argc, char* const* argv) { + fprintf(stderr, "%d\n", argc); + if (2 > argc) { + fprintf(stdout, "usage: \n\t omainfo [filename]\n"); + return 1; + } + + for (int i = 1; i < argc; i++) { + OMAFILE* file = oma_open(argv[i], OMAM_R, NULL); + if (NULL == file) + fprintf(stderr, "Can't open %s\n", argv[i]); + + oma_info_t *info = oma_get_info(file); + const char *codecname = oma_get_codecname(info); + const int bitrate = oma_get_bitrate(info); + + fprintf(stdout, "%s codec: %s, bitrate: %d, channelformat: %d framesz: %d\n", argv[i], codecname, bitrate, info->channel_format, info->framesize); + oma_close(file); + } + return 0; +} diff --git a/src/pcmengin.h b/src/pcmengin.h index a0e0127..8023fed 100644 --- a/src/pcmengin.h +++ b/src/pcmengin.h @@ -30,9 +30,9 @@ class TEndOfRead : public std::exception { template <class T> class TPCMBuffer { std::vector<T> Buf_; - int32_t NumChannels; + uint16_t NumChannels; public: - TPCMBuffer(const int32_t bufSize, const int32_t numChannels) + TPCMBuffer(const int32_t bufSize, const uint32_t numChannels) : NumChannels(numChannels) { Buf_.resize(bufSize*numChannels); @@ -52,7 +52,7 @@ public: abort(); return &Buf_[rpos]; } - size_t Channels() const { + uint16_t Channels() const { return NumChannels; } void Zero(size_t pos, size_t len) { @@ -82,6 +82,9 @@ class TPCMEngine { public: typedef std::unique_ptr<IPCMWriter<T>> TWriterPtr; typedef std::unique_ptr<IPCMReader<T>> TReaderPtr; + struct ProcessMeta { + const uint16_t Channels; + }; private: TPCMBuffer<T> Buffer; TWriterPtr Writer; @@ -104,7 +107,7 @@ public: , Writer(std::move(writer)) , Reader(std::move(reader)) { } - typedef std::function<void(T* data)> TProcessLambda; + typedef std::function<void(T* data, const ProcessMeta& meta)> TProcessLambda; uint64_t ApplyProcess(size_t step, TProcessLambda lambda) { if (step > Buffer.Size()) { @@ -115,8 +118,9 @@ public: Reader->Read(Buffer, sizeToRead); } size_t lastPos = 0; + ProcessMeta meta = {Buffer.Channels()}; for (size_t i = 0; i + step <= Buffer.Size(); i+=step) { - lambda(Buffer[i]); + lambda(Buffer[i], meta); lastPos = i + step; } assert(lastPos == Buffer.Size()); diff --git a/src/qmf/qmf.h b/src/qmf/qmf.h index 19c7d0a..e6ae58f 100644 --- a/src/qmf/qmf.h +++ b/src/qmf/qmf.h @@ -1,13 +1,15 @@ #pragma once #include <string.h> +#include "../config.h" + template<class TPCM, int nIn> class TQmf { static const float TapHalf[24]; - double QmfWindow[48]; + TFloat QmfWindow[48]; TPCM PcmBuffer[nIn + 46]; - double PcmBufferMerge[nIn + 46]; - double DelayBuff[46]; + TFloat PcmBufferMerge[nIn + 46]; + TFloat DelayBuff[46]; public: TQmf() { const int sz = sizeof(QmfWindow)/sizeof(QmfWindow[0]); @@ -21,8 +23,8 @@ public: } } - void Split(TPCM* in, double* lower, double* upper) { - double temp; + void Split(TPCM* in, TFloat* lower, TFloat* upper) { + TFloat temp; for (size_t i = 0; i < 46; i++) PcmBuffer[i] = PcmBuffer[nIn + i]; @@ -41,9 +43,9 @@ public: } } - void Merge(TPCM* out, double* lower, double* upper) { - memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(double)); - double* newPart = &PcmBufferMerge[46]; + void Merge(TPCM* out, TFloat* lower, TFloat* upper) { + memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(TFloat)); + TFloat* newPart = &PcmBufferMerge[46]; for (int i = 0; i < nIn; i+=4) { newPart[i+0] = lower[i/2] + upper[i/2]; newPart[i+1] = lower[i/2] - upper[i/2]; @@ -51,10 +53,10 @@ public: newPart[i+3] = lower[i/2 + 1] - upper[i/2 + 1]; } - double* winP = &PcmBufferMerge[0]; + TFloat* winP = &PcmBufferMerge[0]; for (size_t j = nIn/2; j != 0; j--) { - double s1 = 0; - double s2 = 0; + TFloat s1 = 0; + TFloat s2 = 0; for (size_t i = 0; i < 48; i+=2) { s1 += winP[i] * QmfWindow[i]; s2 += winP[i+1] * QmfWindow[i+1]; @@ -64,7 +66,7 @@ public: winP += 2; out += 2; } - memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(double)); + memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(TFloat)); } }; diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp index 769277e..82a31a8 100644 --- a/src/transient_detector.cpp +++ b/src/transient_detector.cpp @@ -2,19 +2,33 @@ #include <stdlib.h> #include <string.h> +#include <cmath> +#include <cassert> +#include <iostream> namespace NAtracDEnc { -static double calculateRMS(const double* in, uint32_t n) { - double s = 0; +using std::vector; +static TFloat calculateRMS(const TFloat* in, uint32_t n) { + TFloat s = 0; for (uint32_t i = 0; i < n; i++) { - s += in[i] * in[i]; + s += (in[i] * in[i]); } s /= n; return sqrt(s); } -void TTransientDetector::HPFilter(const double* in, double* out) { - static const double fircoef[] = { +static TFloat calculatePeak(const TFloat* in, uint32_t n) { + TFloat s = 0; + for (uint32_t i = 0; i < n; i++) { + TFloat absVal = std::abs(in[i]); + if (absVal > s) + s = absVal; + } + return s; +} + +void TTransientDetector::HPFilter(const TFloat* in, TFloat* out) { + static const TFloat fircoef[] = { -8.65163e-18 * 2.0, -0.00851586 * 2.0, -6.74764e-18 * 2.0, 0.0209036 * 2.0, -3.36639e-17 * 2.0, -0.0438162 * 2.0, -1.54175e-17 * 2.0, 0.0931738 * 2.0, -5.52212e-17 * 2.0, -0.313819 * 2.0 @@ -34,10 +48,10 @@ void TTransientDetector::HPFilter(const double* in, double* out) { } -bool TTransientDetector::Detect(const double* buf) { +bool TTransientDetector::Detect(const TFloat* buf) { const uint32_t nBlocksToAnalize = NShortBlocks + 1; - double* rmsPerShortBlock = reinterpret_cast<double*>(alloca(sizeof(double) * nBlocksToAnalize)); - std::vector<double> filtered(BlockSz); + TFloat* rmsPerShortBlock = reinterpret_cast<TFloat*>(alloca(sizeof(TFloat) * nBlocksToAnalize)); + std::vector<TFloat> filtered(BlockSz); HPFilter(buf, filtered.data()); bool trans = false; rmsPerShortBlock[0] = LastEnergy; @@ -45,13 +59,25 @@ bool TTransientDetector::Detect(const double* buf) { rmsPerShortBlock[i] = 19.0 * log10(calculateRMS(&filtered[(i - 1) * ShortSz], ShortSz)); if (rmsPerShortBlock[i] - rmsPerShortBlock[i - 1] > 16) { trans = true; + LastTransientPos = i; } if (rmsPerShortBlock[i - 1] - rmsPerShortBlock[i] > 20) { trans = true; + LastTransientPos = i; } } LastEnergy = rmsPerShortBlock[NShortBlocks]; return trans; } +std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints, bool useRms) { + vector<TFloat> res; + const uint32_t step = len / maxPoints; + for (uint32_t pos = 0; pos < len; pos += step) { + TFloat rms = useRms ? calculateRMS(in + pos, step) : calculatePeak(in + pos, step); + res.emplace_back(rms); + } + return res; } + +} //namespace NAtracDEnc diff --git a/src/transient_detector.h b/src/transient_detector.h index b3db6ba..46b774f 100644 --- a/src/transient_detector.h +++ b/src/transient_detector.h @@ -3,16 +3,20 @@ #include <cstdint> #include <vector> +#include "config.h" + namespace NAtracDEnc { + class TTransientDetector { const uint32_t ShortSz; const uint32_t BlockSz; const uint32_t NShortBlocks; static const uint32_t PrevBufSz = 20; static const uint32_t FIRLen = 21; - void HPFilter(const double* in, double* out); - std::vector<double> HPFBuffer; - double LastEnergy = 0.0; + void HPFilter(const TFloat* in, TFloat* out); + std::vector<TFloat> HPFBuffer; + TFloat LastEnergy = 0.0; + uint32_t LastTransientPos = 0; public: TTransientDetector(uint32_t shortSz, uint32_t blockSz) : ShortSz(shortSz) @@ -21,6 +25,10 @@ public: { HPFBuffer.resize(BlockSz + FIRLen); } - bool Detect(const double* buf); + bool Detect(const TFloat* buf); + uint32_t GetLastTransientPos() const { return LastTransientPos; } }; + +std::vector<TFloat> AnalyzeGain(const TFloat* in, uint32_t len, uint32_t maxPoints, bool useRms); + } diff --git a/src/transient_detector_ut.cpp b/src/transient_detector_ut.cpp new file mode 100644 index 0000000..5c018c3 --- /dev/null +++ b/src/transient_detector_ut.cpp @@ -0,0 +1,36 @@ +#include "transient_detector.h" +#include <gtest/gtest.h> + +#include <vector> +#include <cmath> + +using std::vector; +using namespace NAtracDEnc; +TEST(AnalyzeGain, AnalyzeGainSimple) { + + TFloat in[256]; + for (int i = 0; i < 256; ++i) { + if (i <= 24) { + in[i] = 1.0; + } else if ( i > 24 && i <= 32) { + in[i] = 8.0; + } else if ( i > 32 && i <= 66) { + in[i] = 128.0; + } else { + in[i] = 0.5; + } + } + vector<TFloat> res = AnalyzeGain(in, 256, 32, false); + EXPECT_EQ(res.size(), 32); + +// for (TFloat v : res) +// std::cout << v << std::endl; + for (int i = 0; i < 3; ++i) + EXPECT_EQ(res[i], 1.0); + for (int i = 3; i < 4; ++i) + EXPECT_EQ(res[i], 8.0); + for (int i = 4; i < 9; ++i) + EXPECT_EQ(res[i], 128.0); + for (int i = 9; i < 32; ++i) + EXPECT_EQ(res[i], 0.5); +} diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..f75c48e --- /dev/null +++ b/src/util.h @@ -0,0 +1,58 @@ +#pragma once +#include <cstdint> +#include <vector> +#include <algorithm> +#include <cmath> + +#include "config.h" +#include <cstring> + +template<class T> +inline void SwapArray(T* p, const size_t len) { + for (size_t i = 0, j = len - 1; i < len / 2; ++i, --j) { + T tmp = p[i]; + p[i] = p[j]; + p[j] = tmp; + } +} + +template<size_t N> +inline void InvertSpectrInPlase(TFloat* in) { + for (size_t i = 0; i < N; i+=2) + in[i] *= -1; +} + +template<size_t N> +inline std::vector<TFloat> InvertSpectr(const TFloat* in) { + std::vector<TFloat> buf(N); + std::memcpy(&buf[0], in, N * sizeof(TFloat)); + InvertSpectrInPlase<N>(&buf[0]); + return buf; +} + +inline uint16_t GetFirstSetBit(uint32_t x) { + static const uint16_t multiplyDeBruijnBitPosition[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return multiplyDeBruijnBitPosition[(uint32_t)(x * 0x07C4ACDDU) >> 27]; +} + +template<class T> +inline uint16_t Log2FloatToIdx(T x, uint16_t shift) { + T t = x * shift; + return GetFirstSetBit(std::trunc(t)); +} + +template<class T> +inline T CalcMedian(T* in, uint32_t len) { + std::vector<T> tmp(in, in+len); + std::sort(tmp.begin(), tmp.end()); + uint32_t pos = (len - 1) / 2; + return tmp[pos]; +} diff --git a/src/util_ut.cpp b/src/util_ut.cpp new file mode 100644 index 0000000..ccd9fce --- /dev/null +++ b/src/util_ut.cpp @@ -0,0 +1,26 @@ +#include "util.h" +#include <gtest/gtest.h> + +#include <vector> + + +TEST(Util, SwapArrayTest) { + + TFloat arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + SwapArray(arr, 8); + for (size_t i = 0; i < 8; ++i) { + EXPECT_NEAR((TFloat)i, arr[7-i], 0.000000000001); + } +} + +TEST(Util, GetFirstSetBitTest) { + EXPECT_EQ(1, GetFirstSetBit(2)); + EXPECT_EQ(1, GetFirstSetBit(3)); + EXPECT_EQ(2, GetFirstSetBit(4)); + EXPECT_EQ(2, GetFirstSetBit(5)); + EXPECT_EQ(2, GetFirstSetBit(6)); + EXPECT_EQ(2, GetFirstSetBit(7)); + EXPECT_EQ(3, GetFirstSetBit(8)); + EXPECT_EQ(3, GetFirstSetBit(9)); + EXPECT_EQ(3, GetFirstSetBit(10)); +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8ca48b..aaebb4a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,5 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer") + include_directories(${gtest_SOURCE_DIR}/include) set(mdct_test_sources @@ -15,15 +17,46 @@ add_executable(bitstream_test ${bitstream_test_sources}) target_link_libraries(bitstream_test gtest_main) set(atrac1mdct_test_sources - ../src/atracdenc.cpp + ../src/atrac1denc.cpp ../src/transient_detector.cpp ../src/bitstream/bitstream.cpp + ../src/atrac/atrac_psy_common.cpp ../src/atrac/atrac_scale.cpp ../src/atrac/atrac1_dequantiser.cpp ../src/atrac/atrac1_bitalloc.cpp ../src/atrac/atrac1.cpp + ../src/atrac/atrac3.cpp #atrac_scale has explicit instantiation ../src/atracdenc_ut.cpp ../src/aea.cpp ) add_executable(atrac1mdct_test ${atrac1mdct_test_sources}) target_link_libraries(atrac1mdct_test mdct_impl gtest_main) + +set(atrac3mdct_test_sources + ../src/atrac3denc.cpp + ../src/transient_detector.cpp + ../src/bitstream/bitstream.cpp + ../src/atrac/atrac_psy_common.cpp + ../src/atrac/atrac_scale.cpp + ../src/atrac/atrac3_bitstream.cpp + ../src/atrac/atrac1.cpp #atrac_scale has explicit instantiation + ../src/atrac/atrac3.cpp + ../src/atrac3denc_ut.cpp + ../src/oma.cpp +) +add_executable(atrac3mdct_test ${atrac3mdct_test_sources}) +target_link_libraries(atrac3mdct_test mdct_impl oma gtest_main) + +set(util_test_sources + ../src/util_ut.cpp +) +add_executable(util_test ${util_test_sources}) +target_link_libraries(util_test gtest_main) + +set(transient_detector_test_sources + ../src/transient_detector_ut.cpp + ../src/transient_detector.cpp +) +add_executable(transient_detector_test ${transient_detector_test_sources}) +target_link_libraries(transient_detector_test gtest_main) + |