Merge branch 'master' into at3plus-devat3plus-dev

author: Daniil Cherednik <dan.cherednik@gmail.com> 2024-12-24 22:59:03 +0100
committer: Daniil Cherednik <dan.cherednik@gmail.com> 2024-12-24 22:59:03 +0100
commit: bddbeb98b3db8d435de6b2d10269640121475538 (patch)
tree: 1a791f5dd49b94ead503b9b77c6b9277f2701a89
parent: c73a0e24a0d5c510a65efbe5c2fbc7fd39a3c003 (diff)
parent: 74d6e04c21bddd435bd74c34dbe027b883772a76 (diff)
download: atracdenc-bddbeb98b3db8d435de6b2d10269640121475538.tar.gz
60 files changed, 838 insertions, 691 deletions
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index e361e4f..00b987e 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -41,7 +41,7 @@ jobs:
         }
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
           
     # install dependencies
     - name: Install dependencies on windows
@@ -69,7 +69,7 @@ jobs:
           
     - name: Upload
       if: startsWith(matrix.config.os,'Windows')
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v4
       with:
         path: ${{github.workspace}}/build/src/Release
         name: ${{ matrix.config.artifact }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02a31dc..4d08de8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.1)
 project(atracdenc)
 include_directories(src)
 
-set(ATDE_USE_FLOAT ON)
-
 if (UNIX)
     find_package(GTest)
     if (GTest_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 75d75bc..d40f6e6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,10 +32,6 @@ set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
 enable_cxx_compiler_flag_if_supported("-Wall")
 enable_cxx_compiler_flag_if_supported("-Wextra")
 
-if (ATDE_USE_FLOAT)
-    add_compile_definitions(ATDE_USE_FLOAT)
-endif()
-
 if (WIN32)
     add_compile_definitions(PLATFORM_WINDOWS)
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
@@ -71,12 +67,7 @@ set(SOURCE_FFT_LIB
     lib/fft/kissfft_impl/tools/kiss_fftr.c
 )
 
-if (ATDE_USE_FLOAT)
-    set_source_files_properties(${SOURCE_FFT_LIB} PROPERTIES COMPILE_FLAGS -Dkiss_fft_scalar=float)
-else()
-    set_source_files_properties(${SOURCE_FFT_LIB} PROPERTIES COMPILE_FLAGS -Dkiss_fft_scalar=double)
-    set(GHA_USE_DOUBLE_API YES)
-endif()
+set_source_files_properties(${SOURCE_FFT_LIB} PROPERTIES COMPILE_FLAGS -Dkiss_fft_scalar=float)
 add_library(fft_impl STATIC ${SOURCE_FFT_LIB})
 
 set(GHA_FFT_LIB fft_impl)
diff --git a/src/aea.cpp b/src/aea.cpp
index 9394677..72d70aa 100644
--- a/src/aea.cpp
+++ b/src/aea.cpp
@@ -39,12 +39,12 @@ public:
     TAeaCommon(const TMeta& meta)
         : Meta(meta)
     {}
-    uint8_t GetChannelNum() const;
+    size_t GetChannelNum() const;
     string GetName() const;
     virtual ~TAeaCommon();
 };
 
-uint8_t TAeaCommon::GetChannelNum() const {
+size_t TAeaCommon::GetChannelNum() const {
     return Meta.AeaHeader[264];
 }
 
@@ -63,7 +63,7 @@ public:
     unique_ptr<TFrame> ReadFrame() override; 
     uint64_t GetLengthInSamples() const override;
 
-    uint8_t GetChannelNum() const override {
+    size_t GetChannelNum() const override {
         return TAeaCommon::GetChannelNum();
     }
 
@@ -117,14 +117,14 @@ unique_ptr<ICompressedIO::TFrame> TAeaInput::ReadFrame() {
 
 class TAeaOutput : public ICompressedOutput, public TAeaCommon {
     static TAeaCommon::TMeta CreateMeta(const string& filename, const string& title,
-        uint8_t numChannel, uint32_t numFrames);
+        size_t numChannel, uint32_t numFrames);
 
     bool FirstWrite = true;
 public:
-    TAeaOutput(const string& filename, const string& title, uint8_t numChannel, uint32_t numFrames);
+    TAeaOutput(const string& filename, const string& title, size_t numChannel, uint32_t numFrames);
     void WriteFrame(vector<char> data) override;
 
-    uint8_t GetChannelNum() const override {
+    size_t GetChannelNum() const override {
         return TAeaCommon::GetChannelNum();
     }
     string GetName() const override {
@@ -132,12 +132,12 @@ public:
     }
 };
 
-TAeaOutput::TAeaOutput(const string& filename, const string& title, uint8_t numChannels, uint32_t numFrames)
+TAeaOutput::TAeaOutput(const string& filename, const string& title, size_t numChannels, uint32_t numFrames)
     : TAeaCommon(CreateMeta(filename, title, numChannels, numFrames))
 {}
 
 TAeaCommon::TMeta TAeaOutput::CreateMeta(const string& filename, const string& title,
-    uint8_t channelsNum, uint32_t numFrames)
+    size_t channelsNum, uint32_t numFrames)
 {
     FILE* fp = fopen(filename.c_str(), "wb");
     if (!fp)
@@ -191,7 +191,7 @@ TCompressedInputPtr CreateAeaInput(const std::string& filename) {
 }
 
 TCompressedOutputPtr CreateAeaOutput(const string& filename, const string& title,
-    uint8_t numChannels, uint32_t numFrames)
+    size_t numChannels, uint32_t numFrames)
 {
     return unique_ptr<TAeaOutput>(new TAeaOutput(filename, title, numChannels, numFrames));
 }
diff --git a/src/aea.h b/src/aea.h
index 50edc9f..1569878 100644
--- a/src/aea.h
+++ b/src/aea.h
@@ -41,4 +41,4 @@ class TAeaFormatError {
 
 TCompressedInputPtr CreateAeaInput(const std::string& filename);
 TCompressedOutputPtr CreateAeaOutput(const std::string& filename, const std::string& title,
-    uint8_t numChannel, uint32_t numFrames);
+    size_t numChannel, uint32_t numFrames);
diff --git a/src/at3.cpp b/src/at3.cpp
index f4afc4f..35d6121 100644
--- a/src/at3.cpp
+++ b/src/at3.cpp
@@ -80,7 +80,7 @@ At3WaveHeader {
 
 class TAt3 : public ICompressedOutput {
 public:
-    TAt3(const std::string &filename, uint8_t numChannels,
+    TAt3(const std::string &filename, size_t numChannels,
         uint32_t numFrames, uint32_t frameSize, bool jointStereo)
         : fp(fopen(filename.c_str(), "wb"))
     {
@@ -146,7 +146,7 @@ public:
         return {};
     }
 
-    uint8_t GetChannelNum() const override {
+    size_t GetChannelNum() const override {
         return 2;
     }
 
@@ -157,7 +157,7 @@ private:
 } //namespace
 
 TCompressedOutputPtr
-CreateAt3Output(const std::string& filename, uint8_t numChannel,
+CreateAt3Output(const std::string& filename, size_t numChannel,
         uint32_t numFrames, uint32_t framesize, bool jointStereo)
 {
     return std::unique_ptr<TAt3>(new TAt3(filename, numChannel, numFrames, framesize, jointStereo));
diff --git a/src/at3.h b/src/at3.h
index 85f7a9e..66842fc 100644
--- a/src/at3.h
+++ b/src/at3.h
@@ -21,5 +21,5 @@
 #include "compressed_io.h"
 
 TCompressedOutputPtr
-CreateAt3Output(const std::string& filename, uint8_t numChannel,
+CreateAt3Output(const std::string& filename, size_t numChannel,
         uint32_t numFrames, uint32_t framesize, bool jointStereo);
diff --git a/src/atrac/at3p/at3p.cpp b/src/atrac/at3p/at3p.cpp
index 110f522..88d0176 100644
--- a/src/atrac/at3p/at3p.cpp
+++ b/src/atrac/at3p/at3p.cpp
@@ -38,7 +38,7 @@ public:
         , GhaProcessor(MakeGhaProcessor0(channels == 2))
     {}
 
-    TPCMEngine<float>::EProcessResult EncodeFrame(const TFloat* data, int channels);
+    TPCMEngine::EProcessResult EncodeFrame(const float* data, int channels);
 private:
     struct TChannelCtx {
         TChannelCtx()
@@ -51,10 +51,10 @@ private:
 
         at3plus_pqf_a_ctx_t PqfCtx;
 
-        TFloat* NextBuf = Buf1;
-        TFloat* CurBuf = nullptr;
-        TFloat Buf1[TAt3PEnc::NumSamples];
-        TFloat Buf2[TAt3PEnc::NumSamples];
+        float* NextBuf = Buf1;
+        float* CurBuf = nullptr;
+        float Buf1[TAt3PEnc::NumSamples];
+        float Buf2[TAt3PEnc::NumSamples];
     };
 
     TAt3PBitStream BitStream;
@@ -62,8 +62,8 @@ private:
     std::unique_ptr<IGhaProcessor> GhaProcessor;
 };
 
-TPCMEngine<float>::EProcessResult TAt3PEnc::TImpl::
-EncodeFrame(const TFloat* data, int channels)
+TPCMEngine::EProcessResult TAt3PEnc::TImpl::
+EncodeFrame(const float* data, int channels)
 {
 
     int needMore = 0;
@@ -83,7 +83,7 @@ EncodeFrame(const TFloat* data, int channels)
     }
 
     if (needMore == channels) {
-        return TPCMEngine<TFloat>::EProcessResult::LOOK_AHEAD;
+        return TPCMEngine::EProcessResult::LOOK_AHEAD;
     }
 
     assert(needMore == 0);
@@ -102,7 +102,7 @@ EncodeFrame(const TFloat* data, int channels)
         std::swap(ChannelCtx[ch].NextBuf, ChannelCtx[ch].CurBuf);
     }
 
-    return TPCMEngine<TFloat>::EProcessResult::PROCESSED;
+    return TPCMEngine::EProcessResult::PROCESSED;
 }
 
 TAt3PEnc::TAt3PEnc(TCompressedOutputPtr&& out, int channels)
@@ -111,10 +111,10 @@ TAt3PEnc::TAt3PEnc(TCompressedOutputPtr&& out, int channels)
 {
 }
 
-TPCMEngine<TFloat>::TProcessLambda TAt3PEnc::GetLambda() {
+TPCMEngine::TProcessLambda TAt3PEnc::GetLambda() {
     Impl.reset(new TImpl(Out.get(), Channels));
 
-    return [this](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta&) {
+    return [this](float* data, const TPCMEngine::ProcessMeta&) {
         return Impl->EncodeFrame(data, Channels);
     };
 }
diff --git a/src/atrac/at3p/at3p_gha.cpp b/src/atrac/at3p/at3p_gha.cpp
index bc9973e..96b3eba 100644
--- a/src/atrac/at3p/at3p_gha.cpp
+++ b/src/atrac/at3p/at3p_gha.cpp
@@ -26,6 +26,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <cmath>
 #include <iostream>
 #include <map>
 #include <vector>
diff --git a/src/atrac/at3p/at3p_gha.h b/src/atrac/at3p/at3p_gha.h
index 468ee3a..45f3cbc 100644
--- a/src/atrac/at3p/at3p_gha.h
+++ b/src/atrac/at3p/at3p_gha.h
@@ -23,7 +23,6 @@
 #include <memory>
 #include <vector>
 
-static_assert(sizeof(TFloat) == sizeof(float), "TFloat must be float32");
 namespace NAtracDEnc {
 
 struct TAt3PGhaData {
diff --git a/src/atrac/atrac1.cpp b/src/atrac/atrac1.cpp
index e128609..0c1c053 100644
--- a/src/atrac/atrac1.cpp
+++ b/src/atrac/atrac1.cpp
@@ -26,8 +26,10 @@ constexpr uint32_t TAtrac1Data::SpecsPerBlock[MaxBfus];
 constexpr uint32_t TAtrac1Data::SpecsStartLong[MaxBfus];
 constexpr uint32_t TAtrac1Data::SpecsStartShort[MaxBfus];
 constexpr uint32_t TAtrac1Data::BfuAmountTab[8];
-TFloat TAtrac1Data::ScaleTable[64] = {0};
-TFloat TAtrac1Data::SineWindow[32] = {0};
+float TAtrac1Data::ScaleTable[64] = {0};
+float TAtrac1Data::SineWindow[32] = {0};
+
+const static TAtrac1Data Atrac1Data;
 
 } //namespace NAtrac1
 } //namespace NAtracDEnc
diff --git a/src/atrac/atrac1.h b/src/atrac/atrac1.h
index 67c869f..ae1809b 100644
--- a/src/atrac/atrac1.h
+++ b/src/atrac/atrac1.h
@@ -55,18 +55,18 @@ class TAtrac1Data {
 public:
     static constexpr uint8_t MaxBfus = 52;
     static constexpr uint8_t NumQMF = 3;
-protected:
-	static constexpr uint32_t SpecsPerBlock[MaxBfus] = {
+
+    static constexpr uint32_t SpecsPerBlock[MaxBfus] = {
         8,  8,  8,  8,  4,  4,  4,  4,  8,  8,  8,  8,  6,  6,  6,  6, 6, 6, 6, 6,  // low band
         6,  6,  6,  6,  7,  7,  7,  7,  9,  9,  9,  9,  10, 10, 10, 10,             // middle band
         12, 12, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20              // high band
-	};
-	static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 20, 36, 52};
-	static constexpr uint32_t SpecsStartLong[MaxBfus] = {
+    };
+    static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 20, 36, 52};
+    static constexpr uint32_t SpecsStartLong[MaxBfus] = {
         0,   8,   16,  24,  32,  36,  40,  44,  48,  56,  64,  72,  80,  86,  92,  98, 104, 110, 116, 122,
         128, 134, 140, 146, 152, 159, 166, 173, 180, 189, 198, 207, 216, 226, 236, 246,
         256, 268, 280, 292, 304, 316, 328, 340, 352, 372, 392, 412, 432, 452, 472, 492,
-	};
+    };
     static constexpr uint32_t SpecsStartShort[MaxBfus] = {
         0,   32,  64,  96,  8,   40,  72,  104, 12,  44,  76,  108, 20,  52,  84,  116, 26,  58,  90, 122,
         128, 160, 192, 224, 134, 166, 198, 230, 141, 173, 205, 237, 150, 182, 214, 246,
@@ -74,13 +74,13 @@ protected:
     };
     static const uint32_t SoundUnitSize = 212;
     static constexpr uint32_t BfuAmountTab[8] = {20,  28,  32,  36, 40, 44, 48, 52};
-	static const uint32_t BitsPerBfuAmountTabIdx = 3;
-	static const uint32_t BitsPerIDWL = 4;
-	static const uint32_t BitsPerIDSF = 6;
+    static const uint32_t BitsPerBfuAmountTabIdx = 3;
+    static const uint32_t BitsPerIDWL = 4;
+    static const uint32_t BitsPerIDSF = 6;
 
-    static TFloat ScaleTable[64];
-    static TFloat SineWindow[32];
-    uint32_t BfuToBand(uint32_t i) {
+    static float ScaleTable[64];
+    static float SineWindow[32];
+    static uint32_t BfuToBand(uint32_t i) {
         if (i < 20)
             return 0;
         if (i < 36)
diff --git a/src/atrac/atrac1_bitalloc.cpp b/src/atrac/atrac1_bitalloc.cpp
index 69485db..5b20cc8 100644
--- a/src/atrac/atrac1_bitalloc.cpp
+++ b/src/atrac/atrac1_bitalloc.cpp
@@ -33,29 +33,29 @@ using std::cerr;
 using std::endl;
 using std::pair;
 
-static const uint32_t FixedBitAllocTableLong[TAtrac1BitStreamWriter::MaxBfus] = {
-    7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6,
+static const uint32_t FixedBitAllocTableLong[TAtrac1Data::MaxBfus] = {
+    7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
     6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4,
-    4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 0, 0, 0
+    4, 4, 3, 3, 3, 3, 3, 3, 2, 1, 1, 1, 1, 0, 0, 0
 };
 
-static const uint32_t FixedBitAllocTableShort[TAtrac1BitStreamWriter::MaxBfus] = {
+static const uint32_t FixedBitAllocTableShort[TAtrac1Data::MaxBfus] = {
     6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,
     6, 6, 6, 6,  5, 5, 5, 5,  5, 5, 5, 5,  5, 5, 5, 5,
     4, 4, 4, 4, 4, 4, 4, 4,   0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static const uint32_t BitBoostMask[TAtrac1BitStreamWriter::MaxBfus] = {
+static const uint32_t BitBoostMask[TAtrac1Data::MaxBfus] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
     1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
 TBitsBooster::TBitsBooster() {
-    for (uint32_t i = 0; i < MaxBfus; ++i) {
+    for (uint32_t i = 0; i < TAtrac1Data::MaxBfus; ++i) {
         if (BitBoostMask[i] == 0)
             continue;
-        const uint32_t nBits = SpecsPerBlock[i];
+        const uint32_t nBits = TAtrac1Data::SpecsPerBlock[i];
         BitsBoostMap.insert(pair<uint32_t, uint32_t>(nBits, i));
     }
     MaxBitsPerIteration = BitsBoostMap.size() ? (--BitsBoostMap.end())->first : 0;
@@ -98,38 +98,70 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3
     return surplus;
 }
 
+std::vector<float> TAtrac1SimpleBitAlloc::ATHLong;
+
+TAtrac1SimpleBitAlloc::TAtrac1SimpleBitAlloc(ICompressedOutput* container, uint32_t bfuIdxConst, bool fastBfuNumSearch)
+    : TAtrac1BitStreamWriter(container)
+    , BfuIdxConst(bfuIdxConst)
+    , FastBfuNumSearch(fastBfuNumSearch)
+{
+    if (ATHLong.size()) {
+        return;
+    }
+    ATHLong.reserve(TAtrac1Data::MaxBfus);
+    auto ATHSpec = CalcATH(512, 44100);
+    for (size_t bandNum = 0; bandNum < TAtrac1Data::NumQMF; ++bandNum) {
+        for (size_t blockNum = TAtrac1Data::BlocksPerBand[bandNum]; blockNum < TAtrac1Data::BlocksPerBand[bandNum + 1]; ++blockNum) {
+           const size_t specNumStart =  TAtrac1Data::SpecsStartLong[blockNum];
+           float x = 999;
+           for (size_t line = specNumStart; line < specNumStart + TAtrac1Data::SpecsPerBlock[blockNum]; line++) {
+                x = fmin(x, ATHSpec[line]);
+           }
+           x = pow(10, 0.1 * x);
+           ATHLong.push_back(x);
+        }
+    }
+}
 
 vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
                                                            const uint32_t bfuNum,
-                                                           const TFloat spread,
-                                                           const TFloat shift,
-                                                           const TBlockSize& blockSize) {
+                                                           const float spread,
+                                                           const float shift,
+                                                           const TBlockSize& blockSize,
+                                                           const float loudness) {
     vector<uint32_t> bitsPerEachBlock(bfuNum);
     for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
-        const uint32_t fix = blockSize.LogCount[BfuToBand(i)] ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i];
-        int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
-        if (tmp > 16) {
-            bitsPerEachBlock[i] = 16;
-        } else if (tmp < 2) {
+        bool shortBlock = blockSize.LogCount[TAtrac1Data::BfuToBand(i)];
+        const uint32_t fix = shortBlock ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i];
+        float ath = ATHLong[i] * loudness;
+        //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
+        if (!shortBlock && scaledBlocks[i].MaxEnergy < ath) {
             bitsPerEachBlock[i] = 0;
         } else {
-            bitsPerEachBlock[i] = tmp;
+            int tmp = spread * ( (float)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
+            if (tmp > 16) {
+                bitsPerEachBlock[i] = 16;
+            } else if (tmp < 2) {
+                bitsPerEachBlock[i] = 0;
+            } else {
+                bitsPerEachBlock[i] = tmp;
+            }
         }
     }
-    return bitsPerEachBlock;	
+    return bitsPerEachBlock;
 }
 
 uint32_t TAtrac1SimpleBitAlloc::GetMaxUsedBfuId(const vector<uint32_t>& bitsPerEachBlock) {
     uint32_t idx = 7;
     for (;;) {
-        uint32_t bfuNum = BfuAmountTab[idx];
+        uint32_t bfuNum = TAtrac1Data::BfuAmountTab[idx];
         if (bfuNum > bitsPerEachBlock.size()) {
             idx--;
         } else if (idx != 0) {
             assert(bfuNum == bitsPerEachBlock.size());
             uint32_t i = 0;
             while (idx && bitsPerEachBlock[bfuNum - 1 - i] == 0) {
-                if (++i >= (BfuAmountTab[idx] - BfuAmountTab[idx-1])) {
+                if (++i >= (TAtrac1Data::BfuAmountTab[idx] - TAtrac1Data::BfuAmountTab[idx-1])) {
                     idx--;
                     bfuNum -= i;
                     i = 0;
@@ -154,31 +186,33 @@ uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed,
     return curBfuId;
 }
 
-uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) {
+uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) {
     uint32_t bfuIdx = BfuIdxConst ? BfuIdxConst - 1 : 7;
     bool autoBfu = !BfuIdxConst;
-    TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
+    float spread = AnalizeScaleFactorSpread(scaledBlocks);
 
-    vector<uint32_t> bitsPerEachBlock(BfuAmountTab[bfuIdx]);
+    vector<uint32_t> bitsPerEachBlock(TAtrac1Data::BfuAmountTab[bfuIdx]);
     uint32_t targetBitsPerBfus;
     uint32_t curBitsPerBfus;
     for (;;) {
-        bitsPerEachBlock.resize(BfuAmountTab[bfuIdx]);
-        const uint32_t bitsAvaliablePerBfus = SoundUnitSize * 8 - BitsPerBfuAmountTabIdx - 32 - 2 - 3 -
-                                              bitsPerEachBlock.size() * (BitsPerIDWL + BitsPerIDSF);
-        TFloat maxShift = 15;
-        TFloat minShift = -3;
-        TFloat shift = 3.0;
+        bitsPerEachBlock.resize(TAtrac1Data::BfuAmountTab[bfuIdx]);
+        const uint32_t bitsAvaliablePerBfus = TAtrac1Data::SoundUnitSize * 8 -
+            TAtrac1Data::BitsPerBfuAmountTabIdx - 32 - 2 - 3 -
+            bitsPerEachBlock.size() * (TAtrac1Data::BitsPerIDWL + TAtrac1Data::BitsPerIDSF);
+
+        float maxShift = 15;
+        float minShift = -3;
+        float shift = 3.0;
         const uint32_t maxBits = bitsAvaliablePerBfus;
         const uint32_t minBits = bitsAvaliablePerBfus - 110;
 
         bool bfuNumChanged = false;
         for (;;) {
-            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx],
-                                                                  spread, shift, blockSize);
+            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, TAtrac1Data::BfuAmountTab[bfuIdx],
+                                                                  spread, shift, blockSize, loudness);
             uint32_t bitsUsed = 0;
             for (size_t i = 0; i < tmpAlloc.size(); i++) {
-                bitsUsed += SpecsPerBlock[i] * tmpAlloc[i];
+                bitsUsed += TAtrac1Data::SpecsPerBlock[i] * tmpAlloc[i];
             }
 
             if (bitsUsed < minBits) {
@@ -215,7 +249,7 @@ uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlo
     }
     ApplyBoost(&bitsPerEachBlock, curBitsPerBfus, targetBitsPerBfus);
     WriteBitStream(bitsPerEachBlock, scaledBlocks, bfuIdx, blockSize);
-    return BfuAmountTab[bfuIdx];
+    return TAtrac1Data::BfuAmountTab[bfuIdx];
 }
 
 TAtrac1BitStreamWriter::TAtrac1BitStreamWriter(ICompressedOutput* container)
@@ -230,7 +264,7 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
                                             const TBlockSize& blockSize) {
     NBitStream::TBitStream bitStream;
     size_t bitUsed = 0;
-    if (bfuAmountIdx >= (1 << BitsPerBfuAmountTabIdx)) {
+    if (bfuAmountIdx >= (1 << TAtrac1Data::BitsPerBfuAmountTabIdx)) {
         cerr << "Wrong bfuAmountIdx (" << bfuAmountIdx << "), frame skiped" << endl;
         return;
     }
@@ -244,8 +278,8 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
     bitStream.Write(0, 2);
     bitUsed+=4;
 
-    bitStream.Write(bfuAmountIdx, BitsPerBfuAmountTabIdx);
-    bitUsed += BitsPerBfuAmountTabIdx;
+    bitStream.Write(bfuAmountIdx, TAtrac1Data::BitsPerBfuAmountTabIdx);
+    bitUsed += TAtrac1Data::BitsPerBfuAmountTabIdx;
 
     bitStream.Write(0, 2);
     bitStream.Write(0, 3);
@@ -265,8 +299,8 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
         if (wordLength == 0 || wordLength == 1)
             continue;
 
-        const TFloat multiple = ((1 << (wordLength - 1)) - 1);
-        for (const TFloat val : scaledBlocks[i].Values) {
+        const float multiple = ((1 << (wordLength - 1)) - 1);
+        for (const float val : scaledBlocks[i].Values) {
             const int tmp = lrint(val * multiple);
             const uint32_t testwl = bitsPerEachBlock[i] ? (bitsPerEachBlock[i] - 1) : 0;
             const uint32_t a = !!testwl + testwl;
@@ -286,8 +320,8 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
     bitStream.Write(0x0, 8);
 
     bitUsed+=8;
-    if (bitUsed > SoundUnitSize * 8) {
-        cerr << "ATRAC1 bitstream corrupted, used: " << bitUsed << " exp: " << SoundUnitSize * 8 << endl;
+    if (bitUsed > TAtrac1Data::SoundUnitSize * 8) {
+        cerr << "ATRAC1 bitstream corrupted, used: " << bitUsed << " exp: " << TAtrac1Data::SoundUnitSize * 8 << endl;
         abort();
     }
     Container->WriteFrame(bitStream.GetBytes());
diff --git a/src/atrac/atrac1_bitalloc.h b/src/atrac/atrac1_bitalloc.h
index 411bece..ca01b13 100644
--- a/src/atrac/atrac1_bitalloc.h
+++ b/src/atrac/atrac1_bitalloc.h
@@ -19,7 +19,6 @@
 #pragma once
 #include "atrac_scale.h"
 #include "../aea.h"
-#include "../atrac/atrac1.h"
 #include <vector>
 #include <map>
 #include <cstdint>
@@ -33,10 +32,10 @@ class IAtrac1BitAlloc {
 public:
     IAtrac1BitAlloc() {};
     virtual ~IAtrac1BitAlloc() {};
-    virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) = 0;
+    virtual uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) = 0;
 };
 
-class TBitsBooster : public virtual TAtrac1Data {
+class TBitsBooster {
     std::multimap<uint32_t, uint32_t> BitsBoostMap; //bits needed -> position
     uint32_t MaxBitsPerIteration;
     uint32_t MinKey;
@@ -45,7 +44,7 @@ public:
     uint32_t ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint32_t cur, uint32_t target);
 };
 
-class TAtrac1BitStreamWriter : public virtual TAtrac1Data {
+class TAtrac1BitStreamWriter {
     ICompressedOutput* Container;
 public:
     explicit TAtrac1BitStreamWriter(ICompressedOutput* container);
@@ -56,19 +55,18 @@ public:
 
 class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster, public virtual IAtrac1BitAlloc {
     std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum,
-                                             const TFloat spread, const TFloat shift, const TBlockSize& blockSize);
+                                             const float spread, const float shift, const TBlockSize& blockSize,
+                                             const float loudness);
     const uint32_t BfuIdxConst;
     const bool FastBfuNumSearch;
+    static std::vector<float> ATHLong;
+
     uint32_t GetMaxUsedBfuId(const std::vector<uint32_t>& bitsPerEachBlock);
     uint32_t CheckBfuUsage(bool* changed, uint32_t curBfuId, const std::vector<uint32_t>& bitsPerEachBlock);
 public:
-    explicit TAtrac1SimpleBitAlloc(ICompressedOutput* container, uint32_t bfuIdxConst, bool fastBfuNumSearch)
-        : TAtrac1BitStreamWriter(container)
-        , BfuIdxConst(bfuIdxConst)
-        , FastBfuNumSearch(fastBfuNumSearch)
-    {}
+    TAtrac1SimpleBitAlloc(ICompressedOutput* container, uint32_t bfuIdxConst, bool fastBfuNumSearch);
     ~TAtrac1SimpleBitAlloc() {};
-     uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) override;
+    uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize, float loudness) override;
 };
 
 } //namespace NAtrac1
diff --git a/src/atrac/atrac1_dequantiser.cpp b/src/atrac/atrac1_dequantiser.cpp
index a259f42..bcc087d 100644
--- a/src/atrac/atrac1_dequantiser.cpp
+++ b/src/atrac/atrac1_dequantiser.cpp
@@ -26,10 +26,10 @@ using namespace NBitStream;
 TAtrac1Dequantiser::TAtrac1Dequantiser() {
 }
 
-void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, TFloat specs[512]) {
-    uint32_t wordLens[MaxBfus];
-    uint32_t idScaleFactors[MaxBfus];
-    const uint32_t numBFUs = BfuAmountTab[stream->Read(3)];
+void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, float specs[512]) {
+    uint32_t wordLens[TAtrac1Data::MaxBfus];
+    uint32_t idScaleFactors[TAtrac1Data::MaxBfus];
+    const uint32_t numBFUs = TAtrac1Data::BfuAmountTab[stream->Read(3)];
     stream->Read(2);
     stream->Read(3);
 
@@ -40,28 +40,29 @@ void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, TFloa
     for (uint32_t i = 0; i < numBFUs; i++) {
         idScaleFactors[i] = stream->Read(6);
     }
-    for (uint32_t i = numBFUs; i < MaxBfus; i++) {
+    for (uint32_t i = numBFUs; i < TAtrac1Data::MaxBfus; i++) {
         wordLens[i] = idScaleFactors[i] = 0;
     }
 
-    for (uint32_t bandNum = 0; bandNum < NumQMF; bandNum++) {
-        for (uint32_t bfuNum = BlocksPerBand[bandNum]; bfuNum < BlocksPerBand[bandNum + 1]; bfuNum++) {
-            const uint32_t numSpecs = SpecsPerBlock[bfuNum];
+    for (uint32_t bandNum = 0; bandNum < TAtrac1Data::NumQMF; bandNum++) {
+        for (uint32_t bfuNum = TAtrac1Data::BlocksPerBand[bandNum]; bfuNum < TAtrac1Data::BlocksPerBand[bandNum + 1]; bfuNum++) {
+            const uint32_t numSpecs = TAtrac1Data::SpecsPerBlock[bfuNum];
             const uint32_t wordLen = !!wordLens[bfuNum] + wordLens[bfuNum];
-            const TFloat scaleFactor = ScaleTable[idScaleFactors[bfuNum]];
-            const uint32_t startPos = bs.LogCount[bandNum] ? SpecsStartShort[bfuNum] : SpecsStartLong[bfuNum]; 
+            const float scaleFactor = TAtrac1Data::ScaleTable[idScaleFactors[bfuNum]];
+            const uint32_t startPos = bs.LogCount[bandNum] ?
+                TAtrac1Data::SpecsStartShort[bfuNum] : TAtrac1Data::SpecsStartLong[bfuNum];
             if (wordLen) {
-                TFloat maxQuant = 1.0 / (TFloat)((1 << (wordLen - 1)) - 1);
+                float maxQuant = 1.0 / (float)((1 << (wordLen - 1)) - 1);
                 //cout << "BFU ("<< bfuNum << ") :" <<  "wordLen " << wordLen << " maxQuant " << maxQuant << " scaleFactor " << scaleFactor << " id " << idScaleFactors[bfuNum] << " num Specs " << numSpecs << " short: "<< (int)bs.LogCount[bandNum] << endl;
                 for (uint32_t i = 0; i < numSpecs; i++ ) {
                     specs[startPos + i] = scaleFactor * maxQuant * MakeSign(stream->Read(wordLen), wordLen);
                 }
             } else {
-                memset(&specs[startPos], 0, numSpecs * sizeof(TFloat));
+                memset(&specs[startPos], 0, numSpecs * sizeof(float));
             }
         }
 
-    } 
+    }
 }
 
 } //namespace NAtrac1
diff --git a/src/atrac/atrac1_dequantiser.h b/src/atrac/atrac1_dequantiser.h
index d3c25bd..0b57c08 100644
--- a/src/atrac/atrac1_dequantiser.h
+++ b/src/atrac/atrac1_dequantiser.h
@@ -24,10 +24,10 @@
 namespace NAtracDEnc {
 namespace NAtrac1 {
 
-class TAtrac1Dequantiser : public TAtrac1Data {
+class TAtrac1Dequantiser {
 public:
     TAtrac1Dequantiser();
-    void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, TFloat specs[512]);
+    void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, float specs[512]);
 };
 
 } //namespace NAtrac1
diff --git a/src/atrac/atrac1_qmf.h b/src/atrac/atrac1_qmf.h
index d8fdae3..0276160 100644
--- a/src/atrac/atrac1_qmf.h
+++ b/src/atrac/atrac1_qmf.h
@@ -28,18 +28,18 @@ class Atrac1AnalysisFilterBank {
     const static int delayComp = 39;
     TQmf<TIn, nInSamples> Qmf1;
     TQmf<TIn, nInSamples / 2> Qmf2;
-    std::vector<TFloat> MidLowTmp;
-    std::vector<TFloat> DelayBuf;
+    std::vector<float> MidLowTmp;
+    std::vector<float> DelayBuf;
 public:
     Atrac1AnalysisFilterBank() {
         MidLowTmp.resize(512);
         DelayBuf.resize(delayComp + 512);
     }
-    void Analysis(TIn* pcm, TFloat* low, TFloat* mid, TFloat* hi) {
-        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) *  delayComp);
+    void Analysis(TIn* pcm, float* low, float* mid, float* hi) {
+        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(float) *  delayComp);
         Qmf1.Analysis(pcm, &MidLowTmp[0], &DelayBuf[delayComp]);
         Qmf2.Analysis(&MidLowTmp[0], low, mid);
-        memcpy(hi, &DelayBuf[0], sizeof(TFloat) * 256);
+        memcpy(hi, &DelayBuf[0], sizeof(float) * 256);
 
     }
 };
@@ -49,16 +49,16 @@ class Atrac1SynthesisFilterBank {
     const static int delayComp = 39;
     TQmf<TOut, nInSamples> Qmf1;
     TQmf<TOut, nInSamples / 2> Qmf2;
-    std::vector<TFloat> MidLowTmp;
-    std::vector<TFloat> DelayBuf;
+    std::vector<float> MidLowTmp;
+    std::vector<float> DelayBuf;
 public:
     Atrac1SynthesisFilterBank() {
         MidLowTmp.resize(512);
         DelayBuf.resize(delayComp + 512);
     }
-    void Synthesis(TOut* pcm, TFloat* low, TFloat* mid, TFloat* hi) {
-        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) *  delayComp);
-        memcpy(&DelayBuf[delayComp], hi, sizeof(TFloat) * 256);
+    void Synthesis(TOut* pcm, float* low, float* mid, float* hi) {
+        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(float) *  delayComp);
+        memcpy(&DelayBuf[delayComp], hi, sizeof(float) * 256);
         Qmf2.Synthesis(&MidLowTmp[0], &low[0], &mid[0]);
         Qmf1.Synthesis(&pcm[0], &MidLowTmp[0], &DelayBuf[0]);
     }
diff --git a/src/atrac/atrac3.cpp b/src/atrac/atrac3.cpp
index 82fb34b..1e5b8e9 100644
--- a/src/atrac/atrac3.cpp
+++ b/src/atrac/atrac3.cpp
@@ -24,7 +24,7 @@ namespace NAtrac3 {
 
 constexpr uint32_t TAtrac3Data::BlockSizeTab[33];
 constexpr uint32_t TAtrac3Data::ClcLengthTab[8];
-constexpr double TAtrac3Data::MaxQuant[8];
+constexpr float TAtrac3Data::MaxQuant[8];
 constexpr uint32_t TAtrac3Data::BlocksPerBand[4 + 1];
 constexpr uint32_t TAtrac3Data::SpecsPerBlock[33];
 constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable1[HuffTable1Sz];
@@ -36,11 +36,13 @@ constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable7[HuffTable7Sz];
 constexpr TAtrac3Data::THuffTablePair TAtrac3Data::HuffTables[7];
 
 constexpr TContainerParams TAtrac3Data::ContainerParams[8];
-double TAtrac3Data::EncodeWindow[256] = {0};
-double TAtrac3Data::DecodeWindow[256] = {0};
-double TAtrac3Data::ScaleTable[64] = {0};
-double TAtrac3Data::GainLevel[16];
-double TAtrac3Data::GainInterpolation[31];
+float TAtrac3Data::EncodeWindow[256] = {0};
+float TAtrac3Data::DecodeWindow[256] = {0};
+float TAtrac3Data::ScaleTable[64] = {0};
+float TAtrac3Data::GainLevel[16];
+float TAtrac3Data::GainInterpolation[31];
+
+static const TAtrac3Data Atrac3Data;
 
 const TContainerParams* TAtrac3Data::GetContainerParamsForBitrate(uint32_t bitrate) {
     // Set default to LP2 mode
diff --git a/src/atrac/atrac3.h b/src/atrac/atrac3.h
index caf49f7..103f6fc 100644
--- a/src/atrac/atrac3.h
+++ b/src/atrac/atrac3.h
@@ -54,13 +54,13 @@ class TAtrac3Data {
 public:
     static constexpr uint8_t MaxBfus = 32;
     static constexpr uint32_t NumSamples = 1024;
-//protected:
+
     static const uint32_t MDCTSz = 512;
-    static double ScaleTable[64];
-    static double EncodeWindow[256];
-    static double DecodeWindow[256];
-    static double GainLevel[16];
-    static double GainInterpolation[31];
+    static float ScaleTable[64];
+    static float EncodeWindow[256];
+    static float DecodeWindow[256];
+    static float GainLevel[16];
+    static float GainInterpolation[31];
     static constexpr int32_t ExponentOffset = 4;
     static constexpr int32_t LocScale = 3;
     static constexpr int32_t LocSz = 1 << LocScale;
@@ -68,7 +68,7 @@ public:
 
     static constexpr uint32_t NumSpecs = NumSamples;
     static const uint32_t frameSz = 152;
-    static constexpr double MaxQuant[8] = {
+    static constexpr float MaxQuant[8] = {
         0.0,    1.5,    2.5,    3.5,
         4.5,    7.5,    15.5,   31.5
     };
@@ -248,6 +248,7 @@ public:
     typedef std::vector<TTonalVal> TTonalComponents;
 };
 
+
 struct TAtrac3EncoderSettings {
     TAtrac3EncoderSettings(uint32_t bitrate, bool noGainControll,
                            bool noTonalComponents, uint8_t sourceChannels, uint32_t bfuIdxConst)
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp
index 78ecaac..9bada6f 100644
--- a/src/atrac/atrac3_bitstream.cpp
+++ b/src/atrac/atrac3_bitstream.cpp
@@ -40,10 +40,35 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
   1, 0
 };
 
-uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+std::vector<float> TAtrac3BitStreamWriter::ATH;
+TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst)
+    : Container(container)
+    , Params(params)
+    , BfuIdxConst(bfuIdxConst)
+{
+    NEnv::SetRoundFloat();
+    if (ATH.size()) {
+        return;
+    }
+    ATH.reserve(TAtrac3Data::MaxBfus);
+    auto ATHSpec = CalcATH(1024, 44100);
+    for (size_t bandNum = 0; bandNum < TAtrac3Data::NumQMF; ++bandNum) {
+        for (size_t blockNum = TAtrac3Data::BlocksPerBand[bandNum]; blockNum < TAtrac3Data::BlocksPerBand[bandNum + 1]; ++blockNum) {
+           const size_t specNumStart =  TAtrac3Data::SpecsStartLong[blockNum];
+           float x = 999;
+           for (size_t line = specNumStart; line < specNumStart + TAtrac3Data::SpecsPerBlock[blockNum]; line++) {
+                x = fmin(x, ATHSpec[line]);
+           }
+           x = pow(10, 0.1 * x);
+           ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it?
+        }
+    }
+}
+
+uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[TAtrac3Data::MaxSpecsPerBlock],
                                         const uint32_t blockSize, NBitStream::TBitStream* bitStream)
 {
-    const uint32_t numBits = ClcLengthTab[selector];
+    const uint32_t numBits = TAtrac3Data::ClcLengthTab[selector];
     const uint32_t bitsUsed = (selector > 1) ? numBits * blockSize : numBits * blockSize / 2;
     if (!bitStream)
         return bitsUsed;
@@ -53,8 +78,8 @@ uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int manti
         }
     } else {
         for (uint32_t i = 0; i < blockSize / 2; ++i) {
-            uint32_t code = MantissaToCLcIdx(mantissas[i * 2]) << 2;
-            code |= MantissaToCLcIdx(mantissas[i * 2 + 1]);
+            uint32_t code = TAtrac3Data::MantissaToCLcIdx(mantissas[i * 2]) << 2;
+            code |= TAtrac3Data::MantissaToCLcIdx(mantissas[i * 2 + 1]);
             ASSERT(numBits == 4);
             bitStream->Write(code, numBits);
         }
@@ -62,12 +87,12 @@ uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int manti
     return bitsUsed;
 }
 
-uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int mantissas[TAtrac3Data::MaxSpecsPerBlock],
                                         const uint32_t blockSize, NBitStream::TBitStream* bitStream)
 {
     ASSERT(selector > 0);
-    const THuffEntry* huffTable = HuffTables[selector - 1].Table;
-    const uint8_t tableSz = HuffTables[selector - 1].Sz;
+    const TAtrac3Data::THuffEntry* huffTable = TAtrac3Data::HuffTables[selector - 1].Table;
+    const uint8_t tableSz = TAtrac3Data::HuffTables[selector - 1].Sz;
     uint32_t bitsUsed = 0;
     if (selector > 1) {
         for (uint32_t i = 0; i < blockSize; ++i) {
@@ -86,7 +111,7 @@ uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int manti
         for (uint32_t i = 0; i < blockSize / 2; ++i) {
             const int ma = mantissas[i * 2];
             const int mb = mantissas[i * 2 + 1];
-            const uint32_t huffS = MantissasToVlcIndex(ma, mb);
+            const uint32_t huffS = TAtrac3Data::MantissasToVlcIndex(ma, mb);
             bitsUsed += huffTable[huffS].Bits;
             if (bitStream)
                 bitStream->Write(huffTable[huffS].Code, huffTable[huffS].Bits);
@@ -95,7 +120,7 @@ uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int manti
     return bitsUsed;
 }
 
-static inline int ToInt(double x) {
+static inline int ToInt(float x) {
 #if defined(_MSC_VER) && !defined(_WIN64)
     int n;
     __asm {
@@ -108,7 +133,7 @@ static inline int ToInt(double x) {
 #endif
 }
 
-static inline void CalcMantisas(const TFloat* values, const uint32_t first, const uint32_t last, const TFloat mul, int* mantisas) {
+static inline void CalcMantisas(const float* values, const uint32_t first, const uint32_t last, const float mul, int* mantisas) {
     for (uint32_t j = 0, f = first; f < last; f++, j++) {
         mantisas[f] = ToInt(values[j] * mul);
     }
@@ -128,12 +153,12 @@ std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(co
             if (precisionPerEachBlocks[i] == 0)
                 continue;
             bits += 6; //sfi
-            const uint32_t first = BlockSizeTab[i];
-            const uint32_t last = BlockSizeTab[i+1];
+            const uint32_t first = TAtrac3Data::BlockSizeTab[i];
+            const uint32_t last = TAtrac3Data::BlockSizeTab[i+1];
             const uint32_t blockSize = last - first;
-            const TFloat mul = MaxQuant[std::min(precisionPerEachBlocks[i], (uint32_t)7)];
+            const float mul = TAtrac3Data::MaxQuant[std::min(precisionPerEachBlocks[i], (uint32_t)7)];
             if (calcMant) {
-                const TFloat* values = scaledBlocks[i].Values.data();
+                const float* values = scaledBlocks[i].Values.data();
                 CalcMantisas(values, first, last, mul, mantisas);
             }
             bits += clcMode ? CLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr) :
@@ -165,14 +190,14 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision
 static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}};
 
 std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce,
-                                                                              const uint16_t targetBits, int mt[MaxSpecs])
+    const uint16_t targetBits, int mt[TAtrac3Data::MaxSpecs], float laudness)
 {
     const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
     if (scaledBlocks.empty()) {
         return DUMMY_ALLOC;
     }
 
-    TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
+    float spread = AnalizeScaleFactorSpread(scaledBlocks);
 
     uint16_t numBfu = BfuIdxConst ? BfuIdxConst : 32;
 
@@ -190,11 +215,11 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
     bool cont = true;
     while (cont) {
         precisionPerEachBlocks.resize(numBfu);
-        TFloat maxShift = 20;
-        TFloat minShift = -8;
+        double maxShift = 20;
+        double minShift = -8;
         for (;;) {
-            TFloat shift = (maxShift + minShift) / 2;
-            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift);
+            double shift = (maxShift + minShift) / 2;
+            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness);
             auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt);
 
             auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr);
@@ -228,7 +253,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
 }
 
 void TAtrac3BitStreamWriter::EncodeSpecs(const TSingleChannelElement& sce, NBitStream::TBitStream* bitStream,
-                                         const std::pair<uint8_t, vector<uint32_t>>& allocation, const int mt[MaxSpecs])
+    const std::pair<uint8_t, vector<uint32_t>>& allocation, const int mt[TAtrac3Data::MaxSpecs])
 {
 
     const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
@@ -253,8 +278,8 @@ void TAtrac3BitStreamWriter::EncodeSpecs(const TSingleChannelElement& sce, NBitS
         if (precisionPerEachBlocks[i] == 0)
             continue;
 
-        const uint32_t first = BlockSizeTab[i];
-        const uint32_t last = BlockSizeTab[i+1];
+        const uint32_t first = TAtrac3Data::BlockSizeTab[i];
+        const uint32_t last = TAtrac3Data::BlockSizeTab[i+1];
         const uint32_t blockSize = last - first;
 
         if (codingMode == 1) {
@@ -345,7 +370,7 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
 
     uint8_t tcgnCheck = 0;
     //for each group of equal quantiser and len 
-    for (uint8_t i = 0; i < 64; ++i) {
+    for (size_t i = 0; i < 64; ++i) {
         const TTonalComponentsSubGroup& curGroup = groups[i];
         if (curGroup.SubGroupPtr.size() == 0) {
             ASSERT(curGroup.SubGroupMap.size() == 0);
@@ -398,7 +423,7 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
                 bitStream->Write(i >> 3, 3);
             uint8_t lastPos = subGroupStartPos;
             uint8_t checkPos = 0;
-            for (uint16_t j = 0; j < 16; ++j) {
+            for (size_t j = 0; j < 16; ++j) {
                 if (!(bandFlags.i[j >> 2])) {
                     continue;
                 }
@@ -426,7 +451,7 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
 
                     ASSERT(curGroup.SubGroupPtr[k]->ScaledBlock.Values.size() < 8);
                     int mantisas[256];
-                    const TFloat mul = MaxQuant[std::min((uint32_t)(i>>3), (uint32_t)7)];
+                    const float mul = TAtrac3Data::MaxQuant[std::min((uint32_t)(i>>3), (uint32_t)7)];
                     ASSERT(codedValues == curGroup.SubGroupPtr[k]->ScaledBlock.Values.size());
                     for (uint32_t z = 0; z < curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(); ++z) {
                         mantisas[z] = lrint(curGroup.SubGroupPtr[k]->ScaledBlock.Values[z] * mul);
@@ -452,19 +477,26 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
 
 vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
                                                             const uint32_t bfuNum,
-                                                            const TFloat spread,
-                                                            const TFloat shift)
+                                                            const float spread,
+                                                            const float shift,
+                                                            const float loudness)
 {
     vector<uint32_t> bitsPerEachBlock(bfuNum);
     for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
-        const uint32_t fix = FixedBitAllocTable[i];
-        int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; 
-        if (tmp > 7) {
-            bitsPerEachBlock[i] = 7;
-        } else if (tmp < 0) {
+        float ath = ATH[i] * loudness;
+        //std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
+        if (scaledBlocks[i].MaxEnergy < ath) {
             bitsPerEachBlock[i] = 0;
         } else {
-            bitsPerEachBlock[i] = tmp;
+            const uint32_t fix = FixedBitAllocTable[i];
+            int tmp = spread * ( (float)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
+            if (tmp > 7) {
+                bitsPerEachBlock[i] = 7;
+            } else if (tmp < 0) {
+                bitsPerEachBlock[i] = 0;
+            } else {
+                bitsPerEachBlock[i] = tmp;
+            }
         }
     }
     return bitsPerEachBlock;
@@ -482,8 +514,8 @@ void WriteJsParams(NBitStream::TBitStream* bs)
 //  0.5 - M only (mono)
 //  0.0 - Uncorrelated
 // -0.5 - S only
-static TFloat CalcMSRatio(TFloat mEnergy, TFloat sEnergy) {
-    TFloat total = sEnergy + mEnergy;
+static float CalcMSRatio(float mEnergy, float sEnergy) {
+    float total = sEnergy + mEnergy;
     if (total > 0)
         return mEnergy / total - 0.5;
 
@@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz,
     if (elements[1].ScaledBlocks.empty()) {
         return maxAllowedShift;
     } else {
-        TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy);
+        float ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness);
         //std::cerr << ratio << std::endl;
         return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift);
     }
 }
 
-void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements)
+void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness)
 {
 
     ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2);
@@ -556,7 +588,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&
         bitsToAlloc[channel] -= bitsUsedByGainInfoAndHeader;
     }
 
-    int mt[2][MaxSpecs];
+    int mt[2][TAtrac3Data::MaxSpecs];
     std::pair<uint8_t, vector<uint32_t>> allocations[2];
 
     const int32_t msBytesShift = Params.Js ? CalcMSBytesShift(Params.FrameSz, singleChannelElements, bitsToAlloc) : 0; // positive - gain to m, negative to s. Must be zero if no joint stereo mode
@@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&
 
     for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
         const TSingleChannelElement& sce = singleChannelElements[channel];
-        allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]);
+        allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness);
     }
 
     for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h
index 152437b..93ee72e 100644
--- a/src/atrac/atrac3_bitstream.h
+++ b/src/atrac/atrac3_bitstream.h
@@ -39,15 +39,16 @@ struct TTonalBlock {
     TScaledBlock ScaledBlock;
 };
 
-class TAtrac3BitStreamWriter : public virtual TAtrac3Data {
+class TAtrac3BitStreamWriter {
 public:
     struct TSingleChannelElement {
         TAtrac3Data::SubbandInfo SubbandInfo;
         std::vector<TTonalBlock> TonalBlocks;
         std::vector<TScaledBlock> ScaledBlocks;
-        TFloat Energy;
+        float Loudness;
     };
 private:
+    static std::vector<float> ATH;
 
     struct TTonalComponentsSubGroup {
         std::vector<uint8_t> SubGroupMap;
@@ -58,24 +59,24 @@ private:
     const uint32_t BfuIdxConst;
     std::vector<char> OutBuffer;
 
-    uint32_t CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+    uint32_t CLCEnc(const uint32_t selector, const int mantissas[TAtrac3Data::MaxSpecsPerBlock],
                     const uint32_t blockSize, NBitStream::TBitStream* bitStream);
 
-    uint32_t VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+    uint32_t VLCEnc(const uint32_t selector, const int mantissas[TAtrac3Data::MaxSpecsPerBlock],
                     const uint32_t blockSize, NBitStream::TBitStream* bitStream);
 
     std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
-                                             uint32_t bfuNum, TFloat spread, TFloat shift);
+                                             uint32_t bfuNum, float spread, float shift, float loudness);
 
     std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce,
-                                                               uint16_t targetBits, int mt[MaxSpecs]);
+                                                               uint16_t targetBits, int mt[TAtrac3Data::MaxSpecs], float laudness);
 
     std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce,
                                                           const std::vector<uint32_t>& precisionPerEachBlocks,
                                                           int* mantisas);
 
     void EncodeSpecs(const TSingleChannelElement& sce, NBitStream::TBitStream* bitStream,
-                     const std::pair<uint8_t, std::vector<uint32_t>>&, const int mt[MaxSpecs]);
+                     const std::pair<uint8_t, std::vector<uint32_t>>&, const int mt[TAtrac3Data::MaxSpecs]);
 
     uint8_t GroupTonalComponents(const std::vector<TTonalBlock>& tonalComponents,
                                  const std::vector<uint32_t>& allocTable,
@@ -85,15 +86,9 @@ private:
                                    const std::vector<uint32_t>& allocTable,
                                    NBitStream::TBitStream* bitStream);
 public:
-    TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3
-        : Container(container)
-        , Params(params)
-        , BfuIdxConst(bfuIdxConst)
-    {
-        NEnv::SetRoundFloat();
-    }
+    TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst);
 
-    void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements);
+    void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness);
 };
 
 } // namespace NAtrac3
diff --git a/src/atrac/atrac3_qmf.h b/src/atrac/atrac3_qmf.h
index 2fc33fa..1ed137e 100644
--- a/src/atrac/atrac3_qmf.h
+++ b/src/atrac/atrac3_qmf.h
@@ -28,14 +28,14 @@ class Atrac3AnalysisFilterBank {
     TQmf<TIn, nInSamples> Qmf1;
     TQmf<TIn, nInSamples / 2> Qmf2;
     TQmf<TIn, nInSamples / 2> Qmf3;
-    std::vector<TFloat> Buf1;
-    std::vector<TFloat> Buf2;
+    std::vector<float> Buf1;
+    std::vector<float> Buf2;
 public:
     Atrac3AnalysisFilterBank() {
         Buf1.resize(nInSamples);
         Buf2.resize(nInSamples);
     }
-    void Analysis(TIn* pcm, TFloat* subs[4]) {
+    void Analysis(TIn* pcm, float* subs[4]) {
         Qmf1.Analysis(pcm, Buf1.data(), Buf2.data());
         Qmf2.Analysis(Buf1.data(), subs[0], subs[1]);
         Qmf3.Analysis(Buf2.data(), subs[3], subs[2]);
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
index 089bf47..186915e 100644
--- a/src/atrac/atrac_psy_common.cpp
+++ b/src/atrac/atrac_psy_common.cpp
@@ -18,6 +18,7 @@
 
 #include "atrac_psy_common.h"
 
+#include <cmath>
 
 ////////////////////////////////////////////////////////////////////////////////
 namespace {
@@ -98,15 +99,15 @@ namespace NAtracDEnc {
 using std::vector;
 
 //returns 1 for tone-like, 0 - noise-like
-TFloat AnalizeScaleFactorSpread(const vector<TScaledBlock>& scaledBlocks)
+float AnalizeScaleFactorSpread(const vector<TScaledBlock>& scaledBlocks)
 {
-    TFloat s = 0.0;
+    float s = 0.0;
     for (size_t i = 0; i < scaledBlocks.size(); ++i) {
         s += scaledBlocks[i].ScaleFactorIndex;
     }
     s /= scaledBlocks.size();
-    TFloat sigma = 0.0;
-    TFloat t = 0.0;
+    float sigma = 0.0;
+    float t = 0.0;
     for (size_t i = 0; i < scaledBlocks.size(); ++i) {
         t = (scaledBlocks[i].ScaleFactorIndex - s);
         t *= t;
@@ -135,4 +136,21 @@ vector<float> CalcATH(int len, int sampleRate)
     return res;
 }
 
+vector<float> CreateLoudnessCurve(size_t sz)
+{
+    std::vector<float> res;
+    res.resize(sz);
+
+    for (size_t i = 0; i < sz; i++) {
+        float f = (float)(i + 3) * 0.5 * 44100 / (float)sz;
+        float t = std::log10(f) - 3.5;
+        t = -10 * t * t + 3 - f / 3000;
+        t = std::pow(10, (0.1 * t));
+        //std::cerr << i << "  => " << f << "  "  << t <<std::endl;
+        res[i] = t;
+    }
+
+    return res;
+}
+
 } // namespace NAtracDEnc
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
index 1a826bb..c433b90 100644
--- a/src/atrac/atrac_psy_common.h
+++ b/src/atrac/atrac_psy_common.h
@@ -21,7 +21,19 @@
 
 namespace NAtracDEnc {
 
-TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
+float AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
 std::vector<float> CalcATH(int len, int sampleRate);
 
+inline float TrackLoudness(float prevLoud, float l0, float l1)
+{
+    return 0.98 * prevLoud + 0.01 * (l0 + l1);
+}
+
+inline float TrackLoudness(float prevLoud, float l)
+{
+    return 0.98 * prevLoud + 0.02 * l;
+}
+
+std::vector<float> CreateLoudnessCurve(size_t sz);
+
 } //namespace NAtracDEnc
diff --git a/src/atrac/atrac_scale.cpp b/src/atrac/atrac_scale.cpp
index f24f7b0..d6f5960 100644
--- a/src/atrac/atrac_scale.cpp
+++ b/src/atrac/atrac_scale.cpp
@@ -33,13 +33,20 @@ using std::endl;
 
 using std::abs;
 
-static const TFloat MAX_SCALE = 1.0;
+static const float MAX_SCALE = 1.0;
 
 template<class TBaseData>
-TScaledBlock TScaler<TBaseData>::Scale(const TFloat* in, uint16_t len) {
-    TFloat maxAbsSpec = 0;
+TScaler<TBaseData>::TScaler() {
+    for (int i = 0; i < 64; i++) {
+        ScaleIndex[TBaseData::ScaleTable[i]] = i;
+    }
+}
+
+template<class TBaseData>
+TScaledBlock TScaler<TBaseData>::Scale(const float* in, uint16_t len) {
+    float maxAbsSpec = 0;
     for (uint16_t i = 0; i < len; ++i) {
-        const TFloat absSpec = abs(in[i]);
+        const float absSpec = abs(in[i]);
         if (absSpec > maxAbsSpec) {
             maxAbsSpec = absSpec;
         }
@@ -48,33 +55,39 @@ TScaledBlock TScaler<TBaseData>::Scale(const TFloat* in, uint16_t len) {
         cerr << "Scale error: absSpec > MAX_SCALE, val: " << maxAbsSpec << endl;
         maxAbsSpec = MAX_SCALE;
     }
-    const map<TFloat, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec);
-    const TFloat scaleFactor = scaleIter->first;
+    const map<float, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec);
+    const float scaleFactor = scaleIter->first;
     const uint8_t scaleFactorIndex = scaleIter->second;
     TScaledBlock res(scaleFactorIndex);
+    float maxEnergy = 0.0;
     for (uint16_t i = 0; i < len; ++i) {
-        TFloat scaledValue = in[i] / scaleFactor;
+        float scaledValue = in[i] / scaleFactor;
+        float energy = in[i] * in[i];
+        maxEnergy = std::max(maxEnergy, energy);
         if (abs(scaledValue) >= 1.0) {
-            cerr << "got "<< scaledValue << " it is wrong scalling" << endl;
+            if (abs(scaledValue) > 1.0) {
+                cerr << "clipping, scaled value: "<< scaledValue << endl;
+            }
             scaledValue = (scaledValue > 0) ? 0.99999 : -0.99999;
         }
         res.Values.push_back(scaledValue);
-	}
+    }
+    res.MaxEnergy = maxEnergy;
     return res;
 }
 
 template<class TBaseData>
-vector<TScaledBlock> TScaler<TBaseData>::ScaleFrame(const vector<TFloat>& specs, const TBlockSize& blockSize) {
+vector<TScaledBlock> TScaler<TBaseData>::ScaleFrame(const vector<float>& specs, const TBlockSize& blockSize) {
     vector<TScaledBlock> scaledBlocks;
     scaledBlocks.reserve(TBaseData::MaxBfus);
-    for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+    for (uint8_t bandNum = 0; bandNum < TBaseData::NumQMF; ++bandNum) {
         const bool shortWinMode = !!blockSize.LogCount[bandNum];
-        for (uint8_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) {
-            const uint16_t specNumStart = shortWinMode ? TBaseData::SpecsStartShort[blockNum] : 
+        for (uint8_t blockNum = TBaseData::BlocksPerBand[bandNum]; blockNum < TBaseData::BlocksPerBand[bandNum + 1]; ++blockNum) {
+            const uint16_t specNumStart = shortWinMode ? TBaseData::SpecsStartShort[blockNum] :
                                                          TBaseData::SpecsStartLong[blockNum];
-            scaledBlocks.emplace_back(Scale(&specs[specNumStart], this->SpecsPerBlock[blockNum]));
-		}
-	}
+            scaledBlocks.emplace_back(Scale(&specs[specNumStart], TBaseData::SpecsPerBlock[blockNum]));
+        }
+    }
     return scaledBlocks;
 }
 
diff --git a/src/atrac/atrac_scale.h b/src/atrac/atrac_scale.h
index 855010e..059360d 100644
--- a/src/atrac/atrac_scale.h
+++ b/src/atrac/atrac_scale.h
@@ -17,35 +17,32 @@
  */
 
 #pragma once
+#include <array>
 #include <vector>
 #include <map>
 #include <cstdint>
 
-#include "atrac1.h"
 #include "lib/bitstream/bitstream.h"
 #include "../config.h"
 
 namespace NAtracDEnc {
 
 struct TScaledBlock {
-	TScaledBlock(uint8_t sfi) : ScaleFactorIndex(sfi) {}
+    TScaledBlock(uint8_t sfi) : ScaleFactorIndex(sfi) {}
     /* const */ uint8_t ScaleFactorIndex = 0;
-    std::vector<TFloat> Values;
+    std::vector<float> Values;
+    float MaxEnergy;
 };
 
 class TBlockSize;
 
 template <class TBaseData>
-class TScaler : public TBaseData {
-    std::map<TFloat, uint8_t>ScaleIndex;
+class TScaler {
+    std::map<float, uint8_t> ScaleIndex;
 public:
-    TScaler() {
-        for (int i = 0; i < 64; i++) {
-            ScaleIndex[TBaseData::ScaleTable[i]] = i;
-        }
-    }
-    TScaledBlock Scale(const TFloat* in, uint16_t len);
-    std::vector<TScaledBlock> ScaleFrame(const std::vector<TFloat>& specs, const TBlockSize& blockSize);
+    TScaler();
+    TScaledBlock Scale(const float* in, uint16_t len);
+    std::vector<TScaledBlock> ScaleFrame(const std::vector<float>& specs, const TBlockSize& blockSize);
 };
 
 class TBlockSize {
@@ -76,7 +73,7 @@ public:
     TBlockSize()
         : LogCount({{0, 0, 0, 0}})
     {}
-    const std::array<int, 4> LogCount;
+    std::array<int, 4> LogCount;
 };
 
 } //namespace NAtracDEnc
diff --git a/src/atrac1denc.cpp b/src/atrac1denc.cpp
index 48f1864..27940bd 100644
--- a/src/atrac1denc.cpp
+++ b/src/atrac1denc.cpp
@@ -24,6 +24,7 @@
 #include "atrac/atrac1_dequantiser.h"
 #include "atrac/atrac1_qmf.h"
 #include "atrac/atrac1_bitalloc.h"
+#include "atrac/atrac_psy_common.h"
 #include "util.h"
 
 namespace NAtracDEnc {
@@ -35,6 +36,7 @@ using std::vector;
 TAtrac1Encoder::TAtrac1Encoder(TCompressedOutputPtr&& aea, TAtrac1EncodeSettings&& settings)
     : Aea(std::move(aea))
     , Settings(std::move(settings))
+    , LoudnessCurve(CreateLoudnessCurve(TAtrac1Data::NumSamples))
 {
 }
 
@@ -43,8 +45,8 @@ TAtrac1Decoder::TAtrac1Decoder(TCompressedInputPtr&& aea)
 {
 }
 
-static void vector_fmul_window(TFloat *dst, const TFloat *src0,
-                                const TFloat *src1, const TFloat *win, int len)
+static void vector_fmul_window(float *dst, const float *src0,
+                                const float *src1, const float *win, int len)
 {
     int i, j;
 
@@ -53,36 +55,36 @@ static void vector_fmul_window(TFloat *dst, const TFloat *src0,
     src0 += len;
 
     for (i = -len, j = len - 1; i < 0; i++, j--) {
-        TFloat s0 = src0[i];
-        TFloat s1 = src1[j];
-        TFloat wi = win[i];
-        TFloat wj = win[j];
+        float s0 = src0[i];
+        float s1 = src1[j];
+        float wi = win[i];
+        float wj = win[j];
         dst[i] = s0 * wj - s1 * wi;
         dst[j] = s0 * wi + s1 * wj;
     }
 }
 
-void TAtrac1MDCT::Mdct(TFloat Specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize) {
+void TAtrac1MDCT::Mdct(float Specs[512], float* low, float* mid, float* hi, const TBlockSize& blockSize) {
     uint32_t pos = 0;
-    for (uint32_t band = 0; band < NumQMF; band++) {
+    for (uint32_t band = 0; band < TAtrac1Data::NumQMF; band++) {
         const uint32_t numMdctBlocks = 1 << blockSize.LogCount[band];
-        TFloat* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
-        uint32_t bufSz = (band == 2) ? 256 : 128; 
+        float* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
+        uint32_t bufSz = (band == 2) ? 256 : 128;
         const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32;
         uint32_t winStart = (numMdctBlocks == 1) ? ((band == 2) ? 112 : 48) : 0;
         //compensate level for 3rd band in case of short window
-        const TFloat multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0;
-        vector<TFloat> tmp(512);
+        const float multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0;
+        vector<float> tmp(512);
         uint32_t blockPos = 0;
 
         for (size_t k = 0; k < numMdctBlocks; ++k) {
-            memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(TFloat));
+            memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(float));
             for (size_t i = 0; i < 32; i++) {
                 srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[blockPos + blockSz - 32 + i];
                 srcBuf[blockPos + blockSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[blockPos + blockSz - 32 + i];
             }
-            memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(TFloat));
-            const vector<TFloat>&  sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]);
+            memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(float));
+            const vector<float>&  sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]);
             for (size_t i = 0; i < sp.size(); i++) {
                 Specs[blockPos + pos + i] = sp[i] * multiple;
             }
@@ -93,25 +95,25 @@ void TAtrac1MDCT::Mdct(TFloat Specs[512], TFloat* low, TFloat* mid, TFloat* hi,
             blockPos += 32;
         }
         pos += bufSz;
-    } 
+    }
 }
-void TAtrac1MDCT::IMdct(TFloat Specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi) {
+void TAtrac1MDCT::IMdct(float Specs[512], const TBlockSize& mode, float* low, float* mid, float* hi) {
     uint32_t pos = 0;
-    for (size_t band = 0; band < NumQMF; band++) {
+    for (size_t band = 0; band < TAtrac1Data::NumQMF; band++) {
         const uint32_t numMdctBlocks = 1 << mode.LogCount[band];
         const uint32_t bufSz = (band == 2) ? 256 : 128;
         const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32;
         uint32_t start = 0;
 
-        TFloat* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi;
+        float* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi;
 
-        vector<TFloat> invBuf(512);
-        TFloat* prevBuf = &dstBuf[bufSz * 2  - 16];
+        vector<float> invBuf(512);
+        float* prevBuf = &dstBuf[bufSz * 2  - 16];
         for (uint32_t block = 0; block < numMdctBlocks; block++) {
             if (band) {
                 SwapArray(&Specs[pos], blockSz);
             }
-            vector<TFloat> inv = (numMdctBlocks != 1) ? Midct64(&Specs[pos]) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]);
+            vector<float> inv = (numMdctBlocks != 1) ? Midct64(&Specs[pos]) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]);
             for (size_t i = 0; i < (inv.size()/2); i++) {
                 invBuf[start+i] = inv[i + inv.size()/4];
             }
@@ -123,7 +125,7 @@ void TAtrac1MDCT::IMdct(TFloat Specs[512], const TBlockSize& mode, TFloat* low,
             pos += blockSz;
         }
         if (numMdctBlocks == 1)
-            memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(TFloat));
+            memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(float));
 
         for (size_t j = 0; j < 16; j++) {
             dstBuf[bufSz*2 - 16  + j] = invBuf[bufSz - 16 + j];
@@ -131,9 +133,9 @@ void TAtrac1MDCT::IMdct(TFloat Specs[512], const TBlockSize& mode, TFloat* low,
     }
 }
 
-TPCMEngine<TFloat>::TProcessLambda TAtrac1Decoder::GetLambda() {
-    return [this](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) {
-        TFloat sum[512];
+TPCMEngine::TProcessLambda TAtrac1Decoder::GetLambda() {
+    return [this](float* data, const TPCMEngine::ProcessMeta& /*meta*/) {
+        float sum[512];
         const uint32_t srcChannels = Aea->GetChannelNum();
         for (uint32_t channel = 0; channel < srcChannels; channel++) {
             std::unique_ptr<ICompressedIO::TFrame> frame(Aea->ReadFrame());
@@ -142,13 +144,13 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Decoder::GetLambda() {
 
             TBlockSize mode(&bitstream);
             TAtrac1Dequantiser dequantiser;
-            vector<TFloat> specs;
+            vector<float> specs;
             specs.resize(512);;
             dequantiser.Dequant(&bitstream, mode, &specs[0]);
 
             IMdct(&specs[0], mode, &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
             SynthesisFilterBank[channel].Synthesis(&sum[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
-            for (size_t i = 0; i < NumSamples; ++i) {
+            for (size_t i = 0; i < TAtrac1Data::NumSamples; ++i) {
                 if (sum[i] > PcmValueMax)
                     sum[i] = PcmValueMax;
                 if (sum[i] < PcmValueMin)
@@ -157,36 +159,52 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Decoder::GetLambda() {
                 data[i * srcChannels + channel] = sum[i];
             }
         }
-        return TPCMEngine<TFloat>::EProcessResult::PROCESSED;
+        return TPCMEngine::EProcessResult::PROCESSED;
     };
 }
 
 
-TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
+TPCMEngine::TProcessLambda TAtrac1Encoder::GetLambda() {
     const uint32_t srcChannels = Aea->GetChannelNum();
-    vector<IAtrac1BitAlloc*> bitAlloc;
-    for (size_t i = 0; i < srcChannels; i++) {
-        bitAlloc.push_back(new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch()));
+    vector<IAtrac1BitAlloc*> bitAlloc(srcChannels);
+
+    for (auto& x : bitAlloc) {
+        x = new TAtrac1SimpleBitAlloc(Aea.get(), Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch());
     }
 
-    return [this, srcChannels, bitAlloc](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& /*meta*/) {
+    struct TChannelData {
+        TChannelData()
+            : Specs(TAtrac1Data::NumSamples)
+            , Loudness(0.0)
+        {}
+
+        vector<float> Specs;
+        float Loudness;
+    };
+
+    using TData = vector<TChannelData>;
+    auto buf = std::make_shared<TData>(srcChannels);
+
+    return [this, srcChannels, bitAlloc, buf](float* data, const TPCMEngine::ProcessMeta& /*meta*/) {
+        TBlockSize blockSz[2];
+
+        uint32_t windowMasks[2] = {0};
         for (uint32_t channel = 0; channel < srcChannels; channel++) {
-            TFloat src[NumSamples];
-            vector<TFloat> specs(512);
-            for (size_t i = 0; i < NumSamples; ++i) {
+            float src[TAtrac1Data::NumSamples];
+            for (size_t i = 0; i < TAtrac1Data::NumSamples; ++i) {
                 src[i] = data[i * srcChannels + channel];
             }
 
             AnalysisFilterBank[channel].Analysis(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
 
-            uint32_t windowMask = 0;
+            uint32_t& windowMask = windowMasks[channel];
             if (Settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) {
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]);
 
-                const vector<TFloat>& invMid = InvertSpectr<128>(&PcmBufMid[channel][0]);
+                const vector<float>& invMid = InvertSpectr<128>(&PcmBufMid[channel][0]);
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 1).Detect(&invMid[0]) << 1;
 
-                const vector<TFloat>& invHi = InvertSpectr<256>(&PcmBufHi[channel][0]);
+                const vector<float>& invHi = InvertSpectr<256>(&PcmBufHi[channel][0]);
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 2).Detect(&invHi[0]) << 2;
 
                 //std::cout << "trans: " << windowMask << std::endl;
@@ -194,13 +212,32 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
                 //no transient detection, use given mask
                 windowMask = Settings.GetWindowMask();
             }
-            const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
 
-            Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize);
-            bitAlloc[channel]->Write(Scaler.ScaleFrame(specs, blockSize), blockSize);
+            blockSz[channel]  = TBlockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
+
+            auto& specs = (*buf)[channel].Specs;
+
+            Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]);
+
+            float l = 0.0;
+            for (size_t i = 0; i < specs.size(); i++) {
+                float e = specs[i] * specs[i];
+                l += e * LoudnessCurve[i];
+            }
+            (*buf)[channel].Loudness = l;
+        }
+
+        if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) {
+            Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness, (*buf)[1].Loudness);
+        } else if (windowMasks[0] == 0) {
+            Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness);
+        }
+
+        for (uint32_t channel = 0; channel < srcChannels; channel++) {
+            bitAlloc[channel]->Write(Scaler.ScaleFrame((*buf)[channel].Specs, blockSz[channel]), blockSz[channel], Loudness / LoudFactor);
         }
 
-        return TPCMEngine<TFloat>::EProcessResult::PROCESSED;
+        return TPCMEngine::EProcessResult::PROCESSED;
     };
 }
 
diff --git a/src/atrac1denc.h b/src/atrac1denc.h
index ac5c481..a51d57e 100644
--- a/src/atrac1denc.h
+++ b/src/atrac1denc.h
@@ -31,7 +31,7 @@
 namespace NAtracDEnc {
 
 
-class TAtrac1MDCT : public virtual NAtrac1::TAtrac1Data {
+class TAtrac1MDCT {
     NMDCT::TMDCT<512> Mdct512;
     NMDCT::TMDCT<256> Mdct256;
     NMDCT::TMDCT<64> Mdct64;
@@ -39,8 +39,8 @@ class TAtrac1MDCT : public virtual NAtrac1::TAtrac1Data {
     NMDCT::TMIDCT<256> Midct256;
     NMDCT::TMIDCT<64> Midct64;
 public:
-    void IMdct(TFloat specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi);
-    void Mdct(TFloat specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize);
+    void IMdct(float specs[512], const TBlockSize& mode, float* low, float* mid, float* hi);
+    void Mdct(float specs[512], float* low, float* mid, float* hi, const TBlockSize& blockSize);
     TAtrac1MDCT()
         : Mdct512(1)
         , Mdct256(0.5)
@@ -51,15 +51,17 @@ public:
     {}
 };
 
-class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public virtual NAtrac1::TAtrac1Data {
+class TAtrac1Encoder : public IProcessor, public TAtrac1MDCT {
     TCompressedOutputPtr Aea;
     const NAtrac1::TAtrac1EncodeSettings Settings;
 
-    TFloat PcmBufLow[2][256 + 16];
-    TFloat PcmBufMid[2][256 + 16];
-    TFloat PcmBufHi[2][512 + 16];
+    float PcmBufLow[2][256 + 16];
+    float PcmBufMid[2][256 + 16];
+    float PcmBufHi[2][512 + 16];
 
-    Atrac1AnalysisFilterBank<TFloat> AnalysisFilterBank[2];
+    Atrac1AnalysisFilterBank<float> AnalysisFilterBank[2];
+
+    const std::vector<float> LoudnessCurve;
 
     class TTransientDetectors {
         std::vector<TTransientDetector> transientDetectorLow;
@@ -89,29 +91,31 @@ class TAtrac1Encoder : public IProcessor<TFloat>, public TAtrac1MDCT, public vir
         }
     };
     TAtrac1Encoder::TTransientDetectors TransientDetectors;
- 
-    TScaler<TAtrac1Data> Scaler;
+
+    TScaler<NAtrac1::TAtrac1Data> Scaler;
+    static constexpr float LoudFactor = 0.006;
+    float Loudness = LoudFactor;
 
 public:
     TAtrac1Encoder(TCompressedOutputPtr&& aea, NAtrac1::TAtrac1EncodeSettings&& settings);
-    TPCMEngine<TFloat>::TProcessLambda GetLambda() override;
+    TPCMEngine::TProcessLambda GetLambda() override;
 };
 
-class TAtrac1Decoder : public IProcessor<TFloat>, public TAtrac1MDCT, public virtual NAtrac1::TAtrac1Data {
+class TAtrac1Decoder : public IProcessor, public TAtrac1MDCT {
     TCompressedInputPtr Aea;
     const NAtrac1::TAtrac1EncodeSettings Settings;
 
-    TFloat PcmBufLow[2][256 + 16];
-    TFloat PcmBufMid[2][256 + 16];
-    TFloat PcmBufHi[2][512 + 16];
+    float PcmBufLow[2][256 + 16];
+    float PcmBufMid[2][256 + 16];
+    float PcmBufHi[2][512 + 16];
 
     int32_t PcmValueMax = 1;
     int32_t PcmValueMin = -1;
 
-    Atrac1SynthesisFilterBank<TFloat> SynthesisFilterBank[2];
+    Atrac1SynthesisFilterBank<float> SynthesisFilterBank[2];
 public:
     TAtrac1Decoder(TCompressedInputPtr&& aea);
-    TPCMEngine<TFloat>::TProcessLambda GetLambda() override;
+    TPCMEngine::TProcessLambda GetLambda() override;
 };
 
 }
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index b1f81d1..56e6517 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -18,6 +18,7 @@
 
 #include "atrac3denc.h"
 #include "transient_detector.h"
+#include "atrac/atrac_psy_common.h"
 #include <assert.h>
 #include <algorithm>
 #include <iostream>
@@ -28,26 +29,26 @@ using namespace NMDCT;
 using namespace NAtrac3;
 using std::vector;
 
-void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4], TGainModulatorArray gainModulators)
+void TAtrac3MDCT::Mdct(float specs[1024], float* bands[4], float maxLevels[4], TGainModulatorArray gainModulators)
 {
     for (int band = 0; band < 4; ++band) {
-        TFloat* srcBuff = bands[band];
-        TFloat* const curSpec = &specs[band*256];
+        float* srcBuff = bands[band];
+        float* const curSpec = &specs[band*256];
         TGainModulator modFn = gainModulators[band];
-        TFloat tmp[512];
-        memcpy(&tmp[0], srcBuff, 256 * sizeof(TFloat));
+        float tmp[512];
+        memcpy(&tmp[0], srcBuff, 256 * sizeof(float));
         if (modFn) {
             modFn(&tmp[0], &srcBuff[256]);
         }
-        TFloat max = 0.0;
+        float max = 0.0;
         for (int i = 0; i < 256; i++) {
             max = std::max(max, std::abs(srcBuff[256+i]));
             srcBuff[i] = TAtrac3Data::EncodeWindow[i] * srcBuff[256+i];
             tmp[256+i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[256+i];
         }
-        const vector<TFloat>& sp = Mdct512(&tmp[0]);
+        const vector<float>& sp = Mdct512(&tmp[0]);
         assert(sp.size() == 256);
-        memcpy(curSpec, sp.data(), 256 * sizeof(TFloat));
+        memcpy(curSpec, sp.data(), 256 * sizeof(float));
         if (band & 1) {
             SwapArray(curSpec, 256);
         }
@@ -55,27 +56,27 @@ void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4]
     }
 }
 
-void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators)
+void TAtrac3MDCT::Mdct(float specs[1024], float* bands[4], TGainModulatorArray gainModulators)
 {
-    static TFloat dummy[4];
+    static float dummy[4];
     Mdct(specs, bands, dummy, gainModulators);
 }
 
-void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators)
+void TAtrac3MDCT::Midct(float specs[1024], float* bands[4], TGainDemodulatorArray gainDemodulators)
 {
     for (int band = 0; band < 4; ++band) {
-        TFloat* dstBuff = bands[band];
-        TFloat* curSpec = &specs[band*256];
-        TFloat* prevBuff = dstBuff + 256;
+        float* dstBuff = bands[band];
+        float* curSpec = &specs[band*256];
+        float* prevBuff = dstBuff + 256;
         TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band];
         if (band & 1) {
             SwapArray(curSpec, 256);
         }
-        vector<TFloat> inv  = Midct512(curSpec);
+        vector<float> inv  = Midct512(curSpec);
         assert(inv.size()/2 == 256);
         for (int j = 0; j < 256; ++j) {
-            inv[j] *= /*2 */ DecodeWindow[j];
-            inv[511 - j] *= /*2*/ DecodeWindow[j];
+            inv[j] *= /*2 */ TAtrac3Data::DecodeWindow[j];
+            inv[511 - j] *= /*2*/ TAtrac3Data::DecodeWindow[j];
         }
         if (demodFn) {
             demodFn(dstBuff, inv.data(), prevBuff);
@@ -84,13 +85,14 @@ void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorAr
                 dstBuff[j] = inv[j] + prevBuff[j];
             }
         }
-        memcpy(prevBuff, &inv[256], sizeof(TFloat)*256);
+        memcpy(prevBuff, &inv[256], sizeof(float)*256);
     }
 }
 
 TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
     : Oma(std::move(oma))
     , Params(std::move(encoderSettings))
+    , LoudnessCurve(CreateLoudnessCurve(TAtrac3Data::NumSamples))
     , SingleChannelElements(Params.SourceChannels)
     , TransientParamsHistory(Params.SourceChannels, std::vector<TTransientParam>(4))
 {}
@@ -128,9 +130,9 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra
     }
 }
 
-TFloat TAtrac3Encoder::LimitRel(TFloat x)
+float TAtrac3Encoder::LimitRel(float x)
 {
-    return std::min(std::max((double)x, GainLevel[15]), GainLevel[0]);
+    return std::min(std::max(x, TAtrac3Data::GainLevel[15]), TAtrac3Data::GainLevel[0]);
 }
 
 void TAtrac3Encoder::ResetTransientParamsHistory(int channel, int band)
@@ -148,18 +150,18 @@ const TAtrac3Encoder::TTransientParam& TAtrac3Encoder::GetTransientParamsHistory
     return TransientParamsHistory[channel][band];
 }
 
-TAtrac3Encoder::TTransientParam TAtrac3Encoder::CalcTransientParam(const std::vector<TFloat>& gain, const TFloat lastMax)
+TAtrac3Encoder::TTransientParam TAtrac3Encoder::CalcTransientParam(const std::vector<float>& gain, const float lastMax)
 {
     int32_t attack0Location = -1; // position where gain is risen up, -1 - no attack
-    TFloat attack0Relation = 1;
+    float attack0Relation = 1;
 
-    const TFloat attackThreshold = 2;
+    const float attackThreshold = 2;
 
     {
         // pre-echo searching
         // relative to previous half frame
         for (uint32_t i = 0; i < gain.size(); i++) {
-            const TFloat tmp = gain[i] / lastMax;
+            const float tmp = gain[i] / lastMax;
             if (tmp > attackThreshold) {
                 attack0Relation = tmp;
                 attack0Location = i;
@@ -169,13 +171,13 @@ TAtrac3Encoder::TTransientParam TAtrac3Encoder::CalcTransientParam(const std::ve
     }
 
     int32_t attack1Location = -1;
-    TFloat attack1Relation = 1;
+    float attack1Relation = 1;
     {
         // pre-echo searching
         // relative to previous subsamples block
-        TFloat q = gain[0];
+        float q = gain[0];
         for (uint32_t i = 1; i < gain.size(); i++) {
-            const TFloat tmp = gain[i] / q;
+            const float tmp = gain[i] / q;
             if (tmp > attackThreshold) {
                 attack1Relation = tmp;
                 attack1Location = i;
@@ -185,15 +187,15 @@ TAtrac3Encoder::TTransientParam TAtrac3Encoder::CalcTransientParam(const std::ve
     }
 
     int32_t releaseLocation = -1; // position where gain is fallen down, -1 - no release
-    TFloat releaseRelation = 1;
+    float releaseRelation = 1;
 
-    const TFloat releaseTreshold = 2;
+    const float releaseTreshold = 2;
     {
         // post-echo searching
         // relative to current frame
-        TFloat q = gain.back();
+        float q = gain.back();
         for (uint32_t i = gain.size() - 2; i > 0; --i) {
-            const TFloat tmp = gain[i] / q;
+            const float tmp = gain[i] / q;
             if (tmp > releaseTreshold) {
                 releaseRelation = tmp;
                 releaseLocation = i;
@@ -206,24 +208,24 @@ TAtrac3Encoder::TTransientParam TAtrac3Encoder::CalcTransientParam(const std::ve
     return {attack0Location, attack0Relation, attack1Location, attack1Relation, releaseLocation, releaseRelation};
 }
 
-void TAtrac3Encoder::CreateSubbandInfo(TFloat* in[4],
+void TAtrac3Encoder::CreateSubbandInfo(float* in[4],
                                          uint32_t channel,
                                          TAtrac3Data::SubbandInfo* subbandInfo)
 {
 
-    auto relToIdx = [](TFloat rel) {
+    auto relToIdx = [](float rel) {
         rel = 1.0/rel;
         return (uint32_t)(RelationToIdx(rel));
     };
 
     for (int band = 0; band < 4; ++band) {
 
-        const TFloat* srcBuff = in[band];
+        const float* srcBuff = in[band];
 
-        const TFloat* const lastMax = &PrevPeak[channel][band];
+        const float* const lastMax = &PrevPeak[channel][band];
 
         std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve;
-        const std::vector<TFloat> gain = AnalyzeGain(srcBuff, 256, 32, false);
+        const std::vector<float> gain = AnalyzeGain(srcBuff, 256, 32, false);
 
         auto transientParam = CalcTransientParam(gain, *lastMax);
         bool hasTransient = false;
@@ -275,8 +277,8 @@ void TAtrac3Encoder::CreateSubbandInfo(TFloat* in[4],
 void TAtrac3Encoder::Matrixing()
 {
     for (uint32_t subband = 0; subband < 4; subband++) {
-        TFloat* pair[2] = {PcmBuffer.GetSecond(subband * 2), PcmBuffer.GetSecond(subband * 2 + 1)};
-        TFloat tmp[2];
+        float* pair[2] = {PcmBuffer.GetSecond(subband * 2), PcmBuffer.GetSecond(subband * 2 + 1)};
+        float tmp[2];
         for (uint32_t sample = 0; sample < 256; sample++) {
             tmp[0] = pair[0][sample];
             tmp[1] = pair[1][sample];
@@ -286,21 +288,33 @@ void TAtrac3Encoder::Matrixing()
     }
 }
 
-TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
+TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
 {
     std::shared_ptr<TAtrac3BitStreamWriter> bitStreamWriter(new TAtrac3BitStreamWriter(Oma.get(), *Params.ConteinerParams, Params.BfuIdxConst));
-    return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
+
+    struct TChannelData {
+        TChannelData()
+            : Specs(TAtrac3Data::NumSamples)
+        {}
+
+        vector<float> Specs;
+    };
+
+    using TData = vector<TChannelData>;
+    auto buf = std::make_shared<TData>(2);
+
+    return [this, bitStreamWriter, buf](float* data, const TPCMEngine::ProcessMeta& meta) {
         using TSce = TAtrac3BitStreamWriter::TSingleChannelElement;
 
         for (uint32_t channel = 0; channel < meta.Channels; channel++) {
-            TFloat src[NumSamples];
+            float src[TAtrac3Data::NumSamples];
 
-            for (size_t i = 0; i < NumSamples; ++i) {
+            for (size_t i = 0; i < TAtrac3Data::NumSamples; ++i) {
                 src[i] = data[i * meta.Channels  + channel] / 4.0;
             }
 
             {
-                TFloat* p[4] = {PcmBuffer.GetSecond(channel), PcmBuffer.GetSecond(channel+2), PcmBuffer.GetSecond(channel+4), PcmBuffer.GetSecond(channel+6)};
+                float* p[4] = {PcmBuffer.GetSecond(channel), PcmBuffer.GetSecond(channel+2), PcmBuffer.GetSecond(channel+4), PcmBuffer.GetSecond(channel+6)};
                 AnalysisFilterBank[channel].Analysis(&src[0], p);
             }
         }
@@ -310,27 +324,42 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
         }
 
         for (uint32_t channel = 0; channel < meta.Channels; channel++) {
-            vector<TFloat> specs(1024);
+            auto& specs = (*buf)[channel].Specs;
             TSce* sce = &SingleChannelElements[channel];
 
             sce->SubbandInfo.Reset();
             if (!Params.NoGainControll) {
-                TFloat* p[4] = {PcmBuffer.GetSecond(channel), PcmBuffer.GetSecond(channel+2), PcmBuffer.GetSecond(channel+4), PcmBuffer.GetSecond(channel+6)};
+                float* p[4] = {PcmBuffer.GetSecond(channel), PcmBuffer.GetSecond(channel+2), PcmBuffer.GetSecond(channel+4), PcmBuffer.GetSecond(channel+6)};
                 CreateSubbandInfo(p, channel, &sce->SubbandInfo); //4 detectors per band
             }
 
-            TFloat* maxOverlapLevels = PrevPeak[channel];
+            float* maxOverlapLevels = PrevPeak[channel];
 
             {
-                TFloat* p[4] = {PcmBuffer.GetFirst(channel), PcmBuffer.GetFirst(channel+2), PcmBuffer.GetFirst(channel+4), PcmBuffer.GetFirst(channel+6)};
+                float* p[4] = {PcmBuffer.GetFirst(channel), PcmBuffer.GetFirst(channel+2), PcmBuffer.GetFirst(channel+4), PcmBuffer.GetFirst(channel+6)};
                 Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
             }
 
-            sce->Energy = CalcEnergy(specs);
+            float l = 0;
+            for (size_t i = 0; i < specs.size(); i++) {
+                float e = specs[i] * specs[i];
+                l += e * LoudnessCurve[i];
+            }
+
+            sce->Loudness = l;
 
             //TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
             sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize());
+        }
 
+        if (meta.Channels == 2 && !Params.ConteinerParams->Js) {
+            const TSce& sce0 = SingleChannelElements[0];
+            const TSce& sce1 = SingleChannelElements[1];
+            Loudness = TrackLoudness(Loudness, sce0.Loudness, sce1.Loudness);
+        } else {
+            // 1 channel or Js. In case of Js we do not use side channel to adjust loudness
+            const TSce& sce0 = SingleChannelElements[0];
+            Loudness = TrackLoudness(Loudness, sce0.Loudness);
         }
 
         if (Params.ConteinerParams->Js && meta.Channels == 1) {
@@ -341,9 +370,8 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
             SingleChannelElements[1].SubbandInfo.Info.resize(1);
         }
 
-        bitStreamWriter->WriteSoundUnit(SingleChannelElements);
-
-        return TPCMEngine<TFloat>::EProcessResult::PROCESSED;
+        bitStreamWriter->WriteSoundUnit(SingleChannelElements, Loudness);
+        return TPCMEngine::EProcessResult::PROCESSED;
     };
 }
 
diff --git a/src/atrac3denc.h b/src/atrac3denc.h
index 1aea07e..fb06f15 100644
--- a/src/atrac3denc.h
+++ b/src/atrac3denc.h
@@ -28,7 +28,7 @@
 
 #include "atrac/atrac3_bitstream.h"
 #include "atrac/atrac_scale.h"
-#include "mdct/mdct.h"
+#include "lib/mdct/mdct.h"
 #include "gain_processor.h"
 
 #include <algorithm>
@@ -39,19 +39,20 @@ namespace NAtracDEnc {
 
 ///////////////////////////////////////////////////////////////////////////////
 
-inline uint16_t RelationToIdx(TFloat x) {
+inline uint16_t RelationToIdx(float x) {
     if (x <= 0.5) {
-        x = 1.0 / std::max(x, (TFloat)0.00048828125);
+        x = 1.0 / std::max(x, (float)0.00048828125);
         return 4 + GetFirstSetBit((int32_t)std::trunc(x));
     } else {
-        x = std::min(x, (TFloat)16.0);
+        x = std::min(x, (float)16.0);
         return 4 - GetFirstSetBit((int32_t)std::trunc(x));
     }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-class TAtrac3MDCT : public NAtrac3::TAtrac3Data {
+class TAtrac3MDCT {
+    using TAtrac3Data = NAtrac3::TAtrac3Data;
     NMDCT::TMDCT<512> Mdct512;
     NMDCT::TMIDCT<512> Midct512;
 public:
@@ -65,47 +66,52 @@ public:
     using TGainDemodulator = TAtrac3GainProcessor::TGainDemodulator;
     typedef std::array<TGainDemodulator, 4> TGainDemodulatorArray;
     typedef std::array<TGainModulator, 4> TGainModulatorArray;
-    void Mdct(TFloat specs[1024],
-              TFloat* bands[4],
-              TFloat maxLevels[4],
+    void Mdct(float specs[1024],
+              float* bands[4],
+              float maxLevels[4],
               TGainModulatorArray gainModulators);
-    void Mdct(TFloat specs[1024],
-              TFloat* bands[4],
+    void Mdct(float specs[1024],
+              float* bands[4],
               TGainModulatorArray gainModulators = TGainModulatorArray());
-    void Midct(TFloat specs[1024],
-               TFloat* bands[4],
+    void Midct(float specs[1024],
+               float* bands[4],
                TGainDemodulatorArray gainDemodulators = TGainDemodulatorArray());
 protected:
     TAtrac3MDCT::TGainModulatorArray MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si);
 };
 
-class TAtrac3Encoder : public IProcessor<TFloat>, public TAtrac3MDCT {
+class TAtrac3Encoder : public IProcessor, public TAtrac3MDCT {
+    using TAtrac3Data = NAtrac3::TAtrac3Data;
     TCompressedOutputPtr Oma;
     const NAtrac3::TAtrac3EncoderSettings Params;
-    TDelayBuffer<TFloat, 8, 256> PcmBuffer; //8 = 2 channels * 4 bands
+    const std::vector<float> LoudnessCurve;
+    TDelayBuffer<float, 8, 256> PcmBuffer; //8 = 2 channels * 4 bands
 
-    TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling
+    float PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling
+
+    Atrac3AnalysisFilterBank<float> AnalysisFilterBank[2];
 
-    Atrac3AnalysisFilterBank<TFloat> AnalysisFilterBank[2];
     TScaler<TAtrac3Data> Scaler;
     std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements;
 public:
     struct TTransientParam {
         int32_t Attack0Location; // Attack position relative to previous frame
-        TFloat Attack0Relation;
+        float Attack0Relation;
         int32_t Attack1Location; // Attack position relative to previous sample
-        TFloat Attack1Relation;
+        float Attack1Relation;
         int32_t ReleaseLocation;
-        TFloat ReleaseRelation;
+        float ReleaseRelation;
     };
 private:
     std::vector<std::vector<TTransientParam>> TransientParamsHistory;
+    static constexpr float LoudFactor = 0.006;
+    float Loudness = LoudFactor;
 #ifdef ATRAC_UT_PUBLIC
 public:
 #endif
-    TFloat LimitRel(TFloat x);
-    TTransientParam CalcTransientParam(const std::vector<TFloat>& gain, TFloat lastMax);
-    void CreateSubbandInfo(TFloat* in[4], uint32_t channel,
+    float LimitRel(float x);
+    TTransientParam CalcTransientParam(const std::vector<float>& gain, float lastMax);
+    void CreateSubbandInfo(float* in[4], uint32_t channel,
                            TAtrac3Data::SubbandInfo* subbandInfo);
     void ResetTransientParamsHistory(int channel, int band);
     void SetTransientParamsHistory(int channel, int band, const TTransientParam& params);
@@ -115,6 +121,6 @@ public:
 public:
     TAtrac3Encoder(TCompressedOutputPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings);
     ~TAtrac3Encoder();
-    TPCMEngine<TFloat>::TProcessLambda GetLambda() override;
+    TPCMEngine::TProcessLambda GetLambda() override;
 };
 }
diff --git a/src/atrac3denc_ut.cpp b/src/atrac3denc_ut.cpp
index f52af02..6979c92 100644
--- a/src/atrac3denc_ut.cpp
+++ b/src/atrac3denc_ut.cpp
@@ -27,14 +27,14 @@ using std::vector;
 using namespace NAtracDEnc;
 using namespace NAtrac3;
 
-static void GenerateSignal(TFloat* buf, size_t n, TFloat f, TFloat a) {
+static void GenerateSignal(float* buf, size_t n, float f, float a) {
     for (size_t i = 0; i < n; ++i) {
         buf[i] = a * sin((M_PI/2) * i * f);
     }
 }
 
-static void GenerateSignalWithTransient(TFloat* buf, size_t n, TFloat f, TFloat a,
-        size_t transientPos, size_t transientLen, TFloat transientLev) {
+static void GenerateSignalWithTransient(float* buf, size_t n, float f, float a,
+        size_t transientPos, size_t transientLen, float transientLev) {
     assert(transientPos + transientLen < n);
     GenerateSignal(buf, n, f, a);
     GenerateSignal(buf+transientPos, transientLen, f, transientLev);
@@ -43,15 +43,15 @@ static void GenerateSignalWithTransient(TFloat* buf, size_t n, TFloat f, TFloat
 //    }
 }
 
-class TWindowTest : public TAtrac3Data {
+class TWindowTest {
 public:
     void RunTest() {
         for (size_t i = 0; i < 256; i++) {
-            const TFloat ha1 = EncodeWindow[i] / 2.0; //compensation
-            const TFloat hs1 = DecodeWindow[i];
-            const TFloat hs2 = DecodeWindow[255-i];
-            const TFloat res = hs1 / (hs1 * hs1 + hs2 * hs2);
-            EXPECT_NEAR(ha1, res, 0.000000001);
+            const float ha1 = TAtrac3Data::EncodeWindow[i] / 2.0; //compensation
+            const double hs1 = TAtrac3Data::DecodeWindow[i];
+            const double hs2 = TAtrac3Data::DecodeWindow[255-i];
+            const float res = hs1 / (hs1 * hs1 + hs2 * hs2);
+            EXPECT_NEAR(ha1, res, 1.0 / (1 << 24));
         }
     }
 };
@@ -93,18 +93,18 @@ public:
 
 TEST(TAtrac3MDCT, TAtrac3MDCTZeroOneBlock) {
     TAtrac3MDCT mdct;
-    TAtrac3MDCTWorkBuff<TFloat> buff;
-    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+    TAtrac3MDCTWorkBuff<float> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<float>::BandBuffSz;
 
-    vector<TFloat> specs(1024);
+    vector<float> specs(1024);
 
-    TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+    float* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
 
     mdct.Mdct(specs.data(), p);
     for(auto s: specs)
         EXPECT_NEAR(s, 0.0, 0.0000000001);
 
-    TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+    float* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
     mdct.Midct(specs.data(), p);
 
     for(size_t i = 0; i < workSz; ++i)
@@ -121,28 +121,28 @@ TEST(TAtrac3MDCT, TAtrac3MDCTZeroOneBlock) {
 
 
 }
-
+/*
 TEST(TAtrac3MDCT, TAtrac3MDCTSignal) {
     TAtrac3MDCT mdct;
-    TAtrac3MDCTWorkBuff<TFloat> buff;
-    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+    TAtrac3MDCTWorkBuff<float> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<float>::BandBuffSz;
 
     const size_t len = 1024;
-    vector<TFloat> signal(len);
-    vector<TFloat> signalRes(len);
+    vector<float> signal(len);
+    vector<float> signalRes(len);
     GenerateSignal(signal.data(), signal.size(), 0.25, 32768);
     
     for (size_t pos = 0; pos < len; pos += workSz) {
-        vector<TFloat> specs(1024);
-        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(TFloat));
+        vector<float> specs(1024);
+        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(float));
 
-        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+        float* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
         mdct.Mdct(specs.data(), p);
 
-        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+        float* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
         mdct.Midct(specs.data(), t);
 
-        memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(TFloat));
+        memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(float));
     }
 
     for (int i = workSz; i < len; ++i)
@@ -151,19 +151,19 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignal) {
 
 TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) {
     TAtrac3MDCT mdct;
-    TAtrac3MDCTWorkBuff<TFloat> buff;
-    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+    TAtrac3MDCTWorkBuff<float> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<float>::BandBuffSz;
 
     const size_t len = 4096;
-    vector<TFloat> signal(len, 8000);
-    vector<TFloat> signalRes(len);
+    vector<float> signal(len, 8000);
+    vector<float> signalRes(len);
     GenerateSignal(signal.data() + 1024, signal.size()-1024, 0.25, 32768);
     
     for (size_t pos = 0; pos < len; pos += workSz) {
-        vector<TFloat> specs(1024);
-        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(TFloat));
+        vector<float> specs(1024);
+        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(float));
 
-        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+        float* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
 
         if (pos == 256) { //apply gain modulation
             TAtrac3Data::SubbandInfo siCur;
@@ -195,7 +195,7 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) {
             mdct.Mdct(specs.data(), p);
         }
 
-        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+        float* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
 
         if (pos == 256) { //restore gain modulation
             TAtrac3Data::SubbandInfo siCur;
@@ -268,7 +268,7 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) {
         } else {
             mdct.Midct(specs.data(), t);
         }
-        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(TFloat));
+        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(float));
     }
     for (int i = workSz; i < len; ++i) {
         //std::cout << "res: " << i << " " << signalRes[i] << std::endl;
@@ -278,21 +278,21 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) {
 
 TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensationAndManualTransient) {
     TAtrac3MDCT mdct;
-    TAtrac3MDCTWorkBuff<TFloat> buff;
-    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+    TAtrac3MDCTWorkBuff<float> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<float>::BandBuffSz;
 
     const size_t len = 1024;
-    vector<TFloat> signal(len);
-    vector<TFloat> signalRes(len);
+    vector<float> signal(len);
+    vector<float> signalRes(len);
     GenerateSignalWithTransient(signal.data(), signal.size(), 0.03125, 512.0,
                     640, 64, 32768.0);
     const std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve1 = {{6, 13}, {4, 14}};
  
     for (size_t pos = 0; pos < len; pos += workSz) {
-        vector<TFloat> specs(1024);
-        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(TFloat));
+        vector<float> specs(1024);
+        memcpy(buff.Band0 + workSz, signal.data() + pos, workSz * sizeof(float));
 
-        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+        float* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
         //for (int i = 0; i < 256; i++) {
         //    std::cout << i + pos << " " << buff.Band0[i] << std::endl;
         //}
@@ -315,7 +315,7 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensationAndManualTransient) {
             if (i > 240 && i < 256)
                 specs[i] /= 1.9;
         }
-        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+        float* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
         if (pos == 512) { //restore gain modulation
             TAtrac3Data::SubbandInfo siCur;
             TAtrac3Data::SubbandInfo siNext;
@@ -337,14 +337,14 @@ TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensationAndManualTransient) {
             mdct.Midct(specs.data(), t);
         }
 
-        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(TFloat));
+        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(float));
     }
     for (int i = workSz; i < len; ++i) {
         //std::cout << "res: " << i << " " << signalRes[i] << std::endl;
         EXPECT_NEAR(signal[i - workSz], signalRes[i], 10);
     }
 }
-
+*/
 TEST(AtracGainControl, RelToIdxTest) {
 
     EXPECT_EQ(4, RelationToIdx(1));
diff --git a/src/atrac3p.h b/src/atrac3p.h
index 82f8718..258a6f1 100644
--- a/src/atrac3p.h
+++ b/src/atrac3p.h
@@ -24,10 +24,10 @@
 
 namespace NAtracDEnc {
 
-class TAt3PEnc : public IProcessor<TFloat> {
+class TAt3PEnc : public IProcessor {
 public:
     TAt3PEnc(TCompressedOutputPtr&& out, int channels);
-    TPCMEngine<TFloat>::TProcessLambda GetLambda() override;
+    TPCMEngine::TProcessLambda GetLambda() override;
     static constexpr int NumSamples = 2048;
 private:
     TCompressedOutputPtr Out;
diff --git a/src/atracdenc_ut.cpp b/src/atracdenc_ut.cpp
index efcd3df..416b972 100644
--- a/src/atracdenc_ut.cpp
+++ b/src/atracdenc_ut.cpp
@@ -24,7 +24,7 @@
 using std::vector;
 using namespace NAtracDEnc;
 
-void CheckResult128(const vector<TFloat>& a, const vector<TFloat>& b) {
+void CheckResult128(const vector<float>& a, const vector<float>& b) {
     float m = 0.0;
     for (int i = 0; i < a.size(); i++) {
         m = fmax(m, (float)a[i]);
@@ -37,7 +37,7 @@ void CheckResult128(const vector<TFloat>& a, const vector<TFloat>& b) {
     }
 }
 
-void CheckResult256(const vector<TFloat>& a, const vector<TFloat>& b) {
+void CheckResult256(const vector<float>& a, const vector<float>& b) {
     float m = 0.0;
     for (int i = 0; i < a.size(); i++) {
         m = fmax(m, (float)a[i]);
@@ -53,14 +53,14 @@ void CheckResult256(const vector<TFloat>& a, const vector<TFloat>& b) {
 
 TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) {
     TAtrac1MDCT mdct;
-    vector<TFloat> low(128 * 2);
-    vector<TFloat> mid(128 * 2);
-    vector<TFloat> hi(256 * 2);
-    vector<TFloat> specs(512 * 2);
-
-    vector<TFloat> lowRes(128 * 2);
-    vector<TFloat> midRes(128 * 2);
-    vector<TFloat> hiRes(256 * 2);
+    vector<float> low(128 * 2);
+    vector<float> mid(128 * 2);
+    vector<float> hi(256 * 2);
+    vector<float> specs(512 * 2);
+
+    vector<float> lowRes(128 * 2);
+    vector<float> midRes(128 * 2);
+    vector<float> hiRes(256 * 2);
  
     for (int i = 0; i < 128; i++) {
         low[i] = mid[i] = i;
@@ -82,25 +82,25 @@ TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) {
 
 TEST(TAtrac1MDCT, TAtrac1MDCTShortEncDec) {
     TAtrac1MDCT mdct;
-    vector<TFloat> low(128 * 2);
-    vector<TFloat> mid(128 * 2);
-    vector<TFloat> hi(256 * 2);
-    vector<TFloat> specs(512 * 2);
-
-    vector<TFloat> lowRes(128 * 2);
-    vector<TFloat> midRes(128 * 2);
-    vector<TFloat> hiRes(256 * 2);
+    vector<float> low(128 * 2);
+    vector<float> mid(128 * 2);
+    vector<float> hi(256 * 2);
+    vector<float> specs(512 * 2);
+
+    vector<float> lowRes(128 * 2);
+    vector<float> midRes(128 * 2);
+    vector<float> hiRes(256 * 2);
  
     for (int i = 0; i < 128; i++) {
         low[i] = mid[i] = i;
     }
-    const vector<TFloat> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation
-    const vector<TFloat> midCopy = mid;
+    const vector<float> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation
+    const vector<float> midCopy = mid;
 
     for (int i = 0; i < 256; i++) {
         hi[i] = i;
     }
-    const vector<TFloat> hiCopy = hi;
+    const vector<float> hiCopy = hi;
 
     const TBlockSize blockSize(true, true, true); //short
 
diff --git a/src/compressed_io.h b/src/compressed_io.h
index 511860e..cc6cbf1 100644
--- a/src/compressed_io.h
+++ b/src/compressed_io.h
@@ -42,7 +42,7 @@ public:
         char* Get() { return Data; }
     };
     virtual std::string GetName() const = 0;
-    virtual uint8_t GetChannelNum() const = 0;
+    virtual size_t GetChannelNum() const = 0;
     virtual ~ICompressedIO() {}
 };
 
diff --git a/src/config.h b/src/config.h
index d3635df..901f584 100644
--- a/src/config.h
+++ b/src/config.h
@@ -22,13 +22,7 @@
 #define NOMINMAX
 #endif
 
-#ifdef ATDE_USE_FLOAT
-#    define kiss_fft_scalar float
-typedef float TFloat;
-#else
-#    define kiss_fft_scalar double
-typedef double TFloat;
-#endif
+#define kiss_fft_scalar float
 
 #ifndef M_PI
 #define M_PI (3.14159265358979323846)
diff --git a/src/gain_processor.h b/src/gain_processor.h
index f43531f..c77536a 100644
--- a/src/gain_processor.h
+++ b/src/gain_processor.h
@@ -24,7 +24,7 @@ template<class T>
 class TGainProcessor : public T {
 
 public:
-    typedef std::function<void(TFloat* out, TFloat* cur, TFloat* prev)> TGainDemodulator;
+    typedef std::function<void(float* out, float* cur, float* prev)> TGainDemodulator;
     /*
      * example GainModulation:
      * PCMinput:
@@ -41,13 +41,13 @@ public:
      *               (i.e the input buffer started at b point)
      * so next transformation (mdct #3) gets modulated first part
      */
-    typedef std::function<void(TFloat* bufCur, TFloat* bufNext)> TGainModulator;
-    static TFloat GetGainInc(uint32_t levelIdxCur)
+    typedef std::function<void(float* bufCur, float* bufNext)> TGainModulator;
+    static float GetGainInc(uint32_t levelIdxCur)
     {
         const int incPos = T::ExponentOffset - levelIdxCur + T::GainInterpolationPosShift;
         return T::GainInterpolation[incPos];
     }
-    static TFloat GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext)
+    static float GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext)
     {
         const int incPos = levelIdxNext - levelIdxCur + T::GainInterpolationPosShift;
         return T::GainInterpolation[incPos];
@@ -57,17 +57,17 @@ public:
     TGainDemodulator Demodulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giNow,
                                 const std::vector<typename T::SubbandInfo::TGainPoint>& giNext)
     {
-        return [=](TFloat* out, TFloat* cur, TFloat* prev) {
+        return [=](float* out, float* cur, float* prev) {
             uint32_t pos = 0;
-            const TFloat scale = giNext.size() ? T::GainLevel[giNext[0].Level] : 1;
+            const float scale = giNext.size() ? T::GainLevel[giNext[0].Level] : 1;
             for (uint32_t i = 0; i < giNow.size(); ++i) {
                 uint32_t lastPos = giNow[i].Location << T::LocScale;
                 const uint32_t levelPos = giNow[i].Level;
                 assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0]));
-                TFloat level = T::GainLevel[levelPos];
+                float level = T::GainLevel[levelPos];
                 const int incPos = ((i + 1) < giNow.size() ? giNow[i + 1].Level : T::ExponentOffset)
                                    - giNow[i].Level + T::GainInterpolationPosShift;
-                TFloat gainInc = T::GainInterpolation[incPos];
+                float gainInc = T::GainInterpolation[incPos];
                 for (; pos < lastPos; pos++) {
                     //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << std::endl;
                     out[pos] = (cur[pos] * scale + prev[pos]) * level;
@@ -87,17 +87,17 @@ public:
     TGainModulator Modulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giCur) {
         if (giCur.empty())
             return {};
-        return [=](TFloat* bufCur, TFloat* bufNext) {
+        return [=](float* bufCur, float* bufNext) {
             uint32_t pos = 0;
-            const TFloat scale = T::GainLevel[giCur[0].Level];
+            const float scale = T::GainLevel[giCur[0].Level];
             for (uint32_t i = 0; i < giCur.size(); ++i) {
                 uint32_t lastPos = giCur[i].Location << T::LocScale;
                 const uint32_t levelPos = giCur[i].Level;
                 assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0]));
-                TFloat level = T::GainLevel[levelPos];
+                float level = T::GainLevel[levelPos];
                 const int incPos = ((i + 1) < giCur.size() ? giCur[i + 1].Level : T::ExponentOffset)
                                    - giCur[i].Level + T::GainInterpolationPosShift;
-                TFloat gainInc = T::GainInterpolation[incPos];
+                float gainInc = T::GainInterpolation[incPos];
                 for (; pos < lastPos; pos++) {
                     //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " <<  bufCur[pos]  << " level: " << level << " bufNext: " << bufNext[pos] << std::endl;
                     bufCur[pos] /= scale;
diff --git a/src/lib/mdct/mdct.cpp b/src/lib/mdct/mdct.cpp
index 74b6d91..c2872db 100644
--- a/src/lib/mdct/mdct.cpp
+++ b/src/lib/mdct/mdct.cpp
@@ -22,11 +22,11 @@
 
 namespace NMDCT {
 
-static std::vector<TFloat> CalcSinCos(size_t n, TFloat scale)
+static std::vector<float> CalcSinCos(size_t n, float scale)
 {
-    std::vector<TFloat> tmp(n >> 1);
-    const TFloat alpha = 2.0 * M_PI / (8.0 * n);
-    const TFloat omiga = 2.0 * M_PI / n;
+    std::vector<float> tmp(n >> 1);
+    const float alpha = 2.0 * M_PI / (8.0 * n);
+    const float omiga = 2.0 * M_PI / n;
     scale = sqrt(scale/n); 
     for (size_t i = 0; i < (n >> 2); ++i) {
         tmp[2 * i + 0] = scale * cos(omiga * i + alpha);
@@ -35,7 +35,7 @@ static std::vector<TFloat> CalcSinCos(size_t n, TFloat scale)
     return tmp;
 }
 
-TMDCTBase::TMDCTBase(size_t n, TFloat scale)
+TMDCTBase::TMDCTBase(size_t n, float scale)
     : N(n)
     , SinCos(CalcSinCos(n, scale))
 {
diff --git a/src/lib/mdct/mdct.h b/src/lib/mdct/mdct.h
index 988d1e9..114af30 100644
--- a/src/lib/mdct/mdct.h
+++ b/src/lib/mdct/mdct.h
@@ -25,21 +25,21 @@
 
 namespace NMDCT {
 
-static_assert(sizeof(kiss_fft_scalar) == sizeof(TFloat), "size of fft_scalar is not equal to size of TFloat");
+static_assert(sizeof(kiss_fft_scalar) == sizeof(float), "size of fft_scalar is not equal to size of float");
 
 class TMDCTBase {
 protected:
     const size_t N;
-    const std::vector<TFloat> SinCos;
+    const std::vector<float> SinCos;
     kiss_fft_cpx*   FFTIn;
     kiss_fft_cpx*   FFTOut;
     kiss_fft_cfg    FFTPlan;
-    TMDCTBase(size_t n, TFloat scale);
+    TMDCTBase(size_t n, float scale);
     virtual ~TMDCTBase();
 };
 
 
-template<size_t TN, typename TIO = TFloat>
+template<size_t TN, typename TIO = float>
 class TMDCT : public TMDCTBase {
     std::vector<TIO> Buf;
 public:
@@ -54,15 +54,15 @@ public:
         const size_t n4 = N >> 2;
         const size_t n34 = 3 * n4;
         const size_t n54 = 5 * n4;
-        const TFloat* cos = &SinCos[0];
-        const TFloat* sin = &SinCos[1];
+        const float* cos = &SinCos[0];
+        const float* sin = &SinCos[1];
 
-        TFloat  *xr, *xi, r0, i0;
-        TFloat  c, s;
+        float  *xr, *xi, r0, i0;
+        float  c, s;
         size_t n;
 
-        xr = (TFloat*)FFTIn;
-        xi = (TFloat*)FFTIn + 1;
+        xr = (float*)FFTIn;
+        xi = (float*)FFTIn + 1;
         for (n = 0; n < n4; n += 2) {
             r0 = in[n34 - 1 - n] + in[n34 + n];
             i0 = in[n4 + n] - in[n4 - 1 - n];
@@ -87,8 +87,8 @@ public:
 
         kiss_fft(FFTPlan, FFTIn, FFTOut);
 
-        xr = (TFloat*)FFTOut;
-        xi = (TFloat*)FFTOut + 1;
+        xr = (float*)FFTOut;
+        xi = (float*)FFTOut + 1;
         for (n = 0; n < n2; n += 2) {
             r0 = xr[n];
             i0 = xi[n];
@@ -104,7 +104,7 @@ public:
     }
 };
 
-template<size_t TN, typename TIO = TFloat>
+template<size_t TN, typename TIO = float>
 class TMIDCT : public TMDCTBase {
     std::vector<TIO> Buf;
 public:
@@ -118,15 +118,15 @@ public:
         const size_t n4 = N >> 2;
         const size_t n34 = 3 * n4;
         const size_t n54 = 5 * n4;
-        const TFloat* cos = &SinCos[0];
-        const TFloat* sin = &SinCos[1];
+        const float* cos = &SinCos[0];
+        const float* sin = &SinCos[1];
 
-        TFloat *xr, *xi, r0, i0, r1, i1;
-        TFloat c, s;
+        float *xr, *xi, r0, i0, r1, i1;
+        float c, s;
         size_t n;
 
-        xr = (TFloat*)FFTIn;
-        xi = (TFloat*)FFTIn + 1;
+        xr = (float*)FFTIn;
+        xi = (float*)FFTIn + 1;
 
         for (n = 0; n < n2; n += 2) {
             r0 = in[n];
@@ -141,8 +141,8 @@ public:
 
         kiss_fft(FFTPlan, FFTIn, FFTOut);
 
-        xr = (TFloat*)FFTOut;
-        xi = (TFloat*)FFTOut + 1;
+        xr = (float*)FFTOut;
+        xi = (float*)FFTOut + 1;
 
         for (n = 0; n < n4; n += 2) {
             r0 = xr[n];
diff --git a/src/lib/mdct/mdct_ut.cpp b/src/lib/mdct/mdct_ut.cpp
index 6f49b59..d997b08 100644
--- a/src/lib/mdct/mdct_ut.cpp
+++ b/src/lib/mdct/mdct_ut.cpp
@@ -26,24 +26,24 @@
 using std::vector;
 using namespace NMDCT;
 
-static vector<TFloat> mdct(TFloat* x, int N) {
-    vector<TFloat> res;
+static vector<float> mdct(float* x, int N) {
+    vector<float> res;
     for (int k = 0; k < N; k++) {
-        TFloat sum = 0;
+        float sum = 0;
         for (int n = 0; n < 2 * N; n++)
-            sum += x[n]* cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5));
+            sum += x[n]* cos((M_PI/N) * ((float)n + 0.5 + N/2) * ((float)k + 0.5));
 
         res.push_back(sum);
     }
     return res;
 }
 
-static vector<TFloat> midct(TFloat* x, int N) {
-    vector<TFloat> res;
+static vector<float> midct(float* x, int N) {
+    vector<float> res;
     for (int n = 0; n < 2 * N; n++) {
-        TFloat sum = 0;
+        float sum = 0;
         for (int k = 0; k < N; k++)
-            sum += (x[k] * cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5)));
+            sum += (x[k] * cos((M_PI/N) * ((float)n + 0.5 + N/2) * ((float)k + 0.5)));
 
         res.push_back(sum);
     }
@@ -53,12 +53,12 @@ static vector<TFloat> midct(TFloat* x, int N) {
 TEST(TMdctTest, MDCT32) {
     const int N = 32;
     TMDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = mdct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = mdct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N);
     for (int i = 0; i < res1.size(); i++) {
@@ -69,12 +69,12 @@ TEST(TMdctTest, MDCT32) {
 TEST(TMdctTest, MDCT64) {
     const int N = 64;
     TMDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = mdct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = mdct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N);
     for (int i = 0; i < res1.size(); i++) {
@@ -85,12 +85,12 @@ TEST(TMdctTest, MDCT64) {
 TEST(TMdctTest, MDCT128) {
     const int N = 128;
     TMDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = mdct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = mdct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N * 4);
     for (int i = 0; i < res1.size(); i++) {
@@ -101,12 +101,12 @@ TEST(TMdctTest, MDCT128) {
 TEST(TMdctTest, MDCT256) {
     const int N = 256;
     TMDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = mdct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = mdct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N * 4);
     for (int i = 0; i < res1.size(); i++) {
@@ -117,14 +117,14 @@ TEST(TMdctTest, MDCT256) {
 TEST(TMdctTest, MDCT256_RAND) {
     const int N = 256;
     TMDCT<N> transform(N);
-    vector<TFloat> src(N);
-    TFloat m = 0.0;
+    vector<float> src(N);
+    float m = 0.0;
     for (int i = 0; i < N; i++) {
         src[i] = rand();
         m = std::max(m, src[i]);
     }
-    const vector<TFloat> res1 = mdct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = mdct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(m * 8);
     for (int i = 0; i < res1.size(); i++) {
@@ -135,12 +135,12 @@ TEST(TMdctTest, MDCT256_RAND) {
 TEST(TMdctTest, MIDCT32) {
     const int N = 32;
     TMIDCT<N> transform;
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = midct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = midct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N);
     for (int i = 0; i < N; i++) {
@@ -151,12 +151,12 @@ TEST(TMdctTest, MIDCT32) {
 TEST(TMdctTest, MIDCT64) {
     const int N = 64;
     TMIDCT<N> transform;
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = midct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = midct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N);
     for (int i = 0; i < N; i++) {
@@ -167,12 +167,12 @@ TEST(TMdctTest, MIDCT64) {
 TEST(TMdctTest, MIDCT128) {
     const int N = 128;
     TMIDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = midct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = midct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N);
     for (int i = 0; i < N; i++) {
@@ -183,12 +183,12 @@ TEST(TMdctTest, MIDCT128) {
 TEST(TMdctTest, MIDCT256) {
     const int N = 256;
     TMIDCT<N> transform(N);
-    vector<TFloat> src(N);
+    vector<float> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<TFloat> res1 = midct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = midct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(N * 2);
     for (int i = 0; i < N; i++) {
@@ -199,14 +199,14 @@ TEST(TMdctTest, MIDCT256) {
 TEST(TMdctTest, MIDCT256_RAND) {
     const int N = 256;
     TMIDCT<N> transform(N);
-    vector<TFloat> src(N);
-    TFloat m = 0.0;
+    vector<float> src(N);
+    float m = 0.0;
     for (int i = 0; i < N/2; i++) {
         src[i] = rand();
         m = std::max(m, src[i]);
     }
-    const vector<TFloat> res1 = midct(&src[0], N/2);
-    const vector<TFloat> res2 = transform(&src[0]);
+    const vector<float> res1 = midct(&src[0], N/2);
+    const vector<float> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     auto eps = CalcEps(m * 4);
     for (int i = 0; i < N; i++) {
diff --git a/src/lib/mdct/mdct_ut_common.h b/src/lib/mdct/mdct_ut_common.h
index 5c1096c..060e2d0 100644
--- a/src/lib/mdct/mdct_ut_common.h
+++ b/src/lib/mdct/mdct_ut_common.h
@@ -21,8 +21,8 @@
 #include "mdct.h"
 
 // Calculate value of error for given magnitude
-inline TFloat CalcEps(TFloat magn) {
-    const float snr = (sizeof(TFloat) == 4) ? -114.0 : -240.0;
+inline float CalcEps(float magn) {
+    const float snr = (sizeof(float) == 4) ? -114.0 : -240.0;
     return magn * pow(10, (snr / 20.0));
 }
 
diff --git a/src/main.cpp b/src/main.cpp
index 6561a2a..8dbf700 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -47,8 +47,8 @@ using std::stoi;
 
 using namespace NAtracDEnc;
 
-typedef std::unique_ptr<TPCMEngine<TFloat>> TPcmEnginePtr;
-typedef std::unique_ptr<IProcessor<TFloat>> TAtracProcessorPtr;
+typedef std::unique_ptr<TPCMEngine> TPcmEnginePtr;
+typedef std::unique_ptr<IProcessor> TAtracProcessorPtr;
 
 static void printUsage(const char* myName, const string& err = string())
 {
@@ -144,7 +144,7 @@ static void PrepareAtrac1Encoder(const string& inFile,
         CheckInputFormat(wavPtr);
         wavIO->reset(wavPtr);
     }
-    const uint8_t numChannels = (*wavIO)->GetChannelNum();
+    const size_t numChannels = (*wavIO)->GetChannelNum();
     *totalSamples = (*wavIO)->GetTotalSamples();
     //TODO: recheck it
     const uint64_t numFrames = numChannels * (*totalSamples) / TAtrac1Data::NumSamples;
@@ -153,9 +153,9 @@ static void PrepareAtrac1Encoder(const string& inFile,
             "the result will be incorrect" << std::endl;
     }
     TCompressedOutputPtr aeaIO = CreateAeaOutput(outFile, "test", numChannels, (uint32_t)numFrames);
-    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+    pcmEngine->reset(new TPCMEngine(4096,
                                             numChannels,
-                                            TPCMEngine<TFloat>::TReaderPtr((*wavIO)->GetPCMReader<TFloat>())));
+                                            TPCMEngine::TReaderPtr((*wavIO)->GetPCMReader())));
     if (!noStdOut)
         cout << "Input\n Filename: " << inFile
              << "\n Channels: " << (int)numChannels
@@ -185,9 +185,9 @@ static void PrepareAtrac1Decoder(const string& inFile,
 	     << "\n Codec: PCM"
              << endl;
     wavIO->reset(new TWav(outFile, aeaIO->GetChannelNum(), 44100));
-    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+    pcmEngine->reset(new TPCMEngine(4096,
                                             aeaIO->GetChannelNum(),
-                                            TPCMEngine<TFloat>::TWriterPtr((*wavIO)->GetPCMWriter<TFloat>())));
+                                            TPCMEngine::TWriterPtr((*wavIO)->GetPCMWriter())));
     atracProcessor->reset(new TAtrac1Decoder(std::move(aeaIO)));
 }
 
@@ -245,9 +245,9 @@ static void PrepareAtrac3Encoder(const string& inFile,
              << "\n Bitrate: " << encoderSettings.ConteinerParams->Bitrate
              << endl;
 
-    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+    pcmEngine->reset(new TPCMEngine(4096,
                                             numChannels,
-                                            TPCMEngine<TFloat>::TReaderPtr(wavIO->GetPCMReader<TFloat>())));
+                                            TPCMEngine::TReaderPtr(wavIO->GetPCMReader())));
     atracProcessor->reset(new TAtrac3Encoder(std::move(omaIO), std::move(encoderSettings)));
 }
 
@@ -297,9 +297,9 @@ static void PrepareAtrac3PEncoder(const string& inFile,
              //<< "\n Bitrate: " << encoderSettings.ConteinerParams->Bitrate
              << endl;
 
-    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+    pcmEngine->reset(new TPCMEngine(4096,
                                             numChannels,
-                                            TPCMEngine<TFloat>::TReaderPtr(wavIO->GetPCMReader<TFloat>())));
+                                            TPCMEngine::TReaderPtr(wavIO->GetPCMReader())));
     atracProcessor->reset(new TAt3PEnc(std::move(omaIO), numChannels));
 }
 
diff --git a/src/oma.cpp b/src/oma.cpp
index 22f9388..7ef06c1 100644
--- a/src/oma.cpp
+++ b/src/oma.cpp
@@ -24,7 +24,7 @@ using std::string;
 using std::vector;
 using std::unique_ptr;
 
-TOma::TOma(const string& filename, const string&, uint8_t numChannel,
+TOma::TOma(const string& filename, const string&, size_t numChannel,
     uint32_t /*numFrames*/, int cid, uint32_t framesize, bool jointStereo) {
     oma_info_t info;
     info.codec = cid;
@@ -54,6 +54,6 @@ string TOma::GetName() const {
     return {};
 }
 
-uint8_t TOma::GetChannelNum() const {
+size_t TOma::GetChannelNum() const {
     return 2; //for ATRAC3
 }
diff --git a/src/oma.h b/src/oma.h
index 62fa329..f150580 100644
--- a/src/oma.h
+++ b/src/oma.h
@@ -25,10 +25,10 @@
 class TOma : public ICompressedOutput {
     OMAFILE* File;
 public:
-    TOma(const std::string& filename, const std::string& title, uint8_t numChannel,
+    TOma(const std::string& filename, const std::string& title, size_t numChannel,
         uint32_t numFrames, int cid, uint32_t framesize, bool jointStereo);
     ~TOma();
     void WriteFrame(std::vector<char> data) override;
     std::string GetName() const override;
-    uint8_t GetChannelNum() const override;
+    size_t GetChannelNum() const override;
 };
diff --git a/src/pcm_io_sndfile.cpp b/src/pcm_io_sndfile.cpp
index 52737c0..4da29e9 100644
--- a/src/pcm_io_sndfile.cpp
+++ b/src/pcm_io_sndfile.cpp
@@ -62,10 +62,10 @@ public:
     size_t GetTotalSamples() const override {
         return File.frames();
     }
-    size_t Read(TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Read(TPCMBuffer& buf, size_t sz) override {
         return File.readf(buf[0], sz);
     }
-    size_t Write(const TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Write(const TPCMBuffer& buf, size_t sz) override {
         return File.writef(buf[0], sz);
     }
 private:
diff --git a/src/pcmengin.h b/src/pcmengin.h
index 5971421..8d1db3f 100644
--- a/src/pcmengin.h
+++ b/src/pcmengin.h
@@ -50,13 +50,12 @@ class TEndOfRead : public std::exception {
     }
 };
 
-template <class T>
 class TPCMBuffer {
-    std::vector<T> Buf_;
-    uint16_t NumChannels;
+    std::vector<float> Buf_;
+    size_t NumChannels;
 
 public:
-    TPCMBuffer(uint16_t bufSize, uint8_t numChannels)
+    TPCMBuffer(uint16_t bufSize, size_t numChannels)
        : NumChannels(numChannels)
     {
         Buf_.resize((size_t)bufSize * numChannels);
@@ -66,7 +65,7 @@ public:
         return Buf_.size() / NumChannels;
     }
 
-    T* operator[](size_t pos) {
+    float* operator[](size_t pos) {
         size_t rpos = pos * NumChannels;
         if (rpos >= Buf_.size()) {
             std::cerr << "attempt to access out of buffer pos: " << pos << std::endl;
@@ -75,7 +74,7 @@ public:
         return &Buf_[rpos];
     }
 
-    const T* operator[](size_t pos) const {
+    const float* operator[](size_t pos) const {
         size_t rpos = pos * NumChannels;
         if (rpos >= Buf_.size()) {
             std::cerr << "attempt to access out of buffer pos: " << pos << std::endl;
@@ -94,52 +93,49 @@ public:
     }
 };
 
-template <class T>
 class IPCMWriter {
     public:
-        virtual void Write(const TPCMBuffer<T>& data , const uint32_t size) const = 0;
+        virtual void Write(const TPCMBuffer& data , const uint32_t size) const = 0;
         IPCMWriter() {};
         virtual ~IPCMWriter() {};
 };
 
-template <class T>
 class IPCMReader {
     public:
-        virtual bool Read(TPCMBuffer<T>& data , const uint32_t size) const = 0;
+        virtual bool Read(TPCMBuffer& data , const uint32_t size) const = 0;
         IPCMReader() {};
         virtual ~IPCMReader() {};
 };
 
-template<class T>
 class TPCMEngine {
 public:
-    typedef std::unique_ptr<IPCMWriter<T>> TWriterPtr;
-    typedef std::unique_ptr<IPCMReader<T>> TReaderPtr;
+    typedef std::unique_ptr<IPCMWriter> TWriterPtr;
+    typedef std::unique_ptr<IPCMReader> TReaderPtr;
     struct ProcessMeta {
         const uint16_t Channels;
     };
 private:
-    TPCMBuffer<T> Buffer;
+    TPCMBuffer Buffer;
     TWriterPtr Writer;
     TReaderPtr Reader;
     uint64_t Processed = 0;
     uint64_t ToDrain = 0;
 public:
-        TPCMEngine(uint16_t bufSize, uint8_t numChannels)
+        TPCMEngine(uint16_t bufSize, size_t numChannels)
            : Buffer(bufSize, numChannels) {
         }
 
-        TPCMEngine(uint16_t bufSize, uint8_t numChannels, TWriterPtr&& writer)
+        TPCMEngine(uint16_t bufSize, size_t numChannels, TWriterPtr&& writer)
             : Buffer(bufSize, numChannels)
             , Writer(std::move(writer)) {
         }
 
-        TPCMEngine(uint16_t bufSize, uint8_t numChannels, TReaderPtr&& reader)
+        TPCMEngine(uint16_t bufSize, size_t numChannels, TReaderPtr&& reader)
             : Buffer(bufSize, numChannels)
             , Reader(std::move(reader)) {
         }
 
-        TPCMEngine(uint16_t bufSize, uint8_t numChannels, TWriterPtr&& writer, TReaderPtr&& reader)
+        TPCMEngine(uint16_t bufSize, size_t numChannels, TWriterPtr&& writer, TReaderPtr&& reader)
             : Buffer(bufSize, numChannels)
             , Writer(std::move(writer))
             , Reader(std::move(reader)) {
@@ -150,7 +146,7 @@ public:
             PROCESSED,
         };
 
-        typedef std::function<EProcessResult(T* data, const ProcessMeta& meta)> TProcessLambda;
+        typedef std::function<EProcessResult(float* data, const ProcessMeta& meta)> TProcessLambda;
 
         uint64_t ApplyProcess(size_t step, TProcessLambda lambda) {
             if (step > Buffer.Size()) {
@@ -196,9 +192,8 @@ public:
         }
 };
 
-template<class T>
 class IProcessor {
 public:
-    virtual typename TPCMEngine<T>::TProcessLambda GetLambda() = 0;
+    virtual typename TPCMEngine::TProcessLambda GetLambda() = 0;
     virtual ~IProcessor() {}
 };
diff --git a/src/platform/win/pcm_io/mf/pcm_io_mf.cpp b/src/platform/win/pcm_io/mf/pcm_io_mf.cpp
index 3f9ff3a..a4e550b 100644
--- a/src/platform/win/pcm_io/mf/pcm_io_mf.cpp
+++ b/src/platform/win/pcm_io/mf/pcm_io_mf.cpp
@@ -69,8 +69,8 @@ static std::wstring Utf8ToMultiByte(const std::string& in) {
 }
 
 // TODO: add dither, noise shape?
-static inline int16_t FloatToInt16(TFloat in) {
-    return std::min((int)INT16_MAX, std::max((int)INT16_MIN, (int)lrint(in * (TFloat)INT16_MAX)));
+static inline int16_t FloatToInt16(float in) {
+    return std::min((int)INT16_MAX, std::max((int)INT16_MIN, (int)lrint(in * (float)INT16_MAX)));
 }
 
 static HRESULT WriteToFile(HANDLE hFile, void* p, DWORD cb) {
@@ -350,7 +350,7 @@ public:
         return static_cast<size_t>(totalSamples);
     }
 
-    size_t Read(TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Read(TPCMBuffer& buf, size_t sz) override {
         HRESULT hr = S_OK;
 
         const size_t sizeBytes = sz * BytesPerSample_;
@@ -447,7 +447,7 @@ public:
         return curPos;
     }
 
-    size_t Write(const TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Write(const TPCMBuffer& buf, size_t sz) override {
         const size_t samples = ChannelsNum_ * sz;
         Buf_.resize(samples * 2);
         for (size_t i = 0; i < samples; i++) {
@@ -480,4 +480,4 @@ IPCMProviderImpl* CreatePCMIOMFReadImpl(const std::string& path) {
 
 IPCMProviderImpl* CreatePCMIOMFWriteImpl(const std::string& path, int channels, int sampleRate) {
     return new TPCMIOMediaFoundationFile(path, channels, sampleRate);
-}
-\ No newline at end of file
+}
diff --git a/src/platform/win/pcm_io/pcm_io.cpp b/src/platform/win/pcm_io/pcm_io.cpp
index bd266ea..452036d 100644
--- a/src/platform/win/pcm_io/pcm_io.cpp
+++ b/src/platform/win/pcm_io/pcm_io.cpp
@@ -26,28 +26,28 @@
 #include <windows.h>
 
 
-void ConvertToPcmBufferFromLE(const BYTE* audioData, TPCMBuffer<TFloat>& buf, size_t sz, size_t shift, size_t channelsNum) {
+void ConvertToPcmBufferFromLE(const BYTE* audioData, TPCMBuffer& buf, size_t sz, size_t shift, size_t channelsNum) {
     if (channelsNum == 1) {
         for (size_t i = 0; i < sz; i++) {
-            *(buf[i + shift] + 0) = (*(int16_t*)(audioData + i * 2 + 0)) / (TFloat)32768.0;
+            *(buf[i + shift] + 0) = (*(int16_t*)(audioData + i * 2 + 0)) / (float)32768.0;
         }
     } else {
         for (size_t i = 0; i < sz; i++) {
-            *(buf[i + shift] + 0) = (*(int16_t*)(audioData + i * 4 + 0)) / (TFloat)32768.0;
-            *(buf[i + shift] + 1) = (*(int16_t*)(audioData + i * 4 + 2)) / (TFloat)32768.0;
+            *(buf[i + shift] + 0) = (*(int16_t*)(audioData + i * 4 + 0)) / (float)32768.0;
+            *(buf[i + shift] + 1) = (*(int16_t*)(audioData + i * 4 + 2)) / (float)32768.0;
         }
     }
 }
 
-void ConvertToPcmBufferFromBE(const BYTE* audioData, TPCMBuffer<TFloat>& buf, size_t sz, size_t shift, size_t channelsNum) {
+void ConvertToPcmBufferFromBE(const BYTE* audioData, TPCMBuffer& buf, size_t sz, size_t shift, size_t channelsNum) {
     if (channelsNum == 1) {
         for (size_t i = 0; i < sz; i++) {
-            *(buf[i + shift] + 0) = conv_ntoh((*(int16_t*)(audioData + i * 2 + 0))) / (TFloat)32768.0;
+            *(buf[i + shift] + 0) = conv_ntoh((*(int16_t*)(audioData + i * 2 + 0))) / (float)32768.0;
         }
     } else {
         for (size_t i = 0; i < sz; i++) {
-            *(buf[i + shift] + 0) = conv_ntoh((*(int16_t*)(audioData + i * 4 + 0))) / (TFloat)32768.0;
-            *(buf[i + shift] + 1) = conv_ntoh((*(int16_t*)(audioData + i * 4 + 2))) / (TFloat)32768.0;
+            *(buf[i + shift] + 0) = conv_ntoh((*(int16_t*)(audioData + i * 4 + 0))) / (float)32768.0;
+            *(buf[i + shift] + 1) = conv_ntoh((*(int16_t*)(audioData + i * 4 + 2))) / (float)32768.0;
         }
     }
 }
diff --git a/src/platform/win/pcm_io/pcm_io_impl.h b/src/platform/win/pcm_io/pcm_io_impl.h
index 117ebe0..6c7d840 100644
--- a/src/platform/win/pcm_io/pcm_io_impl.h
+++ b/src/platform/win/pcm_io/pcm_io_impl.h
@@ -22,5 +22,5 @@
 
 #include <windows.h>
 
-void ConvertToPcmBufferFromLE(const BYTE* audioData, TPCMBuffer<TFloat>& buf, size_t sz, size_t shift, size_t channelsNum);
-void ConvertToPcmBufferFromBE(const BYTE* audioData, TPCMBuffer<TFloat>& buf, size_t sz, size_t shift, size_t channelsNum);
-\ No newline at end of file
+void ConvertToPcmBufferFromLE(const BYTE* audioData, TPCMBuffer& buf, size_t sz, size_t shift, size_t channelsNum);
+void ConvertToPcmBufferFromBE(const BYTE* audioData, TPCMBuffer& buf, size_t sz, size_t shift, size_t channelsNum);
diff --git a/src/platform/win/pcm_io/win32/pcm_io_win32.cpp b/src/platform/win/pcm_io/win32/pcm_io_win32.cpp
index 7eee5b0..bd1849c 100644
--- a/src/platform/win/pcm_io/win32/pcm_io_win32.cpp
+++ b/src/platform/win/pcm_io/win32/pcm_io_win32.cpp
@@ -82,7 +82,7 @@ public:
         }
     }
 
-    size_t Read(TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Read(TPCMBuffer& buf, size_t sz) override {
         if (Finished_)
             return 0;
 
@@ -108,7 +108,7 @@ public:
         return toConvert;
     }
 
-    size_t Write(const TPCMBuffer<TFloat>& buf, size_t sz) override {
+    size_t Write(const TPCMBuffer& buf, size_t sz) override {
         abort();
         return 0;
     }
diff --git a/src/qmf/qmf.h b/src/qmf/qmf.h
index 2e71444..81977f3 100644
--- a/src/qmf/qmf.h
+++ b/src/qmf/qmf.h
@@ -24,10 +24,10 @@
 template<class TPCM, int nIn>
 class TQmf {
     static const float TapHalf[24];
-    TFloat QmfWindow[48];
+    float QmfWindow[48];
     TPCM PcmBuffer[nIn + 46];
-    TFloat PcmBufferMerge[nIn + 46];
-    TFloat DelayBuff[46];
+    float PcmBufferMerge[nIn + 46];
+    float DelayBuff[46];
 public:
     TQmf() {
         const int sz = sizeof(QmfWindow)/sizeof(QmfWindow[0]);
@@ -41,8 +41,8 @@ public:
         }
     }
 
-    void Analysis(TPCM* in, TFloat* lower, TFloat* upper) {
-        TFloat temp;
+    void Analysis(TPCM* in, float* lower, float* upper) {
+        float temp;
         for (size_t i = 0; i < 46; i++)
             PcmBuffer[i] = PcmBuffer[nIn + i];
 
@@ -61,9 +61,9 @@ public:
         }
     }
 
-    void Synthesis(TPCM* out, TFloat* lower, TFloat* upper) {
-        memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(TFloat));
-        TFloat* newPart = &PcmBufferMerge[46];
+    void Synthesis(TPCM* out, float* lower, float* upper) {
+        memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(float));
+        float* newPart = &PcmBufferMerge[46];
         for (int i = 0; i < nIn; i+=4) {
             newPart[i+0] = lower[i/2] + upper[i/2];
             newPart[i+1] = lower[i/2] - upper[i/2];
@@ -71,10 +71,10 @@ public:
             newPart[i+3] = lower[i/2 + 1] - upper[i/2 + 1];
         }
 
-        TFloat* winP = &PcmBufferMerge[0];
+        float* winP = &PcmBufferMerge[0];
         for (size_t j = nIn/2; j != 0; j--) {
-            TFloat s1 = 0;
-            TFloat s2 = 0;
+            float s1 = 0;
+            float s2 = 0;
             for (size_t i = 0; i < 48; i+=2) {
                 s1 += winP[i] * QmfWindow[i];
                 s2 += winP[i+1] * QmfWindow[i+1];
@@ -84,7 +84,7 @@ public:
             winP += 2;
             out += 2;
         }
-        memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(TFloat));
+        memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(float));
     }
 };
 
diff --git a/src/rm.cpp b/src/rm.cpp
index d4de66e..4d04cb7 100644
--- a/src/rm.cpp
+++ b/src/rm.cpp
@@ -62,7 +62,7 @@ constexpr char RA_DESC[] = "Audio Stream";
 static_assert(sizeof(char) == 1, "unexpected char size");
 constexpr size_t MDPR_HEADER_SZ = 42 + sizeof(RA_MIME) + sizeof(RA_DESC) + CODEC_DATA_SZ;
 
-void FillCodecData(char* buf, uint32_t frameSize, uint8_t numChannels, bool jointStereo, uint32_t bitrate) {
+void FillCodecData(char* buf, uint32_t frameSize, size_t numChannels, bool jointStereo, uint32_t bitrate) {
     *reinterpret_cast<uint32_t*>(buf +  0) = swapbyte32_on_le(CODEC_DATA_SZ - 4); // -4 - without size of `size` field
     buf[4] = '.';
     buf[5] = 'r';
@@ -147,7 +147,7 @@ void scramble_data(const char* input, char* out, size_t bytes) {
 
 class TRm : public ICompressedOutput {
 public:
-    TRm(const std::string& filename, const std::string& /*title*/, uint8_t numChannels,
+    TRm(const std::string& filename, const std::string& /*title*/, size_t numChannels,
         uint32_t numFrames, uint32_t frameSize, bool jointStereo)
         : File_(OpenFile(filename))
 	, FrameDuration_((1000.0 * 1024.0 / 44100.0)) // ms
@@ -194,7 +194,7 @@ public:
         return {};
     }
 
-    uint8_t GetChannelNum() const override {
+    size_t GetChannelNum() const override {
 	    return 0;
     }
 
@@ -251,7 +251,7 @@ private:
             throw std::runtime_error("Can't write PROP header");//, errno);
     }
 
-    void WriteMDPR(uint32_t frameSize, uint32_t numFrames, uint8_t numChannels, bool jointStereo) {
+    void WriteMDPR(uint32_t frameSize, uint32_t numFrames, size_t numChannels, bool jointStereo) {
         char buf[MDPR_HEADER_SZ] = {
             'M', 'D', 'P', 'R'};
         *reinterpret_cast<uint32_t*>(buf +  4) = swapbyte32_on_le(MDPR_HEADER_SZ);
@@ -276,7 +276,7 @@ private:
     }
 };
 
-TCompressedOutputPtr CreateRmOutput(const std::string& filename, const std::string& title, uint8_t numChannel,
+TCompressedOutputPtr CreateRmOutput(const std::string& filename, const std::string& title, size_t numChannel,
     uint32_t numFrames, uint32_t framesize, bool jointStereo) {
     return std::unique_ptr<TRm>(new TRm(filename, title, numChannel, numFrames, framesize, jointStereo));
 }
diff --git a/src/rm.h b/src/rm.h
index 1a1e512..03c8981 100644
--- a/src/rm.h
+++ b/src/rm.h
@@ -20,5 +20,5 @@
 
 #include "compressed_io.h"
 
-TCompressedOutputPtr CreateRmOutput(const std::string& filename, const std::string& title, uint8_t numChannel,
+TCompressedOutputPtr CreateRmOutput(const std::string& filename, const std::string& title, size_t numChannel,
         uint32_t numFrames, uint32_t framesize, bool jointStereo);
diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp
index ca96cd4..f090104 100644
--- a/src/transient_detector.cpp
+++ b/src/transient_detector.cpp
@@ -26,8 +26,8 @@
 namespace NAtracDEnc {
 
 using std::vector;
-static TFloat calculateRMS(const TFloat* in, uint32_t n) {
-    TFloat s = 0;
+static float calculateRMS(const float* in, uint32_t n) {
+    float s = 0;
     for (uint32_t i = 0; i < n; i++) {
         s += (in[i] * in[i]);
     }
@@ -35,41 +35,41 @@ static TFloat calculateRMS(const TFloat* in, uint32_t n) {
     return sqrt(s);
 }
 
-static TFloat calculatePeak(const TFloat* in, uint32_t n) {
-    TFloat s = 0;
+static float calculatePeak(const float* in, uint32_t n) {
+    float s = 0;
     for (uint32_t i = 0; i < n; i++) {
-        TFloat absVal = std::abs(in[i]);
+        float absVal = std::abs(in[i]);
         if (absVal > s)
             s = absVal;
     }
     return s;
 }
 
-void TTransientDetector::HPFilter(const TFloat* in, TFloat* out) {
-    static const TFloat fircoef[] = {
+void TTransientDetector::HPFilter(const float* in, float* out) {
+    static const float fircoef[] = {
         -8.65163e-18 * 2.0, -0.00851586 * 2.0, -6.74764e-18 * 2.0, 0.0209036 * 2.0,
         -3.36639e-17 * 2.0, -0.0438162 * 2.0, -1.54175e-17 * 2.0, 0.0931738 * 2.0,
         -5.52212e-17 * 2.0, -0.313819 * 2.0
     };
-    memcpy(HPFBuffer.data() + PrevBufSz, in, BlockSz * sizeof(TFloat));
-    const TFloat* inBuf = HPFBuffer.data();
+    memcpy(HPFBuffer.data() + PrevBufSz, in, BlockSz * sizeof(float));
+    const float* inBuf = HPFBuffer.data();
     for (size_t i = 0; i < BlockSz; ++i) {
-        TFloat s = inBuf[i + 10];
-        TFloat s2 = 0;
+        float s = inBuf[i + 10];
+        float s2 = 0;
         for (size_t j = 0; j < ((FIRLen - 1) / 2) - 1 ; j += 2) {
             s += fircoef[j] * (inBuf[i + j] + inBuf[i + FIRLen - j]);
             s2 += fircoef[j + 1] * (inBuf[i + j + 1] + inBuf[i + FIRLen - j - 1]);
         }
         out[i] = (s + s2)/2;
     }
-    memcpy(HPFBuffer.data(), in + (BlockSz - PrevBufSz),  PrevBufSz * sizeof(TFloat));
+    memcpy(HPFBuffer.data(), in + (BlockSz - PrevBufSz),  PrevBufSz * sizeof(float));
 }
 
 
-bool TTransientDetector::Detect(const TFloat* buf) {
+bool TTransientDetector::Detect(const float* buf) {
     const uint16_t nBlocksToAnalize = NShortBlocks + 1;
-    TFloat* rmsPerShortBlock = reinterpret_cast<TFloat*>(alloca(sizeof(TFloat) * nBlocksToAnalize));
-    std::vector<TFloat> filtered(BlockSz);
+    float* rmsPerShortBlock = reinterpret_cast<float*>(alloca(sizeof(float) * nBlocksToAnalize));
+    std::vector<float> filtered(BlockSz);
     HPFilter(buf, filtered.data());
     bool trans = false;
     rmsPerShortBlock[0] = LastEnergy;
@@ -88,11 +88,11 @@ bool TTransientDetector::Detect(const TFloat* buf) {
     return trans;
 }
 
-std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints, bool useRms) {
-    vector<TFloat> res;
+std::vector<float> AnalyzeGain(const float* in, const uint32_t len, const uint32_t maxPoints, bool useRms) {
+    vector<float> res;
     const uint32_t step = len / maxPoints;
     for (uint32_t pos = 0; pos < len; pos += step) {
-        TFloat rms = useRms ? calculateRMS(in + pos, step) : calculatePeak(in + pos, step);
+        float rms = useRms ? calculateRMS(in + pos, step) : calculatePeak(in + pos, step);
         res.emplace_back(rms);
     }
     return res;
diff --git a/src/transient_detector.h b/src/transient_detector.h
index 0f94cc6..241861b 100644
--- a/src/transient_detector.h
+++ b/src/transient_detector.h
@@ -31,9 +31,9 @@ class TTransientDetector {
     const uint16_t NShortBlocks;
     static const uint16_t PrevBufSz = 20;
     static const uint16_t FIRLen = 21;
-    void HPFilter(const TFloat* in, TFloat* out);
-    std::vector<TFloat> HPFBuffer;
-    TFloat LastEnergy = 0.0;
+    void HPFilter(const float* in, float* out);
+    std::vector<float> HPFBuffer;
+    float LastEnergy = 0.0;
     uint16_t LastTransientPos = 0;
 public:
     TTransientDetector(uint16_t shortSz, uint16_t blockSz)
@@ -43,10 +43,10 @@ public:
     {
         HPFBuffer.resize(BlockSz + FIRLen); 
     }
-    bool Detect(const TFloat* buf);
+    bool Detect(const float* buf);
     uint32_t GetLastTransientPos() const { return LastTransientPos; }
 };
 
-std::vector<TFloat> AnalyzeGain(const TFloat* in, uint32_t len, uint32_t maxPoints, bool useRms);
+std::vector<float> AnalyzeGain(const float* in, uint32_t len, uint32_t maxPoints, bool useRms);
 
 }
diff --git a/src/transient_detector_ut.cpp b/src/transient_detector_ut.cpp
index e41d4ac..95495ae 100644
--- a/src/transient_detector_ut.cpp
+++ b/src/transient_detector_ut.cpp
@@ -26,7 +26,7 @@ using std::vector;
 using namespace NAtracDEnc;
 TEST(AnalyzeGain, AnalyzeGainSimple) {
 
-    TFloat in[256];
+    float in[256];
     for (int i = 0; i < 256; ++i) {
         if (i <= 24) {
             in[i] = 1.0;
@@ -38,10 +38,10 @@ TEST(AnalyzeGain, AnalyzeGainSimple) {
             in[i] = 0.5;
         }
     }
-    vector<TFloat> res = AnalyzeGain(in, 256, 32, false);
+    vector<float> res = AnalyzeGain(in, 256, 32, false);
     EXPECT_EQ(res.size(), 32);    
 
-//    for (TFloat v : res)
+//    for (float v : res)
 //        std::cout << v << std::endl;
     for (int i = 0; i < 3; ++i)
         EXPECT_EQ(res[i], 1.0);
diff --git a/src/util.h b/src/util.h
index 0e5d948..cc3249b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -44,15 +44,15 @@ inline void SwapArray(T* p, const size_t len) {
 }
 
 template<size_t N>
-inline void InvertSpectrInPlase(TFloat* in) {
+inline void InvertSpectrInPlase(float* in) {
     for (size_t i = 0; i < N; i+=2)
         in[i] *= -1;
 }
 
 template<size_t N>
-inline std::vector<TFloat> InvertSpectr(const TFloat* in) {
-    std::vector<TFloat> buf(N);
-    std::memcpy(&buf[0], in, N * sizeof(TFloat));
+inline std::vector<float> InvertSpectr(const float* in) {
+    std::vector<float> buf(N);
+    std::memcpy(&buf[0], in, N * sizeof(float));
     InvertSpectrInPlase<N>(&buf[0]);
     return buf;
 }
diff --git a/src/util_ut.cpp b/src/util_ut.cpp
index 9fb61c4..9028c1e 100644
--- a/src/util_ut.cpp
+++ b/src/util_ut.cpp
@@ -22,10 +22,10 @@
 #include <vector>
 
 TEST(Util, SwapArrayTest) {
-    TFloat arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+    float arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     SwapArray(arr, 8);
     for (size_t i = 0; i < 8; ++i) {
-        EXPECT_NEAR((TFloat)i, arr[7-i], 0.000000000001);
+        EXPECT_NEAR((float)i, arr[7-i], 0.000000000001);
     }
 }
 
@@ -43,10 +43,10 @@ TEST(Util, GetFirstSetBitTest) {
 
 TEST(Util, CalcEnergy) {
 
-    EXPECT_NEAR((TFloat)0.0, CalcEnergy(std::vector<TFloat>{0.0}), 0.000000000001);
-    EXPECT_NEAR((TFloat)1.0, CalcEnergy(std::vector<TFloat>{1.0}), 0.000000000001);
-    EXPECT_NEAR((TFloat)2.0, CalcEnergy(std::vector<TFloat>{1.0, 1.0}), 0.000000000001);
-    EXPECT_NEAR((TFloat)5.0, CalcEnergy(std::vector<TFloat>{2.0, 1.0}), 0.000000000001);
-    EXPECT_NEAR((TFloat)5.0, CalcEnergy(std::vector<TFloat>{1.0, 2.0}), 0.000000000001);
-    EXPECT_NEAR((TFloat)8.0, CalcEnergy(std::vector<TFloat>{2.0, 2.0}), 0.000000000001);
+    EXPECT_NEAR((float)0.0, CalcEnergy(std::vector<float>{0.0}), 0.000000000001);
+    EXPECT_NEAR((float)1.0, CalcEnergy(std::vector<float>{1.0}), 0.000000000001);
+    EXPECT_NEAR((float)2.0, CalcEnergy(std::vector<float>{1.0, 1.0}), 0.000000000001);
+    EXPECT_NEAR((float)5.0, CalcEnergy(std::vector<float>{2.0, 1.0}), 0.000000000001);
+    EXPECT_NEAR((float)5.0, CalcEnergy(std::vector<float>{1.0, 2.0}), 0.000000000001);
+    EXPECT_NEAR((float)8.0, CalcEnergy(std::vector<float>{2.0, 2.0}), 0.000000000001);
 }
diff --git a/src/wav.cpp b/src/wav.cpp
index 8750377..08bff51 100644
--- a/src/wav.cpp
+++ b/src/wav.cpp
@@ -36,23 +36,50 @@ TWav::TWav(const std::string& path)
     : Impl(CreatePCMIOReadImpl(path))
 { }
 
-TWav::TWav(const std::string& path, uint8_t channels, uint16_t sampleRate)
+TWav::TWav(const std::string& path, size_t channels, size_t sampleRate)
     : Impl(CreatePCMIOWriteImpl(path, channels, sampleRate))
 { }
 
 TWav::~TWav() {
 }
 
+IPCMReader* TWav::GetPCMReader() const {
+    return new TWavPcmReader([this](TPCMBuffer& data, const uint32_t size) {
+        if (data.Channels() != Impl->GetChannelsNum())
+            throw TWrongReadBuffer();
+
+        size_t read;
+        if ((read = Impl->Read(data, size)) != size) {
+            if (!read)
+                return false;
+
+            data.Zero(read, size - read);
+        }
+
+        return true;
+    });
+}
+
+IPCMWriter* TWav::GetPCMWriter() {
+    return new TWavPcmWriter([this](const TPCMBuffer& data, const uint32_t size) {
+        if (data.Channels() != Impl->GetChannelsNum())
+            throw TWrongReadBuffer();
+        if (Impl->Write(data, size) != size) {
+            fprintf(stderr, "can't write block\n");
+        }
+    });
+}
+
 uint64_t TWav::GetTotalSamples() const {
     return Impl->GetTotalSamples();
 }
 
-uint8_t TWav::GetChannelNum() const {
-    return (uint8_t)Impl->GetChannelsNum();
+size_t TWav::GetChannelNum() const {
+    return Impl->GetChannelsNum();
 }
 
-uint16_t TWav::GetSampleRate() const {
-    return (uint16_t)Impl->GetSampleRate();
+size_t TWav::GetSampleRate() const {
+    return Impl->GetSampleRate();
 }
 
 //bool TWav::IsFormatSupported() const {
diff --git a/src/wav.h b/src/wav.h
index 55e6d38..a101c8d 100644
--- a/src/wav.h
+++ b/src/wav.h
@@ -28,28 +28,26 @@
 class TFileAlreadyExists : public std::exception {
 };
 
-template<class T>
-class TWavPcmReader : public IPCMReader<T> {
+class TWavPcmReader : public IPCMReader {
 public:
-    typedef std::function<bool(TPCMBuffer<T>& data, const uint32_t size)> TLambda;
+    typedef std::function<bool(TPCMBuffer& data, const uint32_t size)> TLambda;
     TLambda Lambda;
     TWavPcmReader(TLambda lambda)
         : Lambda(lambda)
     {}
-    bool Read(TPCMBuffer<T>& data , const uint32_t size) const override {
+    bool Read(TPCMBuffer& data , const uint32_t size) const override {
         return Lambda(data, size);
     }
 };
 
-template<class T>
-class TWavPcmWriter : public IPCMWriter<T> {
+class TWavPcmWriter : public IPCMWriter {
 public:
-    typedef std::function<void(const TPCMBuffer<T>& data, const uint32_t size)> TLambda;
+    typedef std::function<void(const TPCMBuffer& data, const uint32_t size)> TLambda;
     TLambda Lambda;
     TWavPcmWriter(TLambda lambda)
         : Lambda(lambda)
     {}
-    void Write(const TPCMBuffer<T>& data , const uint32_t size) const override {
+    void Write(const TPCMBuffer& data , const uint32_t size) const override {
         Lambda(data, size);
     }
 };
@@ -60,8 +58,8 @@ public:
     virtual size_t GetChannelsNum() const = 0;
     virtual size_t GetSampleRate() const = 0;
     virtual size_t GetTotalSamples() const = 0;
-    virtual size_t Read(TPCMBuffer<TFloat>& buf, size_t sz) = 0;
-    virtual size_t Write(const TPCMBuffer<TFloat>& buf, size_t sz) = 0;
+    virtual size_t Read(TPCMBuffer& buf, size_t sz) = 0;
+    virtual size_t Write(const TPCMBuffer& buf, size_t sz) = 0;
 };
 
 //TODO: split for reader/writer
@@ -73,48 +71,15 @@ public:
         E_WRITE
     };
     TWav(const std::string& filename); // reading
-    TWav(const std::string& filename, uint8_t channels, uint16_t sampleRate); //writing
+    TWav(const std::string& filename, size_t channels, size_t sampleRate); //writing
     ~TWav();
-    uint8_t GetChannelNum() const;
-    uint16_t GetSampleRate() const;
+    size_t GetChannelNum() const;
+    size_t GetSampleRate() const;
     uint64_t GetTotalSamples() const;
 
-    template<class T>
-    IPCMReader<T>* GetPCMReader() const;
+    IPCMReader* GetPCMReader() const;
 
-    template<class T>
-    IPCMWriter<T>* GetPCMWriter();
+    IPCMWriter* GetPCMWriter();
 };
 
 typedef std::unique_ptr<TWav> TWavPtr;
-
-template<class T>
-IPCMReader<T>* TWav::GetPCMReader() const {
-    return new TWavPcmReader<T>([this](TPCMBuffer<T>& data, const uint32_t size) -> bool {
-        if (data.Channels() != Impl->GetChannelsNum())
-            throw TWrongReadBuffer(); 
-
-        size_t read;
-        if ((read = Impl->Read(data, size)) != size) {
-            if (!read)
-                return false;
-
-            data.Zero(read, size - read);
-        }
-
-        return true;
-    });
-}
-
-template<class T>
-IPCMWriter<T>* TWav::GetPCMWriter() {
-    return new TWavPcmWriter<T>([this](const TPCMBuffer<T>& data, const uint32_t size) {
-        if (data.Channels() != Impl->GetChannelsNum())
-            throw TWrongReadBuffer();
-        if (Impl->Write(data, size) != size) {
-            fprintf(stderr, "can't write block\n");
-        }
-    });
-}
-
-
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2cd1a7b..f5230aa 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,15 +8,12 @@ include_directories(
     "../src/lib"
 )
 
-if (ATDE_USE_FLOAT)
-    add_compile_definitions(ATDE_USE_FLOAT)
-endif()
-
 set(atracdenc_ut
     ${CMAKE_SOURCE_DIR}/src/lib/mdct/mdct_ut.cpp
     ${CMAKE_SOURCE_DIR}/src/lib/bitstream/bitstream_ut.cpp
     ${CMAKE_SOURCE_DIR}/src/util_ut.cpp
     ${CMAKE_SOURCE_DIR}/src/atracdenc_ut.cpp
+    ${CMAKE_SOURCE_DIR}/src/atrac3denc_ut.cpp
     ${CMAKE_SOURCE_DIR}/src/transient_detector_ut.cpp
 )
author	Daniil Cherednik <dan.cherednik@gmail.com>	2024-12-24 22:59:03 +0100
committer	Daniil Cherednik <dan.cherednik@gmail.com>	2024-12-24 22:59:03 +0100
commit	bddbeb98b3db8d435de6b2d10269640121475538 (patch)
tree	1a791f5dd49b94ead503b9b77c6b9277f2701a89
parent	c73a0e24a0d5c510a65efbe5c2fbc7fd39a3c003 (diff)
parent	74d6e04c21bddd435bd74c34dbe027b883772a76 (diff)
download	atracdenc-bddbeb98b3db8d435de6b2d10269640121475538.tar.gz