Dirty implementation of atrac3 encoder:

- no JS mode - constant quantiser for tonal components - gain controll implemented but produces some artifacts with real signals. - etc...
author: Daniil Cherednik <dan.cherednik@gmail.com> 2016-03-13 09:49:33 +0300
committer: Daniil Cherednik <dan.cherednik@gmail.com> 2016-09-02 21:21:28 +0300
commit: cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3 (patch)
tree: 75efff26584e046566d17cd308d45b6b0fd5abfc
parent: b4df8a7c2dd12eea27c8cc52bd52a1bb8c00943f (diff)
download: atracdenc-cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3.tar.gz
49 files changed, 3070 insertions, 406 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2bce62f..6b36f06 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,6 +12,18 @@ endmacro(use_cxx11)
 
 use_cxx11()
 
+macro(use_c11)
+  if (CMAKE_VERSION VERSION_LESS "3.1")
+    if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
+      set (CMAKE_C_FLAGS "--std=gnu11 ${CMAKE_C_FLAGS}")
+    endif ()
+  else ()
+    set (CMAKE_C_STANDARD 11)
+  endif ()
+endmacro(use_c11)
+
+use_c11()
+
 add_subdirectory(3rd/gtest-1.7.0)
 add_subdirectory(src)
 add_subdirectory(test)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fd7fe24..c9c797a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,8 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
+#add_definitions( "-Wall -O2 -g -Rpass-analysis=loop-vectorize" )
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer")
+
 add_definitions( "-Wall -O2 -g" )
 
 project(atracdenc)
@@ -8,11 +11,30 @@ set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
 INCLUDE(FindLibSndFile)
 
 include_directories(${LIBSNDFILE_INCLUDE_DIR})
+include_directories("oma/liboma/include")
 
-set(SOURCE_LIB mdct/vorbis_impl/mdct.c)
-set(SOURCE_EXE main.cpp wav.cpp aea.cpp transient_detector.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac_scale.cpp atrac/atrac1_bitalloc.cpp)
+set(SOURCE_MDCT_LIB mdct/vorbis_impl/mdct.c)
+set(SOURCE_OMA_LIB oma/liboma/src/liboma.c)
+set(SOURCE_EXE
+    main.cpp
+    wav.cpp
+    aea.cpp
+    transient_detector.cpp
+    atrac1denc.cpp
+    bitstream/bitstream.cpp
+    atrac/atrac1.cpp
+    atrac/atrac1_dequantiser.cpp
+    atrac/atrac_scale.cpp
+    atrac/atrac_psy_common.cpp
+    atrac/atrac1_bitalloc.cpp
+    oma.cpp
+    atrac3denc.cpp
+    atrac/atrac3.cpp
+    atrac/atrac3_bitstream.cpp
+    )
 
-add_library(mdct_impl STATIC ${SOURCE_LIB})
+add_library(mdct_impl STATIC ${SOURCE_MDCT_LIB})
+add_library(oma STATIC ${SOURCE_OMA_LIB})
 add_executable(atracdenc ${SOURCE_EXE})
-target_link_libraries(atracdenc mdct_impl ${SNDFILE_LIBRARIES})
+target_link_libraries(atracdenc mdct_impl oma ${SNDFILE_LIBRARIES})
 
diff --git a/src/aea.h b/src/aea.h
index 74068de..14bd9c4 100644
--- a/src/aea.h
+++ b/src/aea.h
@@ -48,5 +48,4 @@ public:
         long long GetLengthInSamples() const override;
 };
 
-typedef std::unique_ptr<IAtrac1IO> TAeaPtr;
 
diff --git a/src/atrac/atrac1.cpp b/src/atrac/atrac1.cpp
index 26d8218..b71e5ae 100644
--- a/src/atrac/atrac1.cpp
+++ b/src/atrac/atrac1.cpp
@@ -1,10 +1,15 @@
 #include "atrac1.h"
 
-constexpr uint32_t TAtrac1Data::BlocksPerBand[QMF_BANDS + 1];
-constexpr uint32_t TAtrac1Data::SpecsPerBlock[MAX_BFUS];
-constexpr uint32_t TAtrac1Data::SpecsStartLong[MAX_BFUS];
-constexpr uint32_t TAtrac1Data::SpecsStartShort[MAX_BFUS];
+namespace NAtracDEnc {
+namespace NAtrac1 {
+
+constexpr uint32_t TAtrac1Data::BlocksPerBand[NumQMF + 1];
+constexpr uint32_t TAtrac1Data::SpecsPerBlock[MaxBfus];
+constexpr uint32_t TAtrac1Data::SpecsStartLong[MaxBfus];
+constexpr uint32_t TAtrac1Data::SpecsStartShort[MaxBfus];
 constexpr uint32_t TAtrac1Data::BfuAmountTab[8];
 double TAtrac1Data::ScaleTable[64] = {0};
 double TAtrac1Data::SineWindow[32] = {0};
 
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1.h b/src/atrac/atrac1.h
index cb0df1f..9736ea7 100644
--- a/src/atrac/atrac1.h
+++ b/src/atrac/atrac1.h
@@ -4,23 +4,52 @@
 #include <map>
 #include <math.h>
 #include "../bitstream/bitstream.h"
-const int QMF_BANDS = 3;
-const int MAX_BFUS = 52;
+namespace NAtracDEnc {
+namespace NAtrac1 {
+
+class TAtrac1EncodeSettings {
+public:
+    enum class EWindowMode {
+        EWM_NOTRANSIENT,
+        EWM_AUTO
+    };
+private:
+    const uint32_t BfuIdxConst = 0;
+    const bool FastBfuNumSearch = false;
+    EWindowMode WindowMode = EWindowMode::EWM_AUTO;
+    const uint32_t WindowMask = 0;
+public:
+    TAtrac1EncodeSettings()
+    {}
+    TAtrac1EncodeSettings(uint32_t bfuIdxConst, bool fastBfuNumSearch, EWindowMode windowMode, uint32_t windowMask)
+        : BfuIdxConst(bfuIdxConst)
+        , FastBfuNumSearch(fastBfuNumSearch)
+        , WindowMode(windowMode)
+        , WindowMask(windowMask)
+    {}
+    uint32_t GetBfuIdxConst() const { return BfuIdxConst; }
+    bool GetFastBfuNumSearch() const { return FastBfuNumSearch; }
+    EWindowMode GetWindowMode() const {return WindowMode; }
+    uint32_t GetWindowMask() const {return WindowMask; }
+};
 
 class TAtrac1Data {
+public:
+    static constexpr uint8_t MaxBfus = 52;
+    static constexpr uint8_t NumQMF = 3;
 protected:
-	static constexpr uint32_t SpecsPerBlock[MAX_BFUS] = {
+	static constexpr uint32_t SpecsPerBlock[MaxBfus] = {
         8,  8,  8,  8,  4,  4,  4,  4,  8,  8,  8,  8,  6,  6,  6,  6, 6, 6, 6, 6,  // low band
         6,  6,  6,  6,  7,  7,  7,  7,  9,  9,  9,  9,  10, 10, 10, 10,             // middle band
         12, 12, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20              // high band
 	};
-	static constexpr uint32_t BlocksPerBand[QMF_BANDS + 1] = {0, 20, 36, 52};
-	static constexpr uint32_t SpecsStartLong[MAX_BFUS] = {
+	static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 20, 36, 52};
+	static constexpr uint32_t SpecsStartLong[MaxBfus] = {
         0,   8,   16,  24,  32,  36,  40,  44,  48,  56,  64,  72,  80,  86,  92,  98, 104, 110, 116, 122,
         128, 134, 140, 146, 152, 159, 166, 173, 180, 189, 198, 207, 216, 226, 236, 246,
         256, 268, 280, 292, 304, 316, 328, 340, 352, 372, 392, 412, 432, 452, 472, 492,
 	};
-    static constexpr uint32_t SpecsStartShort[MAX_BFUS] = {
+    static constexpr uint32_t SpecsStartShort[MaxBfus] = {
         0,   32,  64,  96,  8,   40,  72,  104, 12,  44,  76,  108, 20,  52,  84,  116, 26,  58,  90, 122,
         128, 160, 192, 224, 134, 166, 198, 230, 141, 173, 205, 237, 150, 182, 214, 246,
         256, 288, 320, 352, 384, 416, 448, 480, 268, 300, 332, 364, 396, 428, 460, 492
@@ -30,8 +59,6 @@ protected:
 	static const uint32_t BitsPerBfuAmountTabIdx = 3;
 	static const uint32_t BitsPerIDWL = 4;
 	static const uint32_t BitsPerIDSF = 6;
-    static const uint32_t NumSamples = 512;
-    static const uint8_t NumQMF = QMF_BANDS;
 
     static double ScaleTable[64];
     static double SineWindow[32];
@@ -43,6 +70,7 @@ protected:
         return 2;
     }
 public:
+    static const uint32_t NumSamples = 512;
     TAtrac1Data() {
         if (ScaleTable[0] == 0) {
             for (uint32_t i = 0; i < 64; i++) {
@@ -57,30 +85,5 @@ public:
     }
 };
 
-class TBlockSize {
-    static std::array<int, QMF_BANDS> Parse(NBitStream::TBitStream* stream) {
-        std::array<int,QMF_BANDS> tmp;
-        tmp[0] = 2 - stream->Read(2);
-        tmp[1] = 2 - stream->Read(2);
-        tmp[2] = 3 - stream->Read(2);
-        stream->Read(2); //skip unused 2 bits
-        return tmp;
-    }
-    static std::array<int,QMF_BANDS> Create(bool lowShort, bool midShort, bool hiShort) {
-        std::array<int,QMF_BANDS> tmp;
-        tmp[0] = lowShort ? 2 : 0;
-        tmp[1] = midShort ? 2 : 0;
-        tmp[2] = hiShort ? 3 : 0;
-        return tmp;
-    }
-public:
-    TBlockSize(NBitStream::TBitStream* stream)
-        : LogCount(Parse(stream))
-    {}
-    TBlockSize(bool lowShort, bool midShort, bool hiShort)
-        : LogCount(Create(lowShort, midShort, hiShort))
-    {}
-    const std::array<int,QMF_BANDS> LogCount;
-};
-
-
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1_bitalloc.cpp b/src/atrac/atrac1_bitalloc.cpp
index 783ffd1..0db6272 100644
--- a/src/atrac/atrac1_bitalloc.cpp
+++ b/src/atrac/atrac1_bitalloc.cpp
@@ -1,9 +1,12 @@
 #include "atrac1_bitalloc.h"
+#include "atrac_psy_common.h"
 #include "atrac_scale.h"
 #include "atrac1.h"
 #include <math.h>
 #include <cassert>
 #include "../bitstream/bitstream.h"
+
+namespace NAtracDEnc {
 namespace NAtrac1 {
 
 using std::vector;
@@ -11,47 +14,26 @@ using std::cerr;
 using std::endl;
 using std::pair;
 
-static const uint32_t FixedBitAllocTableLong[MAX_BFUS] = {
+static const uint32_t FixedBitAllocTableLong[TAtrac1BitStreamWriter::MaxBfus] = {
     7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6,
     6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4,
     4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 0, 0, 0
 };
 
-static const uint32_t FixedBitAllocTableShort[MAX_BFUS] = {
+static const uint32_t FixedBitAllocTableShort[TAtrac1BitStreamWriter::MaxBfus] = {
     6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,  6, 6, 6, 6,
     6, 6, 6, 6,  5, 5, 5, 5,  5, 5, 5, 5,  5, 5, 5, 5,
     4, 4, 4, 4, 4, 4, 4, 4,   0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static const uint32_t BitBoostMask[MAX_BFUS] = {
+static const uint32_t BitBoostMask[TAtrac1BitStreamWriter::MaxBfus] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
     1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-//returns 1 for tone-like, 0 - noise-like
-static double AnalizeSpread(const std::vector<TScaledBlock>& scaledBlocks) {
-    double s = 0.0;
-    for (size_t i = 0; i < scaledBlocks.size(); ++i) {
-        s += scaledBlocks[i].ScaleFactorIndex;
-    }
-    s /= scaledBlocks.size();
-    double sigma = 0.0;
-    double xxx = 0.0;
-    for (size_t i = 0; i < scaledBlocks.size(); ++i) {
-        xxx = (scaledBlocks[i].ScaleFactorIndex - s);
-        xxx *= xxx;
-        sigma += xxx;
-    }
-    sigma /= scaledBlocks.size();
-    sigma = sqrt(sigma);
-    if (sigma > 14.0)
-        sigma = 14.0;
-    return sigma/14.0;
-}
-
 TBitsBooster::TBitsBooster() {
-    for (uint32_t i = 0; i < MAX_BFUS; ++i) {
+    for (uint32_t i = 0; i < MaxBfus; ++i) {
         if (BitBoostMask[i] == 0)
             continue;
         const uint32_t nBits = SpecsPerBlock[i];
@@ -68,7 +50,6 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3
     //the key too low
     if (maxIt == BitsBoostMap.begin())
         return surplus;
-    //std::cout << "key: " << key << " min key: " << MinKey << " it pos: " << maxIt->first << endl;
 
     while (surplus >= MinKey) {
         bool done = true;
@@ -76,7 +57,6 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3
             const uint32_t curBits = it->first;
             const uint32_t curPos = it->second;
 
-            //std::cout << "key: " << key << " curBits: " << curBits << endl;
             assert(key >= curBits);
             if (curPos >= bitsPerEachBlock->size())
                 break;
@@ -90,23 +70,25 @@ uint32_t TBitsBooster::ApplyBoost(std::vector<uint32_t>* bitsPerEachBlock, uint3
             (*bitsPerEachBlock)[curPos] += nBitsPerSpec;
             surplus -= curBits * nBitsPerSpec;
 
-            //std::cout << "added: " << curPos << " " << nBitsPerSpec << " got: " << (*bitsPerEachBlock)[curPos] << endl; 
             done = false;
         }
         if (done)
             break;
     }
 
-    //std::cout << "boost: " << surplus << " was " << target - cur << endl;
     return surplus;
 }
 
 
-vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const double spread, const double shift, const TBlockSize& blockSize) {
+vector<uint32_t> TAtrac1SimpleBitAlloc::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
+                                                           const uint32_t bfuNum,
+                                                           const TFloat spread,
+                                                           const TFloat shift,
+                                                           const TBlockSize& blockSize) {
     vector<uint32_t> bitsPerEachBlock(bfuNum);
     for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
         const uint32_t fix = blockSize.LogCount[BfuToBand(i)] ? FixedBitAllocTableShort[i] : FixedBitAllocTableLong[i];
-        int tmp = spread * ( (double)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
+        int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
         if (tmp > 16) {
             bitsPerEachBlock[i] = 16;
         } else if (tmp < 2) {
@@ -143,7 +125,8 @@ uint32_t TAtrac1SimpleBitAlloc::GetMaxUsedBfuId(const vector<uint32_t>& bitsPerE
     return idx;
 }
 
-uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, uint32_t curBfuId, const vector<uint32_t>& bitsPerEachBlock) {
+uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed,
+                                              uint32_t curBfuId, const vector<uint32_t>& bitsPerEachBlock) {
     uint32_t usedBfuId = GetMaxUsedBfuId(bitsPerEachBlock);
     if (usedBfuId < curBfuId) {
         *changed = true;
@@ -151,32 +134,34 @@ uint32_t TAtrac1SimpleBitAlloc::CheckBfuUsage(bool* changed, uint32_t curBfuId,
     }
     return curBfuId;
 }
+
 uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) {
     uint32_t bfuIdx = BfuIdxConst ? BfuIdxConst - 1 : 7;
     bool autoBfu = !BfuIdxConst;
-    double spread = AnalizeSpread(scaledBlocks);
+    TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
 
     vector<uint32_t> bitsPerEachBlock(BfuAmountTab[bfuIdx]);
     uint32_t targetBitsPerBfus;
     uint32_t curBitsPerBfus;
     for (;;) {
         bitsPerEachBlock.resize(BfuAmountTab[bfuIdx]);
-        const uint32_t bitsAvaliablePerBfus =  SoundUnitSize * 8 - BitsPerBfuAmountTabIdx - 32 - 2 - 3 - bitsPerEachBlock.size() * (BitsPerIDWL + BitsPerIDSF);
-        double maxShift = 15;
-        double minShift = -3;
-        double shift = 3.0;
+        const uint32_t bitsAvaliablePerBfus = SoundUnitSize * 8 - BitsPerBfuAmountTabIdx - 32 - 2 - 3 -
+                                              bitsPerEachBlock.size() * (BitsPerIDWL + BitsPerIDSF);
+        TFloat maxShift = 15;
+        TFloat minShift = -3;
+        TFloat shift = 3.0;
         const uint32_t maxBits = bitsAvaliablePerBfus;
         const uint32_t minBits = bitsAvaliablePerBfus - 110;
 
         bool bfuNumChanged = false;
         for (;;) {
-            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx], spread, shift, blockSize);
+            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, BfuAmountTab[bfuIdx],
+                                                                  spread, shift, blockSize);
             uint32_t bitsUsed = 0;
             for (size_t i = 0; i < tmpAlloc.size(); i++) {
                 bitsUsed += SpecsPerBlock[i] * tmpAlloc[i];
             }
 
-            //std::cout << spread << " bitsUsed: " << bitsUsed << " min " << minBits << " max " << maxBits << " " << maxShift << "  " << minShift << " " << endl;
             if (bitsUsed < minBits) {
                 if (maxShift - minShift < 0.1) {
                     if (autoBfu) {
@@ -214,7 +199,10 @@ uint32_t TAtrac1SimpleBitAlloc::Write(const std::vector<TScaledBlock>& scaledBlo
     return BfuAmountTab[bfuIdx];
 }
 
-void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks, uint32_t bfuAmountIdx, const TBlockSize& blockSize) {
+void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachBlock,
+                                            const std::vector<TScaledBlock>& scaledBlocks,
+                                            uint32_t bfuAmountIdx,
+                                            const TBlockSize& blockSize) {
     NBitStream::TBitStream bitStream;
     size_t bitUsed = 0;
     if (bfuAmountIdx >= (1 << BitsPerBfuAmountTabIdx)) {
@@ -252,8 +240,8 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
         if (wordLength == 0 || wordLength == 1)
             continue;
 
-        const double multiple = ((1 << (wordLength - 1)) - 1);
-        for (const double val : scaledBlocks[i].Values) {
+        const TFloat multiple = ((1 << (wordLength - 1)) - 1);
+        for (const TFloat val : scaledBlocks[i].Values) {
             const int tmp = round(val * multiple);
             const uint32_t testwl = bitsPerEachBlock[i] ? (bitsPerEachBlock[i] - 1) : 0;
             const uint32_t a = !!testwl + testwl;
@@ -280,4 +268,5 @@ void TAtrac1BitStreamWriter::WriteBitStream(const vector<uint32_t>& bitsPerEachB
     Container->WriteFrame(bitStream.GetBytes());
 }
 
-}
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1_bitalloc.h b/src/atrac/atrac1_bitalloc.h
index ce7b6fb..c5c4ad2 100644
--- a/src/atrac/atrac1_bitalloc.h
+++ b/src/atrac/atrac1_bitalloc.h
@@ -7,7 +7,9 @@
 #include <map>
 #include <cstdint>
 
+namespace NAtracDEnc {
 namespace NAtrac1 {
+
 using NAtracDEnc::TScaledBlock;
 
 class IAtrac1BitAlloc {
@@ -32,11 +34,13 @@ public:
     explicit TAtrac1BitStreamWriter(TAea* container)
         : Container(container)
     {};
-    void WriteBitStream(const std::vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks, uint32_t bfuAmountIdx, const TBlockSize& blockSize);
+    void WriteBitStream(const std::vector<uint32_t>& bitsPerEachBlock, const std::vector<TScaledBlock>& scaledBlocks,
+                        uint32_t bfuAmountIdx, const TBlockSize& blockSize);
 };
 
 class TAtrac1SimpleBitAlloc : public TAtrac1BitStreamWriter, public TBitsBooster, public virtual IAtrac1BitAlloc {
-    std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum, const double spread, const double shift, const TBlockSize& blockSize);
+    std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks, const uint32_t bfuNum,
+                                             const TFloat spread, const TFloat shift, const TBlockSize& blockSize);
     const uint32_t BfuIdxConst;
     const bool FastBfuNumSearch;
     uint32_t GetMaxUsedBfuId(const std::vector<uint32_t>& bitsPerEachBlock);
@@ -51,4 +55,5 @@ public:
      uint32_t Write(const std::vector<TScaledBlock>& scaledBlocks, const TBlockSize& blockSize) override;
 };
 
-}
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1_dequantiser.cpp b/src/atrac/atrac1_dequantiser.cpp
index 8229822..83abc76 100644
--- a/src/atrac/atrac1_dequantiser.cpp
+++ b/src/atrac/atrac1_dequantiser.cpp
@@ -1,15 +1,16 @@
 #include "atrac1_dequantiser.h"
 #include <string.h>
-
+namespace NAtracDEnc {
 namespace NAtrac1 {
+
 using namespace NBitStream;
 
 TAtrac1Dequantiser::TAtrac1Dequantiser() {
 }
 
-void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, double specs[512]) {
-    uint32_t wordLens[MAX_BFUS];
-    uint32_t idScaleFactors[MAX_BFUS];
+void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, TFloat specs[512]) {
+    uint32_t wordLens[MaxBfus];
+    uint32_t idScaleFactors[MaxBfus];
     const uint32_t numBFUs = BfuAmountTab[stream->Read(3)];
     stream->Read(2);
     stream->Read(3);
@@ -21,28 +22,29 @@ void TAtrac1Dequantiser::Dequant(TBitStream* stream, const TBlockSize& bs, doubl
     for (uint32_t i = 0; i < numBFUs; i++) {
         idScaleFactors[i] = stream->Read(6);
     }
-    for (uint32_t i = numBFUs; i < MAX_BFUS; i++) {
+    for (uint32_t i = numBFUs; i < MaxBfus; i++) {
         wordLens[i] = idScaleFactors[i] = 0;
     }
 
-    for (uint32_t bandNum = 0; bandNum < QMF_BANDS; bandNum++) {
+    for (uint32_t bandNum = 0; bandNum < NumQMF; bandNum++) {
         for (uint32_t bfuNum = BlocksPerBand[bandNum]; bfuNum < BlocksPerBand[bandNum + 1]; bfuNum++) {
             const uint32_t numSpecs = SpecsPerBlock[bfuNum];
             const uint32_t wordLen = !!wordLens[bfuNum] + wordLens[bfuNum];
-            const double scaleFactor = ScaleTable[idScaleFactors[bfuNum]];
+            const TFloat scaleFactor = ScaleTable[idScaleFactors[bfuNum]];
             const uint32_t startPos = bs.LogCount[bandNum] ? SpecsStartShort[bfuNum] : SpecsStartLong[bfuNum]; 
             if (wordLen) {
-                double maxQuant = 1.0 / (double)((1 << (wordLen - 1)) - 1);
+                TFloat maxQuant = 1.0 / (TFloat)((1 << (wordLen - 1)) - 1);
                 //cout << "BFU ("<< bfuNum << ") :" <<  "wordLen " << wordLen << " maxQuant " << maxQuant << " scaleFactor " << scaleFactor << " id " << idScaleFactors[bfuNum] << " num Specs " << numSpecs << " short: "<< (int)bs.LogCount[bandNum] << endl;
                 for (uint32_t i = 0; i < numSpecs; i++ ) {
                     specs[startPos + i] = scaleFactor * maxQuant * MakeSign(stream->Read(wordLen), wordLen);
                 }
             } else {
-                memset(&specs[startPos], 0, numSpecs * sizeof(double));
+                memset(&specs[startPos], 0, numSpecs * sizeof(TFloat));
             }
         }
 
     } 
 }
 
-}
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1_dequantiser.h b/src/atrac/atrac1_dequantiser.h
index 112fc8b..8b2a8b4 100644
--- a/src/atrac/atrac1_dequantiser.h
+++ b/src/atrac/atrac1_dequantiser.h
@@ -1,12 +1,16 @@
 #pragma once
 #include "atrac1.h"
+#include "atrac_scale.h"
 
 
+namespace NAtracDEnc {
 namespace NAtrac1 {
 
 class TAtrac1Dequantiser : public TAtrac1Data {
 public:
     TAtrac1Dequantiser();
-    void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, double specs[512]);
+    void Dequant(NBitStream::TBitStream* stream, const TBlockSize& bs, TFloat specs[512]);
 };
-}
+
+} //namespace NAtrac1
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac1_qmf.h b/src/atrac/atrac1_qmf.h
index 8550932..37d0bba 100644
--- a/src/atrac/atrac1_qmf.h
+++ b/src/atrac/atrac1_qmf.h
@@ -2,24 +2,26 @@
 
 #include "../qmf/qmf.h"
 
+namespace NAtracDEnc {
+
 template<class TIn>
 class Atrac1SplitFilterBank {
     const static int nInSamples = 512;
     const static int delayComp = 39;
     TQmf<TIn, nInSamples> Qmf1;
     TQmf<TIn, nInSamples / 2> Qmf2;
-    std::vector<double> MidLowTmp;
-    std::vector<double> DelayBuf;
+    std::vector<TFloat> MidLowTmp;
+    std::vector<TFloat> DelayBuf;
 public:
     Atrac1SplitFilterBank() {
         MidLowTmp.resize(512);
         DelayBuf.resize(delayComp + 512);
     }
-    void Split(TIn* pcm, double* low, double* mid, double* hi) {
-        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(double) *  delayComp);
+    void Split(TIn* pcm, TFloat* low, TFloat* mid, TFloat* hi) {
+        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) *  delayComp);
         Qmf1.Split(pcm, &MidLowTmp[0], &DelayBuf[delayComp]);
         Qmf2.Split(&MidLowTmp[0], low, mid);
-        memcpy(hi, &DelayBuf[0], sizeof(double) * 256);
+        memcpy(hi, &DelayBuf[0], sizeof(TFloat) * 256);
 
     }
 };
@@ -29,19 +31,19 @@ class Atrac1SynthesisFilterBank {
     const static int delayComp = 39;
     TQmf<TOut, nInSamples> Qmf1;
     TQmf<TOut, nInSamples / 2> Qmf2;
-    std::vector<double> MidLowTmp;
-    std::vector<double> DelayBuf;
+    std::vector<TFloat> MidLowTmp;
+    std::vector<TFloat> DelayBuf;
 public:
     Atrac1SynthesisFilterBank() {
         MidLowTmp.resize(512);
         DelayBuf.resize(delayComp + 512);
     }
-    void Synthesis(TOut* pcm, double* low, double* mid, double* hi) {
-        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(double) *  delayComp);
-        memcpy(&DelayBuf[delayComp], hi, sizeof(double) * 256);
+    void Synthesis(TOut* pcm, TFloat* low, TFloat* mid, TFloat* hi) {
+        memcpy(&DelayBuf[0], &DelayBuf[256], sizeof(TFloat) *  delayComp);
+        memcpy(&DelayBuf[delayComp], hi, sizeof(TFloat) * 256);
         Qmf2.Merge(&MidLowTmp[0], &low[0], &mid[0]);
         Qmf1.Merge(&pcm[0], &MidLowTmp[0], &DelayBuf[0]);
     }
 };
 
-
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac3.cpp b/src/atrac/atrac3.cpp
new file mode 100644
index 0000000..e587d2c
--- /dev/null
+++ b/src/atrac/atrac3.cpp
@@ -0,0 +1,33 @@
+#include "atrac3.h"
+#include <algorithm>
+
+namespace NAtracDEnc {
+namespace NAtrac3 {
+
+constexpr uint32_t TAtrac3Data::BlockSizeTab[33];
+constexpr uint32_t TAtrac3Data::ClcLengthTab[8];
+constexpr double TAtrac3Data::MaxQuant[8];
+constexpr uint32_t TAtrac3Data::BlocksPerBand[4 + 1];
+constexpr uint32_t TAtrac3Data::SpecsPerBlock[33];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable1[HuffTable1Sz];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable2[HuffTable2Sz];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable3[HuffTable3Sz];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable5[HuffTable5Sz];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable6[HuffTable6Sz];
+constexpr TAtrac3Data::THuffEntry TAtrac3Data::HuffTable7[HuffTable7Sz];
+constexpr TAtrac3Data::THuffTablePair TAtrac3Data::HuffTables[7];
+
+constexpr TContainerParams TAtrac3Data::ContainerParams[8];
+double TAtrac3Data::EncodeWindow[256] = {0};
+double TAtrac3Data::DecodeWindow[256] = {0};
+double TAtrac3Data::ScaleTable[64] = {0};
+double TAtrac3Data::GainLevel[16];
+double TAtrac3Data::GainInterpolation[31];
+
+const TContainerParams* TAtrac3Data::GetContainerParamsForBitrate(uint32_t bitrate) {
+    std::cout << bitrate << std::endl;
+    return std::lower_bound(ContainerParams, ContainerParams+8, bitrate);
+}
+
+} // namespace NAtrac3
+} // namespace NAtracDEnc
diff --git a/src/atrac/atrac3.h b/src/atrac/atrac3.h
new file mode 100644
index 0000000..21f1e34
--- /dev/null
+++ b/src/atrac/atrac3.h
@@ -0,0 +1,242 @@
+#pragma once
+#include <math.h>
+#include <cstdint>
+#include <vector>
+#include <cassert>
+#include <iostream>
+
+namespace NAtracDEnc {
+namespace NAtrac3 {
+
+struct TContainerParams {
+    const uint32_t Bitrate;
+    const uint16_t FrameSz;
+    const bool Js;
+};
+
+inline bool operator< (const TContainerParams& x, const TContainerParams& y)
+{
+    return x.Bitrate < y.Bitrate;
+}
+inline bool operator> (const TContainerParams& x, const TContainerParams& y)
+{
+    return x.Bitrate > y.Bitrate;
+}
+inline bool operator< (const TContainerParams& x, const unsigned int y)
+{
+    return x.Bitrate < y;
+}
+inline bool operator> (const TContainerParams& x, const unsigned int y)
+{
+    return x.Bitrate > y;
+}
+
+class TAtrac3Data {
+public:
+    static constexpr uint8_t MaxBfus = 32;
+    static constexpr uint32_t NumSamples = 1024;
+//protected:
+    static const uint32_t MDCTSz = 512;
+    static double ScaleTable[64];
+    static double EncodeWindow[256];
+    static double DecodeWindow[256];
+    static double GainLevel[16];
+    static double GainInterpolation[31];
+    static constexpr int32_t ExponentOffset = 4;
+    static constexpr int32_t LocScale = 3;
+    static constexpr int32_t LocSz = 1 << LocScale;
+    static constexpr int32_t GainInterpolationPosShift = 15;
+
+    static constexpr uint32_t NumSpecs = NumSamples;
+    static const uint32_t frameSz = 152;
+    static constexpr double MaxQuant[8] = {
+        0.0,    1.5,    2.5,    3.5,
+        4.5,    7.5,    15.5,   31.5
+    };
+    static constexpr uint32_t BlockSizeTab[33] = {
+        0,    8,    16,    24,    32,    40,    48,    56,
+        64,   80,   96,    112,   128,   144,   160,   176,
+        192,  224,  256,   288,   320,   352,   384,   416,
+        448,  480,  512,   576,   640,   704,   768,   896,
+        1024
+    };
+    static constexpr uint32_t const * const SpecsStartShort = &BlockSizeTab[0];
+
+    static constexpr uint32_t const * const SpecsStartLong = &BlockSizeTab[0];
+    static constexpr uint32_t ClcLengthTab[8] = { 0, 4, 3, 3, 4, 4, 5, 6 };
+    static constexpr int NumQMF = 4;
+    static constexpr uint32_t MaxSpecs = NumSamples; //1024
+    static constexpr uint32_t MaxSpecsPerBlock = 128;
+
+    static constexpr uint32_t BlocksPerBand[NumQMF + 1] = {0, 18, 26, 30, 32};
+    static constexpr uint32_t SpecsPerBlock[33] = {
+        8,    8,    8,     8,     8,    8,    8,  8,  
+        16,   16,   16,    16,    16,   16,   16, 16, 
+        32,   32,   32,    32,    32,   32,   32, 32,
+        32,   32,   64,    64,    64,   64,   128, 128,
+        128
+    };
+    struct THuffEntry {
+        const uint8_t Code;
+        const uint8_t Bits;
+    };
+    static constexpr uint8_t HuffTable1Sz = 9;
+    static constexpr THuffEntry HuffTable1[HuffTable1Sz] = {
+        { 0x0, 1 },
+        { 0x4, 3 }, { 0x5, 3 },
+        { 0xC, 4 }, { 0xD, 4 }, 
+        { 0x1C, 5 }, { 0x1D, 5 }, { 0x1E, 5 }, { 0x1F, 5 }
+    };
+    static constexpr uint8_t HuffTable2Sz = 5;
+    static constexpr THuffEntry HuffTable2[HuffTable2Sz] = {
+        { 0x0, 1 },
+        { 0x4, 3 }, { 0x5, 3 }, { 0x6, 3 }, { 0x7, 3 }
+    };
+    static constexpr uint8_t HuffTable3Sz = 7;
+    static constexpr THuffEntry HuffTable3[HuffTable3Sz] = {
+        { 0x0, 1 },
+        { 0x4, 3}, { 0x5, 3 },
+        { 0xC, 4 }, { 0xD, 4 }, { 0xE, 4 }, { 0xF, 4 }
+    };
+    static constexpr uint8_t HuffTable5Sz = 15;
+    static constexpr THuffEntry HuffTable5[HuffTable5Sz] = {
+        { 0x0, 2 },
+        { 0x2, 3 }, { 0x3, 3 },
+        { 0x8, 4 }, { 0x9, 4 }, { 0xA, 4 }, { 0xB, 4 }, //{ 0xC, 4 }, { 0xD, 4 },
+        { 0x1C, 5 }, { 0x1D, 5 },
+        { 0x3C, 6 }, { 0x3D, 6 }, { 0x3E, 6 }, { 0x3F, 6},
+        { 0xC, 4 }, { 0xD, 4 } //TODO: is it right table???
+    };
+    static constexpr uint8_t HuffTable6Sz = 31;
+    static constexpr THuffEntry HuffTable6[HuffTable6Sz] = {
+        { 0x0, 3 },
+        { 0x2, 4 }, { 0x3, 4 }, { 0x4, 4 }, { 0x5, 4 }, { 0x6, 4 }, { 0x7, 4 }, //{ 0x8, 4 }, { 0x9, 4 },
+        { 0x14, 5 }, { 0x15, 5 }, { 0x16, 5 }, { 0x17, 5 }, { 0x18, 5 }, { 0x19, 5 },
+        { 0x34, 6 }, { 0x35, 6 }, { 0x36, 6 }, { 0x37, 6 }, { 0x38, 6 }, { 0x39, 6 }, { 0x3A, 6 }, { 0x3B, 6 },
+        { 0x78, 7 }, { 0x79, 7 }, { 0x7A, 7 }, { 0x7B, 7 }, { 0x7C, 7 }, { 0x7D, 7 }, { 0x7E, 7 }, { 0x7F, 7 },
+        { 0x8, 4 }, { 0x9, 4 } //TODO: is it right table???
+    };
+    static constexpr uint8_t HuffTable7Sz = 63;
+    static constexpr THuffEntry HuffTable7[HuffTable7Sz] = {
+        { 0x0, 3 },
+        //{ 0x2, 4 }, { 0x3, 4 },
+        { 0x8, 5 }, { 0x9, 5 }, { 0xA, 5}, { 0xB, 5 }, { 0xC, 5 }, { 0xD, 5 }, { 0xE, 5}, { 0xF, 5 }, { 0x10, 5 },
+                                                                                                            { 0x11, 5 },
+        { 0x24, 6 }, { 0x25, 6 }, { 0x26, 6 }, { 0x27, 6 }, { 0x28, 6 }, { 0x29, 6 }, { 0x2A, 6 }, { 0x2B, 6 },
+        { 0x2C, 6 }, { 0x2D, 6 }, { 0x2E, 6 }, { 0x2F, 6 }, { 0x30, 6 }, { 0x31, 6 }, { 0x32, 6 }, { 0x33, 6 },
+        { 0x68, 7 }, { 0x69, 7 }, { 0x6A, 7 }, { 0x6B, 7 }, { 0x6C, 7 }, { 0x6D, 7 }, { 0x6E, 7 },
+        { 0x6F, 7 }, { 0x70, 7 }, { 0x71, 7 }, { 0x72, 7 }, { 0x73, 7 }, { 0x74, 7 }, { 0x75, 7 },
+        { 0xEC, 8 }, { 0xED, 8 }, { 0xEE, 8 }, { 0xEF, 8 }, { 0xF0, 8 }, { 0xF1, 8 }, { 0xF2, 8 }, { 0xF3, 8 },
+                                                                                               { 0xF4, 8 }, { 0xF5, 8 },
+        { 0xF6, 8 }, { 0xF7, 8 }, { 0xF8, 8 }, { 0xF9, 8 }, { 0xFA, 8 }, { 0xFB, 8 }, { 0xFC, 8 }, { 0xFD, 8 },
+                                                                                               { 0xFE, 8 }, { 0xFF, 8 },
+        { 0x2, 4 }, { 0x3, 4 } //TODO: is it right table???
+    };
+
+    struct THuffTablePair {
+        const THuffEntry* Table;
+        const uint32_t Sz;
+    };
+
+    static constexpr THuffTablePair HuffTables[7] {
+        { HuffTable1, HuffTable1Sz },
+        { HuffTable2, HuffTable2Sz },
+        { HuffTable3, HuffTable3Sz },
+        { HuffTable1, HuffTable1Sz },
+        { HuffTable5, HuffTable5Sz },
+        { HuffTable6, HuffTable6Sz },
+        { HuffTable7, HuffTable7Sz }
+    };
+public:
+    TAtrac3Data() {
+        if (ScaleTable[0] == 0) {
+            for (uint32_t i = 0; i < 64; i++) {
+                ScaleTable[i] = pow(2.0, (double)(i - 15.0) / 3.0);
+            }
+        }
+        for (int i = 0; i < 256; i++) {
+            EncodeWindow[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0)/* * 0.5*/;
+        }
+        for (int i = 0; i < 256; i++) {
+            const double a = EncodeWindow[i];
+            const double b = EncodeWindow[255-i];
+            DecodeWindow[i] = 2.0 * a / (a*a + b*b);
+        }
+        for (int i = 0; i < 16; i++) {
+            GainLevel[i] = pow(2.0, ExponentOffset - i);
+        }
+        for (int i = 0; i < 31; i++) {
+            GainInterpolation[i] = pow(2.0, -1.0 / LocSz * (i - 15));
+        }
+    }
+    static uint32_t MantissaToCLcIdx(int32_t mantissa) {
+        assert(mantissa > -3 && mantissa < 2);
+        const uint32_t mantissa_clc_rtab[4] = { 2, 3, 0, 1};
+        return mantissa_clc_rtab[mantissa + 2];
+    }
+    static uint32_t MantissasToVlcIndex(int32_t a, int32_t b) {
+        assert(a > -2 && a < 2); 
+        assert(b > -2 && b < 2); 
+        const uint32_t mantissas_vlc_rtab[9] = { 8, 4, 7, 2, 0, 1, 6, 3, 5 };
+        const uint8_t idx = 3 * (a + 1) + (b + 1);
+        return mantissas_vlc_rtab[idx];
+    }
+    static constexpr TContainerParams ContainerParams[8] = {
+        { 66150, 192, true },
+        { 93713, 272, true },
+        { 104738, 304, false },
+        { 132300, 384, false },
+        { 146081, 424, false },
+        { 176400, 512, false },
+        { 264600, 768, false },
+        { 352800, 1024, false }
+    };
+    static const TContainerParams* GetContainerParamsForBitrate(uint32_t bitrate);
+
+    class SubbandInfo {
+    public:
+        static const uint32_t MaxGainPointsNum = 8;
+        struct TGainPoint {
+            uint32_t Level;
+            uint32_t Location;
+        };
+    private:
+        std::vector<std::vector<TGainPoint>> Info;
+    public:
+        SubbandInfo()
+        {
+            Info.resize(4);
+        }
+        void AddSubbandCurve(uint16_t n, std::vector<TGainPoint>&& curve) {
+            Info[n] = std::move(curve);
+        }
+        uint32_t GetQmfNum() const {
+            return Info.size();
+        }
+        const std::vector<TGainPoint>& GetGainPoints(uint32_t i) const {
+            return Info[i];
+        }
+    };
+
+    struct TTonalVal {
+        const uint16_t Pos;
+        const double Val;
+    };
+    typedef std::vector<TTonalVal> TTonalComponents;
+};
+
+struct TAtrac3EncoderSettings {
+    explicit TAtrac3EncoderSettings(uint32_t bitrate, bool noGainControll, bool noTonalComponents)
+        : ConteinerParams(TAtrac3Data::GetContainerParamsForBitrate(bitrate))
+        , NoGainControll(noGainControll)
+        , NoTonalComponents(noTonalComponents)
+    {
+        std::cout << bitrate << " " << ConteinerParams->Bitrate << std::endl;
+    }
+    const TContainerParams* ConteinerParams;
+    const bool NoGainControll;
+    const bool NoTonalComponents;
+};
+
+} // namespace NAtrac3
+} // namespace NAtracDEnc
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp
new file mode 100644
index 0000000..e3256b7
--- /dev/null
+++ b/src/atrac/atrac3_bitstream.cpp
@@ -0,0 +1,424 @@
+#include "atrac3_bitstream.h"
+#include "atrac_psy_common.h"
+#include "../bitstream/bitstream.h"
+#include <cassert>
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <cstdlib>
+
+#include <cstring>
+
+namespace NAtracDEnc {
+namespace NAtrac3 {
+
+using std::vector;
+using std::memset;
+
+static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
+  6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+  4, 4, 4, 3, 3, 3, 3, 3,
+  3, 2, 2, 1,
+  1, 0
+};
+
+uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+                                        const uint32_t blockSize, NBitStream::TBitStream* bitStream)
+{
+    const uint32_t numBits = ClcLengthTab[selector];
+    const uint32_t bitsUsed = (selector > 1) ? numBits * blockSize : numBits * blockSize / 2;
+    if (!bitStream)
+        return bitsUsed;
+    if (selector > 1) {
+        for (uint32_t i = 0; i < blockSize; ++i) {
+            bitStream->Write(NBitStream::MakeSign(mantissas[i], numBits), numBits);
+        }
+    } else {
+        for (uint32_t i = 0; i < blockSize / 2; ++i) {
+            uint32_t code = MantissaToCLcIdx(mantissas[i * 2]) << 2;
+            code |= MantissaToCLcIdx(mantissas[i * 2 + 1]);
+            assert(numBits == 4);
+            bitStream->Write(code, numBits);
+        }
+    }
+    return bitsUsed;
+}
+
+uint32_t TAtrac3BitStreamWriter::VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+                                        const uint32_t blockSize, NBitStream::TBitStream* bitStream)
+{
+    assert(selector > 0);
+    const THuffEntry* huffTable = HuffTables[selector - 1].Table;
+    const uint8_t tableSz = HuffTables[selector - 1].Sz;
+    uint32_t bitsUsed = 0;
+    if (selector > 1) {
+        for (uint32_t i = 0; i < blockSize; ++i) {
+            int m = mantissas[i];
+            uint32_t huffS = (m < 0) ? (((uint32_t)(-m)) << 1) | 1 : ((uint32_t)m) << 1;
+            if (huffS)
+                huffS -= 1;
+            assert(huffS < 256);
+            assert(huffS < tableSz);
+            bitsUsed += huffTable[huffS].Bits;
+            if (bitStream)
+                bitStream->Write(huffTable[huffS].Code, huffTable[huffS].Bits);
+        }
+    } else {
+        assert(tableSz == 9); 
+        for (uint32_t i = 0; i < blockSize / 2; ++i) {
+            const int ma = mantissas[i * 2];
+            const int mb = mantissas[i * 2 + 1];
+            const uint32_t huffS = MantissasToVlcIndex(ma, mb);
+            bitsUsed += huffTable[huffS].Bits;
+            if (bitStream)
+                bitStream->Write(huffTable[huffS].Code, huffTable[huffS].Bits);
+        }
+    }
+    return bitsUsed;
+}
+
+std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(const vector<TScaledBlock>& scaledBlocks,
+                                                        const vector<uint32_t>& precisionPerEachBlocks, int* mantisas)
+{
+    uint32_t bitsUsed = 5 + 1; //numBlocks + codingMode
+    const uint32_t numBlocks = precisionPerEachBlocks.size();
+    bitsUsed += numBlocks * 3; //used VLC or CLC table (precision)
+
+    auto lambda = [=](bool clcMode, bool calcMant) {
+        uint32_t bits = 0;
+        for (uint32_t i = 0; i < numBlocks; ++i) {
+            if (precisionPerEachBlocks[i] == 0)
+                continue;
+            bits += 6; //sfi
+            const uint32_t first = BlockSizeTab[i];
+            const uint32_t last = BlockSizeTab[i+1];
+            const uint32_t blockSize = last - first;
+            const TFloat mul = MaxQuant[std::min(precisionPerEachBlocks[i], (uint32_t)7)];
+            if (calcMant) {
+                for (uint32_t j = 0, f = first; f < last; f++, j++) {
+                    mantisas[f] = round(scaledBlocks[i].Values[j] * mul);
+                }
+            }
+            bits += clcMode ? CLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr) :
+                VLCEnc(precisionPerEachBlocks[i], mantisas + first, blockSize, nullptr);
+        }
+        return bits;
+    };
+    const uint32_t clcBits = lambda(true, true);
+    const uint32_t vlcBits = lambda(false, false);
+    bool mode = clcBits <= vlcBits;
+    return std::make_pair(mode, bitsUsed + (mode ? clcBits : vlcBits));
+}
+
+
+std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const vector<TScaledBlock>& scaledBlocks,
+                                                                              uint16_t bitsUsed, int mt[MaxSpecs])
+{
+    TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
+
+    uint8_t numBfu = 32;
+    vector<uint32_t> precisionPerEachBlocks(numBfu);
+    uint8_t mode;
+    for (;;) {
+        precisionPerEachBlocks.resize(numBfu);
+        uint32_t usedBfus = 0;
+        for (auto v : precisionPerEachBlocks) {
+            if (v)
+                usedBfus++;
+        }
+        const uint32_t bitsAvaliablePerBfus = 8 * Params.FrameSz/2 - bitsUsed - 
+            5 - 1 - (numBfu * 3) - (usedBfus * 6);
+        TFloat maxShift = 15;
+        TFloat minShift = -3;
+        TFloat shift = 3.0;
+        const uint32_t maxBits = bitsAvaliablePerBfus;
+        const uint32_t minBits = bitsAvaliablePerBfus - 90;
+        for (;;) {
+            const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift);
+            const auto consumption = CalcSpecsBitsConsumption(scaledBlocks, tmpAlloc, mt);
+
+            if (consumption.second < minBits) {
+                if (maxShift - minShift < 0.1) {
+                    precisionPerEachBlocks = tmpAlloc;
+                    mode = consumption.first;
+                    break;
+                }
+                maxShift = shift;
+                shift -= (shift - minShift) / 2;
+            } else if (consumption.second > maxBits) {
+                minShift = shift;
+                shift += (maxShift - shift) / 2;
+            } else {
+                precisionPerEachBlocks = tmpAlloc;
+                mode = consumption.first;
+                break;
+            }
+        }
+        break;
+
+    }
+    return { mode, precisionPerEachBlocks };
+}
+
+void TAtrac3BitStreamWriter::EncodeSpecs(const vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream,
+                                         const uint16_t bitsUsed)
+{
+    int mt[MaxSpecs];
+    auto allocation = CreateAllocation(scaledBlocks, bitsUsed, mt);
+    const vector<uint32_t>& precisionPerEachBlocks = allocation.second;
+    const uint32_t numBlocks = precisionPerEachBlocks.size(); //number of blocks to save
+    const uint32_t codingMode = allocation.first;//0 - VLC, 1 - CLC
+
+    assert(numBlocks <= 32);
+    bitStream->Write(numBlocks-1, 5);
+    bitStream->Write(codingMode, 1);
+    for (uint32_t i = 0; i < numBlocks; ++i) {
+        uint32_t val = precisionPerEachBlocks[i]; //coding table used (VLC) or number of bits used (CLC)
+        bitStream->Write(val, 3);
+    }
+    for (uint32_t i = 0; i < numBlocks; ++i) {
+        if (precisionPerEachBlocks[i] == 0)
+            continue;
+        bitStream->Write(scaledBlocks[i].ScaleFactorIndex, 6);
+    }
+    for (uint32_t i = 0; i < numBlocks; ++i) {
+        if (precisionPerEachBlocks[i] == 0)
+            continue;
+
+        const uint32_t first = BlockSizeTab[i];
+        const uint32_t last = BlockSizeTab[i+1];
+        const uint32_t blockSize = last - first;
+
+        if (codingMode == 1) {
+            CLCEnc(precisionPerEachBlocks[i], mt + first, blockSize, bitStream);
+        } else {
+            VLCEnc(precisionPerEachBlocks[i], mt + first, blockSize, bitStream);
+        }
+    }
+}
+
+uint8_t TAtrac3BitStreamWriter::GroupTonalComponents(const std::vector<TTonalComponent>& tonalComponents,
+                                                     TTonalComponentsSubGroup groups[64])
+{
+    for (const TTonalComponent& tc : tonalComponents) {
+        assert(tc.ScaledBlock.Values.size() < 8);
+        assert(tc.ScaledBlock.Values.size() > 0);
+        assert(tc.QuantIdx >1);
+        assert(tc.QuantIdx <8);
+        groups[tc.QuantIdx * 8 + tc.ScaledBlock.Values.size()].SubGroupPtr.push_back(&tc);
+    }
+    uint8_t tcsgn = 0;
+    //for each group
+    for (uint8_t i = 0; i < 64; ++i) {
+        uint8_t start_pos;
+        uint8_t cur_pos = 0;
+        //scan tonal components
+        while (cur_pos < groups[i].SubGroupPtr.size()) {
+            start_pos = cur_pos;
+            ++tcsgn;
+            groups[i].SubGroupMap.push_back(cur_pos);
+            uint8_t groupLimiter = 0;
+            //allow not grather than 8 components in one subgroup limited by 64 specs
+            do {
+                ++cur_pos;
+                if (cur_pos == groups[i].SubGroupPtr.size())
+                    break;
+                if (groups[i].SubGroupPtr[cur_pos]->ValPtr->Pos - (groups[i].SubGroupPtr[start_pos]->ValPtr->Pos & ~63) < 64) {
+                    ++groupLimiter;
+                } else {
+                    groupLimiter = 0;
+                    start_pos = cur_pos;
+                }
+            } while (groupLimiter < 7);
+        }
+    }
+    return tcsgn;
+}
+
+uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const std::vector<TTonalComponent>& tonalComponents,
+                                                       NBitStream::TBitStream* bitStream, uint8_t numQmfBand)
+{
+    const uint16_t bitsUsed = bitStream->GetSizeInBits();
+    //group tonal components with same quantizer and len
+    TTonalComponentsSubGroup groups[64];
+    const uint8_t tcsgn = GroupTonalComponents(tonalComponents, groups);
+
+    assert(tcsgn < 32);
+    bitStream->Write(tcsgn, 5);
+    if (tcsgn == 0) {
+        for (int i = 0; i < 64; ++i)
+            assert(groups[i].SubGroupPtr.size() == 0);
+        return 5; //wrote 0 but 5 bits for tcsgn
+    }
+    //Coding mode:
+    // 0 - All are VLC
+    // 1 - All are CLC
+    // 2 - Error
+    // 3 - Own mode for each component
+
+    //TODO: implement switch for best coding mode. Now VLC for all
+    bitStream->Write(0, 2);
+
+    uint8_t tcgnCheck = 0;
+    //for each group of equal quantiser and len 
+    for (uint8_t i = 0; i < 64; ++i) {
+        const TTonalComponentsSubGroup& curGroup = groups[i];
+        if (curGroup.SubGroupPtr.size() == 0) {
+            assert(curGroup.SubGroupMap.size() == 0);
+            continue;
+        }
+        assert(curGroup.SubGroupMap.size());
+        for (uint8_t subgroup = 0; subgroup < curGroup.SubGroupMap.size(); ++subgroup) {
+            const uint8_t subGroupStartPos = curGroup.SubGroupMap[subgroup];
+            const uint8_t subGroupEndPos = (subgroup < curGroup.SubGroupMap.size() - 1) ?
+                curGroup.SubGroupMap[subgroup+1] : curGroup.SubGroupPtr.size();
+            assert(subGroupEndPos > subGroupStartPos);
+            //number of coded values are same in group
+            const uint8_t codedValues = curGroup.SubGroupPtr[0]->ScaledBlock.Values.size();
+
+            //Number of tonal component for each 64spec block. Used to set qmf band flags and simplify band encoding loop
+            uint8_t bandFlags[16];
+            memset(bandFlags, 0, 16 * sizeof(uint8_t));
+            assert(numQmfBand <= 4);
+            for (uint8_t j = subGroupStartPos; j < subGroupEndPos; ++j) {
+                //assert num of coded values are same in group
+                assert(codedValues == curGroup.SubGroupPtr[j]->ScaledBlock.Values.size());
+                uint8_t specBlock = (curGroup.SubGroupPtr[j]->ValPtr->Pos) >> 6;
+                assert((specBlock >> 2) < numQmfBand);
+                bandFlags[specBlock]++;
+            }
+
+            assert(numQmfBand == 4);
+
+            tcgnCheck++;
+            
+            for (uint8_t j = 0; j < numQmfBand; ++j) {
+                bitStream->Write((bool)(*(uint32_t*)&bandFlags[j<<2]), 1);
+            }
+            //write number of coded values for components in current group
+            assert(codedValues > 0);
+            bitStream->Write(codedValues - 1, 3);
+            //write quant index
+            assert((i >> 3) > 1);
+            assert((i >> 3) < 8);
+            assert(i);
+            bitStream->Write(i >> 3, 3);
+            uint8_t lastPos = subGroupStartPos;
+            uint8_t checkPos = 0;
+            for (uint16_t j = 0; j < 16; ++j) {
+                if (!(*(uint32_t*)&bandFlags[j & 0xC])) { //discard two bits
+                    continue;
+                }
+
+                const uint8_t codedComponents = bandFlags[j];
+                assert(codedComponents < 8);
+                bitStream->Write(codedComponents, 3);
+                uint8_t k = lastPos;
+                for (; k < lastPos + codedComponents; ++k) {
+                    assert(curGroup.SubGroupPtr[k]->ValPtr->Pos >= j * 64);
+                    uint16_t relPos = curGroup.SubGroupPtr[k]->ValPtr->Pos - j * 64;
+                    assert(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex < 64);
+                    bitStream->Write(curGroup.SubGroupPtr[k]->ScaledBlock.ScaleFactorIndex, 6);
+
+                    assert(relPos < 64);
+                    
+                    bitStream->Write(relPos, 6);
+
+                    assert(curGroup.SubGroupPtr[k]->ScaledBlock.Values.size() < 8);
+                    int mantisas[256];
+                    const TFloat mul = MaxQuant[std::min((uint32_t)(i>>3), (uint32_t)7)];
+                    assert(codedValues == curGroup.SubGroupPtr[k]->ScaledBlock.Values.size());
+                    for (uint32_t z = 0; z < curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(); ++z) {
+                        mantisas[z] = round(curGroup.SubGroupPtr[k]->ScaledBlock.Values[z] * mul);
+                    }
+                    //VLCEnc
+
+                    assert(i);
+                    VLCEnc(i>>3, mantisas, curGroup.SubGroupPtr[k]->ScaledBlock.Values.size(), bitStream);
+
+
+                }
+                lastPos = k;
+                checkPos = lastPos;
+            }
+
+            assert(subGroupEndPos == checkPos);
+        }
+    }
+    assert(tcgnCheck == tcsgn);
+    return bitStream->GetSizeInBits() - bitsUsed;
+}
+
+vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
+                                                            const uint32_t bfuNum,
+                                                            const TFloat spread,
+                                                            const TFloat shift)
+{
+    vector<uint32_t> bitsPerEachBlock(bfuNum);
+    for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
+        const uint32_t fix = FixedBitAllocTable[i];
+        int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift; 
+        if (tmp > 7) {
+            bitsPerEachBlock[i] = 7;
+        } else if (tmp < 0) {
+            bitsPerEachBlock[i] = 0;
+        } else {
+            bitsPerEachBlock[i] = tmp;
+        }
+    }
+    return bitsPerEachBlock;
+}
+
+
+void TAtrac3BitStreamWriter::WriteSoundUnit(const TAtrac3Data::SubbandInfo& subbandInfo,
+                                            const std::vector<TTonalComponent>& tonalComponents,
+                                            const vector<TScaledBlock>& scaledBlocks)
+{
+    NBitStream::TBitStream bitStream;
+    if (Params.Js) {
+        //TODO
+    } else {
+        bitStream.Write(0x28, 6); //0x28 - id
+    }
+    const uint8_t numQmfBand = subbandInfo.GetQmfNum();
+    bitStream.Write(numQmfBand - 1, 2);
+
+    //write gain info
+    for (uint32_t band = 0; band < numQmfBand; ++band) {
+        const vector<TAtrac3Data::SubbandInfo::TGainPoint>& GainPoints = subbandInfo.GetGainPoints(band);
+        assert(GainPoints.size() < TAtrac3Data::SubbandInfo::MaxGainPointsNum);
+        bitStream.Write(GainPoints.size(), 3);
+        int s = 0;
+        for (const TAtrac3Data::SubbandInfo::TGainPoint& point : GainPoints) {
+            bitStream.Write(point.Level, 4);
+            bitStream.Write(point.Location, 5);
+            s++;
+            assert(s < 8);
+        }
+    }
+    const uint16_t bitsUsedByGainInfo = bitStream.GetSizeInBits() - 8;
+    const uint16_t bitsUsedByTonal = EncodeTonalComponents(tonalComponents, &bitStream, numQmfBand);
+    //spec
+    EncodeSpecs(scaledBlocks, &bitStream, bitsUsedByTonal + bitsUsedByGainInfo);
+
+    if (!Container)
+        abort();
+    if (OutBuffer.empty()) {
+        std::vector<char> channel = bitStream.GetBytes();
+        assert(channel.size() <= Params.FrameSz/2);
+        channel.resize(Params.FrameSz/2);
+        OutBuffer.insert(OutBuffer.end(), channel.begin(), channel.end());
+    } else {
+        std::vector<char> channel = bitStream.GetBytes();
+
+        assert(channel.size() <= Params.FrameSz/2);
+        channel.resize(Params.FrameSz/2);
+        OutBuffer.insert(OutBuffer.end(), channel.begin(), channel.end());
+        Container->WriteFrame(OutBuffer);
+        OutBuffer.clear();
+    }
+
+}
+
+} // namespace NAtrac3
+} // namespace NAtracDEnc
diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h
new file mode 100644
index 0000000..225d98c
--- /dev/null
+++ b/src/atrac/atrac3_bitstream.h
@@ -0,0 +1,71 @@
+#pragma once
+#include "atrac3.h"
+#include "atrac1.h"
+#include "../aea.h"
+#include "../oma.h"
+#include "../atrac/atrac1.h"
+#include "atrac_scale.h"
+#include <vector>
+#include <utility>
+
+namespace NAtracDEnc {
+namespace NAtrac3 {
+
+struct TTonalComponent {
+    TTonalComponent(const TAtrac3Data::TTonalVal* valPtr, uint8_t quantIdx, const TScaledBlock& scaledBlock)
+        : ValPtr(valPtr)
+        , QuantIdx(quantIdx)
+        , ScaledBlock(scaledBlock)
+    {}
+    const TAtrac3Data::TTonalVal* ValPtr = nullptr;
+    uint8_t QuantIdx = 0;
+    TScaledBlock ScaledBlock;
+};
+
+class TAtrac3BitStreamWriter : public virtual TAtrac3Data {
+    struct TTonalComponentsSubGroup {
+        std::vector<uint8_t> SubGroupMap;
+        std::vector<const TTonalComponent*> SubGroupPtr;
+    };
+    TOma* Container;
+    const TContainerParams Params;
+    std::vector<char> OutBuffer;
+
+    uint32_t CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+                    const uint32_t blockSize, NBitStream::TBitStream* bitStream);
+
+    uint32_t VLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
+                    const uint32_t blockSize, NBitStream::TBitStream* bitStream);
+
+    std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
+                                             uint32_t bfuNum, TFloat spread, TFloat shift);
+
+    std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const std::vector<TScaledBlock>& scaledBlocks,
+                                                               uint16_t bitsUsed, int mt[MaxSpecs]);
+
+    std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const std::vector<TScaledBlock>& scaledBlocks,
+                                                          const std::vector<uint32_t>& precisionPerEachBlocks,
+                                                          int* mantisas);
+
+    void EncodeSpecs(const std::vector<TScaledBlock>& scaledBlocks, NBitStream::TBitStream* bitStream,
+                     uint16_t bitsUsed);
+
+    uint8_t GroupTonalComponents(const std::vector<TTonalComponent>& tonalComponents,
+                                 TTonalComponentsSubGroup groups[64]);
+
+    uint16_t EncodeTonalComponents(const std::vector<TTonalComponent>& tonalComponents,
+                                   NBitStream::TBitStream* bitStream, uint8_t numQmfBand);
+public:
+    TAtrac3BitStreamWriter(TOma* container, const TContainerParams& params) //no mono mode for atrac3
+        : Container(container)
+        , Params(params)
+    {
+
+    }
+    void WriteSoundUnit(const TAtrac3Data::SubbandInfo& subbandInfo,
+                        const std::vector<TTonalComponent>& tonalComponents,
+                        const std::vector<TScaledBlock>& scaledBlocks);
+};
+
+} // namespace NAtrac3
+} // namespace NAtracDEnc
diff --git a/src/atrac/atrac3_qmf.h b/src/atrac/atrac3_qmf.h
new file mode 100644
index 0000000..f0ef805
--- /dev/null
+++ b/src/atrac/atrac3_qmf.h
@@ -0,0 +1,27 @@
+#pragma once
+#include <vector>
+#include "../qmf/qmf.h"
+
+namespace NAtracDEnc {
+
+template<class TIn>
+class Atrac3SplitFilterBank {
+    const static int nInSamples = 1024;
+    TQmf<TIn, nInSamples> Qmf1;
+    TQmf<TIn, nInSamples / 2> Qmf2;
+    TQmf<TIn, nInSamples / 2> Qmf3;
+    std::vector<TFloat> Buf1;
+    std::vector<TFloat> Buf2;
+public:
+    Atrac3SplitFilterBank() {
+        Buf1.resize(nInSamples);
+        Buf2.resize(nInSamples);
+    }
+    void Split(TIn* pcm, TFloat* subs[4]) {
+        Qmf1.Split(pcm, Buf1.data(), Buf2.data());
+        Qmf2.Split(Buf1.data(), subs[0], subs[1]);
+        Qmf3.Split(Buf2.data(), subs[3], subs[2]);
+    }
+};
+
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac_psy_common.cpp b/src/atrac/atrac_psy_common.cpp
new file mode 100644
index 0000000..f6f3bf7
--- /dev/null
+++ b/src/atrac/atrac_psy_common.cpp
@@ -0,0 +1,26 @@
+#include "atrac_psy_common.h"
+
+namespace NAtracDEnc {
+
+//returns 1 for tone-like, 0 - noise-like
+TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks) {
+    TFloat s = 0.0;
+    for (size_t i = 0; i < scaledBlocks.size(); ++i) {
+        s += scaledBlocks[i].ScaleFactorIndex;
+    }
+    s /= scaledBlocks.size();
+    TFloat sigma = 0.0;
+    TFloat t = 0.0;
+    for (size_t i = 0; i < scaledBlocks.size(); ++i) {
+        t = (scaledBlocks[i].ScaleFactorIndex - s);
+        t *= t;
+        sigma += t;
+    }
+    sigma /= scaledBlocks.size();
+    sigma = sqrt(sigma);
+    if (sigma > 14.0)
+        sigma = 14.0;
+    return sigma/14.0;
+}
+
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac_psy_common.h b/src/atrac/atrac_psy_common.h
new file mode 100644
index 0000000..4c580a4
--- /dev/null
+++ b/src/atrac/atrac_psy_common.h
@@ -0,0 +1,8 @@
+#pragma once
+#include "atrac_scale.h"
+
+namespace NAtracDEnc {
+
+double AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
+
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac_scale.cpp b/src/atrac/atrac_scale.cpp
index 1e24cb5..a03bc13 100644
--- a/src/atrac/atrac_scale.cpp
+++ b/src/atrac/atrac_scale.cpp
@@ -1,48 +1,68 @@
 #include "atrac_scale.h"
 #include "atrac1.h"
+#include "atrac3.h"
 #include <cmath>
 #include <iostream>
 #include <algorithm>
+
 namespace NAtracDEnc {
+
 using std::vector;
 using std::map;
 
-using namespace std;
+using std::cerr;
+using std::endl;
+
+using std::abs;
+
 static const uint32_t MAX_SCALE = 65536;
 
 template<class TBaseData>
-vector<TScaledBlock> TScaler<TBaseData>::Scale(const vector<double>& specs, const TBlockSize& blockSize) {
+TScaledBlock TScaler<TBaseData>::Scale(const TFloat* in, uint16_t len) {
+    TFloat maxAbsSpec = 0;
+    for (uint16_t i = 0; i < len; ++i) {
+        const TFloat absSpec = abs(in[i]);
+        if (absSpec > maxAbsSpec) {
+            if (absSpec > MAX_SCALE) {
+                cerr << "Scale error: absSpec > MAX_SCALE, val: " << absSpec << endl;
+                maxAbsSpec = MAX_SCALE;
+            } else {
+                maxAbsSpec = absSpec;
+            }
+        }
+    }
+    const map<TFloat, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec);
+    const TFloat scaleFactor = scaleIter->first;
+    const uint8_t scaleFactorIndex = scaleIter->second;
+    TScaledBlock res(scaleFactorIndex);
+    for (uint16_t i = 0; i < len; ++i) {
+        const TFloat scaledValue = in[i] / scaleFactor;
+        if (scaledValue > 1.0) {
+            cerr << "got "<< scaledValue << " it is wrong scalling" << endl;
+        }
+        res.Values.push_back(scaledValue);
+	}
+    return res;
+}
+
+template<class TBaseData>
+vector<TScaledBlock> TScaler<TBaseData>::ScaleFrame(const vector<TFloat>& specs, const TBlockSize& blockSize) {
     vector<TScaledBlock> scaledBlocks;
     for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
         const bool shortWinMode = !!blockSize.LogCount[bandNum];
         for (uint8_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) {
-            const uint16_t specNumStart = shortWinMode ? this->SpecsStartShort[blockNum] : this->SpecsStartLong[blockNum];
-            const uint16_t specNumEnd = specNumStart + this->SpecsPerBlock[blockNum];
-            double maxAbsSpec = 0;
-            for (uint16_t curSpec = specNumStart; curSpec < specNumEnd; ++curSpec) {
-                const double absSpec = abs(specs[curSpec]);
-                if (absSpec > maxAbsSpec) {
-                    if (absSpec > MAX_SCALE) {
-                        cerr << "got " << absSpec << " value - overflow" << endl;
-                        maxAbsSpec = MAX_SCALE;
-                    } else {
-                        maxAbsSpec = absSpec;
-                    }
-                }
-            }
-            const map<double, uint8_t>::const_iterator scaleIter = ScaleIndex.lower_bound(maxAbsSpec);
-            const double scaleFactor = scaleIter->first;
-            const uint8_t scaleFactorIndex = scaleIter->second;
-            scaledBlocks.push_back(TScaledBlock(scaleFactorIndex));
-            for (uint16_t specNum = specNumStart; specNum < specNumEnd; ++specNum) {
-                const double scaledValue = specs[specNum] / scaleFactor;
-                if (scaledValue > 1.0)
-                    cerr << "got "<< scaledValue << " value - wrong scalling" << endl;
-                scaledBlocks.back().Values.push_back(scaledValue);
-			}
+            const uint16_t specNumStart = shortWinMode ? TBaseData::SpecsStartShort[blockNum] : 
+                                                         TBaseData::SpecsStartLong[blockNum];
+            scaledBlocks.emplace_back(Scale(&specs[specNumStart], this->SpecsPerBlock[blockNum]));
 		}
 	}
     return scaledBlocks;
 }
-template class TScaler<TAtrac1Data>;
-}
+
+template
+class TScaler<NAtrac1::TAtrac1Data>;
+
+template
+class TScaler<NAtrac3::TAtrac3Data>;
+
+} //namespace NAtracDEnc
diff --git a/src/atrac/atrac_scale.h b/src/atrac/atrac_scale.h
index dd437a2..499fac2 100644
--- a/src/atrac/atrac_scale.h
+++ b/src/atrac/atrac_scale.h
@@ -4,24 +4,60 @@
 #include <cstdint>
 
 #include "atrac1.h"
+#include "../config.h"
+
 namespace NAtracDEnc {
 
 struct TScaledBlock {
 	TScaledBlock(uint8_t sfi) : ScaleFactorIndex(sfi) {}
-    const uint8_t ScaleFactorIndex = 0;
-    std::vector<double> Values;
+    /* const */ uint8_t ScaleFactorIndex = 0;
+    std::vector<TFloat> Values;
 };
 
+class TBlockSize;
+
 template <class TBaseData>
 class TScaler : public TBaseData {
-    std::map<double, uint8_t>ScaleIndex;
+    std::map<TFloat, uint8_t>ScaleIndex;
 public:
     TScaler() {
         for (int i = 0; i < 64; i++) {
             ScaleIndex[TBaseData::ScaleTable[i]] = i;
         }
     }
-    std::vector<TScaledBlock> Scale(const std::vector<double>& specs, const TBlockSize& blockSize);
+    TScaledBlock Scale(const TFloat* in, uint16_t len);
+    std::vector<TScaledBlock> ScaleFrame(const std::vector<TFloat>& specs, const TBlockSize& blockSize);
+};
+
+class TBlockSize {
+    static std::array<int, 4> Parse(NBitStream::TBitStream* stream) {
+        //ATRAC1 - 3 subbands, ATRAC3 - 4 subbands.
+        //TODO: rewrite
+        std::array<int, 4> tmp;
+        tmp[0] = 2 - stream->Read(2);
+        tmp[1] = 2 - stream->Read(2);
+        tmp[2] = 3 - stream->Read(2);
+        stream->Read(2); //skip unused 2 bits
+        return tmp;
+    }
+    static std::array<int, 4> Create(bool lowShort, bool midShort, bool hiShort) {
+        std::array<int, 4> tmp;
+        tmp[0] = lowShort ? 2 : 0;
+        tmp[1] = midShort ? 2 : 0;
+        tmp[2] = hiShort ? 3 : 0;
+        return tmp;
+    }
+public:
+    TBlockSize(NBitStream::TBitStream* stream)
+        : LogCount(Parse(stream))
+    {}
+    TBlockSize(bool lowShort, bool midShort, bool hiShort)
+        : LogCount(Create(lowShort, midShort, hiShort))
+    {}
+    TBlockSize()
+        : LogCount({{0, 0, 0, 0}})
+    {}
+    const std::array<int, 4> LogCount;
 };
 
-}
+} //namespace NAtracDEnc
diff --git a/src/atracdenc.cpp b/src/atrac1denc.cpp
index 2de1fda..ff7923c 100644
--- a/src/atracdenc.cpp
+++ b/src/atrac1denc.cpp
@@ -1,35 +1,27 @@
 #include <vector>
 
-#include "atracdenc.h"
+#include "atrac1denc.h"
 #include "bitstream/bitstream.h"
 #include "atrac/atrac1.h"
 #include "atrac/atrac1_dequantiser.h"
 #include "atrac/atrac1_qmf.h"
 #include "atrac/atrac1_bitalloc.h"
+#include "util.h"
 
 namespace NAtracDEnc {
-using namespace std;
 using namespace NBitStream;
 using namespace NAtrac1;
 using namespace NMDCT;
+using std::vector;
 
-template<int N>
-static vector<double> invertSpectr(double* in) {
-    vector<double> buf(N);
-    memcpy(&buf[0], in, N * sizeof(double));
-    for (int i = 0; i < N; i+=2)
-        buf[i] *= -1;
-    return buf;
-}
-
-TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, TAtrac1EncodeSettings&& settings)
+TAtrac1Processor::TAtrac1Processor(TCompressedIOPtr&& aea, TAtrac1EncodeSettings&& settings)
     : Aea(std::move(aea))
     , Settings(std::move(settings))
 {
 }
 
-static void vector_fmul_window(double *dst, const double *src0,
-                                const double *src1, const double *win, int len)
+static void vector_fmul_window(TFloat *dst, const TFloat *src0,
+                                const TFloat *src1, const TFloat *win, int len)
 {
     int i, j;
 
@@ -38,21 +30,21 @@ static void vector_fmul_window(double *dst, const double *src0,
     src0 += len;
 
     for (i = -len, j = len - 1; i < 0; i++, j--) {
-        double s0 = src0[i];
-        double s1 = src1[j];
-        double wi = win[i];
-        double wj = win[j];
+        TFloat s0 = src0[i];
+        TFloat s1 = src1[j];
+        TFloat wi = win[i];
+        TFloat wj = win[j];
         dst[i] = s0 * wj - s1 * wi;
         dst[j] = s0 * wi + s1 * wj;
     }
 }
 
-vector<double> midct(double* x, int N) {
-    vector<double> res;
+vector<TFloat> midct(TFloat* x, int N) {
+    vector<TFloat> res;
     for (int n = 0; n < 2 * N; n++) {
-        double sum = 0;
+        TFloat sum = 0;
         for (int k = 0; k < N; k++) {
-            sum += (x[k] * cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5)));
+            sum += (x[k] * cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5)));
         }
 
         res.push_back(sum);
@@ -60,36 +52,32 @@ vector<double> midct(double* x, int N) {
     return res;
 }
 
-void TAtrac1MDCT::Mdct(double Specs[512], double* low, double* mid, double* hi, const TBlockSize& blockSize) {
+void TAtrac1MDCT::Mdct(TFloat Specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize) {
     uint32_t pos = 0;
-    for (uint32_t band = 0; band < QMF_BANDS; band++) {
+    for (uint32_t band = 0; band < NumQMF; band++) {
         const uint32_t numMdctBlocks = 1 << blockSize.LogCount[band];
-        double* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
+        TFloat* srcBuf = (band == 0) ? low : (band == 1) ? mid : hi;
         uint32_t bufSz = (band == 2) ? 256 : 128; 
         const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32;
         uint32_t winStart = (numMdctBlocks == 1) ? ((band == 2) ? 112 : 48) : 0;
         //compensate level for 3rd band in case of short window
-        const double multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0;
-        vector<double> tmp(512);
+        const TFloat multiple = (numMdctBlocks != 1 && band == 2) ? 2.0 : 1.0;
+        vector<TFloat> tmp(512);
         uint32_t blockPos = 0;
 
         for (size_t k = 0; k < numMdctBlocks; ++k) {
-            memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(double));
+            memcpy(&tmp[winStart], &srcBuf[bufSz], 32 * sizeof(TFloat));
             for (size_t i = 0; i < 32; i++) {
                 srcBuf[bufSz + i] = TAtrac1Data::SineWindow[i] * srcBuf[blockPos + blockSz - 32 + i];
                 srcBuf[blockPos + blockSz - 32 + i] = TAtrac1Data::SineWindow[31 - i] * srcBuf[blockPos + blockSz - 32 + i];
             }
-            memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(double));
-            const vector<double>&  sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]);
+            memcpy(&tmp[winStart+32], &srcBuf[blockPos], blockSz * sizeof(TFloat));
+            const vector<TFloat>&  sp = (numMdctBlocks == 1) ? ((band == 2) ? Mdct512(&tmp[0]) : Mdct256(&tmp[0])) : Mdct64(&tmp[0]);
             for (size_t i = 0; i < sp.size(); i++) {
                 Specs[blockPos + pos + i] = sp[i] * multiple;
             }
             if (band) {
-                for (uint32_t j = 0; j < sp.size() / 2; j++) {
-                    double tmp = Specs[blockPos + pos +j];
-                    Specs[blockPos + pos + j] = Specs[blockPos + pos + sp.size() - 1 -j];
-                    Specs[blockPos + pos + sp.size() - 1 -j] = tmp;
-                }
+                SwapArray(&Specs[blockPos + pos], sp.size());
             }
 
             blockPos += 32;
@@ -97,29 +85,23 @@ void TAtrac1MDCT::Mdct(double Specs[512], double* low, double* mid, double* hi,
         pos += bufSz;
     } 
 }
-void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low, double* mid, double* hi) {
+void TAtrac1MDCT::IMdct(TFloat Specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi) {
     uint32_t pos = 0;
-    for (size_t band = 0; band < QMF_BANDS; band++) {
+    for (size_t band = 0; band < NumQMF; band++) {
         const uint32_t numMdctBlocks = 1 << mode.LogCount[band];
         const uint32_t bufSz = (band == 2) ? 256 : 128;
         const uint32_t blockSz = (numMdctBlocks == 1) ? bufSz : 32;
         uint32_t start = 0;
 
-        double* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi;
+        TFloat* dstBuf = (band == 0) ? low : (band == 1) ? mid : hi;
 
-        vector<double> invBuf(512);
-        double* prevBuf = &dstBuf[bufSz * 2  - 16];
+        vector<TFloat> invBuf(512);
+        TFloat* prevBuf = &dstBuf[bufSz * 2  - 16];
         for (uint32_t block = 0; block < numMdctBlocks; block++) {
-
             if (band) {
-                for (uint32_t j = 0; j < blockSz/2; j++) {
-                    double tmp = Specs[pos+j];
-                    Specs[pos+j] = Specs[pos + blockSz - 1 -j];
-                    Specs[pos + blockSz - 1 -j] = tmp;
-                }
+                SwapArray(&Specs[pos], blockSz);
             }
-
-            vector<double> inv = (numMdctBlocks != 1) ? midct(&Specs[pos], blockSz) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]);
+            vector<TFloat> inv = (numMdctBlocks != 1) ? midct(&Specs[pos], blockSz) : (bufSz == 128) ? Midct256(&Specs[pos]) : Midct512(&Specs[pos]);
             for (size_t i = 0; i < (inv.size()/2); i++) {
                 invBuf[start+i] = inv[i + inv.size()/4];
             }
@@ -131,7 +113,7 @@ void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low,
             pos += blockSz;
         }
         if (numMdctBlocks == 1)
-            memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(double));
+            memcpy(dstBuf + 32, &invBuf[16], ((band == 2) ? 240 : 112) * sizeof(TFloat));
 
         for (size_t j = 0; j < 16; j++) {
             dstBuf[bufSz*2 - 16  + j] = invBuf[bufSz - 16 + j];
@@ -139,9 +121,9 @@ void TAtrac1MDCT::IMdct(double Specs[512], const TBlockSize& mode, double* low,
     }
 }
 
-TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() {
-    return [this](double* data) {
-        double sum[512];
+TPCMEngine<TFloat>::TProcessLambda TAtrac1Processor::GetDecodeLambda() {
+    return [this](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
+        TFloat sum[512];
         const uint32_t srcChannels = Aea->GetChannelNum();
         for (uint32_t channel = 0; channel < srcChannels; channel++) {
             std::unique_ptr<TAea::TFrame> frame(Aea->ReadFrame());
@@ -150,7 +132,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() {
 
             TBlockSize mode(&bitstream);
             TAtrac1Dequantiser dequantiser;
-            vector<double> specs;
+            vector<TFloat> specs;
             specs.resize(512);;
             dequantiser.Dequant(&bitstream, mode, &specs[0]);
 
@@ -170,7 +152,7 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() {
 }
 
 
-TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
+TPCMEngine<TFloat>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
     const uint32_t srcChannels = Aea->GetChannelNum();
     vector<IAtrac1BitAlloc*> bitAlloc;
     for (size_t i = 0; i < srcChannels; i++) {
@@ -180,10 +162,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
         bitAlloc.push_back(new TAtrac1SimpleBitAlloc(atrac1container, Settings.GetBfuIdxConst(), Settings.GetFastBfuNumSearch()));
     }
 
-    return [this, srcChannels, bitAlloc](double* data) {
+    return [this, srcChannels, bitAlloc](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
         for (uint32_t channel = 0; channel < srcChannels; channel++) {
-            double src[NumSamples];
-            vector<double> specs(512);
+            TFloat src[NumSamples];
+            vector<TFloat> specs(512);
             for (size_t i = 0; i < NumSamples; ++i) {
                 src[i] = data[i * srcChannels + channel];
             }
@@ -194,10 +176,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
             if (Settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) {
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]);
 
-                const vector<double>& invMid = invertSpectr<128>(&PcmBufMid[channel][0]);
+                const vector<TFloat>& invMid = InvertSpectr<128>(&PcmBufMid[channel][0]);
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 1).Detect(&invMid[0]) << 1;
 
-                const vector<double>& invHi = invertSpectr<256>(&PcmBufHi[channel][0]);
+                const vector<TFloat>& invHi = InvertSpectr<256>(&PcmBufHi[channel][0]);
                 windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 2).Detect(&invHi[0]) << 2;
 
                 //std::cout << "trans: " << windowMask << std::endl;
@@ -207,15 +189,10 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda() {
             }
             const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
 
-            //for (int i = 0; i < 256; ++i) {
-            //    std::cout << PcmBufHi[channel][i] << std::endl;
-            //}
-            //std::cout<< "============" << std::endl;
             Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize);
-            bitAlloc[channel]->Write(Scaler.Scale(specs, blockSize), blockSize);
+            bitAlloc[channel]->Write(Scaler.ScaleFrame(specs, blockSize), blockSize);
         }
     };
 }
 
-
-}
+} //namespace NAtracDEnc
diff --git a/src/atracdenc.h b/src/atrac1denc.h
index 30dbd20..693a468 100644
--- a/src/atracdenc.h
+++ b/src/atrac1denc.h
@@ -1,6 +1,7 @@
 #pragma once
 #include "pcmengin.h"
 #include "aea.h"
+#include "oma.h"
 #include "atrac_encode_settings.h"
 #include "transient_detector.h"
 #include "atrac/atrac1.h"
@@ -15,37 +16,38 @@ namespace NAtracDEnc {
 
 enum EMode {
     E_ENCODE = 1,
-    E_DECODE = 2
+    E_DECODE = 2,
+    E_ATRAC3 = 4
 };
 
-class TAtrac1MDCT : public virtual TAtrac1Data {
+class TAtrac1MDCT : public virtual NAtrac1::TAtrac1Data {
     NMDCT::TMDCT<512> Mdct512;
     NMDCT::TMDCT<256> Mdct256;
     NMDCT::TMDCT<64> Mdct64;
     NMDCT::TMIDCT<512> Midct512;
     NMDCT::TMIDCT<256> Midct256;
 public:
-    void IMdct(double specs[512], const TBlockSize& mode, double* low, double* mid, double* hi);
-    void Mdct(double specs[512], double* low, double* mid, double* hi, const TBlockSize& blockSize);
+    void IMdct(TFloat specs[512], const TBlockSize& mode, TFloat* low, TFloat* mid, TFloat* hi);
+    void Mdct(TFloat specs[512], TFloat* low, TFloat* mid, TFloat* hi, const TBlockSize& blockSize);
     TAtrac1MDCT()
         : Mdct512(2)
         , Mdct256(1)
     {}
 };
 
-class TAtrac1Processor : public IProcessor<double>, public TAtrac1MDCT, public virtual TAtrac1Data {
-    TAeaPtr Aea;
-    const TAtrac1EncodeSettings Settings;
+class TAtrac1Processor : public IProcessor<TFloat>, public TAtrac1MDCT, public virtual NAtrac1::TAtrac1Data {
+    TCompressedIOPtr Aea;
+    const NAtrac1::TAtrac1EncodeSettings Settings;
 
-    double PcmBufLow[2][256 + 16];
-    double PcmBufMid[2][256 + 16];
-    double PcmBufHi[2][512 + 16];
+    TFloat PcmBufLow[2][256 + 16];
+    TFloat PcmBufMid[2][256 + 16];
+    TFloat PcmBufHi[2][512 + 16];
 
     int32_t PcmValueMax = 32767;
     int32_t PcmValueMin = -32767;
 
-    Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2];
-    Atrac1SplitFilterBank<double> SplitFilterBank[2];
+    Atrac1SynthesisFilterBank<TFloat> SynthesisFilterBank[2];
+    Atrac1SplitFilterBank<TFloat> SplitFilterBank[2];
 
     class TTransientDetectors {
         std::vector<TTransientDetector> transientDetectorLow;
@@ -79,9 +81,9 @@ class TAtrac1Processor : public IProcessor<double>, public TAtrac1MDCT, public v
     TScaler<TAtrac1Data> Scaler;
 
 public:
-    TAtrac1Processor(TAeaPtr&& aea, TAtrac1EncodeSettings&& settings);
-    TPCMEngine<double>::TProcessLambda GetDecodeLambda() override;
+    TAtrac1Processor(TCompressedIOPtr&& aea, NAtrac1::TAtrac1EncodeSettings&& settings);
+    TPCMEngine<TFloat>::TProcessLambda GetDecodeLambda() override;
 
-    TPCMEngine<double>::TProcessLambda GetEncodeLambda() override;
+    TPCMEngine<TFloat>::TProcessLambda GetEncodeLambda() override;
 };
 }
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
new file mode 100644
index 0000000..fa7724c
--- /dev/null
+++ b/src/atrac3denc.cpp
@@ -0,0 +1,357 @@
+#include "atrac3denc.h"
+#include "transient_detector.h"
+#include "util.h"
+#include <assert.h>
+#include <algorithm>
+#include <iostream>
+#include <cmath>
+namespace NAtracDEnc {
+
+using namespace NMDCT;
+using namespace NAtrac3;
+using std::vector;
+
+void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TFloat maxLevels[4], TGainModulatorArray gainModulators)
+{
+    for (int band = 0; band < 4; ++band) {
+        TFloat* srcBuff = bands[band];
+        TFloat* const curSpec = &specs[band*256];
+        TGainModulator modFn = gainModulators[band];
+        vector<TFloat> tmp(512);
+        memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat));
+        if (modFn) {
+            modFn(tmp.data(), srcBuff);
+        }
+        TFloat max = 0.0;
+        for (int i = 0; i < 256; i++) {
+            max = std::max(max, std::abs(srcBuff[i]));
+            srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i];
+            srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i];
+        }
+        memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat));
+        const vector<TFloat>& sp = Mdct512(&tmp[0]);
+        assert(sp.size() == 256);
+        memcpy(curSpec, sp.data(), 256 * sizeof(TFloat));
+        if (band & 1) {
+            SwapArray(curSpec, 256);
+        }
+        maxLevels[band] = max;
+    }
+}
+
+void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators)
+{
+    static TFloat dummy[4];
+    Mdct(specs, bands, dummy, gainModulators);
+}
+
+void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators)
+{
+    for (int band = 0; band < 4; ++band) {
+        TFloat* dstBuff = bands[band];
+        TFloat* curSpec = &specs[band*256];
+        TFloat* prevBuff = dstBuff + 256;
+        TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band];
+        if (band & 1) {
+            SwapArray(curSpec, 256);
+        }
+        vector<TFloat> inv  = Midct512(curSpec);
+        assert(inv.size()/2 == 256);
+        for (int j = 0; j < 256; ++j) {
+            inv[j] *= /*2 */ DecodeWindow[j];
+            inv[511 - j] *= /*2*/ DecodeWindow[j];
+        }
+        if (demodFn) {
+            demodFn(dstBuff, inv.data(), prevBuff);
+        } else {
+            for (uint32_t j = 0; j < 256; ++j) {
+                dstBuff[j] = inv[j] + prevBuff[j];
+            }
+        }
+        memcpy(prevBuff, &inv[256], sizeof(TFloat)*256);
+    }
+}
+
+TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
+    : Oma(std::move(oma))
+    , Params(std::move(encoderSettings))
+    , TransientDetectors(2 * 4, TTransientDetector(8, 256)) //2 - channels, 4 - bands
+{}
+
+TAtrac3Processor::~TAtrac3Processor()
+{}
+
+TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si)
+{
+    switch (si.GetQmfNum()) {
+        case 1:
+        {
+            return {{ GainProcessor.Modulate(si.GetGainPoints(0)), TAtrac3MDCT::TGainModulator(),
+                TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }};
+        }
+        case 2:
+        {
+            return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+                TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator() }};
+        }
+        case 3:
+        {
+            return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+                GainProcessor.Modulate(si.GetGainPoints(2)), TAtrac3MDCT::TGainModulator() }};
+        }
+        case 4:
+        {
+            return {{ GainProcessor.Modulate(si.GetGainPoints(0)), GainProcessor.Modulate(si.GetGainPoints(1)),
+                GainProcessor.Modulate(si.GetGainPoints(2)), GainProcessor.Modulate(si.GetGainPoints(3)) }};
+        }
+        default:
+            assert(false);
+            return {};
+
+    }
+}
+
+//TODO:
+TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn)
+{
+    TAtrac3Data::TTonalComponents res;
+    const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 };
+    for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+        //disable for frequence above 16KHz until we works without proper psy
+        if (bandNum > 2)
+            continue;
+        for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) {
+            const uint16_t specNumStart = SpecsStartLong[blockNum];
+            const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum];
+            float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]);
+            if (!isnan(level)) {
+                for (uint16_t n = specNumStart; n < specNumEnd; ++n) {
+                    //TODO:
+                    TFloat absValue = std::abs(specs[n]);
+                    if (absValue > 65535.0) {
+                        TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0;
+                        std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl;
+                        //res.push_back({n, specs[n] - shift});
+                        specs[n] = shift;
+                    } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) {
+                        res.push_back({n, specs[n]/* - level*/});
+                        specs[n] = 0;//level;
+                    }
+                    
+                }
+
+            }
+        }
+    }
+    return res;
+}
+
+std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents)
+{
+    vector<TTonalComponent> componentMap;
+    for (uint16_t i = 0; i < tonalComponents.size();) {
+        const uint16_t startPos = i;
+        uint16_t curPos;
+        do {
+            curPos = tonalComponents[i].Pos;
+            ++i;
+        } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7);
+        const uint16_t len = i - startPos;
+        TFloat tmp[8];
+        for (uint8_t j = 0; j < len; ++j)
+            tmp[j] = tonalComponents[startPos + j].Val;
+        const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len);
+        componentMap.push_back({&tonalComponents[startPos], 7, scaledBlock});
+    }
+    return componentMap;
+}
+
+
+TFloat TAtrac3Processor::LimitRel(TFloat x)
+{
+    return std::min(std::max(x, GainLevel[15]), GainLevel[0]);
+}
+
+TAtrac3Processor::TTransientParam TAtrac3Processor::CalcTransientParam(const std::vector<TFloat>& gain, const TFloat lastMax)
+{
+    int32_t attackLocation = 0;
+    TFloat attackRelation = 1;
+
+    const TFloat attackThreshold = 4;
+    //pre-echo searching
+    TFloat tmp;
+    TFloat q = lastMax; //std::max(lastMax, gain[0]);
+    tmp = gain[0] / q;
+    if (tmp > attackThreshold) {
+        attackRelation = tmp;
+    } else {
+        for (uint32_t i = 0; i < gain.size() -1; ++i) {
+            q =  std::max(q, gain[i]);
+            tmp = gain[i+1] / q;
+            if (tmp > attackThreshold) {
+                attackRelation = tmp;
+                attackLocation = i;
+                break;
+            }
+        }
+    }
+
+    int32_t releaseLocation = 0;
+    TFloat releaseRelation = 1;
+
+    const TFloat releaseTreshold = 4;
+    //post-echo searching
+    q = 0;
+    for (uint32_t i = gain.size() - 1; i > 0; --i) {
+        q = std::max(q, gain[i]);
+        tmp = gain[i-1] / q;
+        if (tmp > releaseTreshold) {
+            releaseRelation = tmp;
+            releaseLocation = i;
+            break;
+        }
+    }
+    if (releaseLocation == 0) {
+        q = std::max(q, gain[0]);
+        tmp = lastMax / q;
+        if (tmp > releaseTreshold) {
+            releaseRelation = tmp;
+        }
+    }
+
+    return {attackLocation, attackRelation, releaseLocation, releaseRelation};
+}
+
+TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4],
+                                                             uint32_t channel,
+                                                             TTransientDetector* transientDetector)
+{
+    TAtrac3Data::SubbandInfo siCur;
+    for (int band = 0; band < 4; ++band) {
+
+        TFloat invBuf[256];
+        if (band & 1) {
+            memcpy(invBuf, in[band], 256*sizeof(TFloat));
+            InvertSpectrInPlase<256>(invBuf);
+        }
+        const TFloat* srcBuff = (band & 1) ? invBuf : in[band];
+
+        const TFloat* const lastMax = &PrevPeak[channel][band];
+
+        std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve;
+        std::vector<TFloat> gain = AnalyzeGain(srcBuff, 256, 32, false);
+
+        auto transientParam = CalcTransientParam(gain, *lastMax);
+        bool hasTransient = (transientParam.AttackRelation != 1.0 || transientParam.ReleaseRelation != 1.0);
+
+        //combine attack and release
+        TFloat relA = 1;
+        TFloat relB = 1;
+        TFloat relC = 1;
+        uint32_t loc1 = 0;
+        uint32_t loc2 = 0;
+        if (transientParam.AttackLocation < transientParam.ReleaseLocation) {
+            //Peak like transient
+            relA = transientParam.AttackRelation;
+            loc1 = transientParam.AttackLocation;
+            relB = 1;
+            loc2 = transientParam.ReleaseLocation;
+            relC = transientParam.ReleaseRelation;
+        } else if (transientParam.AttackLocation > transientParam.ReleaseLocation) {
+            //Hole like transient
+            relA = transientParam.AttackRelation;
+            loc1 = transientParam.ReleaseLocation;
+            relB = transientParam.AttackRelation * transientParam.ReleaseRelation;
+            loc2 = transientParam.AttackLocation;
+            relC = transientParam.ReleaseRelation;
+        } else {
+            //???
+            //relA = relB = relC = transientParam.AttackRelation * transientParam.ReleaseRelation;
+            //loc1 = loc2 = transientParam.ReleaseLocation;
+            hasTransient = false;
+        }
+        //std::cout << "loc: " << loc1 << " " << loc2 << " rel: " << relA << " " << relB << " " << relC <<  std::endl;
+
+        if (relC != 1) {
+            relA /= relC;
+            relB /= relC;
+            relC = 1.0;
+        }
+        auto relToIdx = [this](TFloat rel) {
+            rel = LimitRel(1/rel);
+            return (uint32_t)(15 - Log2FloatToIdx(rel, 2048));
+        };
+        curve.push_back({relToIdx(relA), loc1});
+        if (loc1 != loc2) {
+            curve.push_back({relToIdx(relB), loc2});
+        }
+        if (loc2 != 31) {
+            curve.push_back({relToIdx(relC), 31});
+        }
+
+        if (hasTransient) {
+            siCur.AddSubbandCurve(band, std::move(curve));
+        }
+
+    }
+    return siCur;
+}
+
+
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda()
+{
+    TOma* omaptr = dynamic_cast<TOma*>(Oma.get());
+    if (!omaptr) {
+        std::cerr << "Wrong container" << std::endl;
+        abort();
+    }
+
+    TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, *Params.ConteinerParams);
+    return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
+        for (uint32_t channel=0; channel < 2; channel++) {
+            vector<TFloat> specs(1024);
+            TFloat src[NumSamples];
+
+            for (size_t i = 0; i < NumSamples; ++i) {
+                src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding
+            }
+
+            TFloat* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]};
+            SplitFilterBank[channel].Split(&src[0], p);
+            
+            TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ?
+                TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band
+
+            TFloat* maxOverlapLevels = PrevPeak[channel];
+
+            Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(siCur));
+            TTonalComponents tonals = Params.NoTonalComponents ? 
+                    TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) {
+                std::vector<TFloat> magnitude(len);
+                for (uint16_t i = 0; i < len; ++i) {
+                    magnitude[i] = std::abs(spec[i]);
+                }
+                float median = CalcMedian(magnitude.data(), len);
+                for (uint16_t i = 0; i < len; ++i) {
+                    if (median > 0.001) {
+                        return median;
+                    }
+                }
+                return NAN;
+            });
+
+            const std::vector<TTonalComponent>& components = MapTonalComponents(tonals);
+
+            //TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
+            bitStreamWriter->WriteSoundUnit(siCur, components, Scaler.ScaleFrame(specs, TBlockSize())); 
+        }
+    };
+}
+
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda()
+{
+    abort();
+    return {};
+}
+
+}//namespace NAtracDEnc
diff --git a/src/atrac3denc.h b/src/atrac3denc.h
new file mode 100644
index 0000000..25f5c90
--- /dev/null
+++ b/src/atrac3denc.h
@@ -0,0 +1,86 @@
+#pragma once
+#include "config.h"
+#include "pcmengin.h"
+#include "oma.h"
+#include "aea.h"
+#include "atrac/atrac3.h"
+#include "atrac/atrac3_qmf.h"
+#include "transient_detector.h"
+
+#include "atrac/atrac3_bitstream.h"
+#include "atrac/atrac_scale.h"
+#include "mdct/mdct.h"
+#include "gain_processor.h"
+
+#include <functional>
+#include <array>
+namespace NAtracDEnc {
+
+class TAtrac3MDCT : public NAtrac3::TAtrac3Data {
+    NMDCT::TMDCT<512> Mdct512;
+    NMDCT::TMIDCT<512> Midct512;
+public:
+    typedef TGainProcessor<TAtrac3Data> TAtrac3GainProcessor;
+    TAtrac3GainProcessor GainProcessor;
+    TAtrac3MDCT()
+        : Mdct512(2)
+    {}
+public:
+    using TGainModulator = TAtrac3GainProcessor::TGainModulator;
+    using TGainDemodulator = TAtrac3GainProcessor::TGainDemodulator;
+    typedef std::array<TGainDemodulator, 4> TGainDemodulatorArray;
+    typedef std::array<TGainModulator, 4> TGainModulatorArray;
+    void Mdct(TFloat specs[1024],
+              TFloat* bands[4],
+              TFloat maxLevels[4],
+              TGainModulatorArray gainModulators);
+    void Mdct(TFloat specs[1024],
+              TFloat* bands[4],
+              TGainModulatorArray gainModulators = TGainModulatorArray());
+    void Midct(TFloat specs[1024],
+               TFloat* bands[4],
+               TGainDemodulatorArray gainDemodulators = TGainDemodulatorArray());
+protected:
+    TAtrac3MDCT::TGainModulatorArray MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si);
+};
+
+//returns threshhold
+typedef std::function<float(const TFloat* p, uint16_t len)> TTonalDetector;
+
+class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT {
+    TCompressedIOPtr Oma;
+    const NAtrac3::TAtrac3EncoderSettings Params;
+    TFloat PcmBuffer[2][4][256 + 256]; //2 channel, 4 band, 256 sample + 256 for overlap buffer
+
+    TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling
+
+    Atrac3SplitFilterBank<TFloat> SplitFilterBank[2];
+    TScaler<TAtrac3Data> Scaler;
+    std::vector<TTransientDetector> TransientDetectors;
+    typedef std::array<uint8_t, NumSpecs> TonalComponentMask;
+public:
+    struct TTransientParam {
+        const int32_t AttackLocation;
+        const TFloat AttackRelation;
+        const int32_t ReleaseLocation;
+        const TFloat ReleaseRelation;
+    };
+private:
+
+#ifdef ATRAC_UT_PUBLIC
+public:
+#endif
+    TFloat LimitRel(TFloat x);
+    TTransientParam CalcTransientParam(const std::vector<TFloat>& gain, TFloat lastMax);
+    TAtrac3Data::SubbandInfo CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector);
+    TonalComponentMask AnalyzeTonalComponent(TFloat* specs);
+    TTonalComponents ExtractTonalComponents(TFloat* specs, TTonalDetector fn);
+
+    std::vector<NAtrac3::TTonalComponent> MapTonalComponents(const TTonalComponents& tonalComponents);
+public:
+    TAtrac3Processor(TCompressedIOPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings);
+    ~TAtrac3Processor();
+    TPCMEngine<TFloat>::TProcessLambda GetDecodeLambda() override;
+    TPCMEngine<TFloat>::TProcessLambda GetEncodeLambda() override;
+};
+}
diff --git a/src/atrac3denc_ut.cpp b/src/atrac3denc_ut.cpp
new file mode 100644
index 0000000..e0602a2
--- /dev/null
+++ b/src/atrac3denc_ut.cpp
@@ -0,0 +1,335 @@
+#define ATRAC_UT_PUBLIC
+
+#include "atrac3denc.h"
+#include <gtest/gtest.h>
+
+#include <vector>
+#include <cmath>
+using std::vector;
+using namespace NAtracDEnc;
+using namespace NAtrac3;
+
+static void GenerateSignal(TFloat* buf, size_t n, TFloat f, TFloat a) {
+    for (size_t i = 0; i < n; ++i) {
+        buf[i] = a * sin((M_PI/2) * i * f);
+    }
+}
+
+static void GenerateSignalWithTransient(TFloat* buf, size_t n, TFloat f, TFloat a,
+        size_t transientPos, size_t transientLen, TFloat transientLev) {
+    assert(transientPos + transientLen < n);
+    GenerateSignal(buf, n, f, a);
+    GenerateSignal(buf+transientPos, transientLen, f, transientLev);
+//    for (size_t i = transientPos; i < transientPos + transientLen; ++i) {
+//        buf[i] += (i & 1) ? transientLev : - transientLev;
+//    }
+}
+
+class TWindowTest : public TAtrac3Data {
+public:
+    void RunTest() {
+        for (size_t i = 0; i < 256; i++) {
+            const TFloat ha1 = EncodeWindow[i] / 2.0; //compensation
+            const TFloat hs1 = DecodeWindow[i];
+            const TFloat hs2 = DecodeWindow[255-i];
+            const TFloat res = hs1 / (hs1 * hs1 + hs2 * hs2);
+            EXPECT_NEAR(ha1, res, 0.000000001);
+        }
+    }
+};
+
+template<class T>
+class TAtrac3MDCTWorkBuff {
+    T* Buffer;
+public:
+    static const size_t BandBuffSz = 256;
+    static const size_t BandBuffAndOverlapSz = BandBuffSz * 2;
+    static const size_t BuffSz = BandBuffAndOverlapSz * (4 + 4); 
+    T* const Band0;
+    T* const Band1;
+    T* const Band2;
+    T* const Band3;
+    T* const Band0Res;
+    T* const Band1Res;
+    T* const Band2Res;
+    T* const Band3Res;
+    TAtrac3MDCTWorkBuff()
+        : Buffer(new T[BuffSz])
+        , Band0(Buffer)
+        , Band1(Band0 + BandBuffAndOverlapSz)
+        , Band2(Band1 + BandBuffAndOverlapSz)
+        , Band3(Band2 + BandBuffAndOverlapSz)
+        , Band0Res(Band3 + BandBuffAndOverlapSz)
+        , Band1Res(Band0Res + BandBuffAndOverlapSz)
+        , Band2Res(Band1Res + BandBuffAndOverlapSz)
+        , Band3Res(Band2Res + BandBuffAndOverlapSz)
+    {
+        memset(Buffer, 0, sizeof(T)*BuffSz);
+    }
+    ~TAtrac3MDCTWorkBuff()
+    {
+        delete[] Buffer;
+    }
+};
+
+
+TEST(TAtrac3MDCT, TAtrac3MDCTZeroOneBlock) {
+    TAtrac3MDCT mdct;
+    TAtrac3MDCTWorkBuff<TFloat> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+
+    vector<TFloat> specs(1024);
+
+    TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+
+    mdct.Mdct(specs.data(), p);
+    for(auto s: specs)
+        EXPECT_NEAR(s, 0.0, 0.0000000001);
+
+    TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+    mdct.Midct(specs.data(), p);
+
+    for(size_t i = 0; i < workSz; ++i)
+        EXPECT_NEAR(buff.Band0Res[i], 0.0, 0.0000000001);
+
+   for(size_t i = 0; i < workSz; ++i)
+        EXPECT_NEAR(buff.Band1Res[i], 0.0, 0.0000000001);
+
+   for(size_t i = 0; i < workSz; ++i)
+        EXPECT_NEAR(buff.Band2Res[i], 0.0, 0.0000000001);
+
+   for(size_t i = 0; i < workSz; ++i)
+        EXPECT_NEAR(buff.Band3Res[i], 0.0, 0.0000000001);
+
+
+}
+
+TEST(TAtrac3MDCT, TAtrac3MDCTSignal) {
+    TAtrac3MDCT mdct;
+    TAtrac3MDCTWorkBuff<TFloat> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+
+    const size_t len = 1024;
+    vector<TFloat> signal(len);
+    vector<TFloat> signalRes(len);
+    GenerateSignal(signal.data(), signal.size(), 0.25, 32768);
+    
+    for (size_t pos = 0; pos < len; pos += workSz) {
+        vector<TFloat> specs(1024);
+        memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat));
+
+        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+        mdct.Mdct(specs.data(), p);
+
+        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+        mdct.Midct(specs.data(), t);
+
+        memcpy(signalRes.data() + pos, buff.Band0Res, workSz * sizeof(TFloat));
+    }
+
+    for (int i = workSz; i < len; ++i)
+        EXPECT_NEAR(signal[i - workSz], signalRes[i], 0.00000001);
+}
+
+TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensation) {
+    TAtrac3MDCT mdct;
+    TAtrac3MDCTWorkBuff<TFloat> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+
+    const size_t len = 4096;
+    vector<TFloat> signal(len, 8000);
+    vector<TFloat> signalRes(len);
+    GenerateSignal(signal.data() + 1024, signal.size()-1024, 0.25, 32768);
+    
+    for (size_t pos = 0; pos < len; pos += workSz) {
+        vector<TFloat> specs(1024);
+        memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat));
+
+        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+
+        if (pos == 256) { //apply gain modulation
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, {{3, 2}});
+
+            mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)),
+                    TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()});
+        } else if (pos == 1024) {
+            TAtrac3Data::SubbandInfo siCur;
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}};
+            siCur.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)),
+                    TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()});
+        } else if (pos == 1024 + 256) {
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, {{1, 0}});
+
+            mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)),
+                    TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()});
+        } else if (pos == 2048) {
+            TAtrac3Data::SubbandInfo siCur;
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}};
+            siCur.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)),
+                    TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()});
+        } else {
+            mdct.Mdct(specs.data(), p);
+        }
+
+        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+
+        if (pos == 256) { //restore gain modulation
+            TAtrac3Data::SubbandInfo siCur;
+            TAtrac3Data::SubbandInfo siNext;
+            siNext.AddSubbandCurve(0, {{3, 2}});
+ 
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 512) {
+            TAtrac3Data::SubbandInfo siNext;
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, {{3, 2}});
+ 
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 1024) {
+            TAtrac3Data::SubbandInfo siCur;
+            TAtrac3Data::SubbandInfo siNext;
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}};
+            siNext.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 1024 + 256) {
+            TAtrac3Data::SubbandInfo siNext;
+            TAtrac3Data::SubbandInfo siCur;
+            siNext.AddSubbandCurve(0, {{1, 0}});
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{3, 2}, {2, 5}};
+            siCur.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 1024 + 256 + 256) {
+            TAtrac3Data::SubbandInfo siNext;
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, {{1, 0}});
+
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 2048) {
+            TAtrac3Data::SubbandInfo siCur;
+            TAtrac3Data::SubbandInfo siNext;
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}};
+            siNext.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 2048 + 256) {
+            TAtrac3Data::SubbandInfo siNext;
+            TAtrac3Data::SubbandInfo siCur;
+            std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve = {{4, 2}, {1, 5}};
+            siCur.AddSubbandCurve(0, std::move(curve));
+
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else {
+            mdct.Midct(specs.data(), t);
+        }
+        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(TFloat));
+    }
+    for (int i = workSz; i < len; ++i) {
+        //std::cout << "res: " << i << " " << signalRes[i] << std::endl;
+        EXPECT_NEAR(signal[i - workSz], signalRes[i], 0.00000001);
+    }
+}
+
+TEST(TAtrac3MDCT, TAtrac3MDCTSignalWithGainCompensationAndManualTransient) {
+    TAtrac3MDCT mdct;
+    TAtrac3MDCTWorkBuff<TFloat> buff;
+    size_t workSz = TAtrac3MDCTWorkBuff<TFloat>::BandBuffSz;
+
+    const size_t len = 1024;
+    vector<TFloat> signal(len);
+    vector<TFloat> signalRes(len);
+    GenerateSignalWithTransient(signal.data(), signal.size(), 0.03125, 512.0,
+                    640, 64, 32768.0);
+    const std::vector<TAtrac3Data::SubbandInfo::TGainPoint> curve1 = {{6, 13}, {4, 14}};
+ 
+    for (size_t pos = 0; pos < len; pos += workSz) {
+        vector<TFloat> specs(1024);
+        memcpy(buff.Band0, signal.data() + pos, workSz * sizeof(TFloat));
+
+        TFloat* p[4] = { buff.Band0, buff.Band1, buff.Band2, buff.Band3 };
+        //for (int i = 0; i < 256; i++) {
+        //    std::cout << i + pos << " " << buff.Band0[i] << std::endl;
+        //}
+
+        if (pos == 512) { //apply gain modulation
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1));
+
+            for (int i = 0; i < 256; i++) {
+                std::cout << i << " " << buff.Band0[i] << std::endl;
+            }
+
+            mdct.Mdct(specs.data(), p, { mdct.GainProcessor.Modulate(siCur.GetGainPoints(0)),
+                    TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator(), TAtrac3MDCT::TGainModulator()});
+        } else { 
+            mdct.Mdct(specs.data(), p);
+        }
+
+        for (int i = 0; i < specs.size(); ++i) {
+            if (i > 240 && i < 256)
+                specs[i] /= 1.9;
+        }
+        TFloat* t[4] = { buff.Band0Res, buff.Band1Res, buff.Band2Res, buff.Band3Res };
+        if (pos == 512) { //restore gain modulation
+            TAtrac3Data::SubbandInfo siCur;
+            TAtrac3Data::SubbandInfo siNext;
+            siNext.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1));
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else if (pos == 768) {
+            TAtrac3Data::SubbandInfo siNext;
+            TAtrac3Data::SubbandInfo siCur;
+            siCur.AddSubbandCurve(0, std::vector<TAtrac3Data::SubbandInfo::TGainPoint>(curve1));
+ 
+            mdct.Midct(specs.data(), t, {mdct.GainProcessor.Demodulate(siCur.GetGainPoints(0), siNext.GetGainPoints(0)),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator(),
+                    TAtrac3MDCT::TAtrac3GainProcessor::TGainDemodulator()});
+        } else { 
+            mdct.Midct(specs.data(), t);
+        }
+
+        memcpy(signalRes.data() + pos, buff.Band0Res,  workSz * sizeof(TFloat));
+    }
+    for (int i = workSz; i < len; ++i) {
+        //std::cout << "res: " << i << " " << signalRes[i] << std::endl;
+        EXPECT_NEAR(signal[i - workSz], signalRes[i], 10);
+    }
+}
+
+TEST(TAtrac3MDCT, TAtrac3MDCTWindow) {
+    TWindowTest test;
+    test.RunTest();
+}
+
+
diff --git a/src/atrac_encode_settings.h b/src/atrac_encode_settings.h
index e3ae3b7..fd96fa4 100644
--- a/src/atrac_encode_settings.h
+++ b/src/atrac_encode_settings.h
@@ -2,29 +2,4 @@
 
 namespace NAtracDEnc {
 
-class TAtrac1EncodeSettings {
-public:
-    enum class EWindowMode {
-        EWM_NOTRANSIENT,
-        EWM_AUTO
-    };
-private:
-    const uint32_t BfuIdxConst = 0;
-    const bool FastBfuNumSearch = false;
-    EWindowMode WindowMode = EWindowMode::EWM_AUTO;
-    const uint32_t WindowMask = 0;
-public:
-    TAtrac1EncodeSettings();
-    TAtrac1EncodeSettings(uint32_t bfuIdxConst, bool fastBfuNumSearch, EWindowMode windowMode, uint32_t windowMask)
-        : BfuIdxConst(bfuIdxConst)
-        , FastBfuNumSearch(fastBfuNumSearch)
-        , WindowMode(windowMode)
-        , WindowMask(windowMask)
-    {}
-    uint32_t GetBfuIdxConst() const { return BfuIdxConst; }
-    bool GetFastBfuNumSearch() const { return FastBfuNumSearch; }
-    EWindowMode GetWindowMode() const {return WindowMode; }
-    uint32_t GetWindowMask() const {return WindowMask; }
-};
-
 }
diff --git a/src/atracdenc_ut.cpp b/src/atracdenc_ut.cpp
index 5370e9e..5d5f5eb 100644
--- a/src/atracdenc_ut.cpp
+++ b/src/atracdenc_ut.cpp
@@ -1,16 +1,16 @@
-#include "atracdenc.h"
+#include "atrac1denc.h"
 #include <gtest/gtest.h>
 
 #include <vector>
 using std::vector;
 using namespace NAtracDEnc;
 
-void CheckResult128(const vector<double>& a, const vector<double>& b) {
+void CheckResult128(const vector<TFloat>& a, const vector<TFloat>& b) {
     for (int i = 0; i < 96; ++i ) {
         EXPECT_NEAR(a[i], 4 * b[i+32], 0.0000001);
     }
 }
-void CheckResult256(const vector<double>& a, const vector<double>& b) {
+void CheckResult256(const vector<TFloat>& a, const vector<TFloat>& b) {
     for (int i = 0; i < 192; ++i ) {
         EXPECT_NEAR(a[i], 2 * b[i+32], 0.0000001);
     }
@@ -19,14 +19,14 @@ void CheckResult256(const vector<double>& a, const vector<double>& b) {
 
 TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) {
     TAtrac1MDCT mdct;
-    vector<double> low(128 * 2);
-    vector<double> mid(128 * 2);
-    vector<double> hi(256 * 2);
-    vector<double> specs(512 * 2);
-
-    vector<double> lowRes(128 * 2);
-    vector<double> midRes(128 * 2);
-    vector<double> hiRes(256 * 2);
+    vector<TFloat> low(128 * 2);
+    vector<TFloat> mid(128 * 2);
+    vector<TFloat> hi(256 * 2);
+    vector<TFloat> specs(512 * 2);
+
+    vector<TFloat> lowRes(128 * 2);
+    vector<TFloat> midRes(128 * 2);
+    vector<TFloat> hiRes(256 * 2);
  
     for (int i = 0; i < 128; i++) {
         low[i] = mid[i] = i;
@@ -48,25 +48,25 @@ TEST(TAtrac1MDCT, TAtrac1MDCTLongEncDec) {
 
 TEST(TAtrac1MDCT, TAtrac1MDCTShortEncDec) {
     TAtrac1MDCT mdct;
-    vector<double> low(128 * 2);
-    vector<double> mid(128 * 2);
-    vector<double> hi(256 * 2);
-    vector<double> specs(512 * 2);
-
-    vector<double> lowRes(128 * 2);
-    vector<double> midRes(128 * 2);
-    vector<double> hiRes(256 * 2);
+    vector<TFloat> low(128 * 2);
+    vector<TFloat> mid(128 * 2);
+    vector<TFloat> hi(256 * 2);
+    vector<TFloat> specs(512 * 2);
+
+    vector<TFloat> lowRes(128 * 2);
+    vector<TFloat> midRes(128 * 2);
+    vector<TFloat> hiRes(256 * 2);
  
     for (int i = 0; i < 128; i++) {
         low[i] = mid[i] = i;
     }
-    const vector<double> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation
-    const vector<double> midCopy = mid;
+    const vector<TFloat> lowCopy = low; //in case of short wondow AtracMDCT changed input buffer during calculation
+    const vector<TFloat> midCopy = mid;
 
     for (int i = 0; i < 256; i++) {
         hi[i] = i;
     }
-    const vector<double> hiCopy = hi;
+    const vector<TFloat> hiCopy = hi;
 
     const TBlockSize blockSize(true, true, true); //short
 
diff --git a/src/bitstream/bitstream.cpp b/src/bitstream/bitstream.cpp
index d916f52..e8f1857 100644
--- a/src/bitstream/bitstream.cpp
+++ b/src/bitstream/bitstream.cpp
@@ -11,7 +11,6 @@ TBitStream::TBitStream(const char* buf, int size)
 {}
 TBitStream::TBitStream()
 {}
-
 void TBitStream::Write(unsigned long long val, int n) {
 	if (n > 23 || n < 0)
 		abort();
@@ -29,10 +28,30 @@ void TBitStream::Write(unsigned long long val, int n) {
 
 	for (int i = 0; i < n/8 + (overlap ? 2 : 1); ++i) {
 		Buf[bytesPos+i] |= t.bytes[7-i];
+
+  //      std::cout << "bufPos: "<< bytesPos+i << " buf: " << (int)Buf[bytesPos+i] << std::endl;
 	}
 
     BitsUsed += n;
 }
+/*
+void TBitStream::Write(unsigned long long val, int n) {
+    if (n > 23 || n < 0)
+        abort();
+    const int bitsLeft = Buf.size() * 8 - BitsUsed;
+    const int bitsReq = n - bitsLeft;
+    const int bytesPos = BitsUsed / 8;
+    const int overlap = BitsUsed % 8;
+
+    if (overlap || bitsReq >= 0) {
+        Buf.resize(Buf.size() + (bitsReq / 8 + (overlap ? 2 : 1 )), 0);
+    }
+    TMix t;
+    t.ull   = (val << (64 - n)) >> overlap;
+    *(unsigned long long*)&Buf[bytesPos-8] |= t.ull;
+    BitsUsed += n;
+}
+*/
 unsigned long long TBitStream::Read(int n) {
 	if (n >23 || n < 0)
 		abort();
diff --git a/src/bitstream/bitstream_ut.cpp b/src/bitstream/bitstream_ut.cpp
index 109570b..7e246ca 100644
--- a/src/bitstream/bitstream_ut.cpp
+++ b/src/bitstream/bitstream_ut.cpp
@@ -11,8 +11,10 @@ TEST(TBitStream, DefaultConstructor) {
 TEST(TBitStream, SimpleWriteRead) {
     TBitStream bs;
     bs.Write(5, 3);
-    EXPECT_EQ(3, bs.GetSizeInBits());
+    bs.Write(true, 1);
+    EXPECT_EQ(4, bs.GetSizeInBits());
     EXPECT_EQ(5, bs.Read(3));
+    EXPECT_EQ(true, bs.Read(1));
 }
 
 TEST(TBisStream, OverlapWriteRead) {
@@ -30,6 +32,7 @@ TEST(TBisStream, OverlapWriteRead) {
     EXPECT_EQ(212, bs.Read(22));
     EXPECT_EQ(323, bs.Read(22));
 }
+
 TEST(TBisStream, OverlapWriteRead2) {
     TBitStream bs;
     bs.Write(2, 2);
@@ -41,6 +44,64 @@ TEST(TBisStream, OverlapWriteRead2) {
     EXPECT_EQ(10003, bs.Read(16));
 }
 
+TEST(TBisStream, OverlapWriteRead3) {
+    TBitStream bs;
+    bs.Write(40, 6);
+    bs.Write(3, 2);
+    bs.Write(0, 3);
+    bs.Write(0, 3);
+    bs.Write(0, 3);
+    bs.Write(0, 3);
+
+    bs.Write(3, 5);
+    bs.Write(1, 2);
+    bs.Write(1, 1);
+    bs.Write(1, 1);
+    bs.Write(1, 1);
+    bs.Write(1, 1);
+
+    bs.Write(0, 3);
+    bs.Write(4, 3);
+    bs.Write(35, 6);
+    bs.Write(25, 6);
+    bs.Write(3, 3);
+    bs.Write(32, 6);
+    bs.Write(29, 6);
+    bs.Write(3, 3);
+    bs.Write(36, 6);
+    bs.Write(49, 6);
+
+
+
+
+    EXPECT_EQ(40, bs.Read(6));
+    EXPECT_EQ(3, bs.Read(2));
+    EXPECT_EQ(0, bs.Read(3));
+    EXPECT_EQ(0, bs.Read(3));
+    EXPECT_EQ(0, bs.Read(3));
+    EXPECT_EQ(0, bs.Read(3));
+    EXPECT_EQ(3, bs.Read(5));
+
+    EXPECT_EQ(1, bs.Read(2));
+    EXPECT_EQ(1, bs.Read(1));
+    EXPECT_EQ(1, bs.Read(1));
+    EXPECT_EQ(1, bs.Read(1));
+    EXPECT_EQ(1, bs.Read(1));
+
+    EXPECT_EQ(0, bs.Read(3));
+    EXPECT_EQ(4, bs.Read(3));
+    EXPECT_EQ(35, bs.Read(6));
+    EXPECT_EQ(25, bs.Read(6));
+    EXPECT_EQ(3, bs.Read(3));
+    EXPECT_EQ(32, bs.Read(6));
+    EXPECT_EQ(29, bs.Read(6));
+    EXPECT_EQ(3, bs.Read(3));
+    EXPECT_EQ(36, bs.Read(6));
+    EXPECT_EQ(49, bs.Read(6));
+
+}
+
+
 TEST(TBisStream, SignWriteRead) {
     TBitStream bs;
     bs.Write(MakeSign(-2, 3), 3);
diff --git a/src/compressed_io.h b/src/compressed_io.h
index d8cfb11..cc45a7f 100644
--- a/src/compressed_io.h
+++ b/src/compressed_io.h
@@ -2,6 +2,7 @@
 #include <vector>
 #include <array>
 #include <string>
+#include <memory>
 
 class ICompressedIO {
 public:
@@ -29,3 +30,5 @@ public:
     virtual long long GetLengthInSamples() const = 0;
     virtual ~ICompressedIO() {}
 };
+
+typedef std::unique_ptr<ICompressedIO> TCompressedIOPtr;
diff --git a/src/config.h b/src/config.h
index 942841a..698b865 100644
--- a/src/config.h
+++ b/src/config.h
@@ -2,3 +2,8 @@
 
 #define CONFIG_DOUBLE
 
+#ifdef CONFIG_DOUBLE
+typedef double TFloat;
+#else
+typedef float TFloat;
+#endif
diff --git a/src/gain_processor.h b/src/gain_processor.h
new file mode 100644
index 0000000..02c7206
--- /dev/null
+++ b/src/gain_processor.h
@@ -0,0 +1,104 @@
+#include <functional>
+
+#include "config.h"
+
+template<class T>
+class TGainProcessor : public T {
+
+public:
+    typedef std::function<void(TFloat* out, TFloat* cur, TFloat* prev)> TGainDemodulator;
+    /*
+     * example GainModulation:
+     * PCMinput:
+     *     N   b    N        N
+     * --------|--------|--------|--------
+     * |       | - mdct #1
+     *     |        | - mdct #2
+     *     a
+     *         |        | - mdct #3
+     *         ^^^^^ - modulated by previous step
+     * lets consider a case we want to modulate mdct #2.
+     *     bufCur - is a buffer of first half of mdct transformation (a)
+     *     bufNext - is a buffer of second half of mdct transformation and overlaping
+     *               (i.e the input buffer started at b point)
+     * so next transformation (mdct #3) gets modulated first part
+     */
+    typedef std::function<void(TFloat* bufCur, TFloat* bufNext)> TGainModulator;
+    static TFloat GetGainInc(uint32_t levelIdxCur)
+    {
+        const int incPos = T::ExponentOffset - levelIdxCur + T::GainInterpolationPosShift;
+        return T::GainInterpolation[incPos];
+    }
+    static TFloat GetGainInc(uint32_t levelIdxCur, uint32_t levelIdxNext)
+    {
+        const int incPos = levelIdxNext - levelIdxCur + T::GainInterpolationPosShift;
+        return T::GainInterpolation[incPos];
+    }
+
+
+    TGainDemodulator Demodulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giNow,
+                                const std::vector<typename T::SubbandInfo::TGainPoint>& giNext)
+    {
+        return [=](TFloat* out, TFloat* cur, TFloat* prev) {
+            uint32_t pos = 0;
+            const TFloat scale = giNext.size() ? T::GainLevel[giNext[0].Level] : 1;
+            for (uint32_t i = 0; i < giNow.size(); ++i) {
+                uint32_t lastPos = giNow[i].Location << T::LocScale;
+                const uint32_t levelPos = giNow[i].Level;
+                assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0]));
+                TFloat level = T::GainLevel[levelPos];
+                const int incPos = ((i + 1) < giNow.size() ? giNow[i + 1].Level : T::ExponentOffset)
+                                   - giNow[i].Level + T::GainInterpolationPosShift;
+                TFloat gainInc = T::GainInterpolation[incPos];
+                for (; pos < lastPos; pos++) {
+                    //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << std::endl;
+                    out[pos] = (cur[pos] * scale + prev[pos]) * level;
+                }
+                for (; pos < lastPos + T::LocSz; pos++) {
+                    //std::cout << "pos: " << pos << " scale: " << scale << " level: " << level << " gainInc: " << gainInc << std::endl;
+                    out[pos] = (cur[pos] * scale + prev[pos]) * level;
+                    level *= gainInc;
+                }
+            }
+            for (; pos < T::MDCTSz/2; pos++) {
+                //std::cout << "pos: " << pos << " scale: " << scale << std::endl;
+                out[pos] = cur[pos] * scale + prev[pos];
+            }
+        };
+    }
+    TGainModulator Modulate(const std::vector<typename T::SubbandInfo::TGainPoint>& giCur) {
+        if (giCur.empty())
+            return {};
+        return [=](TFloat* bufCur, TFloat* bufNext) {
+            uint32_t pos = 0;
+            const TFloat scale = T::GainLevel[giCur[0].Level];
+            for (uint32_t i = 0; i < giCur.size(); ++i) {
+                uint32_t lastPos = giCur[i].Location << T::LocScale;
+                const uint32_t levelPos = giCur[i].Level;
+                assert(levelPos < sizeof(T::GainLevel)/sizeof(T::GainLevel[0]));
+                TFloat level = T::GainLevel[levelPos];
+                const int incPos = ((i + 1) < giCur.size() ? giCur[i + 1].Level : T::ExponentOffset)
+                                   - giCur[i].Level + T::GainInterpolationPosShift;
+                TFloat gainInc = T::GainInterpolation[incPos];
+                for (; pos < lastPos; pos++) {
+                    //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " <<  bufCur[pos]  << " level: " << level << " bufNext: " << bufNext[pos] << std::endl;
+                    bufCur[pos] /= scale;
+                    bufNext[pos] /= level;
+                }
+                for (; pos < lastPos + T::LocSz; pos++) {
+
+                    //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " <<  bufCur[pos]  << " level: " << level << " (gainInc) " << gainInc << " bufNext: " << bufNext[pos] << std::endl;
+                    bufCur[pos] /= scale;
+                    bufNext[pos] /= level;
+                    //std::cout << "mod pos: " << pos << " scale: " << scale << " level: " << level << " gainInc: " << gainInc << std::endl;
+                    level *= gainInc;
+                }
+            }
+            for (; pos < T::MDCTSz/2; pos++) {
+
+                //std::cout << "mod pos: " << pos << " scale: " << scale << " bufCur: " << bufCur[pos] << std::endl;
+                bufCur[pos] /= scale;
+            }
+        };
+    }
+};
diff --git a/src/main.cpp b/src/main.cpp
index f74b253..9550ce4 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -6,20 +6,23 @@
 #include "pcmengin.h"
 #include "wav.h"
 #include "aea.h"
-#include "atracdenc.h"
+#include "config.h"
+#include "atrac1denc.h"
+#include "atrac3denc.h"
 
 using std::cout;
 using std::cerr;
 using std::endl;
 using std::string;
-using std::unique_ptr;
-using std::move;
 using std::stoi;
 
 using namespace NAtracDEnc;
 
+typedef std::unique_ptr<TPCMEngine<TFloat>> TPcmEnginePtr;
+typedef std::unique_ptr<IProcessor<TFloat>> TAtracProcessorPtr;
 
-static void printUsage(const char* myName) {
+static void printUsage(const char* myName)
+{
     cout << "\tusage: " << myName << " <-e|-d> <-i input> <-o output>\n" << endl;
     cout << "-e encode mode (PCM -> ATRAC), -i wav file, -o aea file" << endl;
     cout << "-d decode mode (ATRAC -> PCM), -i aea file, -o wav file" << endl;
@@ -27,7 +30,8 @@ static void printUsage(const char* myName) {
 
 }
 
-static void printProgress(int percent) {
+static void printProgress(int percent)
+{
     static uint32_t counter;
     counter++;
     const char symbols[4] = {'-', '\\', '|', '/'};
@@ -35,26 +39,143 @@ static void printProgress(int percent) {
     fflush(stdout);
 }
 
-static string GetHelp() {
+static string GetHelp()
+{
     return "\n--encode -i \t - encode mode"
         "\n--decode -d \t - decode mode"
         "\n -i input file"
         "\n -o output file"
-        "\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU. WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency."
-        "\n --bfuidxfast\t enable fast search of BFU amount"
-        "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window";
+        "\n --bitrate (only if supported by codec)"
+        "\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU (ATRAC1). "
+             "WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency."
+        "\n --bfuidxfast\t enable fast search of BFU amount (ATRAC1)"
+        "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window "
+                                                                                                              "(ATRAC1)"
+        /*"\n --nogaincontrol disable gain control (ATRAC3)"*/
+        "\n --notonal disable tonal components (ATRAC3)";
 }
 
-int main(int argc, char* const* argv) {
+static int checkedStoi(const char* data, int min, int max, int def)
+{
+    int tmp = 0;
+    try {
+        tmp = stoi(data);
+        if (tmp < min || tmp > max)
+            throw std::invalid_argument(data);
+        return tmp;
+    } catch (std::invalid_argument&) {
+        cerr << "Wrong arg: " << data << " " << def << " will be used" << endl;
+        return def;
+    }
+}
+
+enum EOptions
+{
+    O_ENCODE = 'e',
+    O_DECODE = 'd',
+    O_HELP = 'h',
+    O_BITRATE = 'b',
+    O_BFUIDXCONST = 1,
+    O_BFUIDXFAST = 2,
+    O_NOTRANSIENT = 3,
+    O_MONO = 'm',
+    O_NOSTDOUT = '4',
+    O_NOTONAL = 5,
+    O_NOGAINCONTROL = 6,
+};
+
+static void PrepareAtrac1Encoder(const string& inFile,
+                                 const string& outFile, 
+                                 const bool noStdOut, 
+                                 NAtrac1::TAtrac1EncodeSettings&& encoderSettings,
+                                 uint64_t* totalSamples,
+                                 TWavPtr* wavIO,
+                                 TPcmEnginePtr* pcmEngine,
+                                 TAtracProcessorPtr* atracProcessor)
+{
+    using NAtrac1::TAtrac1Data;
+
+    wavIO->reset(new TWav(inFile));
+    const int numChannels = (*wavIO)->GetChannelNum();
+    *totalSamples = (*wavIO)->GetTotalSamples();
+    //TODO: recheck it
+    const uint32_t numFrames = numChannels * (*totalSamples) / TAtrac1Data::NumSamples;
+    TCompressedIOPtr aeaIO = TCompressedIOPtr(new TAea(outFile, "test", numChannels, numFrames));
+    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+                                            numChannels,
+                                            TPCMEngine<TFloat>::TReaderPtr((*wavIO)->GetPCMReader<TFloat>())));
+    if (!noStdOut)
+        cout << "Input file: " << inFile
+             << "\n Channels: " << numChannels
+             << "\n SampleRate: " << (*wavIO)->GetSampleRate()
+             << "\n TotalSamples: " << totalSamples
+             << endl;
+    atracProcessor->reset(new TAtrac1Processor(std::move(aeaIO), std::move(encoderSettings)));
+}
+
+static void PrepareAtrac1Decoder(const string& inFile,
+                                 const string& outFile,
+                                 const bool noStdOut,
+                                 uint64_t* totalSamples,
+                                 TWavPtr* wavIO,
+                                 TPcmEnginePtr* pcmEngine,
+                                 TAtracProcessorPtr* atracProcessor)
+{
+    TCompressedIOPtr aeaIO = TCompressedIOPtr(new TAea(inFile));
+    *totalSamples = aeaIO->GetLengthInSamples();
+    uint32_t length = aeaIO->GetLengthInSamples();
+    if (!noStdOut)
+        cout << "Name: " << aeaIO->GetName()
+             << "\n Channels: " << aeaIO->GetChannelNum()
+             << "\n Length: " << length
+             << endl;
+    wavIO->reset(new TWav(outFile, aeaIO->GetChannelNum(), 44100));
+    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+                                            aeaIO->GetChannelNum(),
+                                            TPCMEngine<TFloat>::TWriterPtr((*wavIO)->GetPCMWriter<TFloat>())));
+    atracProcessor->reset(new TAtrac1Processor(std::move(aeaIO), NAtrac1::TAtrac1EncodeSettings()));
+}
+
+static void PrepareAtrac3Encoder(const string& inFile,
+                                 const string& outFile,
+                                 const bool noStdOut,
+                                 NAtrac3::TAtrac3EncoderSettings&& encoderSettings,
+                                 uint64_t* totalSamples,
+                                 TWavPtr* wavIO,
+                                 TPcmEnginePtr* pcmEngine,
+                                 TAtracProcessorPtr* atracProcessor)
+{
+    std::cout << "WARNING: ATRAC3 is uncompleted, result will be not good )))" << std::endl;
+    if (!noStdOut)
+        std::cout << "bitrate " << encoderSettings.ConteinerParams->Bitrate << std::endl;
+    wavIO->reset(new TWav(inFile));
+    const int numChannels = (*wavIO)->GetChannelNum();
+    *totalSamples = (*wavIO)->GetTotalSamples();
+    TCompressedIOPtr omaIO = TCompressedIOPtr(new TOma(outFile,
+                                                       "test",
+                                                       numChannels,
+                                                       numChannels * (*totalSamples) / 512, OMAC_ID_ATRAC3,
+                                                       encoderSettings.ConteinerParams->FrameSz));
+    pcmEngine->reset(new TPCMEngine<TFloat>(4096,
+                                            numChannels,
+                                            TPCMEngine<TFloat>::TReaderPtr((*wavIO)->GetPCMReader<TFloat>())));
+    atracProcessor->reset(new TAtrac3Processor(std::move(omaIO), std::move(encoderSettings)));
+}
+
+int main(int argc, char* const* argv)
+{
     const char* myName = argv[0];
     static struct option longopts[] = {
-        { "encode", no_argument, NULL, 'e' },
-        { "decode", no_argument, NULL, 'd' },
-        { "help", no_argument, NULL, 'h' },
-        { "bfuidxconst", required_argument, NULL, 1},
-        { "bfuidxfast", no_argument, NULL, 2},
-        { "notransient", optional_argument, NULL, 3},
-        { "nostdout", no_argument, NULL, 4},
+        { "encode", optional_argument, NULL, O_ENCODE },
+        { "decode", no_argument, NULL, O_DECODE },
+        { "help", no_argument, NULL, O_HELP },
+        { "bitrate", required_argument, NULL, O_BITRATE},
+        { "bfuidxconst", required_argument, NULL, O_BFUIDXCONST},
+        { "bfuidxfast", no_argument, NULL, O_BFUIDXFAST},
+        { "notransient", optional_argument, NULL, O_NOTRANSIENT},
+        { "nostdout", no_argument, NULL, O_NOSTDOUT},
+        { "notonal", no_argument, NULL, O_NOTONAL},
+        { "nogaincontrol", no_argument, NULL, O_NOGAINCONTROL},
         { NULL, 0, NULL, 0}
     };
 
@@ -64,15 +185,23 @@ int main(int argc, char* const* argv) {
     uint32_t mode = 0;
     uint32_t bfuIdxConst = 0; //0 - auto, no const
     bool fastBfuNumSearch = false;
-    bool nostdout = false;
-    TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_AUTO;
-    uint32_t winMask = 0; //all is long
+    bool noStdOut = false;
+    bool noGainControl = false;
+    bool noTonalComponents = false;
+    NAtrac1::TAtrac1EncodeSettings::EWindowMode windowMode = NAtrac1::TAtrac1EncodeSettings::EWindowMode::EWM_AUTO;
+    uint32_t winMask = 0; //0 - all is long
+    uint32_t bitrate = 0; //0 - use default for codec
     while ((ch = getopt_long(argc, argv, "edhi:o:m", longopts, NULL)) != -1) {
         switch (ch) {
-            case 'e':
+            case O_ENCODE:
                 mode |= E_ENCODE;
+                if (optarg) {
+                    if (strcmp(optarg, "atrac3") == 0) {
+                        mode |= E_ATRAC3;
+                    }
+                }
                 break;
-            case 'd':
+            case O_DECODE:
                 mode |= E_DECODE;
                 break;
             case 'i':
@@ -81,25 +210,24 @@ int main(int argc, char* const* argv) {
             case 'o':
                 outFile = optarg;
                 if (outFile == "-")
-                    nostdout = true;
+                    noStdOut = true;
                 break;
             case 'h':
                 cout << GetHelp() << endl;
                 return 0;
                 break;
-            case 1:
-                try {
-                    bfuIdxConst = stoi(optarg);
-                } catch (std::invalid_argument&) {
-                    cerr << "Wrong arg: " << optarg << " should be (0, 8]" << endl;
-                    return -1;
-                }
+            case O_BITRATE:
+                bitrate = checkedStoi(optarg, 32, 384, 0);
+                std::cout << "BITRATE" << bitrate << std::endl;
+                break;
+            case O_BFUIDXCONST:
+                bfuIdxConst = checkedStoi(optarg, 1, 8, 0);
                 break;
-            case 2:
+            case O_BFUIDXFAST:
                 fastBfuNumSearch = true;
                 break;
-            case 3:
-                windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT;
+            case O_NOTRANSIENT:
+                windowMode = NAtrac1::TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT;
                 if (optarg) {
                     winMask = stoi(optarg);
                 }
@@ -108,8 +236,14 @@ int main(int argc, char* const* argv) {
                     ((winMask & 2) ? "short": "long") << ", hi - " <<
                     ((winMask & 4) ? "short": "long") << endl;
                 break;
-            case 4:
-                nostdout = true;
+            case O_NOSTDOUT:
+                noStdOut = true;
+                break;
+            case O_NOTONAL:
+                noTonalComponents = true;
+                break;
+            case O_NOGAINCONTROL:
+                noGainControl = true;
                 break;
 			default:
                 printUsage(myName);
@@ -128,36 +262,50 @@ int main(int argc, char* const* argv) {
         return 1;
     }
     if (bfuIdxConst > 8) {
-        cerr << "Wrong bfuidxconst value ("<< bfuIdxConst << "). This is advanced options, use --help to get more information" << endl;
+        cerr << "Wrong bfuidxconst value ("<< bfuIdxConst << "). "
+             << "This is advanced options, use --help to get more information"
+             << endl;
         return 1;
     }
 
-    TPCMEngine<double>* pcmEngine = nullptr;
-    IProcessor<double>* atracProcessor;
+    
+    TPcmEnginePtr pcmEngine;
+    TAtracProcessorPtr atracProcessor;
     uint64_t totalSamples = 0;
     TWavPtr wavIO;
-    if (mode == E_ENCODE) {
-        wavIO = TWavPtr(new TWav(inFile));
-        const int numChannels = wavIO->GetChannelNum();
-        totalSamples = wavIO->GetTotalSamples();
-        //TODO: recheck it
-        TAeaPtr aeaIO = TAeaPtr(new TAea(outFile, "test", numChannels, numChannels * totalSamples / 512));
-        pcmEngine = new TPCMEngine<double>(4096, numChannels, TPCMEngine<double>::TReaderPtr(wavIO->GetPCMReader<double>()));
-        if (!nostdout)
-            cout << "Input file: " << inFile << "\n Channels: " << numChannels << "\n SampleRate: " << wavIO->GetSampleRate() << "\n TotalSamples: " << totalSamples << endl;
-        atracProcessor = new TAtrac1Processor(move(aeaIO), TAtrac1EncodeSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask));
-    } else if (mode == E_DECODE) {
-        TAeaPtr aeaIO = TAeaPtr(new TAea(inFile));
-        totalSamples = aeaIO->GetLengthInSamples();
-        uint32_t length = aeaIO->GetLengthInSamples();
-        if (!nostdout)
-            cout << "Name: " << aeaIO->GetName() << "\n Channels: " << aeaIO->GetChannelNum() << "\n Length: " << length << endl;
-        wavIO = TWavPtr(new TWav(outFile, aeaIO->GetChannelNum(), 44100));
-        pcmEngine = new TPCMEngine<double>(4096, aeaIO->GetChannelNum(), TPCMEngine<double>::TWriterPtr(wavIO->GetPCMWriter<double>()));
-        atracProcessor = new TAtrac1Processor(move(aeaIO), TAtrac1EncodeSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask));
-    } else {
-        cerr << "Processing mode was not specified" << endl;
-        return 1;
+    uint32_t pcmFrameSz = 0; //size of one pcm frame to process
+    switch (mode) {
+        case E_ENCODE:
+        {
+            using NAtrac1::TAtrac1Data;
+            NAtrac1::TAtrac1EncodeSettings encoderSettings(bfuIdxConst, fastBfuNumSearch, windowMode, winMask);
+            PrepareAtrac1Encoder(inFile, outFile, noStdOut, std::move(encoderSettings),
+                &totalSamples, &wavIO, &pcmEngine, &atracProcessor);
+            pcmFrameSz = TAtrac1Data::NumSamples;
+        }
+        break;
+        case E_DECODE:
+        {
+            using NAtrac1::TAtrac1Data;
+            PrepareAtrac1Decoder(inFile, outFile, noStdOut,
+                &totalSamples, &wavIO, &pcmEngine, &atracProcessor);
+            pcmFrameSz = TAtrac1Data::NumSamples;
+        }
+        break;
+        case (E_ENCODE | E_ATRAC3):
+        {
+            using NAtrac3::TAtrac3Data;
+            NAtrac3::TAtrac3EncoderSettings encoderSettings(bitrate * 1024, noGainControl, noTonalComponents); 
+            PrepareAtrac3Encoder(inFile, outFile, noStdOut, std::move(encoderSettings),
+                &totalSamples, &wavIO, &pcmEngine, &atracProcessor);
+            pcmFrameSz = TAtrac3Data::NumSamples;;
+        }
+        break;
+        default:
+        {
+            cerr << "Processing mode was not specified" << endl;
+            return 1;
+        }
     }
 
     auto atracLambda = (mode == E_DECODE) ? atracProcessor->GetDecodeLambda() :
@@ -165,12 +313,12 @@ int main(int argc, char* const* argv) {
 
     uint64_t processed = 0;
     try {
-        while (totalSamples > (processed = pcmEngine->ApplyProcess(512, atracLambda)))
+        while (totalSamples > (processed = pcmEngine->ApplyProcess(pcmFrameSz, atracLambda)))
         {
-            if (!nostdout)
+            if (!noStdOut)
                 printProgress(processed*100/totalSamples);
         }
-        if (!nostdout)
+        if (!noStdOut)
             cout << "\nDone" << endl;
     }
     catch (TAeaIOError err) {
diff --git a/src/mdct/mdct.h b/src/mdct/mdct.h
index ced049c..33863fb 100644
--- a/src/mdct/mdct.h
+++ b/src/mdct/mdct.h
@@ -8,7 +8,7 @@ namespace NMDCT {
 class TMDCTBase {
 protected:
     MDCTContext Ctx;
-    TMDCTBase(int n, double scale) {
+    TMDCTBase(int n, TFloat scale) {
         mdct_ctx_init(&Ctx, n, scale);
     };
     virtual ~TMDCTBase() {
@@ -19,13 +19,13 @@ protected:
 
 template<int N>
 class TMDCT : public TMDCTBase {
-    std::vector<double> Buf;
+    std::vector<TFloat> Buf;
 public:
     TMDCT(float scale = 1.0)
         : TMDCTBase(N, scale)
         , Buf(N/2)
     {}
-    const std::vector<double>& operator()(double* in) {
+    const std::vector<TFloat>& operator()(TFloat* in) {
         mdct(&Ctx, &Buf[0], in);
         return Buf;
     }
@@ -33,13 +33,13 @@ public:
 
 template<int N>
 class TMIDCT : public TMDCTBase {
-    std::vector<double> Buf;
+    std::vector<TFloat> Buf;
 public:
     TMIDCT(float scale = 1.0)
         : TMDCTBase(N, scale)
         , Buf(N)
     {}
-    const std::vector<double>& operator()(double* in) {
+    const std::vector<TFloat>& operator()(TFloat* in) {
         midct(&Ctx, &Buf[0], in);
         return Buf;
     }
diff --git a/src/mdct/mdct_ut.cpp b/src/mdct/mdct_ut.cpp
index e81bea1..3552afd 100644
--- a/src/mdct/mdct_ut.cpp
+++ b/src/mdct/mdct_ut.cpp
@@ -7,24 +7,24 @@
 using std::vector;
 using namespace NMDCT;
 
-static vector<double> mdct(double* x, int N) {
-    vector<double> res;
+static vector<TFloat> mdct(TFloat* x, int N) {
+    vector<TFloat> res;
     for (int k = 0; k < N; k++) {
-        double sum = 0;
+        TFloat sum = 0;
         for (int n = 0; n < 2 * N; n++)
-            sum += x[n]* cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5));
+            sum += x[n]* cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5));
 
         res.push_back(sum);
     }
     return res;
 }  
 
-static vector<double> midct(double* x, int N) {
-    vector<double> res;
+static vector<TFloat> midct(TFloat* x, int N) {
+    vector<TFloat> res;
     for (int n = 0; n < 2 * N; n++) {
-        double sum = 0;
+        TFloat sum = 0;
         for (int k = 0; k < N; k++)
-            sum += (x[k] * cos((M_PI/N) * ((double)n + 0.5 + N/2) * ((double)k + 0.5)));
+            sum += (x[k] * cos((M_PI/N) * ((TFloat)n + 0.5 + N/2) * ((TFloat)k + 0.5)));
 
         res.push_back(sum);
     }
@@ -35,12 +35,12 @@ static vector<double> midct(double* x, int N) {
 TEST(TBitStream, MDCT64) {
     const int N = 64;
     TMDCT<N> transform(N);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = mdct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = mdct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < res1.size(); i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
@@ -50,12 +50,12 @@ TEST(TBitStream, MDCT64) {
 TEST(TBitStream, MDCT128) {
     const int N = 128;
     TMDCT<N> transform(N);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = mdct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = mdct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < res1.size(); i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
@@ -65,12 +65,12 @@ TEST(TBitStream, MDCT128) {
 TEST(TBitStream, MDCT256) {
     const int N = 256;
     TMDCT<N> transform(N);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = mdct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = mdct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < res1.size(); i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.00000001);
@@ -80,12 +80,12 @@ TEST(TBitStream, MDCT256) {
 TEST(TBitStream, MDCT256_RAND) {
     const int N = 256;
     TMDCT<N> transform(N);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N; i++) {
         src[i] = rand();
     }
-    const vector<double> res1 = mdct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = mdct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < res1.size(); i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.01);
@@ -96,12 +96,12 @@ TEST(TBitStream, MDCT256_RAND) {
 TEST(TBitStream, MIDCT64) {
     const int N = 64;
     TMIDCT<N> transform(1);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = midct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = midct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < N; i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
@@ -111,12 +111,12 @@ TEST(TBitStream, MIDCT64) {
 TEST(TBitStream, MIDCT128) {
     const int N = 128;
     TMIDCT<N> transform(1);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = midct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = midct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < N; i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.0000000001);
@@ -126,12 +126,12 @@ TEST(TBitStream, MIDCT128) {
 TEST(TBitStream, MIDCT256) {
     const int N = 256;
     TMIDCT<N> transform(1);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = i;
     }
-    const vector<double> res1 = midct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = midct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < N; i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.000000001);
@@ -141,12 +141,12 @@ TEST(TBitStream, MIDCT256) {
 TEST(TBitStream, MIDCT256_RAND) {
     const int N = 256;
     TMIDCT<N> transform(1);
-    vector<double> src(N);
+    vector<TFloat> src(N);
     for (int i = 0; i < N/2; i++) {
         src[i] = rand();
     }
-    const vector<double> res1 = midct(&src[0], N/2);
-    const vector<double> res2 = transform(&src[0]);
+    const vector<TFloat> res1 = midct(&src[0], N/2);
+    const vector<TFloat> res2 = transform(&src[0]);
     EXPECT_EQ(res1.size(), res2.size());
     for (int i = 0; i < N; i++) {
         EXPECT_NEAR(res1[i], res2[i], 0.01);
diff --git a/src/oma.cpp b/src/oma.cpp
new file mode 100644
index 0000000..5fd18b7
--- /dev/null
+++ b/src/oma.cpp
@@ -0,0 +1,42 @@
+#include "oma.h"
+#include <stdlib.h>
+
+TOma::TOma(const std::string& filename, const std::string& title, int numChannel, uint32_t numFrames, int cid, uint32_t framesize) {
+    oma_info_t info;
+    info.codec = cid;
+    info.samplerate = 44100;
+    info.channel_format = OMA_STEREO;
+    info.framesize = framesize;
+    File = oma_open(filename.c_str(), OMAM_W, &info);
+    if (!File)
+        abort();
+}
+
+TOma::~TOma() {
+    oma_close(File);
+}
+
+std::unique_ptr<ICompressedIO::TFrame> TOma::ReadFrame() {
+    abort();
+    return nullptr;
+}
+
+void TOma::WriteFrame(std::vector<char> data) {
+    if (oma_write(File, &data[0], 1) == -1) {
+        fprintf(stderr, "write error\n");
+        abort();
+    }
+}
+
+std::string TOma::GetName() const {
+    abort();
+    return {};
+}
+
+int TOma::GetChannelNum() const {
+    return 2; //for ATRAC3
+}
+long long TOma::GetLengthInSamples() const {
+    abort();
+    return 0;
+}
diff --git a/src/oma.h b/src/oma.h
new file mode 100644
index 0000000..838322d
--- /dev/null
+++ b/src/oma.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "compressed_io.h"
+#include "oma/liboma/include/oma.h"
+
+
+class TOma : public ICompressedIO {
+    OMAFILE* File;
+public:
+    TOma(const std::string& filename, const std::string& title, int numChannel, uint32_t numFrames, int cid, uint32_t framesize);
+    ~TOma();
+    std::unique_ptr<TFrame> ReadFrame() override;
+    void WriteFrame(std::vector<char> data) override;
+    std::string GetName() const override;
+    int GetChannelNum() const override;
+    long long GetLengthInSamples() const override;
+};
diff --git a/src/oma/liboma/include/oma.h b/src/oma/liboma/include/oma.h
new file mode 100644
index 0000000..29d1b72
--- /dev/null
+++ b/src/oma/liboma/include/oma.h
@@ -0,0 +1,60 @@
+
+#ifndef OMA_H
+#define OMA_H
+
+typedef struct omafile_ctx OMAFILE;
+
+struct oma_info {
+    int codec;
+    int framesize;
+    int samplerate;
+    int channel_format;
+};
+
+enum {
+    OMAM_R = 0x1,
+    OMAM_W = 0x2,
+};
+
+enum {
+    OMAC_ID_ATRAC3 = 0,
+    OMAC_ID_ATRAC3PLUS = 1,
+    OMAC_ID_MP3 = 2,
+    OMAC_ID_LPCM = 3,
+    OMAC_ID_WMA = 5
+};
+
+enum {
+    OMA_MONO = 0,
+    OMA_STEREO = 1,
+    OMA_STEREO_JS = 2,
+    OMA_3 = 3,
+    OMA_4 = 4,
+    OMA_6 = 5,
+    OMA_7 = 6,
+    OMA_8 = 7
+
+};
+
+typedef struct oma_info oma_info_t;
+typedef long long block_count_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int oma_get_last_err();
+
+OMAFILE* oma_open(const char *path, int mode, oma_info_t *info);
+int oma_close(OMAFILE* oma_file);
+
+block_count_t oma_read(OMAFILE *oma_file, void *ptr, block_count_t blocks);
+block_count_t oma_write(OMAFILE *oma_file, const void *ptr, block_count_t blocks);
+
+oma_info_t* oma_get_info(OMAFILE *oma_file);
+int oma_get_bitrate(oma_info_t *info);
+const char *oma_get_codecname(oma_info_t *info);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OMA_H */
diff --git a/src/oma/liboma/src/liboma.c b/src/oma/liboma/src/liboma.c
new file mode 100644
index 0000000..50297d0
--- /dev/null
+++ b/src/oma/liboma/src/liboma.c
@@ -0,0 +1,269 @@
+#include "../include/oma.h"
+#include "oma_internal.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+//to use htonl
+//TODO: rewrite
+#include <arpa/inet.h>
+
+static const int OMA_HEADER_SIZE = 96;
+static const int liboma_samplerates[8] = { 32000, 44100, 48000, 88200, 96000, 0 };
+static const char* codec_name[6] = { "ATRAC3", "ATRAC3PLUS", "MPEG1LAYER3", "LPCM", "", "OMAC_ID_WMA" };
+static char ea3_str[] = {'E', 'A', '3'};
+static int channel_id_to_format_tab[7] = { OMA_MONO, OMA_STEREO, OMA_3, OMA_4, OMA_6, OMA_7, OMA_8 };
+enum {
+    OMAERR_OK = 0,
+    OMAERR_IO = -1,
+    OMAERR_PERM = -2,
+    OMAERR_FMT = -3,
+    OMAERR_ENCRYPT = -4,
+    OMAERR_VAL = -5,
+    OMAERR_EOF = -6
+};
+
+static __thread int err;
+int oma_get_last_err() {
+    return err;
+}
+
+static void save_err(int e) {
+    err = e;
+}
+
+static int oma_check_header(const char* buf) {
+    if (memcmp(buf, &ea3_str[0], 3) || buf[4] != 0 || buf[5] != OMA_HEADER_SIZE) {
+        return OMAERR_FMT;
+    }
+    return OMAERR_OK;
+}
+
+static int oma_check_encryption(const char* buf) {
+    if (buf[6] == -1 && buf[7] == -1)
+        return OMAERR_OK;
+    return OMAERR_ENCRYPT;
+}
+
+static int oma_get_samplerate_idx(int samplerate) {
+    if (samplerate <= 0) {
+        fprintf(stderr, "wrong samplerate\n");
+        return -1;
+    }
+    for (int i = 0; ; i++) {
+       if (liboma_samplerates[i] == samplerate)
+           return i;
+       if (liboma_samplerates[i] == 0)
+           return -1;
+    }
+    return -1;
+}
+
+static int oma_read_atrac3_header(uint32_t params, oma_info_t* info) {
+    const int js = (params >> 17) & 0x1;
+    const int samplerate = liboma_samplerates[(params >> 13) & 0x7];
+    if (samplerate == 0) {
+        fprintf(stderr, "liboma: wrong samplerate params, can't read header\n");
+        return -1;
+    }
+    info->codec = OMAC_ID_ATRAC3;
+    info->framesize = (params & 0x3FF) * 8;
+    info->samplerate = samplerate;
+    info->channel_format = js ? OMA_STEREO_JS : OMA_STEREO;
+    return 0;
+}
+
+static int oma_write_atrac3_header(uint32_t *params, oma_info_t *info) {
+    const int channel_format = info->channel_format;
+    if (channel_format != OMA_STEREO_JS && channel_format != OMA_STEREO) {
+        fprintf(stderr, "wrong channel format\n");
+        return -1;
+    }
+    const uint32_t js = channel_format == OMA_STEREO_JS;
+    const int samplerate_idx = oma_get_samplerate_idx(info->samplerate);
+    if (samplerate_idx == -1)
+        return -1;
+    const uint32_t framesz = info->framesize / 8;
+    fprintf(stderr, "framesize: %d\n", framesz);
+    if (framesz > 0x3FF)
+        return -1;
+    *params = htonl((OMAC_ID_ATRAC3 << 24) | (js << 17) | ((uint32_t)samplerate_idx << 13) | framesz);
+    return 0;
+}
+
+static int oma_read_atrac3p_header(uint32_t params, oma_info_t* info) {
+    const int channel_id = (params >> 10) & 7;
+    if (channel_id == 0) {
+        return -1;
+    }
+    const int samplerate = liboma_samplerates[(params >> 13) & 0x7];
+     if (samplerate == 0) {
+        fprintf(stderr, "liboma: wrong samplerate params, can't read header\n");
+        return -1;
+    }
+    info->codec = OMAC_ID_ATRAC3PLUS;
+    info->framesize = ((params & 0x3FF) * 8) + 8;
+    info->samplerate = samplerate;
+    uint32_t ch_id = (params >> 10) & 7;
+    info->channel_format = channel_id_to_format_tab[ch_id - 1];
+    return 0;
+}
+
+static int oma_write_header(OMAFILE* ctx, oma_info_t *omainfo) {
+    if (ctx == NULL || omainfo == NULL)
+        return -1;
+    char *headerbuf = (char*)calloc(OMA_HEADER_SIZE, 1);
+    memcpy(headerbuf, &ea3_str[0], 3);
+    headerbuf[3] = 1; //???
+    headerbuf[5] = OMA_HEADER_SIZE;
+    headerbuf[6] = 0xFF;
+    headerbuf[7] = 0xFF;
+    uint32_t *params = (uint32_t*)(headerbuf+32);
+    switch (omainfo->codec) {
+        case OMAC_ID_ATRAC3:
+            oma_write_atrac3_header(params, omainfo);
+            break;
+        case OMAC_ID_ATRAC3PLUS:
+            assert(0);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+    int rv = fwrite(headerbuf, sizeof(char), OMA_HEADER_SIZE, ctx->file);
+    if (rv != OMA_HEADER_SIZE) {
+        fprintf(stderr, "can't write header\n");
+        rv = -1;
+    }
+    free(headerbuf);
+    return rv;
+}
+
+static int oma_parse_header(OMAFILE* file) {
+    char buf[OMA_HEADER_SIZE];
+    int read = fread(&buf[0], sizeof(char), OMA_HEADER_SIZE, file->file);
+    int err = 0;
+    uint32_t params = 0;
+    if (OMA_HEADER_SIZE != read)
+        return feof(file->file) ? OMAERR_FMT : OMAERR_IO;
+
+    err = oma_check_header(&buf[0]);
+    if (OMAERR_OK != err)
+        return err;
+
+    err = oma_check_encryption(&buf[0]);
+    if (OMAERR_OK != err)
+        return err;
+
+    //detect codecs
+    params = ((uint8_t)buf[33]) << 16 | ((uint8_t)buf[34]) << 8 | ((uint8_t)buf[35]);
+    switch (buf[32]) {
+        case OMAC_ID_ATRAC3:
+            oma_read_atrac3_header(params, &file->info);
+            break;
+        case OMAC_ID_ATRAC3PLUS:
+            oma_read_atrac3p_header(params, &file->info);
+            break;
+
+        default:
+            fprintf(stderr, "got unsupported format: %d\n", buf[32]);
+            return OMAERR_FMT;
+    }
+
+    return OMAERR_OK;
+}
+
+OMAFILE* oma_open(const char *path, int mode, oma_info_t *info) {
+    const static char* modes[3] = {"", "rb", "wb"};
+    FILE* file = fopen(path, modes[mode]);
+    int err = 0;
+    if (NULL == file) {
+        return NULL;
+    }
+
+    struct omafile_ctx *ctx = (struct omafile_ctx*)malloc(sizeof(struct omafile_ctx));
+    if (NULL == ctx) {
+        goto close_ret;
+    }
+
+    ctx->file = file;
+    if (mode == OMAM_R) { 
+        err = oma_parse_header(ctx);
+        if (OMAERR_OK != err) {
+            goto free_close_ret;
+        }
+    } else {
+        if (!info) {
+            err = OMAERR_VAL;
+            goto free_close_ret;
+        }
+        memcpy(&ctx->info, info, sizeof(oma_info_t));
+        err = oma_write_header(ctx, info);
+    }
+
+    return ctx;
+
+free_close_ret:
+    free(ctx);
+
+close_ret:
+    save_err(err);
+    fclose(file);
+    return NULL;
+}
+
+int oma_close(OMAFILE *ctx) {
+    FILE* file = ctx->file;
+    free(ctx);
+    fclose(file);
+    return 0;
+}
+
+block_count_t oma_read(OMAFILE *oma_file, void *ptr, block_count_t blocks) {
+    size_t read = fread(ptr, oma_file->info.framesize, blocks, oma_file->file);
+    if (read == blocks)
+        return read;
+    if (feof(oma_file->file)) {
+        save_err(OMAERR_EOF);
+        return 0;
+    }
+    return -1;
+}
+
+block_count_t oma_write(OMAFILE *oma_file, const void *ptr, block_count_t blocks) {
+    size_t writen = fwrite(ptr, oma_file->info.framesize, blocks, oma_file->file);
+    if (writen == blocks)
+        return writen;
+    return -1;
+}
+
+oma_info_t* oma_get_info(OMAFILE *oma_file) {
+    if (oma_file == NULL)
+        return NULL;
+    return &oma_file->info;
+}
+int oma_get_bitrate(oma_info_t *info) {
+    switch (info->codec) {
+        case OMAC_ID_ATRAC3:
+            return info->samplerate * info->framesize * 8 / 1024;
+            break;
+        case OMAC_ID_ATRAC3PLUS:
+            return info->samplerate * info->framesize * 8 / 2048;
+            break;
+        default:
+            return -1;
+    }
+    return -1;
+}
+
+const char *oma_get_codecname(oma_info_t *info) {
+    if (info == NULL)
+        return "";
+    int id = info->codec;
+    if (id < 0 || id > 5)
+        return "";
+    return codec_name[id];
+}
diff --git a/src/oma/liboma/src/oma_internal.h b/src/oma/liboma/src/oma_internal.h
new file mode 100644
index 0000000..881e89b
--- /dev/null
+++ b/src/oma/liboma/src/oma_internal.h
@@ -0,0 +1,18 @@
+#ifndef OMA_INTERNAL_H
+#define OMA_INTERNAL_H
+
+#include <stdio.h>
+#include "oma.h"
+
+struct omafile_ctx {
+    FILE* file;
+    oma_info_t info;
+};
+
+
+
+//static inline uint16_t read_big16(void *x) {
+//    return (((const uint8_t*)x)[0] << 8) | ((const uint8_t)x);
+//}
+
+#endif /* OMA_INTERNAL_H */
diff --git a/src/oma/liboma/src/tools/omacp.c b/src/oma/liboma/src/tools/omacp.c
new file mode 100644
index 0000000..3d9190e
--- /dev/null
+++ b/src/oma/liboma/src/tools/omacp.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "oma.h"
+
+int main(int argc, char* const* argv) {
+    if (3 != argc)
+        fprintf(stdout, "usage: \n\t omainfo [in] [out]\n");
+
+    OMAFILE* infile = oma_open(argv[1], OMAM_R, NULL);
+    if (NULL == infile)
+        fprintf(stderr, "Can't open %s to read, err: %d\n", argv[1], oma_get_last_err());
+
+    oma_info_t *info = oma_get_info(infile);
+    const char *codecname = oma_get_codecname(info);
+    const int bitrate = oma_get_bitrate(info);
+
+    fprintf(stdout, "codec: %s, bitrate: %d, channel format: %d\n", codecname, bitrate, info->chanel_format);
+
+    OMAFILE* outfile = oma_open(argv[2], OMAM_W, info);
+    if (NULL == outfile)
+        fprintf(stderr, "Can't open %s to write, err: %d\n", argv[2], oma_get_last_err());
+
+    char* buf = (char*)malloc(info->framesize);
+    for (;;) {
+        block_count_t rcount = oma_read(infile, buf, 1);
+        if (rcount == 0)
+            break;
+        if (rcount == -1) {
+            fprintf(stderr, "read error\n");
+            break;
+        }
+        if (oma_write(outfile, buf, 1) == -1) {
+            fprintf(stderr, "write error\n");
+            break;
+        }
+    }
+}
diff --git a/src/oma/liboma/src/tools/omainfo.c b/src/oma/liboma/src/tools/omainfo.c
new file mode 100644
index 0000000..a487010
--- /dev/null
+++ b/src/oma/liboma/src/tools/omainfo.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+#include "oma.h"
+
+int main(int argc, char* const* argv) {
+    fprintf(stderr, "%d\n", argc);
+    if (2 > argc) {
+        fprintf(stdout, "usage: \n\t omainfo [filename]\n");
+        return 1;
+    }
+
+    for (int i = 1; i < argc; i++) {
+        OMAFILE* file = oma_open(argv[i], OMAM_R, NULL);
+        if (NULL == file)
+            fprintf(stderr, "Can't open %s\n", argv[i]);
+
+        oma_info_t *info = oma_get_info(file);
+        const char *codecname = oma_get_codecname(info);
+        const int bitrate = oma_get_bitrate(info);
+
+        fprintf(stdout, "%s codec: %s, bitrate: %d, channelformat: %d framesz: %d\n", argv[i], codecname, bitrate, info->channel_format, info->framesize);
+        oma_close(file);
+    }
+    return 0;
+}
diff --git a/src/pcmengin.h b/src/pcmengin.h
index a0e0127..8023fed 100644
--- a/src/pcmengin.h
+++ b/src/pcmengin.h
@@ -30,9 +30,9 @@ class TEndOfRead : public std::exception {
 template <class T>
 class TPCMBuffer {
     std::vector<T> Buf_;
-    int32_t NumChannels;
+    uint16_t NumChannels;
 public:
-    TPCMBuffer(const int32_t bufSize, const int32_t numChannels)
+    TPCMBuffer(const int32_t bufSize, const uint32_t numChannels)
        : NumChannels(numChannels)
     {
         Buf_.resize(bufSize*numChannels);
@@ -52,7 +52,7 @@ public:
             abort();
         return &Buf_[rpos];
     }
-    size_t Channels() const {
+    uint16_t Channels() const {
         return NumChannels;
     }
     void Zero(size_t pos, size_t len) {
@@ -82,6 +82,9 @@ class TPCMEngine {
 public:
     typedef std::unique_ptr<IPCMWriter<T>> TWriterPtr;
     typedef std::unique_ptr<IPCMReader<T>> TReaderPtr;
+    struct ProcessMeta {
+        const uint16_t Channels;
+    };
 private:
     TPCMBuffer<T> Buffer;
     TWriterPtr Writer;
@@ -104,7 +107,7 @@ public:
             , Writer(std::move(writer))
             , Reader(std::move(reader)) {
         }
-        typedef std::function<void(T* data)> TProcessLambda; 
+        typedef std::function<void(T* data, const ProcessMeta& meta)> TProcessLambda; 
 
         uint64_t ApplyProcess(size_t step, TProcessLambda lambda) {
             if (step > Buffer.Size()) {
@@ -115,8 +118,9 @@ public:
                 Reader->Read(Buffer, sizeToRead);
             }
             size_t lastPos = 0;
+            ProcessMeta meta = {Buffer.Channels()};
             for (size_t i = 0; i + step <= Buffer.Size(); i+=step) {
-                lambda(Buffer[i]);
+                lambda(Buffer[i], meta);
                 lastPos = i + step;
             }
             assert(lastPos == Buffer.Size());
diff --git a/src/qmf/qmf.h b/src/qmf/qmf.h
index 19c7d0a..e6ae58f 100644
--- a/src/qmf/qmf.h
+++ b/src/qmf/qmf.h
@@ -1,13 +1,15 @@
 #pragma once
 #include <string.h>
 
+#include "../config.h"
+
 template<class TPCM, int nIn>
 class TQmf {
     static const float TapHalf[24];
-    double QmfWindow[48];
+    TFloat QmfWindow[48];
     TPCM PcmBuffer[nIn + 46];
-    double PcmBufferMerge[nIn + 46];
-    double DelayBuff[46];
+    TFloat PcmBufferMerge[nIn + 46];
+    TFloat DelayBuff[46];
 public:
     TQmf() {
         const int sz = sizeof(QmfWindow)/sizeof(QmfWindow[0]);
@@ -21,8 +23,8 @@ public:
         }
     }
 
-    void Split(TPCM* in, double* lower, double* upper) {
-        double temp;
+    void Split(TPCM* in, TFloat* lower, TFloat* upper) {
+        TFloat temp;
         for (size_t i = 0; i < 46; i++)
             PcmBuffer[i] = PcmBuffer[nIn + i];
 
@@ -41,9 +43,9 @@ public:
         }
     }
 
-    void Merge(TPCM* out, double* lower, double* upper) {
-        memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(double));
-        double* newPart = &PcmBufferMerge[46];
+    void Merge(TPCM* out, TFloat* lower, TFloat* upper) {
+        memcpy(&PcmBufferMerge[0], &DelayBuff[0], 46*sizeof(TFloat));
+        TFloat* newPart = &PcmBufferMerge[46];
         for (int i = 0; i < nIn; i+=4) {
             newPart[i+0] = lower[i/2] + upper[i/2];
             newPart[i+1] = lower[i/2] - upper[i/2];
@@ -51,10 +53,10 @@ public:
             newPart[i+3] = lower[i/2 + 1] - upper[i/2 + 1];
         }
 
-        double* winP = &PcmBufferMerge[0];
+        TFloat* winP = &PcmBufferMerge[0];
         for (size_t j = nIn/2; j != 0; j--) {
-            double s1 = 0;
-            double s2 = 0;
+            TFloat s1 = 0;
+            TFloat s2 = 0;
             for (size_t i = 0; i < 48; i+=2) {
                 s1 += winP[i] * QmfWindow[i];
                 s2 += winP[i+1] * QmfWindow[i+1];
@@ -64,7 +66,7 @@ public:
             winP += 2;
             out += 2;
         }
-        memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(double));
+        memcpy(&DelayBuff[0], &PcmBufferMerge[nIn], 46*sizeof(TFloat));
     }
 };
 
diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp
index 769277e..82a31a8 100644
--- a/src/transient_detector.cpp
+++ b/src/transient_detector.cpp
@@ -2,19 +2,33 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <cmath>
+#include <cassert>
+#include <iostream>
 namespace NAtracDEnc {
 
-static double calculateRMS(const double* in, uint32_t n) {
-    double s = 0;
+using std::vector;
+static TFloat calculateRMS(const TFloat* in, uint32_t n) {
+    TFloat s = 0;
     for (uint32_t i = 0; i < n; i++) {
-        s += in[i] * in[i];
+        s += (in[i] * in[i]);
     }
     s /= n;
     return sqrt(s);
 }
 
-void TTransientDetector::HPFilter(const double* in, double* out) {
-    static const double fircoef[] = {
+static TFloat calculatePeak(const TFloat* in, uint32_t n) {
+    TFloat s = 0;
+    for (uint32_t i = 0; i < n; i++) {
+        TFloat absVal = std::abs(in[i]);
+        if (absVal > s)
+            s = absVal;
+    }
+        return s;
+}
+
+void TTransientDetector::HPFilter(const TFloat* in, TFloat* out) {
+    static const TFloat fircoef[] = {
         -8.65163e-18 * 2.0, -0.00851586 * 2.0, -6.74764e-18 * 2.0, 0.0209036 * 2.0,
         -3.36639e-17 * 2.0, -0.0438162 * 2.0, -1.54175e-17 * 2.0, 0.0931738 * 2.0,
         -5.52212e-17 * 2.0, -0.313819 * 2.0
@@ -34,10 +48,10 @@ void TTransientDetector::HPFilter(const double* in, double* out) {
 }
 
 
-bool TTransientDetector::Detect(const double* buf) {
+bool TTransientDetector::Detect(const TFloat* buf) {
     const uint32_t nBlocksToAnalize = NShortBlocks + 1;
-    double* rmsPerShortBlock = reinterpret_cast<double*>(alloca(sizeof(double) * nBlocksToAnalize));
-    std::vector<double> filtered(BlockSz);
+    TFloat* rmsPerShortBlock = reinterpret_cast<TFloat*>(alloca(sizeof(TFloat) * nBlocksToAnalize));
+    std::vector<TFloat> filtered(BlockSz);
     HPFilter(buf, filtered.data());
     bool trans = false;
     rmsPerShortBlock[0] = LastEnergy;
@@ -45,13 +59,25 @@ bool TTransientDetector::Detect(const double* buf) {
         rmsPerShortBlock[i] = 19.0 * log10(calculateRMS(&filtered[(i - 1) * ShortSz], ShortSz));
         if (rmsPerShortBlock[i] - rmsPerShortBlock[i - 1] > 16) {
             trans = true;
+            LastTransientPos = i;
         }
         if (rmsPerShortBlock[i - 1] - rmsPerShortBlock[i] > 20) {
             trans = true;
+            LastTransientPos = i;
         }
     }
     LastEnergy = rmsPerShortBlock[NShortBlocks];
     return trans;
 }
 
+std::vector<TFloat> AnalyzeGain(const TFloat* in, const uint32_t len, const uint32_t maxPoints, bool useRms) {
+    vector<TFloat> res;
+    const uint32_t step = len / maxPoints;
+    for (uint32_t pos = 0; pos < len; pos += step) {
+        TFloat rms = useRms ? calculateRMS(in + pos, step) : calculatePeak(in + pos, step);
+        res.emplace_back(rms);
+    }
+    return res;
 }
+
+} //namespace NAtracDEnc
diff --git a/src/transient_detector.h b/src/transient_detector.h
index b3db6ba..46b774f 100644
--- a/src/transient_detector.h
+++ b/src/transient_detector.h
@@ -3,16 +3,20 @@
 #include <cstdint>
 #include <vector>
 
+#include "config.h"
+
 namespace NAtracDEnc {
+
 class TTransientDetector {
     const uint32_t ShortSz;
     const uint32_t BlockSz;
     const uint32_t NShortBlocks;
     static const uint32_t PrevBufSz = 20;
     static const uint32_t FIRLen = 21;
-    void HPFilter(const double* in, double* out);
-    std::vector<double> HPFBuffer;
-    double LastEnergy = 0.0;
+    void HPFilter(const TFloat* in, TFloat* out);
+    std::vector<TFloat> HPFBuffer;
+    TFloat LastEnergy = 0.0;
+    uint32_t LastTransientPos = 0;
 public:
     TTransientDetector(uint32_t shortSz, uint32_t blockSz)
         : ShortSz(shortSz)
@@ -21,6 +25,10 @@ public:
     {
         HPFBuffer.resize(BlockSz + FIRLen); 
     }
-    bool Detect(const double* buf);
+    bool Detect(const TFloat* buf);
+    uint32_t GetLastTransientPos() const { return LastTransientPos; }
 };
+
+std::vector<TFloat> AnalyzeGain(const TFloat* in, uint32_t len, uint32_t maxPoints, bool useRms);
+
 }
diff --git a/src/transient_detector_ut.cpp b/src/transient_detector_ut.cpp
new file mode 100644
index 0000000..5c018c3
--- /dev/null
+++ b/src/transient_detector_ut.cpp
@@ -0,0 +1,36 @@
+#include "transient_detector.h"
+#include <gtest/gtest.h>
+
+#include <vector>
+#include <cmath>
+
+using std::vector;
+using namespace NAtracDEnc;
+TEST(AnalyzeGain, AnalyzeGainSimple) {
+
+    TFloat in[256];
+    for (int i = 0; i < 256; ++i) {
+        if (i <= 24) {
+            in[i] = 1.0;
+        } else if ( i > 24 && i <= 32) {
+            in[i] = 8.0;
+        } else if ( i > 32 && i <= 66) {
+            in[i] = 128.0;
+        } else {
+            in[i] = 0.5;
+        }
+    }
+    vector<TFloat> res = AnalyzeGain(in, 256, 32, false);
+    EXPECT_EQ(res.size(), 32);    
+
+//    for (TFloat v : res)
+//        std::cout << v << std::endl;
+    for (int i = 0; i < 3; ++i)
+        EXPECT_EQ(res[i], 1.0);
+    for (int i = 3; i < 4; ++i)
+        EXPECT_EQ(res[i], 8.0);
+    for (int i = 4; i < 9; ++i)
+        EXPECT_EQ(res[i], 128.0);
+    for (int i = 9; i < 32; ++i)
+        EXPECT_EQ(res[i], 0.5);
+}
diff --git a/src/util.h b/src/util.h
new file mode 100644
index 0000000..f75c48e
--- /dev/null
+++ b/src/util.h
@@ -0,0 +1,58 @@
+#pragma once
+#include <cstdint>
+#include <vector>
+#include <algorithm>
+#include <cmath>
+
+#include "config.h"
+#include <cstring>
+
+template<class T>
+inline void SwapArray(T* p, const size_t len) {
+    for (size_t i = 0, j = len - 1; i < len / 2; ++i, --j) {
+        T tmp = p[i];
+        p[i] = p[j];
+        p[j] = tmp;
+    }
+}
+
+template<size_t N>
+inline void InvertSpectrInPlase(TFloat* in) {
+    for (size_t i = 0; i < N; i+=2)
+        in[i] *= -1;
+}
+
+template<size_t N>
+inline std::vector<TFloat> InvertSpectr(const TFloat* in) {
+    std::vector<TFloat> buf(N);
+    std::memcpy(&buf[0], in, N * sizeof(TFloat));
+    InvertSpectrInPlase<N>(&buf[0]);
+    return buf;
+}
+
+inline uint16_t GetFirstSetBit(uint32_t x) {
+    static const uint16_t multiplyDeBruijnBitPosition[32] = {
+        0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+        8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
+    };
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    return multiplyDeBruijnBitPosition[(uint32_t)(x * 0x07C4ACDDU) >> 27];
+}
+
+template<class T>
+inline uint16_t Log2FloatToIdx(T x, uint16_t shift) {
+    T t = x * shift;
+    return GetFirstSetBit(std::trunc(t));
+}
+
+template<class T>
+inline T CalcMedian(T* in, uint32_t len) {
+    std::vector<T> tmp(in, in+len);
+    std::sort(tmp.begin(), tmp.end());
+    uint32_t pos = (len - 1) / 2;
+    return tmp[pos];
+}
diff --git a/src/util_ut.cpp b/src/util_ut.cpp
new file mode 100644
index 0000000..ccd9fce
--- /dev/null
+++ b/src/util_ut.cpp
@@ -0,0 +1,26 @@
+#include "util.h"
+#include <gtest/gtest.h>
+
+#include <vector>
+
+
+TEST(Util, SwapArrayTest) {
+
+    TFloat arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+    SwapArray(arr, 8);
+    for (size_t i = 0; i < 8; ++i) {
+        EXPECT_NEAR((TFloat)i, arr[7-i], 0.000000000001);
+    }
+}
+
+TEST(Util, GetFirstSetBitTest) {
+    EXPECT_EQ(1, GetFirstSetBit(2));
+    EXPECT_EQ(1, GetFirstSetBit(3));
+    EXPECT_EQ(2, GetFirstSetBit(4));
+    EXPECT_EQ(2, GetFirstSetBit(5));
+    EXPECT_EQ(2, GetFirstSetBit(6));
+    EXPECT_EQ(2, GetFirstSetBit(7));
+    EXPECT_EQ(3, GetFirstSetBit(8));
+    EXPECT_EQ(3, GetFirstSetBit(9));
+    EXPECT_EQ(3, GetFirstSetBit(10));
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c8ca48b..aaebb4a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,5 @@
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer")
+
 include_directories(${gtest_SOURCE_DIR}/include)
 
 set(mdct_test_sources
@@ -15,15 +17,46 @@ add_executable(bitstream_test ${bitstream_test_sources})
 target_link_libraries(bitstream_test gtest_main)
 
 set(atrac1mdct_test_sources
-    ../src/atracdenc.cpp
+    ../src/atrac1denc.cpp
     ../src/transient_detector.cpp
     ../src/bitstream/bitstream.cpp
+    ../src/atrac/atrac_psy_common.cpp
     ../src/atrac/atrac_scale.cpp
     ../src/atrac/atrac1_dequantiser.cpp
     ../src/atrac/atrac1_bitalloc.cpp
     ../src/atrac/atrac1.cpp
+    ../src/atrac/atrac3.cpp #atrac_scale has explicit instantiation
     ../src/atracdenc_ut.cpp
     ../src/aea.cpp
 )
 add_executable(atrac1mdct_test ${atrac1mdct_test_sources})
 target_link_libraries(atrac1mdct_test mdct_impl gtest_main)
+
+set(atrac3mdct_test_sources
+    ../src/atrac3denc.cpp
+    ../src/transient_detector.cpp
+    ../src/bitstream/bitstream.cpp
+    ../src/atrac/atrac_psy_common.cpp
+    ../src/atrac/atrac_scale.cpp
+    ../src/atrac/atrac3_bitstream.cpp
+    ../src/atrac/atrac1.cpp #atrac_scale has explicit instantiation
+    ../src/atrac/atrac3.cpp
+    ../src/atrac3denc_ut.cpp
+    ../src/oma.cpp
+)
+add_executable(atrac3mdct_test ${atrac3mdct_test_sources})
+target_link_libraries(atrac3mdct_test mdct_impl oma gtest_main)
+
+set(util_test_sources
+    ../src/util_ut.cpp
+)
+add_executable(util_test ${util_test_sources})
+target_link_libraries(util_test gtest_main)
+
+set(transient_detector_test_sources
+    ../src/transient_detector_ut.cpp
+    ../src/transient_detector.cpp
+)
+add_executable(transient_detector_test ${transient_detector_test_sources})
+target_link_libraries(transient_detector_test gtest_main)
+
author	Daniil Cherednik <dan.cherednik@gmail.com>	2016-03-13 09:49:33 +0300
committer	Daniil Cherednik <dan.cherednik@gmail.com>	2016-09-02 21:21:28 +0300
commit	cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3 (patch)
tree	75efff26584e046566d17cd308d45b6b0fd5abfc
parent	b4df8a7c2dd12eea27c8cc52bd52a1bb8c00943f (diff)
download	atracdenc-cfaa2cd39b7256a868a4f5cd83aac207df6bd1b3.tar.gz