[atrac3] Simple adaptive MS bitrate shift

author: Daniil Cherednik <dan.cherednik@gmail.com> 2020-07-08 00:08:50 +0300
committer: Daniil Cherednik <dan.cherednik@gmail.com> 2020-07-08 00:08:50 +0300
commit: 40d971c988fd9ccfb82cc6aaacc57ca1ad477d52 (patch)
tree: 66c4a8d87450e0318be6621d9315b90ec5229978
parent: 240ca54145159a7aefc74ca8b23ed273e0ce2cbf (diff)
download: atracdenc-40d971c988fd9ccfb82cc6aaacc57ca1ad477d52.tar.gz
5 files changed, 61 insertions, 10 deletions
diff --git a/src/atrac/atrac3_bitstream.cpp b/src/atrac/atrac3_bitstream.cpp
index 04209c2..df0130c 100644
--- a/src/atrac/atrac3_bitstream.cpp
+++ b/src/atrac/atrac3_bitstream.cpp
@@ -150,9 +150,10 @@ std::pair<uint8_t, uint32_t> TAtrac3BitStreamWriter::CalcSpecsBitsConsumption(co
 
 //true - should reencode
 //false - not need to
-static inline bool CheckBfus(uint8_t* numBfu, const vector<uint32_t>& precisionPerEachBlocks)
+static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precisionPerEachBlocks)
 {
-    uint8_t curLastBfu = *numBfu - 1;
+    assert(*numBfu);
+    uint16_t curLastBfu = *numBfu - 1;
     //assert(curLastBfu < precisionPerEachBlocks.size());
     assert(*numBfu == precisionPerEachBlocks.size());
     if (precisionPerEachBlocks[curLastBfu] == 0) {
@@ -174,7 +175,17 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
 
     TFloat spread = AnalizeScaleFactorSpread(scaledBlocks);
 
-    uint8_t numBfu = BfuIdxConst ? BfuIdxConst : 32;
+    uint16_t numBfu = BfuIdxConst ? BfuIdxConst : 32;
+
+    // Limit number of BFU if target bitrate is not enough
+    // 3 bits to write each bfu without data
+    // 5 bits we need for tonal header
+    // 32 * 3 + 5 = 101
+    if (targetBits < 101) {
+        uint16_t lim = (targetBits - 5) / 3;
+        numBfu = std::min(numBfu, lim);
+    }
+
     vector<uint32_t> precisionPerEachBlocks(numBfu);
     uint8_t mode;
     bool cont = true;
@@ -188,14 +199,18 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
             auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt);
 
             auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr);
-            //std::cerr << consumption.second << " |tonal: " << bitsUsedByTonal << std::endl;
+            //std::cerr << consumption.second << " |tonal: " << bitsUsedByTonal << " target: " << targetBits << " shift " << shift << " max | min " << maxShift << " " << minShift << std::endl;
             consumption.second += bitsUsedByTonal;
 
             if (consumption.second < targetBits) {
                 if (maxShift - minShift < 0.1) {
                     precisionPerEachBlocks = tmpAlloc;
                     mode = consumption.first;
-                    cont = !BfuIdxConst && CheckBfus(&numBfu, precisionPerEachBlocks);
+                    if (numBfu > 1) {
+                        cont = !BfuIdxConst && CheckBfus(&numBfu, precisionPerEachBlocks);
+                    } else {
+                        cont = false;
+                    }
                     break;
                 }
                 maxShift = shift - 0.01;
@@ -467,19 +482,33 @@ void WriteJsParams(NBitStream::TBitStream* bs)
     }
 }
 
-static int32_t CalcMsBytesShift(uint32_t frameSz,
+//  0.5 - M only (mono)
+//  0.0 - Uncorrelated
+// -0.5 - S only
+static TFloat CalcMSRatio(TFloat mEnergy, TFloat sEnergy) {
+    TFloat total = sEnergy + mEnergy;
+    if (total > 0)
+        return mEnergy / total - 0.5;
+
+    // No signal - nothing to shift
+    return 0;
+}
+
+static int32_t CalcMSBytesShift(uint32_t frameSz,
                                 const vector<TAtrac3BitStreamWriter::TSingleChannelElement>& elements,
                                 const int32_t b[2])
 {
     const int32_t totalUsedBits = 0 - b[0] - b[1];
     assert(totalUsedBits > 0);
 
-    const uint32_t maxAllowedShift = frameSz / 2 - Div8Ceil(totalUsedBits);
+    const int32_t maxAllowedShift = (frameSz / 2 - Div8Ceil(totalUsedBits));
 
     if (elements[1].ScaledBlocks.empty()) {
         return maxAllowedShift;
     } else {
-        return std::min(frameSz / 3, maxAllowedShift);
+        TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy);
+        //std::cerr << ratio << std::endl;
+        return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift);
     }
 }
 
@@ -533,7 +562,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&
     int mt[2][MaxSpecs];
     std::pair<uint8_t, vector<uint32_t>> allocations[2];
 
-    const int32_t msBytesShift = Params.Js ? CalcMsBytesShift(Params.FrameSz, singleChannelElements, bitsToAlloc) : 0; // positive - gain to m, negative to s. Must be zero if no joint stereo mode
+    const int32_t msBytesShift = Params.Js ? CalcMSBytesShift(Params.FrameSz, singleChannelElements, bitsToAlloc) : 0; // positive - gain to m, negative to s. Must be zero if no joint stereo mode
 
     bitsToAlloc[0] += 8 * (halfFrameSz + msBytesShift);
     bitsToAlloc[1] += 8 * (halfFrameSz - msBytesShift);
diff --git a/src/atrac/atrac3_bitstream.h b/src/atrac/atrac3_bitstream.h
index cc57a60..02f05bc 100644
--- a/src/atrac/atrac3_bitstream.h
+++ b/src/atrac/atrac3_bitstream.h
@@ -45,6 +45,7 @@ public:
         TAtrac3Data::SubbandInfo SubbandInfo;
         std::vector<TTonalBlock> TonalBlocks;
         std::vector<TScaledBlock> ScaledBlocks;
+        TFloat Energy;
     };
 private:
 
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index 071a0fb..0538b67 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -334,6 +334,8 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda()
                 Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
             }
 
+            sce->Energy = CalcEnergy(specs);
+
             //TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
             sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize());
 
diff --git a/src/util.h b/src/util.h
index 01162e4..7405c68 100644
--- a/src/util.h
+++ b/src/util.h
@@ -21,6 +21,7 @@
 #include <vector>
 #include <algorithm>
 #include <cmath>
+#include <numeric>
 
 #include "config.h"
 #include <cstring>
@@ -72,3 +73,12 @@ inline T CalcMedian(T* in, uint32_t len) {
     uint32_t pos = (len - 1) / 2;
     return tmp[pos];
 }
+
+template<class T>
+inline T CalcEnergy(const std::vector<T>& in) {
+    return std::accumulate(in.begin(), in.end(), 0.0,
+        [](const T& a, const T& b) {
+            return a + b * b;
+        });
+}
+
diff --git a/src/util_ut.cpp b/src/util_ut.cpp
index d4626a8..80217a9 100644
--- a/src/util_ut.cpp
+++ b/src/util_ut.cpp
@@ -23,7 +23,6 @@
 
 
 TEST(Util, SwapArrayTest) {
-
     TFloat arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     SwapArray(arr, 8);
     for (size_t i = 0; i < 8; ++i) {
@@ -42,3 +41,13 @@ TEST(Util, GetFirstSetBitTest) {
     EXPECT_EQ(3, GetFirstSetBit(9));
     EXPECT_EQ(3, GetFirstSetBit(10));
 }
+
+TEST(Util, CalcEnergy) {
+
+    EXPECT_NEAR((TFloat)0.0, CalcEnergy(std::vector<TFloat>{0.0}), 0.000000000001);
+    EXPECT_NEAR((TFloat)1.0, CalcEnergy(std::vector<TFloat>{1.0}), 0.000000000001);
+    EXPECT_NEAR((TFloat)2.0, CalcEnergy(std::vector<TFloat>{1.0, 1.0}), 0.000000000001);
+    EXPECT_NEAR((TFloat)5.0, CalcEnergy(std::vector<TFloat>{2.0, 1.0}), 0.000000000001);
+    EXPECT_NEAR((TFloat)5.0, CalcEnergy(std::vector<TFloat>{1.0, 2.0}), 0.000000000001);
+    EXPECT_NEAR((TFloat)8.0, CalcEnergy(std::vector<TFloat>{2.0, 2.0}), 0.000000000001);
+}
author	Daniil Cherednik <dan.cherednik@gmail.com>	2020-07-08 00:08:50 +0300
committer	Daniil Cherednik <dan.cherednik@gmail.com>	2020-07-08 00:08:50 +0300
commit	40d971c988fd9ccfb82cc6aaacc57ca1ad477d52 (patch)
tree	66c4a8d87450e0318be6621d9315b90ec5229978
parent	240ca54145159a7aefc74ca8b23ed273e0ce2cbf (diff)
download	atracdenc-40d971c988fd9ccfb82cc6aaacc57ca1ad477d52.tar.gz