aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2015-12-23 02:41:38 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2015-12-23 02:41:38 +0300
commita2a87f35b03242dcbea91dd74f31abde705e183a (patch)
tree65ade0e6bafab3ef99c87869f53c1ed0719586cf
parent8b704f5ce2d0666b5b6dc3fb6881b1e6b2dff1bd (diff)
downloadatracdenc-a2a87f35b03242dcbea91dd74f31abde705e183a.tar.gz
initial implementation of transient detection added
-rw-r--r--README.md1
-rw-r--r--src/Makefile2
-rw-r--r--src/atrac_encode_settings.h5
-rw-r--r--src/atracdenc.cpp41
-rw-r--r--src/atracdenc.h34
-rw-r--r--src/main.cpp12
-rw-r--r--src/transient_detector.cpp51
-rw-r--r--src/transient_detector.h24
8 files changed, 149 insertions, 21 deletions
diff --git a/README.md b/README.md
index 3115c2c..60ef03c 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,6 @@ Usage:
You can use --help option to get help
Limitations:
- - Only long window
- Bit allocation based on the tonality of the signal (see http://www.minidisc.org/aes_atrac.html)
- Only 44100 16bit wav input file
diff --git a/src/Makefile b/src/Makefile
index 39783a3..063964a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,7 +1,7 @@
all:
cd ./mdct && make && cd ../
- g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc
+ g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp transient_detector.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc
test:
g++ -std=c++11 atracdenc_ut.cpp atracdenc.cpp aea.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp -I ../3rd/gtest-1.7.0/include/ ../3rd/gtest-1.7.0/src/gtest-all.o ../3rd/gtest-1.7.0/src/gtest_main.o mdct/mdct_impl.o -o atracdenc_ut
diff --git a/src/atrac_encode_settings.h b/src/atrac_encode_settings.h
index f878a5f..e3ae3b7 100644
--- a/src/atrac_encode_settings.h
+++ b/src/atrac_encode_settings.h
@@ -5,14 +5,13 @@ namespace NAtracDEnc {
class TAtrac1EncodeSettings {
public:
enum class EWindowMode {
- EWM_LONG_ONLY,
- EWM_SHORT_ONLY,
+ EWM_NOTRANSIENT,
EWM_AUTO
};
private:
const uint32_t BfuIdxConst = 0;
const bool FastBfuNumSearch = false;
- EWindowMode WindowMode = EWindowMode::EWM_LONG_ONLY;
+ EWindowMode WindowMode = EWindowMode::EWM_AUTO;
const uint32_t WindowMask = 0;
public:
TAtrac1EncodeSettings();
diff --git a/src/atracdenc.cpp b/src/atracdenc.cpp
index 64fd99a..2ee5918 100644
--- a/src/atracdenc.cpp
+++ b/src/atracdenc.cpp
@@ -12,6 +12,16 @@ using namespace std;
using namespace NBitStream;
using namespace NAtrac1;
using namespace NMDCT;
+
+template<int N>
+static vector<double> invertSpectr(double* in) {
+ vector<double> buf(N);
+ memcpy(&buf[0], in, N * sizeof(double));
+ for (int i = 0; i < N; i+=2)
+ buf[i] *= -1;
+ return buf;
+}
+
TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, bool mono)
: MixChannel(mono)
, Aea(std::move(aea))
@@ -158,30 +168,41 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() {
TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda(const TAtrac1EncodeSettings& settings) {
const uint32_t srcChannels = Aea->GetChannelNum();
- //TODO: should not be here
- //vector<char> dummy;
- //dummy.resize(212 * srcChannels);
- //Aea->WriteFrame(dummy);
- //cout << "Encode, channels: " << srcChannels << endl;
vector<IAtrac1BitAlloc*> bitAlloc;
for (int i = 0; i < srcChannels; i++)
bitAlloc.push_back(new TAtrac1SimpleBitAlloc(Aea.get(), settings.GetBfuIdxConst(), settings.GetFastBfuNumSearch()));
- //bitAlloc.push_back(new TAtrac1PsyBitAlloc(Aea.get()));
return [this, srcChannels, bitAlloc, settings](vector<double>* data) {
for (uint32_t channel = 0; channel < srcChannels; channel++) {
double src[NumSamples];
- double sum[512];
- vector<double> specs;
- specs.resize(512);
+ vector<double> specs(512);
for (int i = 0; i < NumSamples; ++i) {
src[i] = data[i][channel];
}
+
SplitFilterBank[channel].Split(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]);
- const uint32_t windowMask = (settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_SHORT_ONLY) ? settings.GetWindowMask() : 0;
+ uint32_t windowMask = 0;
+ if (settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) {
+ windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]);
+
+ const vector<double>& invMid = invertSpectr<128>(&PcmBufMid[channel][0]);
+ windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 1).Detect(&invMid[0]) << 1;
+
+ const vector<double>& invHi = invertSpectr<256>(&PcmBufHi[channel][0]);
+ windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 2).Detect(&invHi[0]) << 2;
+
+ //std::cout << "trans: " << windowMask << std::endl;
+ } else {
+ //no transient detection, use given mask
+ windowMask = settings.GetWindowMask();
+ }
const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi
+ //for (int i = 0; i < 256; ++i) {
+ // std::cout << PcmBufHi[channel][i] << std::endl;
+ //}
+ //std::cout<< "============" << std::endl;
Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize);
bitAlloc[channel]->Write(Scaler.Scale(specs, blockSize), blockSize);
}
diff --git a/src/atracdenc.h b/src/atracdenc.h
index 5abdac9..9695bc1 100644
--- a/src/atracdenc.h
+++ b/src/atracdenc.h
@@ -2,11 +2,15 @@
#include "pcmengin.h"
#include "aea.h"
#include "atrac_encode_settings.h"
+#include "transient_detector.h"
#include "atrac/atrac1.h"
#include "atrac/atrac1_qmf.h"
#include "atrac/atrac1_scale.h"
#include "mdct/mdct.h"
+#include <assert.h>
+#include <vector>
+
namespace NAtracDEnc {
enum EMode {
@@ -34,11 +38,41 @@ class TAtrac1Processor : public TAtrac1MDCT, public virtual TAtrac1Data {
TAeaPtr Aea;
double PcmBufLow[2][256 + 16];
+ double PcmBufLowT[2][256 + 16];
double PcmBufMid[2][256 + 16];
double PcmBufHi[2][512 + 16];
Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2];
Atrac1SplitFilterBank<double> SplitFilterBank[2];
+
+ class TTransientDetectors {
+ std::vector<TTransientDetector> transientDetectorLow;
+ std::vector<TTransientDetector> transientDetectorMid;
+ std::vector<TTransientDetector> transientDetectorHi;
+ public:
+ TTransientDetectors()
+ : transientDetectorLow(2, TTransientDetector(16, 128))
+ , transientDetectorMid(2, TTransientDetector(16, 128))
+ , transientDetectorHi(2, TTransientDetector(16, 256))
+ {}
+ TTransientDetector& GetDetector(uint32_t channel, uint32_t band) {
+ switch (band) {
+ case 0:
+ return transientDetectorLow[channel];
+ break;
+ case 1:
+ return transientDetectorMid[channel];
+ break;
+ case 2:
+ return transientDetectorHi[channel];
+ break;
+ default:
+ assert(false);
+ return transientDetectorLow[channel];
+ }
+ }
+ };
+ TAtrac1Processor::TTransientDetectors TransientDetectors;
NAtrac1::TScaler Scaler;
diff --git a/src/main.cpp b/src/main.cpp
index 4ef6118..013c5c5 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -42,7 +42,7 @@ static string GetHelp() {
"\n -o output file"
"\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU. WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency."
"\n --bfuidxfast\t enable fast search of BFU amount"
- "\n --shortonly[=mask] use short window (32 sample) for all blocks. Mask (specifyed without space) is used to set this option only for given band";
+ "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window";
}
int main(int argc, char* const* argv) {
@@ -53,7 +53,7 @@ int main(int argc, char* const* argv) {
{ "help", no_argument, NULL, 'h' },
{ "bfuidxconst", required_argument, NULL, 1},
{ "bfuidxfast", no_argument, NULL, 2},
- { "shortonly", optional_argument, NULL, 3},
+ { "notransient", optional_argument, NULL, 3},
{ "mono", no_argument, NULL, 'm'},
{ NULL, 0, NULL, 0}
};
@@ -65,8 +65,8 @@ int main(int argc, char* const* argv) {
uint32_t bfuIdxConst = 0; //0 - auto, no const
bool fastBfuNumSearch = false;
bool mono = false;
- TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_LONG_ONLY;
- uint32_t winMask = 7;
+ TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_AUTO;
+ uint32_t winMask = 0; //all is long
while ((ch = getopt_long(argc, argv, "edhi:o:m", longopts, NULL)) != -1) {
switch (ch) {
case 'e':
@@ -104,11 +104,11 @@ int main(int argc, char* const* argv) {
fastBfuNumSearch = true;
break;
case 3:
- windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_SHORT_ONLY;
+ windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT;
if (optarg) {
winMask = stoi(optarg);
}
- cout << "Explicit short window mode specified, bands: low - " <<
+ cout << "Transient detection disabled, bands: low - " <<
((winMask & 1) ? "short": "long") << ", mid - " <<
((winMask & 2) ? "short": "long") << ", hi - " <<
((winMask & 4) ? "short": "long") << endl;
diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp
new file mode 100644
index 0000000..49e523d
--- /dev/null
+++ b/src/transient_detector.cpp
@@ -0,0 +1,51 @@
+#include "transient_detector.h"
+#include <stdlib.h>
+
+namespace NAtracDEnc {
+
+static double calculateRMS(const double* in, uint32_t n) {
+ double s = 0;
+ for (uint32_t i = 0; i < n; i++) {
+ s += in[i] * in[i];
+ }
+ s /= n;
+ return sqrt(s);
+}
+
+void TTransientDetector::HPFilter(const double* in, double* out) {
+ const uint32_t firLen = 21;
+ static const double fircoef[] = {
+ -8.65163e-18 * 2.0, -0.00851586 * 2.0, -6.74764e-18 * 2.0, 0.0209036 * 2.0,
+ -3.36639e-17 * 2.0, -0.0438162 * 2.0, -1.54175e-17 * 2.0, 0.0931738 * 2.0,
+ -5.52212e-17 * 2.0, -0.313819 * 2.0
+ };
+ const uint32_t x = prevBufSz;
+ memcpy(HPFBuffer.data() + x, in, BlockSz * sizeof(double));
+ const double* inBuf = HPFBuffer.data();
+ for (int i = 0; i < BlockSz; ++i) {
+ double s = inBuf[i + 10];
+ double s2 = 0;
+ for (int j = 0; j < ((firLen - 1) / 2) - 1 ; j += 2) {
+ s += fircoef[j] * (inBuf[i + j] + inBuf[i + firLen - j]);
+ s2 += fircoef[j + 1] * (inBuf[i + j + 1] + inBuf[i + firLen - j - 1]);
+ }
+ out[i] = (s + s2)/2;
+ }
+ memcpy(HPFBuffer.data(), in + (BlockSz - x), x * sizeof(double));
+}
+
+
+bool TTransientDetector::Detect(const double* buf) {
+ double* rmsPerShortBlock = reinterpret_cast<double*>(alloca(sizeof(double) * NShortBlocks));
+ std::vector<double> filtered(BlockSz);
+ HPFilter(buf, filtered.data());
+ for (uint32_t i = 0; i < NShortBlocks; ++i) {
+ rmsPerShortBlock[i] = 19.0 * log10(calculateRMS(&filtered[i * ShortSz], ShortSz));
+ if (i && rmsPerShortBlock[i] - rmsPerShortBlock[i - 1] > 10) {
+ return true;
+ }
+ }
+ return false;
+}
+
+}
diff --git a/src/transient_detector.h b/src/transient_detector.h
new file mode 100644
index 0000000..143507c
--- /dev/null
+++ b/src/transient_detector.h
@@ -0,0 +1,24 @@
+#pragma once
+#include <math.h>
+#include <cstdint>
+#include <vector>
+
+namespace NAtracDEnc {
+class TTransientDetector {
+ const uint32_t ShortSz;
+ const uint32_t BlockSz;
+ const uint32_t NShortBlocks;
+ static const uint32_t prevBufSz = 20;
+ void HPFilter(const double* in, double* out);
+ std::vector<double> HPFBuffer;
+public:
+ TTransientDetector(uint32_t shortSz, uint32_t blockSz)
+ : ShortSz(shortSz)
+ , BlockSz(blockSz)
+ , NShortBlocks(blockSz/shortSz)
+ {
+ HPFBuffer.resize(BlockSz + prevBufSz);
+ }
+ bool Detect(const double* buf);
+};
+}