diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2015-12-23 02:41:38 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2015-12-23 02:41:38 +0300 |
commit | a2a87f35b03242dcbea91dd74f31abde705e183a (patch) | |
tree | 65ade0e6bafab3ef99c87869f53c1ed0719586cf | |
parent | 8b704f5ce2d0666b5b6dc3fb6881b1e6b2dff1bd (diff) | |
download | atracdenc-a2a87f35b03242dcbea91dd74f31abde705e183a.tar.gz |
initial implementation of transient detection added
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | src/Makefile | 2 | ||||
-rw-r--r-- | src/atrac_encode_settings.h | 5 | ||||
-rw-r--r-- | src/atracdenc.cpp | 41 | ||||
-rw-r--r-- | src/atracdenc.h | 34 | ||||
-rw-r--r-- | src/main.cpp | 12 | ||||
-rw-r--r-- | src/transient_detector.cpp | 51 | ||||
-rw-r--r-- | src/transient_detector.h | 24 |
8 files changed, 149 insertions, 21 deletions
@@ -10,7 +10,6 @@ Usage: You can use --help option to get help Limitations: - - Only long window - Bit allocation based on the tonality of the signal (see http://www.minidisc.org/aes_atrac.html) - Only 44100 16bit wav input file diff --git a/src/Makefile b/src/Makefile index 39783a3..063964a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ all: cd ./mdct && make && cd ../ - g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc + g++ -std=c++11 -O2 -g main.cpp wav.cpp aea.cpp transient_detector.cpp atracdenc.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp mdct/mdct_impl.o -o atracdenc test: g++ -std=c++11 atracdenc_ut.cpp atracdenc.cpp aea.cpp bitstream/bitstream.cpp atrac/atrac1.cpp atrac/atrac1_dequantiser.cpp atrac/atrac1_scale.cpp atrac/atrac1_bitalloc.cpp -I ../3rd/gtest-1.7.0/include/ ../3rd/gtest-1.7.0/src/gtest-all.o ../3rd/gtest-1.7.0/src/gtest_main.o mdct/mdct_impl.o -o atracdenc_ut diff --git a/src/atrac_encode_settings.h b/src/atrac_encode_settings.h index f878a5f..e3ae3b7 100644 --- a/src/atrac_encode_settings.h +++ b/src/atrac_encode_settings.h @@ -5,14 +5,13 @@ namespace NAtracDEnc { class TAtrac1EncodeSettings { public: enum class EWindowMode { - EWM_LONG_ONLY, - EWM_SHORT_ONLY, + EWM_NOTRANSIENT, EWM_AUTO }; private: const uint32_t BfuIdxConst = 0; const bool FastBfuNumSearch = false; - EWindowMode WindowMode = EWindowMode::EWM_LONG_ONLY; + EWindowMode WindowMode = EWindowMode::EWM_AUTO; const uint32_t WindowMask = 0; public: TAtrac1EncodeSettings(); diff --git a/src/atracdenc.cpp b/src/atracdenc.cpp index 64fd99a..2ee5918 100644 --- a/src/atracdenc.cpp +++ b/src/atracdenc.cpp @@ -12,6 +12,16 @@ using namespace std; using namespace NBitStream; using namespace NAtrac1; using namespace NMDCT; + +template<int N> +static vector<double> invertSpectr(double* in) { + vector<double> buf(N); + memcpy(&buf[0], in, N * sizeof(double)); + for (int i = 0; i < N; i+=2) + buf[i] *= -1; + return buf; +} + TAtrac1Processor::TAtrac1Processor(TAeaPtr&& aea, bool mono) : MixChannel(mono) , Aea(std::move(aea)) @@ -158,30 +168,41 @@ TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetDecodeLambda() { TPCMEngine<double>::TProcessLambda TAtrac1Processor::GetEncodeLambda(const TAtrac1EncodeSettings& settings) { const uint32_t srcChannels = Aea->GetChannelNum(); - //TODO: should not be here - //vector<char> dummy; - //dummy.resize(212 * srcChannels); - //Aea->WriteFrame(dummy); - //cout << "Encode, channels: " << srcChannels << endl; vector<IAtrac1BitAlloc*> bitAlloc; for (int i = 0; i < srcChannels; i++) bitAlloc.push_back(new TAtrac1SimpleBitAlloc(Aea.get(), settings.GetBfuIdxConst(), settings.GetFastBfuNumSearch())); - //bitAlloc.push_back(new TAtrac1PsyBitAlloc(Aea.get())); return [this, srcChannels, bitAlloc, settings](vector<double>* data) { for (uint32_t channel = 0; channel < srcChannels; channel++) { double src[NumSamples]; - double sum[512]; - vector<double> specs; - specs.resize(512); + vector<double> specs(512); for (int i = 0; i < NumSamples; ++i) { src[i] = data[i][channel]; } + SplitFilterBank[channel].Split(&src[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0]); - const uint32_t windowMask = (settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_SHORT_ONLY) ? settings.GetWindowMask() : 0; + uint32_t windowMask = 0; + if (settings.GetWindowMode() == TAtrac1EncodeSettings::EWindowMode::EWM_AUTO) { + windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 0).Detect(&PcmBufLow[channel][0]); + + const vector<double>& invMid = invertSpectr<128>(&PcmBufMid[channel][0]); + windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 1).Detect(&invMid[0]) << 1; + + const vector<double>& invHi = invertSpectr<256>(&PcmBufHi[channel][0]); + windowMask |= (uint32_t)TransientDetectors.GetDetector(channel, 2).Detect(&invHi[0]) << 2; + + //std::cout << "trans: " << windowMask << std::endl; + } else { + //no transient detection, use given mask + windowMask = settings.GetWindowMask(); + } const TBlockSize blockSize(windowMask & 0x1, windowMask & 0x2, windowMask & 0x4); //low, mid, hi + //for (int i = 0; i < 256; ++i) { + // std::cout << PcmBufHi[channel][i] << std::endl; + //} + //std::cout<< "============" << std::endl; Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSize); bitAlloc[channel]->Write(Scaler.Scale(specs, blockSize), blockSize); } diff --git a/src/atracdenc.h b/src/atracdenc.h index 5abdac9..9695bc1 100644 --- a/src/atracdenc.h +++ b/src/atracdenc.h @@ -2,11 +2,15 @@ #include "pcmengin.h" #include "aea.h" #include "atrac_encode_settings.h" +#include "transient_detector.h" #include "atrac/atrac1.h" #include "atrac/atrac1_qmf.h" #include "atrac/atrac1_scale.h" #include "mdct/mdct.h" +#include <assert.h> +#include <vector> + namespace NAtracDEnc { enum EMode { @@ -34,11 +38,41 @@ class TAtrac1Processor : public TAtrac1MDCT, public virtual TAtrac1Data { TAeaPtr Aea; double PcmBufLow[2][256 + 16]; + double PcmBufLowT[2][256 + 16]; double PcmBufMid[2][256 + 16]; double PcmBufHi[2][512 + 16]; Atrac1SynthesisFilterBank<double> SynthesisFilterBank[2]; Atrac1SplitFilterBank<double> SplitFilterBank[2]; + + class TTransientDetectors { + std::vector<TTransientDetector> transientDetectorLow; + std::vector<TTransientDetector> transientDetectorMid; + std::vector<TTransientDetector> transientDetectorHi; + public: + TTransientDetectors() + : transientDetectorLow(2, TTransientDetector(16, 128)) + , transientDetectorMid(2, TTransientDetector(16, 128)) + , transientDetectorHi(2, TTransientDetector(16, 256)) + {} + TTransientDetector& GetDetector(uint32_t channel, uint32_t band) { + switch (band) { + case 0: + return transientDetectorLow[channel]; + break; + case 1: + return transientDetectorMid[channel]; + break; + case 2: + return transientDetectorHi[channel]; + break; + default: + assert(false); + return transientDetectorLow[channel]; + } + } + }; + TAtrac1Processor::TTransientDetectors TransientDetectors; NAtrac1::TScaler Scaler; diff --git a/src/main.cpp b/src/main.cpp index 4ef6118..013c5c5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -42,7 +42,7 @@ static string GetHelp() { "\n -o output file" "\nAdvanced options:\n --bfuidxconst\t Set constant amount of used BFU. WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency." "\n --bfuidxfast\t enable fast search of BFU amount" - "\n --shortonly[=mask] use short window (32 sample) for all blocks. Mask (specifyed without space) is used to set this option only for given band"; + "\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window"; } int main(int argc, char* const* argv) { @@ -53,7 +53,7 @@ int main(int argc, char* const* argv) { { "help", no_argument, NULL, 'h' }, { "bfuidxconst", required_argument, NULL, 1}, { "bfuidxfast", no_argument, NULL, 2}, - { "shortonly", optional_argument, NULL, 3}, + { "notransient", optional_argument, NULL, 3}, { "mono", no_argument, NULL, 'm'}, { NULL, 0, NULL, 0} }; @@ -65,8 +65,8 @@ int main(int argc, char* const* argv) { uint32_t bfuIdxConst = 0; //0 - auto, no const bool fastBfuNumSearch = false; bool mono = false; - TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_LONG_ONLY; - uint32_t winMask = 7; + TAtrac1EncodeSettings::EWindowMode windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_AUTO; + uint32_t winMask = 0; //all is long while ((ch = getopt_long(argc, argv, "edhi:o:m", longopts, NULL)) != -1) { switch (ch) { case 'e': @@ -104,11 +104,11 @@ int main(int argc, char* const* argv) { fastBfuNumSearch = true; break; case 3: - windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_SHORT_ONLY; + windowMode = TAtrac1EncodeSettings::EWindowMode::EWM_NOTRANSIENT; if (optarg) { winMask = stoi(optarg); } - cout << "Explicit short window mode specified, bands: low - " << + cout << "Transient detection disabled, bands: low - " << ((winMask & 1) ? "short": "long") << ", mid - " << ((winMask & 2) ? "short": "long") << ", hi - " << ((winMask & 4) ? "short": "long") << endl; diff --git a/src/transient_detector.cpp b/src/transient_detector.cpp new file mode 100644 index 0000000..49e523d --- /dev/null +++ b/src/transient_detector.cpp @@ -0,0 +1,51 @@ +#include "transient_detector.h" +#include <stdlib.h> + +namespace NAtracDEnc { + +static double calculateRMS(const double* in, uint32_t n) { + double s = 0; + for (uint32_t i = 0; i < n; i++) { + s += in[i] * in[i]; + } + s /= n; + return sqrt(s); +} + +void TTransientDetector::HPFilter(const double* in, double* out) { + const uint32_t firLen = 21; + static const double fircoef[] = { + -8.65163e-18 * 2.0, -0.00851586 * 2.0, -6.74764e-18 * 2.0, 0.0209036 * 2.0, + -3.36639e-17 * 2.0, -0.0438162 * 2.0, -1.54175e-17 * 2.0, 0.0931738 * 2.0, + -5.52212e-17 * 2.0, -0.313819 * 2.0 + }; + const uint32_t x = prevBufSz; + memcpy(HPFBuffer.data() + x, in, BlockSz * sizeof(double)); + const double* inBuf = HPFBuffer.data(); + for (int i = 0; i < BlockSz; ++i) { + double s = inBuf[i + 10]; + double s2 = 0; + for (int j = 0; j < ((firLen - 1) / 2) - 1 ; j += 2) { + s += fircoef[j] * (inBuf[i + j] + inBuf[i + firLen - j]); + s2 += fircoef[j + 1] * (inBuf[i + j + 1] + inBuf[i + firLen - j - 1]); + } + out[i] = (s + s2)/2; + } + memcpy(HPFBuffer.data(), in + (BlockSz - x), x * sizeof(double)); +} + + +bool TTransientDetector::Detect(const double* buf) { + double* rmsPerShortBlock = reinterpret_cast<double*>(alloca(sizeof(double) * NShortBlocks)); + std::vector<double> filtered(BlockSz); + HPFilter(buf, filtered.data()); + for (uint32_t i = 0; i < NShortBlocks; ++i) { + rmsPerShortBlock[i] = 19.0 * log10(calculateRMS(&filtered[i * ShortSz], ShortSz)); + if (i && rmsPerShortBlock[i] - rmsPerShortBlock[i - 1] > 10) { + return true; + } + } + return false; +} + +} diff --git a/src/transient_detector.h b/src/transient_detector.h new file mode 100644 index 0000000..143507c --- /dev/null +++ b/src/transient_detector.h @@ -0,0 +1,24 @@ +#pragma once +#include <math.h> +#include <cstdint> +#include <vector> + +namespace NAtracDEnc { +class TTransientDetector { + const uint32_t ShortSz; + const uint32_t BlockSz; + const uint32_t NShortBlocks; + static const uint32_t prevBufSz = 20; + void HPFilter(const double* in, double* out); + std::vector<double> HPFBuffer; +public: + TTransientDetector(uint32_t shortSz, uint32_t blockSz) + : ShortSz(shortSz) + , BlockSz(blockSz) + , NShortBlocks(blockSz/shortSz) + { + HPFBuffer.resize(BlockSz + prevBufSz); + } + bool Detect(const double* buf); +}; +} |