aboutsummaryrefslogtreecommitdiffstats
path: root/src/atrac3denc.cpp
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2016-06-19 02:58:23 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2016-06-19 03:31:55 +0300
commit1151d5831f19a9f24dd0c545a4968606712a62d2 (patch)
treec978c1b9a3fc86fef531dd412fe6b7668b7c0567 /src/atrac3denc.cpp
parent8d65a0bd0774e03b3d10354e15f2f3361a2ce26a (diff)
downloadatracdenc-1151d5831f19a9f24dd0c545a4968606712a62d2.tar.gz
some improvements of ATRAC3 implementation:atrac3
- simple (ATRAC1 like) psychoacoustic added - possibility to encode tonal components - simple tonal component extractor - refactoring
Diffstat (limited to 'src/atrac3denc.cpp')
-rw-r--r--src/atrac3denc.cpp144
1 files changed, 111 insertions, 33 deletions
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index 7557d5e..432fb18 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -1,22 +1,23 @@
#include "atrac3denc.h"
-#include "atrac/atrac3_bitstream.h"
+#include "transient_detector.h"
#include "util.h"
#include <assert.h>
-
+#include <algorithm>
#include <iostream>
-
+#include <cmath>
namespace NAtracDEnc {
using namespace NMDCT;
+using namespace NAtrac3;
using std::vector;
-void TAtrac3MDCT::Mdct(double specs[1024], double* bands[4], TGainModulatorArray gainModulators) {
+void TAtrac3MDCT::Mdct(TFloat specs[1024], TFloat* bands[4], TGainModulatorArray gainModulators) {
for (int band = 0; band < 4; ++band) {
- double* srcBuff = bands[band];
- double* const curSpec = &specs[band*256];
+ TFloat* srcBuff = bands[band];
+ TFloat* const curSpec = &specs[band*256];
TGainModulator modFn = gainModulators[band];
- vector<double> tmp(512);
- memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(double));
+ vector<TFloat> tmp(512);
+ memcpy(&tmp[0], &srcBuff[256], 256 * sizeof(TFloat));
if (modFn) {
modFn(tmp.data(), srcBuff);
}
@@ -24,30 +25,30 @@ void TAtrac3MDCT::Mdct(double specs[1024], double* bands[4], TGainModulatorArray
srcBuff[256+i] = TAtrac3Data::EncodeWindow[i] * srcBuff[i];
srcBuff[i] = TAtrac3Data::EncodeWindow[255-i] * srcBuff[i];
}
- memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(double));
- const vector<double>& sp = Mdct512(&tmp[0]);
+ memcpy(&tmp[256], &srcBuff[0], 256 * sizeof(TFloat));
+ const vector<TFloat>& sp = Mdct512(&tmp[0]);
assert(sp.size() == 256);
- memcpy(curSpec, sp.data(), 256 * sizeof(double));
+ memcpy(curSpec, sp.data(), 256 * sizeof(TFloat));
if (band & 1) {
SwapArray(curSpec, 256);
}
}
}
-void TAtrac3MDCT::Midct(double specs[1024], double* bands[4], TGainDemodulatorArray gainDemodulators) {
+void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorArray gainDemodulators) {
for (int band = 0; band < 4; ++band) {
- double* dstBuff = bands[band];
- double* curSpec = &specs[band*256];
- double* prevBuff = dstBuff + 256;
+ TFloat* dstBuff = bands[band];
+ TFloat* curSpec = &specs[band*256];
+ TFloat* prevBuff = dstBuff + 256;
TAtrac3GainProcessor::TGainDemodulator demodFn = gainDemodulators[band];
if (band & 1) {
SwapArray(curSpec, 256);
}
- vector<double> inv = Midct512(curSpec);
+ vector<TFloat> inv = Midct512(curSpec);
assert(inv.size()/2 == 256);
for (int j = 0; j < 256; ++j) {
- inv[j] *= 2 * DecodeWindow[j];
- inv[511 - j] *= 2 * DecodeWindow[j];
+ inv[j] *= /*2 */ DecodeWindow[j];
+ inv[511 - j] *= /*2*/ DecodeWindow[j];
}
if (demodFn) {
demodFn(dstBuff, inv.data(), prevBuff);
@@ -56,13 +57,14 @@ void TAtrac3MDCT::Midct(double specs[1024], double* bands[4], TGainDemodulatorAr
dstBuff[j] = inv[j] + prevBuff[j];
}
}
- memcpy(prevBuff, &inv[256], sizeof(double)*256);
+ memcpy(prevBuff, &inv[256], sizeof(TFloat)*256);
}
}
-TAtrac3Processor::TAtrac3Processor(TAeaPtr&& oma, const TContainerParams& params)
+TAtrac3Processor::TAtrac3Processor(TCompressedIOPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
: Oma(std::move(oma))
- , Params(params)
+ , Params(std::move(encoderSettings))
+ , TransientDetectors(2 * 4, TTransientDetector(8, 256)) //2 - channels, 4 - bands
{}
TAtrac3Processor::~TAtrac3Processor()
@@ -97,35 +99,111 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra
}
}
-TPCMEngine<double>::TProcessLambda TAtrac3Processor::GetEncodeLambda() {
+//TODO:
+TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn) {
+ TAtrac3Data::TTonalComponents res;
+ const float thresholds[TAtrac3Data::NumQMF] = { 0.9, 2.4, 2.8, 3.2 };
+ for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
+ //disable for frequence above 16KHz until we works without proper psy
+ if (bandNum > 2)
+ continue;
+ for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) {
+ const uint16_t specNumStart = SpecsStartLong[blockNum];
+ const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum];
+ float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]);
+ if (!isnan(level)) {
+ for (uint16_t n = specNumStart; n < specNumEnd; ++n) {
+ //TODO:
+ TFloat absValue = std::abs(specs[n]);
+ if (absValue > 65535.0) {
+ TFloat shift = (specs[n] > 0) ? 65535.0 : -65535.0;
+
+ std::cerr << "shift overflowed value " << specs[n] << " " << specs[n] - shift << " " << shift << std::endl;
+ res.push_back({n, specs[n] - shift});
+ specs[n] = shift;
+ } else if (log10(std::abs(specs[n])) - log10(level) > thresholds[bandNum]) {
+ res.push_back({n, specs[n]/* - level*/});
+ specs[n] = 0;//level;
+ }
+
+ }
+
+ }
+ }
+ }
+ return res;
+}
+std::vector<TTonalComponent> TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents) {
+ vector<TTonalComponent> componentMap;
+ for (uint16_t i = 0; i < tonalComponents.size();) {
+ const uint16_t startPos = i;
+ uint16_t curPos;
+ do {
+ curPos = tonalComponents[i].Pos;
+ ++i;
+ } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7);
+ const uint16_t len = i - startPos;
+ TFloat tmp[8];
+ for (uint8_t j = 0; j < len; ++j)
+ tmp[j] = tonalComponents[startPos + j].Val;
+ const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len);
+ componentMap.push_back({&tonalComponents[startPos], 7, scaledBlock});
+ }
+ return componentMap;
+}
+
+TAtrac3Data::SubbandInfo TAtrac3Processor::CreateSubbandInfo(TFloat* in[4], uint32_t channel, TTransientDetector* transientDetector) {
+ assert(false); //not implemented
+ return {};
+}
+
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda() {
TOma* omaptr = dynamic_cast<TOma*>(Oma.get());
if (!omaptr) {
std::cerr << "Wrong container" << std::endl;
abort();
}
- TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, Params);
- return [this, bitStreamWriter](double* data, const TPCMEngine<double>::ProcessMeta& meta) {
+ TAtrac3BitStreamWriter* bitStreamWriter = new TAtrac3BitStreamWriter(omaptr, *Params.ConteinerParams);
+ return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
for (uint32_t channel=0; channel < 2; channel++) {
- vector<double> specs(1024);
- double src[NumSamples];
+ vector<TFloat> specs(1024);
+ TFloat src[NumSamples];
for (int i = 0; i < NumSamples; ++i) {
- src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)]; //no mono mode in atrac3. //TODO we can double frame after encoding
+ src[i] = data[meta.Channels == 1 ? i : (i * 2 + channel)] / 4.0; //no mono mode in atrac3. //TODO we can TFloat frame after encoding
}
- double* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]};
+ TFloat* p[4] = {&PcmBuffer[channel][0][0], &PcmBuffer[channel][1][0], &PcmBuffer[channel][2][0], &PcmBuffer[channel][3][0]};
SplitFilterBank[channel].Split(&src[0], p);
-
- TAtrac3Data::SubbandInfo siCur;
+
+ TAtrac3Data::SubbandInfo siCur = Params.NoGainControll ?
+ TAtrac3Data::SubbandInfo() : CreateSubbandInfo(p, channel, &TransientDetectors[channel*4]); //4 detectors per band
Mdct(specs.data(), p, MakeGainModulatorArray(siCur));
- const TBlockSize blockSize(false, false, false);
- bitStreamWriter->WriteSoundUnit(siCur, Scaler.Scale(specs, blockSize));
+ TTonalComponents tonals = Params.NoTonalComponents ?
+ TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) {
+ std::vector<TFloat> magnitude(len);
+ for (uint16_t i = 0; i < len; ++i) {
+ magnitude[i] = std::abs(spec[i]);
+ }
+ float median = CalcMedian(magnitude.data(), len);
+ for (uint16_t i = 0; i < len; ++i) {
+ if (median > 0.001) {
+ return median;
+ }
+ }
+ return NAN;
+ });
+
+ const std::vector<TTonalComponent>& components = MapTonalComponents(tonals);
+
+ //TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
+ bitStreamWriter->WriteSoundUnit(siCur, components, Scaler.ScaleFrame(specs, TBlockSize()));
}
};
}
-TPCMEngine<double>::TProcessLambda TAtrac3Processor::GetDecodeLambda() {
+TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetDecodeLambda() {
abort();
return {};
}