aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2018-12-05 00:14:29 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2018-12-05 00:14:29 +0300
commitb2342e902d62067fda5a08c7cccff62ee3619fc5 (patch)
tree226144be966246d69244a6440e485b1bf46a84e5
parentd8014b6a75086decf99526d1538a172b527536e5 (diff)
downloadatracdenc-b2342e902d62067fda5a08c7cccff62ee3619fc5.tar.gz
[atrac3] Remove tonal extraction code
Current implementation doesn't add notisible quality improvements, but often adds artifacts due to tonal bit allocation penalty. Proper implementation must use tonal component only if penalty of tonal allocation less than gain generic bit allocation.
-rw-r--r--src/atrac3denc.cpp81
-rw-r--r--src/atrac3denc.h7
-rw-r--r--src/main.cpp4
3 files changed, 2 insertions, 90 deletions
diff --git a/src/atrac3denc.cpp b/src/atrac3denc.cpp
index 85b0c31..519cd9b 100644
--- a/src/atrac3denc.cpp
+++ b/src/atrac3denc.cpp
@@ -129,61 +129,6 @@ TAtrac3MDCT::TGainModulatorArray TAtrac3MDCT::MakeGainModulatorArray(const TAtra
}
}
-//TODO:
-TAtrac3Data::TTonalComponents TAtrac3Processor::ExtractTonalComponents(TFloat* specs, TTonalDetector fn)
-{
- TAtrac3Data::TTonalComponents res;
- const float thresholds[TAtrac3Data::NumQMF] = { 16, 2.4, 2.8, 3.2 };
- for (uint8_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
- //disable for frequence above 16KHz until we works without proper psy
- if (bandNum)
- continue;
- for (uint8_t blockNum = BlocksPerBand[bandNum]; blockNum < BlocksPerBand[bandNum + 1]; ++blockNum) {
- const uint16_t specNumStart = SpecsStartLong[blockNum];
- const uint16_t specNumEnd = specNumStart + SpecsPerBlock[blockNum];
- float level = fn(specs + specNumStart, SpecsPerBlock[blockNum]);
- if (!std::isnan(level)) {
- for (uint16_t n = specNumStart; n < specNumEnd; ++n) {
- //TODO:
- TFloat absValue = std::abs(specs[n]);
- //std::cerr << n << " " << absValue << " " << level << std::endl;
- if (absValue > 0.999999) {
- TFloat shift = (specs[n] > 0) ? 0.999999 : -0.999999;
- std::cerr << "overflow: " << specs[n] << " at: " << n << std::endl;
- //res.push_back({n, specs[n] - shift});
- specs[n] = shift;
- } else if (std::abs(specs[n]) / level > thresholds[bandNum]) {
- res.push_back({n, specs[n]/* - level*/, blockNum});
- specs[n] = 0;//level;
- }
-
- }
-
- }
- }
- }
- return res;
-}
-
-void TAtrac3Processor::MapTonalComponents(const TTonalComponents& tonalComponents, vector<TTonalBlock>* componentMap)
-{
- for (uint16_t i = 0; i < tonalComponents.size();) {
- const uint16_t startPos = i;
- uint16_t curPos;
- do {
- curPos = tonalComponents[i].Pos;
- ++i;
- } while ( i < tonalComponents.size() && tonalComponents[i].Pos == curPos + 1 && i - startPos < 7);
- const uint16_t len = i - startPos;
- TFloat tmp[8];
- for (uint8_t j = 0; j < len; ++j)
- tmp[j] = tonalComponents[startPos + j].Val;
- const TScaledBlock& scaledBlock = Scaler.Scale(tmp, len);
- componentMap->push_back({&tonalComponents[startPos], scaledBlock});
- }
-}
-
-
TFloat TAtrac3Processor::LimitRel(TFloat x)
{
return std::min(std::max(x, GainLevel[15]), GainLevel[0]);
@@ -342,10 +287,6 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda()
return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
using TSce = TAtrac3BitStreamWriter::TSingleChannelElement;
- // TTonalBlock has pointer to the TTonalVal so TTonalComponents must be avaliable
- // TODO: this code should be rewritten
- TTonalComponents tonals[2];
-
assert(SingleChannelElements.size() == meta.Channels);
for (uint32_t channel = 0; channel < SingleChannelElements.size(); channel++) {
vector<TFloat> specs(1024);
@@ -375,28 +316,8 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Processor::GetEncodeLambda()
Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
}
- tonals[channel] = Params.NoTonalComponents ?
- TAtrac3Data::TTonalComponents() : ExtractTonalComponents(specs.data(), [](const TFloat* spec, uint16_t len) {
- std::vector<TFloat> magnitude(len);
- //TFloat s = 0.0;
- for (uint16_t i = 0; i < len; ++i) {
- magnitude[i] = std::abs(spec[i]);
- // s += magnitude[i];
- }
- float median = CalcMedian(magnitude.data(), len);
- for (uint16_t i = 0; i < len; ++i) {
- if (median > 0.000015) {
- return median;
- }
- }
- return NAN;
- });
-
- sce->TonalBlocks.clear();
- MapTonalComponents(tonals[channel], &sce->TonalBlocks);
-
//TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
- sce->ScaledBlocks = std::move(Scaler.ScaleFrame(specs, TBlockSize()));
+ sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize());
}
diff --git a/src/atrac3denc.h b/src/atrac3denc.h
index fb19c84..a07f717 100644
--- a/src/atrac3denc.h
+++ b/src/atrac3denc.h
@@ -79,9 +79,6 @@ protected:
TAtrac3MDCT::TGainModulatorArray MakeGainModulatorArray(const TAtrac3Data::SubbandInfo& si);
};
-//returns threshhold
-typedef std::function<float(const TFloat* p, uint16_t len)> TTonalDetector;
-
class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT {
TCompressedIOPtr Oma;
const NAtrac3::TAtrac3EncoderSettings Params;
@@ -93,7 +90,6 @@ class TAtrac3Processor : public IProcessor<TFloat>, public TAtrac3MDCT {
TScaler<TAtrac3Data> Scaler;
std::vector<TTransientDetector> TransientDetectors;
std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements;
- typedef std::array<uint8_t, NumSpecs> TonalComponentMask;
public:
struct TTransientParam {
int32_t Attack0Location; // Attack position relative to previous frame
@@ -116,10 +112,7 @@ public:
void ResetTransientParamsHistory(int channel, int band);
void SetTransientParamsHistory(int channel, int band, const TTransientParam& params);
const TTransientParam& GetTransientParamsHistory(int channel, int band) const;
- TonalComponentMask AnalyzeTonalComponent(TFloat* specs);
- TTonalComponents ExtractTonalComponents(TFloat* specs, TTonalDetector fn);
- void MapTonalComponents(const TTonalComponents& tonalComponents, std::vector<NAtrac3::TTonalBlock>* componentMap);
public:
TAtrac3Processor(TCompressedIOPtr&& oma, NAtrac3::TAtrac3EncoderSettings&& encoderSettings);
~TAtrac3Processor();
diff --git a/src/main.cpp b/src/main.cpp
index 51a2cac..a1f2867 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -69,9 +69,8 @@ static string GetHelp()
"WARNING: It is not a lowpass filter! Do not use it to cut off hi frequency."
"\n --bfuidxfast\t enable fast search of BFU amount (ATRAC1)"
"\n --notransient[=mask] disable transient detection and use optional mask to set bands with short MDCT window "
- "(ATRAC1)"
+ "(ATRAC1)";
/*"\n --nogaincontrol disable gain control (ATRAC3)"*/
- "\n --notonal disable tonal components (ATRAC3)";
}
static int checkedStoi(const char* data, int min, int max, int def)
@@ -212,7 +211,6 @@ int main(int argc, char* const* argv)
{ "bfuidxfast", no_argument, NULL, O_BFUIDXFAST},
{ "notransient", optional_argument, NULL, O_NOTRANSIENT},
{ "nostdout", no_argument, NULL, O_NOSTDOUT},
- { "notonal", no_argument, NULL, O_NOTONAL},
{ "nogaincontrol", no_argument, NULL, O_NOGAINCONTROL},
{ NULL, 0, NULL, 0}
};