summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <[email protected]>2026-05-12 21:01:59 +0200
committerDaniil Cherednik <[email protected]>2026-05-12 21:57:17 +0200
commit8a7c92ac447f50d10f19addfaeea8969f9da096f (patch)
treedcdfcf21b6cc65f406df3e9d73eeecfdb93634f6
parenta57dab0fa16bb48972405ca861f6bfd7059b80b8 (diff)
[AT3P] Add ATRAC3plus RIFF outputat3p_riff
-rw-r--r--src/at3.cpp323
-rw-r--r--src/at3.h4
-rw-r--r--src/main.cpp3
3 files changed, 266 insertions, 64 deletions
diff --git a/src/at3.cpp b/src/at3.cpp
index eaab1e5..bd5bf4d 100644
--- a/src/at3.cpp
+++ b/src/at3.cpp
@@ -24,6 +24,14 @@
#include <iostream>
#include <cmath>
#include <assert.h>
+#include <memory>
+#include <stdexcept>
+
+#ifdef PLATFORM_WINDOWS
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
/*
* ATRAC3-in-WAV file format.
@@ -38,11 +46,11 @@ namespace {
// Based on http://soundfile.sapp.org/doc/WaveFormat/ + ffmpeg/libnetmd docs
#ifdef _MSC_VER
#pragma pack(push, 1)
-struct
+#define ATRACDENC_PACKED
#else
-struct __attribute__((packed))
+#define ATRACDENC_PACKED __attribute__((packed))
#endif
-At3WaveHeader {
+struct ATRACDENC_PACKED At3WaveHeader {
// "RIFF" "WAVE" header
char riff_chunk_id[4];
uint32_t chunk_size;
@@ -61,48 +69,137 @@ At3WaveHeader {
uint16_t bits_per_sample;
// WAVEFORMATEX cbSize
- uint16_t extradata_size; // 14
+ uint16_t extradata_size;
+
+ struct ATRACDENC_PACKED TAt3Data {
+ // atrac3 extradata
+ uint16_t unknown0; // always 1
+ uint32_t bytes_per_frame; // PCM bytes represented per frame = 1024 samples * 2ch * 2B = 0x1000
+ uint16_t coding_mode; // 1 = joint stereo, 0 = stereo
+ uint16_t coding_mode2; // same as <coding_mode>
+ uint16_t unknown1; // always 1
+ uint16_t unknown2; // always 0
+
+ // "fact" subchunk — required by Sony's psp_at3tool decoder and by ffmpeg
+ // for encoder-delay compensation. Without it, PSP tool rejects files
+ // > ~40 s with "input file is illegal file or over 2G Byte".
+ char fact_id[4];
+ uint32_t fact_size; // 8
+ uint32_t total_samples; // total PCM samples per channel
+ uint32_t samples_per_frame; // 1024 for ATRAC3
+
+ // "data" subchunk
+ char subchunk2_id[4];
+ uint32_t subchunk2_size;
+ };
- // atrac3 extradata
- uint16_t unknown0; // always 1
- uint32_t bytes_per_frame; // PCM bytes represented per frame = 1024 samples * 2ch * 2B = 0x1000
- uint16_t coding_mode; // 1 = joint stereo, 0 = stereo
- uint16_t coding_mode2; // same as <coding_mode>
- uint16_t unknown1; // always 1
- uint16_t unknown2; // always 0
+ struct ATRACDENC_PACKED TAt3pData {
+ // WAVEFORMATEXTENSIBLE
+ uint16_t valid_bits_per_sample;
+ uint32_t channel_mask;
+ uint8_t subformat_guid[16];
- // "fact" subchunk — required by Sony's psp_at3tool decoder and by ffmpeg
- // for encoder-delay compensation. Without it, PSP tool rejects files
- // > ~40 s with "input file is illegal file or over 2G Byte".
- char fact_id[4];
- uint32_t fact_size; // 8
- uint32_t total_samples; // total PCM samples per channel
- uint32_t samples_per_frame; // 1024 for ATRAC3
+ // "fact" subchunk
+ char fact_id[4];
+ uint32_t fact_size;
+ uint32_t total_samples;
- // "data" subchunk
- char subchunk2_id[4];
- uint32_t subchunk2_size;
+ // "data" subchunk
+ char subchunk2_id[4];
+ uint32_t subchunk2_size;
+ };
+
+ union {
+ TAt3Data at3;
+ TAt3pData at3p;
+ } codec;
};
#ifdef _MSC_VER
#pragma pack(pop)
#endif
+#undef ATRACDENC_PACKED
+
+static constexpr uint32_t WaveSampleRate = 44100;
+static constexpr uint32_t At3SamplesPerFrame = 1024;
+static constexpr uint32_t At3pSamplesPerFrame = 2048;
+static constexpr size_t WaveFormatBaseSize = offsetof(At3WaveHeader, codec) -
+ offsetof(At3WaveHeader, audio_format);
+static constexpr size_t At3ExtraSize = offsetof(At3WaveHeader::TAt3Data, fact_id);
+static constexpr size_t At3pExtraSize = offsetof(At3WaveHeader::TAt3pData, fact_id);
+static constexpr size_t At3HeaderSize = offsetof(At3WaveHeader, codec) +
+ sizeof(At3WaveHeader::TAt3Data);
+static constexpr size_t At3pHeaderSize = offsetof(At3WaveHeader, codec) +
+ sizeof(At3WaveHeader::TAt3pData);
+
+static_assert(At3ExtraSize == 14, "unexpected ATRAC3 WAV extradata size");
+static_assert(At3pExtraSize == 22, "unexpected ATRAC3plus WAV extension size");
+static_assert(At3HeaderSize == 76, "unexpected ATRAC3 WAV header size");
+static_assert(At3pHeaderSize == 80, "unexpected ATRAC3plus WAV header size");
+
+struct TFileCloser {
+ void operator()(FILE* Fp) const {
+#ifdef PLATFORM_WINDOWS
+ if (Fp) {
+ const int fd = _fileno(Fp);
+ if (fflush(Fp) == 0 && fd != -1) {
+ _commit(fd);
+ }
+ fclose(Fp);
+ }
+#else
+ if (Fp) {
+ const int fd = fileno(Fp);
+ if (fflush(Fp) == 0 && fd != -1) {
+ fsync(fd);
+ }
+ fclose(Fp);
+ }
+#endif
+ }
+};
+
+using TFilePtr = std::unique_ptr<FILE, TFileCloser>;
+
+static void BackfillWaveHeader(FILE* Fp, size_t headerSize, uint64_t framesWritten,
+ uint32_t frameSize, uint32_t samplesPerFrame,
+ size_t totalSamplesOffset, size_t dataSizeOffset)
+{
+ const uint64_t actualFileSize = headerSize + framesWritten * uint64_t(frameSize);
+ if (actualFileSize >= UINT32_MAX) {
+ return;
+ }
+
+ const uint32_t chunkSize = uint32_t(actualFileSize - 8);
+ const uint32_t totalSamples = uint32_t(framesWritten) * samplesPerFrame;
+ const uint32_t dataSize = uint32_t(framesWritten) * frameSize;
+ const uint32_t chunkSizeLE = swapbyte32_on_be(chunkSize);
+ const uint32_t totalSamplesLE = swapbyte32_on_be(totalSamples);
+ const uint32_t dataSizeLE = swapbyte32_on_be(dataSize);
+
+ fseek(Fp, offsetof(At3WaveHeader, chunk_size), SEEK_SET);
+ fwrite(&chunkSizeLE, sizeof(uint32_t), 1, Fp);
+ fseek(Fp, static_cast<long>(totalSamplesOffset), SEEK_SET);
+ fwrite(&totalSamplesLE, sizeof(uint32_t), 1, Fp);
+ fseek(Fp, static_cast<long>(dataSizeOffset), SEEK_SET);
+ fwrite(&dataSizeLE, sizeof(uint32_t), 1, Fp);
+}
class TAt3 : public ICompressedOutput {
public:
TAt3(const std::string &filename, size_t numChannels,
uint32_t numFrames, uint32_t frameSize, bool jointStereo)
- : fp(NAtracDEnc::FOpenUtf8(filename, "wb"))
+ : Fp(NAtracDEnc::FOpenUtf8(filename, "wb"))
, FrameSize(frameSize)
, FramesWritten(0)
{
- if (!fp) {
+ if (!Fp) {
throw std::runtime_error("unable to open output file '" + filename + "'");
}
- struct At3WaveHeader header;
+ At3WaveHeader header;
memset(&header, 0, sizeof(header));
- uint64_t file_size = sizeof(struct At3WaveHeader) + uint64_t(numFrames) * uint64_t(frameSize);
+ uint64_t file_size = At3HeaderSize + uint64_t(numFrames) * uint64_t(frameSize);
if (file_size >= UINT32_MAX) {
throw std::runtime_error("File size is too big for this file format");
@@ -116,39 +213,37 @@ public:
memcpy(header.subchunk1_id, "fmt ", 4);
// fmt chunk ends where the next chunk ("fact") begins.
- header.subchunk1_size = swapbyte32_on_be(offsetof(struct At3WaveHeader, fact_id) -
- offsetof(struct At3WaveHeader, audio_format));
+ header.subchunk1_size = swapbyte32_on_be(WaveFormatBaseSize + At3ExtraSize);
// libnetmd: #define NETMD_RIFF_FORMAT_TAG_ATRAC3 0x270
// mmreg.h (mingw-w64): WAVE_FORMAT_SONY_SCX 0x270
// riff.c (ffmpeg): AV_CODEC_ID_ATRAC3 0x0270
header.audio_format = swapbyte16_on_be(0x270);
header.num_channels = swapbyte16_on_be(numChannels);
- header.sample_rate = swapbyte32_on_be(44100);
- header.byte_rate = swapbyte32_on_be(frameSize * header.sample_rate / 1024);
+ header.sample_rate = swapbyte32_on_be(WaveSampleRate);
+ header.byte_rate = swapbyte32_on_be(frameSize * WaveSampleRate / At3SamplesPerFrame);
header.block_align = swapbyte16_on_be(frameSize);
header.bits_per_sample = swapbyte16_on_be(0);
- header.extradata_size = swapbyte16_on_be(offsetof(struct At3WaveHeader, fact_id) -
- offsetof(struct At3WaveHeader, unknown0));
+ header.extradata_size = swapbyte16_on_be(At3ExtraSize);
- header.unknown0 = swapbyte16_on_be(1);
+ header.codec.at3.unknown0 = swapbyte16_on_be(1);
// 1024 samples × 2 channels × 2 bytes = 4096 (0x1000). Sony's encoder
// writes this value; PSP tool and ffmpeg rely on it for frame sizing.
- header.bytes_per_frame = swapbyte32_on_be(0x1000);
- header.coding_mode = swapbyte16_on_be(jointStereo ? 0x0001 : 0x0000);
- header.coding_mode2 = header.coding_mode; // already byte-swapped (if needed)
- header.unknown1 = swapbyte16_on_be(1);
- header.unknown2 = swapbyte16_on_be(0);
+ header.codec.at3.bytes_per_frame = swapbyte32_on_be(0x1000);
+ header.codec.at3.coding_mode = swapbyte16_on_be(jointStereo ? 0x0001 : 0x0000);
+ header.codec.at3.coding_mode2 = header.codec.at3.coding_mode; // already byte-swapped (if needed)
+ header.codec.at3.unknown1 = swapbyte16_on_be(1);
+ header.codec.at3.unknown2 = swapbyte16_on_be(0);
- memcpy(header.fact_id, "fact", 4);
- header.fact_size = swapbyte32_on_be(8);
- header.total_samples = swapbyte32_on_be(uint32_t(numFrames) * 1024);
- header.samples_per_frame = swapbyte32_on_be(1024);
+ memcpy(header.codec.at3.fact_id, "fact", 4);
+ header.codec.at3.fact_size = swapbyte32_on_be(8);
+ header.codec.at3.total_samples = swapbyte32_on_be(uint32_t(numFrames) * At3SamplesPerFrame);
+ header.codec.at3.samples_per_frame = swapbyte32_on_be(At3SamplesPerFrame);
- memcpy(header.subchunk2_id, "data", 4);
- header.subchunk2_size = swapbyte32_on_be(numFrames * frameSize); // TODO
+ memcpy(header.codec.at3.subchunk2_id, "data", 4);
+ header.codec.at3.subchunk2_size = swapbyte32_on_be(numFrames * frameSize);
- if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header)) {
+ if (fwrite(&header, 1, At3HeaderSize, Fp.get()) != At3HeaderSize) {
throw std::runtime_error("Cannot write WAV header to file");
}
}
@@ -159,28 +254,16 @@ public:
// RIFF chunk_size, fact total_samples, and data subchunk_size
// reflect the actual frame count on disk.
if (FramesWritten > 0) {
- const uint64_t actualFileSize = sizeof(struct At3WaveHeader) +
- uint64_t(FramesWritten) * uint64_t(FrameSize);
- if (actualFileSize < UINT32_MAX) {
- const uint32_t chunkSize = uint32_t(actualFileSize - 8);
- const uint32_t totalSamples = uint32_t(FramesWritten) * 1024u;
- const uint32_t dataSize = uint32_t(FramesWritten) * FrameSize;
- const uint32_t chunkSizeLE = swapbyte32_on_be(chunkSize);
- const uint32_t totalSamplesLE = swapbyte32_on_be(totalSamples);
- const uint32_t dataSizeLE = swapbyte32_on_be(dataSize);
- fseek(fp, offsetof(struct At3WaveHeader, chunk_size), SEEK_SET);
- fwrite(&chunkSizeLE, sizeof(uint32_t), 1, fp);
- fseek(fp, offsetof(struct At3WaveHeader, total_samples), SEEK_SET);
- fwrite(&totalSamplesLE, sizeof(uint32_t), 1, fp);
- fseek(fp, offsetof(struct At3WaveHeader, subchunk2_size), SEEK_SET);
- fwrite(&dataSizeLE, sizeof(uint32_t), 1, fp);
- }
+ BackfillWaveHeader(Fp.get(), At3HeaderSize, FramesWritten, FrameSize, At3SamplesPerFrame,
+ offsetof(At3WaveHeader, codec) +
+ offsetof(At3WaveHeader::TAt3Data, total_samples),
+ offsetof(At3WaveHeader, codec) +
+ offsetof(At3WaveHeader::TAt3Data, subchunk2_size));
}
- fclose(fp);
}
virtual void WriteFrame(std::vector<char> data) override {
- if (fwrite(data.data(), 1, data.size(), fp) != data.size()) {
+ if (fwrite(data.data(), 1, data.size(), Fp.get()) != data.size()) {
throw std::runtime_error("Cannot write AT3 data to file");
}
++FramesWritten;
@@ -195,9 +278,116 @@ public:
}
private:
- FILE *fp;
+ TFilePtr Fp;
+ uint32_t FrameSize;
+ uint64_t FramesWritten;
+};
+
+static const uint8_t Atrac3plusSubformatGuid[16] = {
+ 0xBF, 0xAA, 0x23, 0xE9, 0x58, 0xCB, 0x71, 0x44,
+ 0xA1, 0x19, 0xFF, 0xFA, 0x01, 0xE4, 0xCE, 0x62
+};
+
+static uint32_t GetWaveChannelMask(size_t numChannels) {
+ switch (numChannels) {
+ case 1:
+ return 0x00000004; // front center
+ case 2:
+ return 0x00000003; // front left | front right
+ default:
+ return 0;
+ }
+}
+
+class TAt3p : public ICompressedOutput {
+public:
+ TAt3p(const std::string &filename, size_t numChannels,
+ uint32_t numFrames, uint32_t frameSize)
+ : Fp(NAtracDEnc::FOpenUtf8(filename, "wb"))
+ , FrameSize(frameSize)
+ , FramesWritten(0)
+ , NumChannels(numChannels)
+ {
+ if (!Fp) {
+ throw std::runtime_error("unable to open output file '" + filename + "'");
+ }
+ if (frameSize > UINT16_MAX) {
+ throw std::runtime_error("ATRAC3plus frame size is too large for WAV block_align");
+ }
+ if (numChannels > UINT16_MAX) {
+ throw std::runtime_error("Too many channels for WAV output");
+ }
+
+ At3WaveHeader header;
+ memset(&header, 0, sizeof(header));
+
+ const uint64_t file_size = At3pHeaderSize + uint64_t(numFrames) * uint64_t(frameSize);
+ if (file_size >= UINT32_MAX) {
+ throw std::runtime_error("File size is too big for this file format");
+ }
+
+ memcpy(header.riff_chunk_id, "RIFF", 4);
+ header.chunk_size = swapbyte32_on_be(uint32_t(file_size - 8));
+ memcpy(header.riff_format, "WAVE", 4);
+
+ memcpy(header.subchunk1_id, "fmt ", 4);
+ header.subchunk1_size = swapbyte32_on_be(WaveFormatBaseSize + At3pExtraSize);
+ header.audio_format = swapbyte16_on_be(0xFFFE); // WAVE_FORMAT_EXTENSIBLE
+ header.num_channels = swapbyte16_on_be(uint16_t(numChannels));
+ header.sample_rate = swapbyte32_on_be(WaveSampleRate);
+ header.byte_rate = swapbyte32_on_be(frameSize * WaveSampleRate / At3pSamplesPerFrame);
+ header.block_align = swapbyte16_on_be(uint16_t(frameSize));
+ header.bits_per_sample = swapbyte16_on_be(16);
+ header.extradata_size = swapbyte16_on_be(At3pExtraSize);
+ header.codec.at3p.valid_bits_per_sample = swapbyte16_on_be(16);
+ header.codec.at3p.channel_mask = swapbyte32_on_be(GetWaveChannelMask(numChannels));
+ memcpy(header.codec.at3p.subformat_guid, Atrac3plusSubformatGuid, sizeof(header.codec.at3p.subformat_guid));
+
+ memcpy(header.codec.at3p.fact_id, "fact", 4);
+ header.codec.at3p.fact_size = swapbyte32_on_be(sizeof(uint32_t));
+ header.codec.at3p.total_samples = swapbyte32_on_be(uint32_t(numFrames) * At3pSamplesPerFrame);
+
+ memcpy(header.codec.at3p.subchunk2_id, "data", 4);
+ header.codec.at3p.subchunk2_size = swapbyte32_on_be(numFrames * frameSize);
+
+ if (fwrite(&header, 1, At3pHeaderSize, Fp.get()) != At3pHeaderSize) {
+ throw std::runtime_error("Cannot write WAV header to file");
+ }
+ }
+
+ virtual ~TAt3p() override {
+ if (FramesWritten > 0) {
+ BackfillWaveHeader(Fp.get(), At3pHeaderSize, FramesWritten, FrameSize, At3pSamplesPerFrame,
+ offsetof(At3WaveHeader, codec) +
+ offsetof(At3WaveHeader::TAt3pData, total_samples),
+ offsetof(At3WaveHeader, codec) +
+ offsetof(At3WaveHeader::TAt3pData, subchunk2_size));
+ }
+ }
+
+ virtual void WriteFrame(std::vector<char> data) override {
+ if (data.size() != FrameSize) {
+ throw std::runtime_error("Unexpected ATRAC3plus frame size");
+ }
+ if (fwrite(data.data(), 1, data.size(), Fp.get()) != data.size()) {
+ throw std::runtime_error("Cannot write AT3 data to file");
+ }
+ ++FramesWritten;
+ }
+
+ std::string GetName() const override {
+ return {};
+ }
+
+ size_t GetChannelNum() const override {
+ return NumChannels;
+ }
+
+private:
+ TFilePtr Fp;
uint32_t FrameSize;
uint64_t FramesWritten;
+ size_t NumChannels;
};
} //namespace
@@ -208,3 +398,10 @@ CreateAt3Output(const std::string& filename, size_t numChannel,
{
return std::unique_ptr<TAt3>(new TAt3(filename, numChannel, numFrames, framesize, jointStereo));
}
+
+TCompressedOutputPtr
+CreateAt3POutput(const std::string& filename, size_t numChannel,
+ uint32_t numFrames, uint32_t framesize)
+{
+ return std::unique_ptr<TAt3p>(new TAt3p(filename, numChannel, numFrames, framesize));
+}
diff --git a/src/at3.h b/src/at3.h
index 66842fc..32d7b85 100644
--- a/src/at3.h
+++ b/src/at3.h
@@ -23,3 +23,7 @@
TCompressedOutputPtr
CreateAt3Output(const std::string& filename, size_t numChannel,
uint32_t numFrames, uint32_t framesize, bool jointStereo);
+
+TCompressedOutputPtr
+CreateAt3POutput(const std::string& filename, size_t numChannel,
+ uint32_t numFrames, uint32_t framesize);
diff --git a/src/main.cpp b/src/main.cpp
index 46a04b4..33fcd05 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -277,7 +277,8 @@ static void PrepareAtrac3PEncoder(const string& inFile,
string contName;
if (ext == "wav" || ext == "at3") {
- throw std::runtime_error("Not implemented");
+ contName = "AT3 (RIFF)";
+ omaIO = CreateAt3POutput(outFile, numChannels, numFrames, 2048);
} else if (ext == "rm") {
throw std::runtime_error("RealMedia container is not supported for ATRAC3PLUS");
} else {