lavc: use a separate field for exporting audio encoder padding

Currently, the amount of padding inserted at the beginning by some audio encoders, is exported through AVCodecContext.delay. However - the term 'delay' is heavily overloaded and can have multiple different meanings even in the case of audio encoding. - this field has entirely different meanings, depending on whether the codec context is used for encoding or decoding (and has yet another different meaning for video), preventing generic handling of the codec context. Therefore, add a new field -- AVCodecContext.initial_padding. It could conceivably be used for decoding as well at a later point.
author: Anton Khirnov <anton@khirnov.net> 2014-08-23 12:40:50 +0000
committer: Anton Khirnov <anton@khirnov.net> 2014-10-13 19:09:01 +0000
commit: 2df0c32ea12ddfa72ba88309812bfb13b674130f (patch)
tree: 50677fbc787646c10931f8ba95e32883173f7bfb /libavcodec
parent: c80a816142699dea9cf9fa66689a7838a487ed7e (diff)
download: ffmpeg-2df0c32ea12ddfa72ba88309812bfb13b674130f.tar.gz
21 files changed, 59 insertions, 40 deletions
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 55aa2f1a2f..fa0ac0031a 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -777,7 +777,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     for (i = 0; i < 428; i++)
         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
 
-    avctx->delay = 1024;
+    avctx->initial_padding = 1024;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 5c02e7f9de..13666efc21 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2436,7 +2436,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         return ret;
 
     avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
-    avctx->delay      = AC3_BLOCK_SIZE;
+    avctx->initial_padding = AC3_BLOCK_SIZE;
 
     s->bitstream_mode = avctx->audio_service_type;
     if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
diff --git a/libavcodec/audio_frame_queue.c b/libavcodec/audio_frame_queue.c
index 0a8b25c6e3..82c16a11a3 100644
--- a/libavcodec/audio_frame_queue.c
+++ b/libavcodec/audio_frame_queue.c
@@ -29,8 +29,8 @@ av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
 {
     afq->avctx             = avctx;
     afq->next_pts          = AV_NOPTS_VALUE;
-    afq->remaining_delay   = avctx->delay;
-    afq->remaining_samples = avctx->delay;
+    afq->remaining_delay   = avctx->initial_padding;
+    afq->remaining_samples = avctx->initial_padding;
     afq->frame_queue       = NULL;
 }
 
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index f0fa7a959f..a24ce407c9 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1191,16 +1191,7 @@ typedef struct AVCodecContext {
      *   encoded input.
      *
      * Audio:
-     *   For encoding, this is the number of "priming" samples added by the
-     *   encoder to the beginning of the stream. The decoded output will be
-     *   delayed by this many samples relative to the input to the encoder (or
-     *   more, if the decoder adds its own padding).
-     *   The timestamps on the output packets are adjusted by the encoder so
-     *   that they always refer to the first sample of the data actually
-     *   contained in the packet, including any added padding.
-     *   E.g. if the timebase is 1/samplerate and the timestamp of the first
-     *   input sample is 0, the timestamp of the first output packet will be
-     *   -delay.
+     *   For encoding, this field is unused (see initial_padding).
      *
      *   For decoding, this is the number of samples the decoder needs to
      *   output before the decoder's output is valid. When seeking, you should
@@ -2780,6 +2771,23 @@ typedef struct AVCodecContext {
      * use AVOptions to set this field.
      */
     int side_data_only_packets;
+
+    /**
+     * Audio only. The number of "priming" samples (padding) inserted by the
+     * encoder at the beginning of the audio. I.e. this number of leading
+     * decoded samples must be discarded by the caller to get the original audio
+     * without leading padding.
+     *
+     * - decoding: unused
+     * - encoding: Set by libavcodec. The timestamps on the output packets are
+     *             adjusted by the encoder so that they always refer to the
+     *             first sample of the data actually contained in the packet,
+     *             including any added padding.  E.g. if the timebase is
+     *             1/samplerate and the timestamp of the first input sample is
+     *             0, the timestamp of the first output packet will be
+     *             -initial_padding.
+     */
+    int initial_padding;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/g722enc.c b/libavcodec/g722enc.c
index e7b67dad4f..be437946c6 100644
--- a/libavcodec/g722enc.c
+++ b/libavcodec/g722enc.c
@@ -106,7 +106,7 @@ static av_cold int g722_encode_init(AVCodecContext * avctx)
            a common packet size for VoIP applications */
         avctx->frame_size = 320;
     }
-    avctx->delay = 22;
+    avctx->initial_padding = 22;
 
     if (avctx->trellis) {
         /* validate trellis */
@@ -375,7 +375,7 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
     *got_packet_ptr = 1;
     return 0;
 }
diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c
index 9b5b11af8a..ad51a03181 100644
--- a/libavcodec/libfaac.c
+++ b/libavcodec/libfaac.c
@@ -157,7 +157,7 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
         goto error;
     }
 
-    avctx->delay = FAAC_DELAY_SAMPLES;
+    avctx->initial_padding = FAAC_DELAY_SAMPLES;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
index 34717d4e86..d45fad2676 100644
--- a/libavcodec/libfdk-aacenc.c
+++ b/libavcodec/libfdk-aacenc.c
@@ -286,7 +286,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size = info.frameLength;
-    avctx->delay      = info.encoderDelay;
+    avctx->initial_padding = info.encoderDelay;
     ff_af_queue_init(avctx, &s->afq);
 
     if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index b7a323a8a0..23f1581e13 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -137,7 +137,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
     }
 
     /* get encoder delay */
-    avctx->delay = lame_get_encoder_delay(s->gfp) + 528 + 1;
+    avctx->initial_padding = lame_get_encoder_delay(s->gfp) + 528 + 1;
     ff_af_queue_init(avctx, &s->afq);
 
     avctx->frame_size  = lame_get_framesize(s->gfp);
diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 6b459590d7..0704e94b47 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c
@@ -200,7 +200,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size  = 160;
-    avctx->delay       =  50;
+    avctx->initial_padding = 50;
     ff_af_queue_init(avctx, &s->afq);
 
     s->enc_state = Encoder_Interface_init(s->enc_dtx);
@@ -250,7 +250,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 return AVERROR(ENOMEM);
             memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf));
             samples = flush_buf;
-            if (frame->nb_samples < avctx->frame_size - avctx->delay)
+            if (frame->nb_samples < avctx->frame_size - avctx->initial_padding)
                 s->enc_last_frame = -1;
         }
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) {
diff --git a/libavcodec/libopusenc.c b/libavcodec/libopusenc.c
index 9af8bcda7e..ee9655beef 100644
--- a/libavcodec/libopusenc.c
+++ b/libavcodec/libopusenc.c
@@ -87,7 +87,7 @@ static void libopus_write_header(AVCodecContext *avctx, int stream_count,
     bytestream_put_buffer(&p, "OpusHead", 8);
     bytestream_put_byte(&p, 1); /* Version */
     bytestream_put_byte(&p, channels);
-    bytestream_put_le16(&p, avctx->delay); /* Lookahead samples at 48kHz */
+    bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
     bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
     bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
 
@@ -277,7 +277,7 @@ static int av_cold libopus_encode_init(AVCodecContext *avctx)
         goto fail;
     }
 
-    ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->delay));
+    ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
     if (ret != OPUS_OK)
         av_log(avctx, AV_LOG_WARNING,
                "Unable to get number of lookahead samples: %s\n",
diff --git a/libavcodec/libspeexenc.c b/libavcodec/libspeexenc.c
index 651d7ace0b..98f89b214f 100644
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c
@@ -235,7 +235,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     s->header.frames_per_packet = s->frames_per_packet;
 
     /* set encoding delay */
-    speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
+    speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->initial_padding);
     ff_af_queue_init(avctx, &s->afq);
 
     /* create header packet bytes from header struct */
diff --git a/libavcodec/libtwolame.c b/libavcodec/libtwolame.c
index def5feeb9d..400985ad32 100644
--- a/libavcodec/libtwolame.c
+++ b/libavcodec/libtwolame.c
@@ -60,7 +60,7 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
     int ret;
 
     avctx->frame_size = TWOLAME_SAMPLES_PER_FRAME;
-    avctx->delay      = 512 - 32 + 1;
+    avctx->initial_padding = 512 - 32 + 1;
 
     s->glopts = twolame_init();
     if (!s->glopts)
@@ -151,7 +151,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
     if (frame) {
         if (frame->pts != AV_NOPTS_VALUE)
-            avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+            avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
     } else {
         avpkt->pts = s->next_pts;
     }
diff --git a/libavcodec/libvo-aacenc.c b/libavcodec/libvo-aacenc.c
index 9450792311..6dd71174d3 100644
--- a/libavcodec/libvo-aacenc.c
+++ b/libavcodec/libvo-aacenc.c
@@ -61,7 +61,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     int index, ret;
 
     avctx->frame_size = FRAME_SIZE;
-    avctx->delay      = ENC_DELAY;
+    avctx->initial_padding = ENC_DELAY;
     s->last_frame     = 2;
     ff_af_queue_init(avctx, &s->afq);
 
diff --git a/libavcodec/libvo-amrwbenc.c b/libavcodec/libvo-amrwbenc.c
index b255ba5504..da3941b312 100644
--- a/libavcodec/libvo-amrwbenc.c
+++ b/libavcodec/libvo-amrwbenc.c
@@ -93,7 +93,7 @@ static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
     s->last_bitrate    = avctx->bit_rate;
 
     avctx->frame_size  = 320;
-    avctx->delay       =  80;
+    avctx->initial_padding =  80;
 
     s->state     = E_IF_init();
 
@@ -131,7 +131,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size = size;
     *got_packet_ptr = 1;
diff --git a/libavcodec/libvorbis.c b/libavcodec/libvorbis.c
index 4b4caaac17..07973e6379 100644
--- a/libavcodec/libvorbis.c
+++ b/libavcodec/libvorbis.c
@@ -322,8 +322,8 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (duration > 0) {
         /* we do not know encoder delay until we get the first packet from
          * libvorbis, so we have to update the AudioFrameQueue counts */
-        if (!avctx->delay) {
-            avctx->delay              = duration;
+        if (!avctx->initial_padding) {
+            avctx->initial_padding    = duration;
             s->afq.remaining_delay   += duration;
             s->afq.remaining_samples += duration;
         }
diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index 51a6f5be5a..4e074a582d 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -84,7 +84,7 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
     bitrate = bitrate / 1000;
     s->nb_channels = channels;
     avctx->frame_size = MPA_FRAME_SIZE;
-    avctx->delay      = 512 - 32 + 1;
+    avctx->initial_padding = 512 - 32 + 1;
 
     /* encoding freq */
     s->lsf = 0;
@@ -735,7 +735,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     encode_frame(s, bit_alloc, padding);
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size = put_bits_count(&s->pb) / 8;
     *got_packet_ptr = 1;
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 5732163476..9a84591456 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -165,7 +165,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size = NELLY_SAMPLES;
-    avctx->delay      = NELLY_BUF_LEN;
+    avctx->initial_padding = NELLY_BUF_LEN;
     ff_af_queue_init(avctx, &s->afq);
     s->avctx = avctx;
     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 7627adc0a7..fd04766561 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -56,7 +56,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
         return -1;
     }
     avctx->frame_size = NBLOCKS * BLOCKSIZE;
-    avctx->delay      = avctx->frame_size;
+    avctx->initial_padding = avctx->frame_size;
     avctx->bit_rate = 8000;
     ractx = avctx->priv_data;
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 89f249f093..b28a659f0a 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1240,6 +1240,11 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         }
     }
 
+#if FF_API_AUDIOENC_DELAY
+    if (av_codec_is_encoder(avctx->codec))
+        avctx->delay = avctx->initial_padding;
+#endif
+
     if (av_codec_is_decoder(avctx->codec)) {
         /* validate channel layout from the decoder */
         if (avctx->channel_layout) {
@@ -1447,6 +1452,10 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
 end:
     av_frame_free(&padded_frame);
 
+#if FF_API_AUDIOENC_DELAY
+    avctx->delay = avctx->initial_padding;
+#endif
+
     return ret;
 }
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 90b1f10f65..c44686d6ec 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,8 +29,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 56
-#define LIBAVCODEC_VERSION_MINOR  2
-#define LIBAVCODEC_VERSION_MICRO  2
+#define LIBAVCODEC_VERSION_MINOR  3
+#define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -153,5 +153,8 @@
 #ifndef FF_API_AFD
 #define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 57)
 #endif
+#ifndef FF_API_AUDIOENC_DELAY
+#define FF_API_AUDIOENC_DELAY    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index 95fc199ef8..e801663d66 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -92,8 +92,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avctx->block_align = block_align;
     avctx->bit_rate    = avctx->block_align * 8LL * avctx->sample_rate /
                          s->frame_len;
-    avctx->frame_size  =
-    avctx->delay       = s->frame_len;
+    avctx->frame_size = avctx->initial_padding = s->frame_len;
 
     return 0;
 }
@@ -420,7 +419,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
     flush_put_bits(&s->pb);
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size     = avctx->block_align;
     *got_packet_ptr = 1;
author	Anton Khirnov <anton@khirnov.net>	2014-08-23 12:40:50 +0000
committer	Anton Khirnov <anton@khirnov.net>	2014-10-13 19:09:01 +0000
commit	2df0c32ea12ddfa72ba88309812bfb13b674130f (patch)
tree	50677fbc787646c10931f8ba95e32883173f7bfb /libavcodec
parent	c80a816142699dea9cf9fa66689a7838a487ed7e (diff)
download	ffmpeg-2df0c32ea12ddfa72ba88309812bfb13b674130f.tar.gz