diff options
Diffstat (limited to 'libavformat/oggenc.c')
-rw-r--r-- | libavformat/oggenc.c | 226 |
1 files changed, 181 insertions, 45 deletions
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c index 2fef74ad7a..0713a13a70 100644 --- a/libavformat/oggenc.c +++ b/libavformat/oggenc.c @@ -2,20 +2,20 @@ * Ogg muxer * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at free dot fr> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -54,6 +54,8 @@ typedef struct OGGStreamContext { int kfgshift; int64_t last_kf_pts; int vrev; + /* for VP8 granule */ + int isvp8; int eos; unsigned page_count; ///< number of page buffered OGGPage page; ///< current page @@ -80,6 +82,8 @@ typedef struct OGGContext { static const AVOption options[] = { { "serial_offset", "serial number offset", OFFSET(serial_offset), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, PARAM }, + { "oggpagesize", "Set preferred Ogg page size.", + OFFSET(pref_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, MAX_PAGE_SIZE, PARAM}, { "pagesize", "preferred page size in bytes (deprecated)", OFFSET(pref_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_PAGE_SIZE, PARAM }, { "page_duration", "preferred page duration, in microseconds", @@ -87,9 +91,9 @@ static const AVOption options[] = { { NULL }, }; -#define OGG_CLASS(flavor)\ +#define OGG_CLASS(flavor, name)\ static const AVClass flavor ## _muxer_class = {\ - .class_name = #flavor " muxer",\ + .class_name = #name " muxer",\ .item_name = av_default_item_name,\ .option = options,\ .version = LIBAVUTIL_VERSION_INT,\ @@ -142,11 +146,19 @@ static int ogg_write_page(AVFormatContext *s, OGGPage *page, int extra_flags) return 0; } +static int ogg_key_granule(OGGStreamContext *oggstream, int64_t granule) +{ + return (oggstream->kfgshift && !(granule & ((1<<oggstream->kfgshift)-1))) || + (oggstream->isvp8 && !((granule >> 3) & 0x07ffffff)); +} + static int64_t ogg_granule_to_timestamp(OGGStreamContext *oggstream, int64_t granule) { if (oggstream->kfgshift) return (granule>>oggstream->kfgshift) + (granule & ((1<<oggstream->kfgshift)-1)); + else if (oggstream->isvp8) + return granule >> 32; else return granule; } @@ -186,7 +198,7 @@ static int ogg_buffer_page(AVFormatContext *s, OGGStreamContext *oggstream) return AVERROR(ENOMEM); l->page = oggstream->page; - oggstream->page.start_granule = oggstream->page.granule; + oggstream->page.start_granule = ogg_granule_to_timestamp(oggstream, oggstream->page.granule); oggstream->page_count++; ogg_reset_cur_page(oggstream); @@ -212,9 +224,14 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, int i, segments, len, flush = 0; // Handles VFR by flushing page because this frame needs to have a timestamp - if (st->codecpar->codec_id == AV_CODEC_ID_THEORA && !header && - ogg_granule_to_timestamp(oggstream, granule) > - ogg_granule_to_timestamp(oggstream, oggstream->last_granule) + 1) { + // For theora and VP8, keyframes also need to have a timestamp to correctly mark + // them as such, otherwise seeking will not work correctly at the very + // least with old libogg versions. + // Do not try to flush header packets though, that will create broken files. + if ((st->codecpar->codec_id == AV_CODEC_ID_THEORA || st->codecpar->codec_id == AV_CODEC_ID_VP8) && !header && + (ogg_granule_to_timestamp(oggstream, granule) > + ogg_granule_to_timestamp(oggstream, oggstream->last_granule) + 1 || + ogg_key_granule(oggstream, granule))) { if (oggstream->page.granule != -1) ogg_buffer_page(s, oggstream); flush = 1; @@ -248,18 +265,21 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, if (i == total_segments) page->granule = granule; - if (!header) { + { AVStream *st = s->streams[page->stream_index]; int64_t start = av_rescale_q(page->start_granule, st->time_base, AV_TIME_BASE_Q); - int64_t next = av_rescale_q(page->granule, st->time_base, - AV_TIME_BASE_Q); + int64_t next = av_rescale_q(ogg_granule_to_timestamp(oggstream, page->granule), + st->time_base, AV_TIME_BASE_Q); - if (page->segments_count == 255 || - (ogg->pref_size > 0 && page->size >= ogg->pref_size) || - (ogg->pref_duration > 0 && next - start >= ogg->pref_duration)) { + if (page->segments_count == 255) { ogg_buffer_page(s, oggstream); + } else if (!header) { + if ((ogg->pref_size > 0 && page->size >= ogg->pref_size) || + (ogg->pref_duration > 0 && next - start >= ogg->pref_duration)) { + ogg_buffer_page(s, oggstream); + } } } } @@ -270,16 +290,18 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, return 0; } -static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact, +static uint8_t *ogg_write_vorbiscomment(int64_t offset, int bitexact, int *header_len, AVDictionary **m, int framing_bit) { - const char *vendor = bitexact ? "Libav" : LIBAVFORMAT_IDENT; - int size; + const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT; + int64_t size; uint8_t *p, *p0; ff_metadata_conv(m, ff_vorbiscomment_metadata_conv, NULL); size = offset + ff_vorbiscomment_length(*m, vendor) + framing_bit; + if (size > INT_MAX) + return NULL; p = av_mallocz(size); if (!p) return NULL; @@ -339,7 +361,7 @@ static int ogg_build_speex_headers(AVCodecParameters *par, uint8_t *p; if (par->extradata_size < SPEEX_HEADER_SIZE) - return -1; + return AVERROR_INVALIDDATA; // first packet: Speex header p = av_mallocz(SPEEX_HEADER_SIZE); @@ -368,7 +390,7 @@ static int ogg_build_opus_headers(AVCodecParameters *par, uint8_t *p; if (par->extradata_size < OPUS_HEADER_SIZE) - return -1; + return AVERROR_INVALIDDATA; /* first packet: Opus header */ p = av_mallocz(par->extradata_size); @@ -388,6 +410,57 @@ static int ogg_build_opus_headers(AVCodecParameters *par, return 0; } +#define VP8_HEADER_SIZE 26 + +static int ogg_build_vp8_headers(AVFormatContext *s, AVStream *st, + OGGStreamContext *oggstream, int bitexact) +{ + AVCodecParameters *par = st->codecpar; + uint8_t *p; + + /* first packet: VP8 header */ + p = av_mallocz(VP8_HEADER_SIZE); + if (!p) + return AVERROR(ENOMEM); + oggstream->header[0] = p; + oggstream->header_len[0] = VP8_HEADER_SIZE; + bytestream_put_byte(&p, 0x4f); // HDRID + bytestream_put_buffer(&p, "VP80", 4); // Identifier + bytestream_put_byte(&p, 1); // HDRTYP + bytestream_put_byte(&p, 1); // VMAJ + bytestream_put_byte(&p, 0); // VMIN + bytestream_put_be16(&p, par->width); + bytestream_put_be16(&p, par->height); + bytestream_put_be24(&p, par->sample_aspect_ratio.num); + bytestream_put_be24(&p, par->sample_aspect_ratio.den); + if (st->r_frame_rate.num > 0 && st->r_frame_rate.den > 0) { + // OggVP8 requires pts to increase by 1 per visible frame, so use the least common + // multiple framerate if available. + av_log(s, AV_LOG_DEBUG, "Changing time base from %d/%d to %d/%d\n", + st->time_base.num, st->time_base.den, + st->r_frame_rate.den, st->r_frame_rate.num); + avpriv_set_pts_info(st, 64, st->r_frame_rate.den, st->r_frame_rate.num); + } + bytestream_put_be32(&p, st->time_base.den); + bytestream_put_be32(&p, st->time_base.num); + + /* optional second packet: VorbisComment */ + if (av_dict_get(st->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) { + p = ogg_write_vorbiscomment(7, bitexact, &oggstream->header_len[1], &st->metadata, 0); + if (!p) + return AVERROR(ENOMEM); + oggstream->header[1] = p; + bytestream_put_byte(&p, 0x4f); // HDRID + bytestream_put_buffer(&p, "VP80", 4); // Identifier + bytestream_put_byte(&p, 2); // HDRTYP + bytestream_put_byte(&p, 0x20); + } + + oggstream->isvp8 = 1; + + return 0; +} + static void ogg_write_pages(AVFormatContext *s, int flush) { OGGContext *ogg = s->priv_data; @@ -413,7 +486,7 @@ static void ogg_write_pages(AVFormatContext *s, int flush) static int ogg_write_header(AVFormatContext *s) { OGGContext *ogg = s->priv_data; - OGGStreamContext *oggstream; + OGGStreamContext *oggstream = NULL; int i, j; if (ogg->pref_size) @@ -423,29 +496,33 @@ static int ogg_write_header(AVFormatContext *s) AVStream *st = s->streams[i]; unsigned serial_num = i + ogg->serial_offset; - if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { if (st->codecpar->codec_id == AV_CODEC_ID_OPUS) /* Opus requires a fixed 48kHz clock */ avpriv_set_pts_info(st, 64, 1, 48000); else avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); + } if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS && st->codecpar->codec_id != AV_CODEC_ID_THEORA && st->codecpar->codec_id != AV_CODEC_ID_SPEEX && st->codecpar->codec_id != AV_CODEC_ID_FLAC && - st->codecpar->codec_id != AV_CODEC_ID_OPUS) { + st->codecpar->codec_id != AV_CODEC_ID_OPUS && + st->codecpar->codec_id != AV_CODEC_ID_VP8) { av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i); - return -1; + return AVERROR(EINVAL); } - if (!st->codecpar->extradata || !st->codecpar->extradata_size) { + if ((!st->codecpar->extradata || !st->codecpar->extradata_size) && + st->codecpar->codec_id != AV_CODEC_ID_VP8) { av_log(s, AV_LOG_ERROR, "No extradata present\n"); - return -1; + return AVERROR_INVALIDDATA; } oggstream = av_mallocz(sizeof(*oggstream)); if (!oggstream) return AVERROR(ENOMEM); + oggstream->page.stream_index = i; if (!(s->flags & AVFMT_FLAG_BITEXACT)) @@ -459,11 +536,13 @@ static int ogg_write_header(AVFormatContext *s) } while (j < i); oggstream->serial_num = serial_num; + av_dict_copy(&st->metadata, s->metadata, AV_DICT_DONT_OVERWRITE); + st->priv_data = oggstream; if (st->codecpar->codec_id == AV_CODEC_ID_FLAC) { int err = ogg_build_flac_headers(st->codecpar, oggstream, s->flags & AVFMT_FLAG_BITEXACT, - &s->metadata); + &st->metadata); if (err) { av_log(s, AV_LOG_ERROR, "Error writing FLAC headers\n"); av_freep(&st->priv_data); @@ -472,7 +551,7 @@ static int ogg_write_header(AVFormatContext *s) } else if (st->codecpar->codec_id == AV_CODEC_ID_SPEEX) { int err = ogg_build_speex_headers(st->codecpar, oggstream, s->flags & AVFMT_FLAG_BITEXACT, - &s->metadata); + &st->metadata); if (err) { av_log(s, AV_LOG_ERROR, "Error writing Speex headers\n"); av_freep(&st->priv_data); @@ -481,12 +560,20 @@ static int ogg_write_header(AVFormatContext *s) } else if (st->codecpar->codec_id == AV_CODEC_ID_OPUS) { int err = ogg_build_opus_headers(st->codecpar, oggstream, s->flags & AVFMT_FLAG_BITEXACT, - &s->metadata); + &st->metadata); if (err) { av_log(s, AV_LOG_ERROR, "Error writing Opus headers\n"); av_freep(&st->priv_data); return err; } + } else if (st->codecpar->codec_id == AV_CODEC_ID_VP8) { + int err = ogg_build_vp8_headers(s, st, oggstream, + s->flags & AVFMT_FLAG_BITEXACT); + if (err) { + av_log(s, AV_LOG_ERROR, "Error writing VP8 headers\n"); + av_freep(&st->priv_data); + return err; + } } else { uint8_t *p; const char *cstr = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? "vorbis" : "theora"; @@ -495,14 +582,14 @@ static int ogg_write_header(AVFormatContext *s) if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size, st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42, - oggstream->header, oggstream->header_len) < 0) { + (const uint8_t**)oggstream->header, oggstream->header_len) < 0) { av_log(s, AV_LOG_ERROR, "Extradata corrupted\n"); av_freep(&st->priv_data); - return -1; + return AVERROR_INVALIDDATA; } p = ogg_write_vorbiscomment(7, s->flags & AVFMT_FLAG_BITEXACT, - &oggstream->header_len[1], &s->metadata, + &oggstream->header_len[1], &st->metadata, framing_bit); oggstream->header[1] = p; if (!p) @@ -512,6 +599,14 @@ static int ogg_write_header(AVFormatContext *s) bytestream_put_buffer(&p, cstr, 6); if (st->codecpar->codec_id == AV_CODEC_ID_THEORA) { + int den = AV_RB32(oggstream->header[0] + 22), num = AV_RB32(oggstream->header[0] + 26); + /* Make sure to use time base stored in the Theora stream header to write + correct timestamps */ + if (st->time_base.num != num || st->time_base.den != den) { + av_log(s, AV_LOG_DEBUG, "Changing time base from %d/%d to %d/%d\n", + st->time_base.num, st->time_base.den, num, den); + avpriv_set_pts_info(st, 64, num, den); + } /** KFGSHIFT is the width of the less significant section of the granule position The less significant section is the frame count since the last keyframe */ oggstream->kfgshift = ((oggstream->header[0][40]&3)<<3)|(oggstream->header[0][41]>>5); @@ -571,7 +666,18 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt) av_rescale_q(st->codecpar->initial_padding, (AVRational){ 1, st->codecpar->sample_rate }, st->time_base); - else + else if (st->codecpar->codec_id == AV_CODEC_ID_VP8) { + int64_t pts, invcnt, dist; + int visible; + + visible = (pkt->data[0] >> 4) & 1; + pts = pkt->pts + pkt->duration; + invcnt = (oggstream->last_granule >> 30) & 3; + invcnt = visible ? 3 : (invcnt == 3 ? 0 : invcnt + 1); + dist = (pkt->flags & AV_PKT_FLAG_KEY) ? 0 : ((oggstream->last_granule >> 3) & 0x07ffffff) + 1; + + granule = (pts << 32) | (invcnt << 30) | (dist << 3); + } else granule = pkt->pts + pkt->duration; if (oggstream->page.start_granule == AV_NOPTS_VALUE) @@ -602,7 +708,7 @@ static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt) } ogg_write_pages(s, 2); - return 0; + return 1; } static int ogg_write_trailer(AVFormatContext *s) @@ -624,8 +730,9 @@ static int ogg_write_trailer(AVFormatContext *s) OGGStreamContext *oggstream = st->priv_data; if (st->codecpar->codec_id == AV_CODEC_ID_FLAC || st->codecpar->codec_id == AV_CODEC_ID_SPEEX || - st->codecpar->codec_id == AV_CODEC_ID_OPUS) { - av_free(oggstream->header[0]); + st->codecpar->codec_id == AV_CODEC_ID_OPUS || + st->codecpar->codec_id == AV_CODEC_ID_VP8) { + av_freep(&oggstream->header[0]); } av_freep(&oggstream->header[1]); av_freep(&st->priv_data); @@ -634,12 +741,22 @@ static int ogg_write_trailer(AVFormatContext *s) } #if CONFIG_OGG_MUXER -OGG_CLASS(ogg) +OGG_CLASS(ogg, Ogg) AVOutputFormat ff_ogg_muxer = { .name = "ogg", .long_name = NULL_IF_CONFIG_SMALL("Ogg"), .mime_type = "application/ogg", - .extensions = "ogg,ogv", + .extensions = "ogg" +#if !CONFIG_OGV_MUXER + ",ogv" +#endif +#if !CONFIG_SPX_MUXER + ",spx" +#endif +#if !CONFIG_OPUS_MUXER + ",opus" +#endif + , .priv_data_size = sizeof(OGGContext), .audio_codec = CONFIG_LIBVORBIS_ENCODER ? AV_CODEC_ID_VORBIS : AV_CODEC_ID_FLAC, @@ -647,21 +764,20 @@ AVOutputFormat ff_ogg_muxer = { .write_header = ogg_write_header, .write_packet = ogg_write_packet, .write_trailer = ogg_write_trailer, - .flags = AVFMT_TS_NEGATIVE | AVFMT_ALLOW_FLUSH, + .flags = AVFMT_TS_NEGATIVE | AVFMT_TS_NONSTRICT | AVFMT_ALLOW_FLUSH, .priv_class = &ogg_muxer_class, }; #endif #if CONFIG_OGA_MUXER -OGG_CLASS(oga) +OGG_CLASS(oga, Ogg audio) AVOutputFormat ff_oga_muxer = { .name = "oga", .long_name = NULL_IF_CONFIG_SMALL("Ogg Audio"), .mime_type = "audio/ogg", .extensions = "oga", .priv_data_size = sizeof(OGGContext), - .audio_codec = CONFIG_LIBVORBIS_ENCODER ? - AV_CODEC_ID_VORBIS : AV_CODEC_ID_FLAC, + .audio_codec = AV_CODEC_ID_FLAC, .write_header = ogg_write_header, .write_packet = ogg_write_packet, .write_trailer = ogg_write_trailer, @@ -670,8 +786,28 @@ AVOutputFormat ff_oga_muxer = { }; #endif +#if CONFIG_OGV_MUXER +OGG_CLASS(ogv, Ogg video) +AVOutputFormat ff_ogv_muxer = { + .name = "ogv", + .long_name = NULL_IF_CONFIG_SMALL("Ogg Video"), + .mime_type = "video/ogg", + .extensions = "ogv", + .priv_data_size = sizeof(OGGContext), + .audio_codec = CONFIG_LIBVORBIS_ENCODER ? + AV_CODEC_ID_VORBIS : AV_CODEC_ID_FLAC, + .video_codec = CONFIG_LIBTHEORA_ENCODER ? + AV_CODEC_ID_THEORA : AV_CODEC_ID_VP8, + .write_header = ogg_write_header, + .write_packet = ogg_write_packet, + .write_trailer = ogg_write_trailer, + .flags = AVFMT_TS_NEGATIVE | AVFMT_TS_NONSTRICT | AVFMT_ALLOW_FLUSH, + .priv_class = &ogv_muxer_class, +}; +#endif + #if CONFIG_SPX_MUXER -OGG_CLASS(spx) +OGG_CLASS(spx, Ogg Speex) AVOutputFormat ff_spx_muxer = { .name = "spx", .long_name = NULL_IF_CONFIG_SMALL("Ogg Speex"), @@ -688,7 +824,7 @@ AVOutputFormat ff_spx_muxer = { #endif #if CONFIG_OPUS_MUXER -OGG_CLASS(opus) +OGG_CLASS(opus, Ogg Opus) AVOutputFormat ff_opus_muxer = { .name = "opus", .long_name = NULL_IF_CONFIG_SMALL("Ogg Opus"), |