diff options
author | Jacob Lifshay <programmerjake@gmail.com> | 2025-07-22 23:28:41 -0700 |
---|---|---|
committer | Jacob Lifshay <programmerjake@gmail.com> | 2025-08-08 03:04:42 -0700 |
commit | ceb9688fee7188f506e4ad9f8f5f9cd148d7afb5 (patch) | |
tree | f932890c002a82c5497febca2ae77e09c9d015d6 | |
parent | a18d5a766d6af64c5edeb8230291cd171f60d2e9 (diff) | |
download | ffmpeg-ceb9688fee7188f506e4ad9f8f5f9cd148d7afb5.tar.gz |
lavf/mccdec: clean up, add support for mcc 2.0 features, and add SMPTE_436M_ANC output stream
Signed-off-by: Jacob Lifshay <programmerjake@gmail.com>
-rwxr-xr-x | configure | 1 | ||||
-rw-r--r-- | doc/demuxers.texi | 26 | ||||
-rw-r--r-- | libavformat/mccdec.c | 385 |
3 files changed, 325 insertions, 87 deletions
@@ -3726,6 +3726,7 @@ matroska_audio_muxer_select="matroska_muxer" matroska_demuxer_select="riffdec" matroska_demuxer_suggest="bzlib zlib" matroska_muxer_select="iso_writer mpeg4audio riffenc aac_adtstoasc_bsf pgs_frame_merge_bsf vp9_superframe_bsf" +mcc_demuxer_select="smpte_436m" mlp_demuxer_select="mlp_parser" mmf_muxer_select="riffenc" mov_demuxer_select="iso_media riffdec" diff --git a/doc/demuxers.texi b/doc/demuxers.texi index 016ce6468d..c6b72c3b69 100644 --- a/doc/demuxers.texi +++ b/doc/demuxers.texi @@ -855,6 +855,32 @@ Set the sample rate for libopenmpt to output. Range is from 1000 to INT_MAX. The value default is 48000. @end table +@anchor{mccdec} +@section mcc + +Demuxer for MacCaption MCC files, it supports MCC versions 1.0 and 2.0. +MCC files store VANC data, which can include closed captions (EIA-608 and CEA-708), ancillary time code, pan-scan data, etc. +By default, for backward compatibility, the MCC demuxer extracts just the EIA-608 and CEA-708 closed captions and returns a @code{EIA_608} stream, ignoring all other VANC data. +You can change it to return all VANC data in a @code{SMPTE_436M_ANC} data stream by setting @option{-eia608_extract 0} + +@subsection Examples + +@itemize +@item +Convert a MCC file to Scenarist (SCC) format: +@example +ffmpeg -i CC.mcc -c:s copy CC.scc +@end example +Note that the SCC format only supports EIA-608, so this will discard all other data such as CEA-708 extensions. + +@item +Merge a MCC file into a MXF file: +@example +ffmpeg -i video_and_audio.mxf -eia608_extract 0 -i CC.mcc -c copy -map 0 -map 1 out.mxf +@end example +This retains all VANC data and inserts it into the output MXF file as a @code{SMPTE_436M_ANC} data stream. +@end itemize + @section mov/mp4/3gp Demuxer for Quicktime File Format & ISO/IEC Base Media File Format (ISO/IEC 14496-12 or MPEG-4 Part 12, ISO/IEC 15444-12 or JPEG 2000 Part 12). diff --git a/libavformat/mccdec.c b/libavformat/mccdec.c index 8c0ea09b6b..8a9eff4c2a 100644 --- a/libavformat/mccdec.c +++ b/libavformat/mccdec.c @@ -1,6 +1,7 @@ /* * MCC subtitle demuxer * Copyright (c) 2020 Paul B Mahol + * Copyright (c) 2025 Jacob Lifshay * * This file is part of FFmpeg. * @@ -22,18 +23,32 @@ #include "avformat.h" #include "demux.h" #include "internal.h" -#include "subtitles.h" +#include "libavcodec/bytestream.h" +#include "libavcodec/codec_id.h" +#include "libavcodec/smpte_436m.h" #include "libavutil/avstring.h" -#include "libavutil/bprint.h" -#include "libavutil/intreadwrite.h" +#include "libavutil/avutil.h" +#include "libavutil/error.h" +#include "libavutil/internal.h" +#include "libavutil/log.h" +#include "libavutil/macros.h" +#include "libavutil/opt.h" +#include "libavutil/rational.h" +#include "libavutil/timecode.h" +#include "subtitles.h" +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> typedef struct MCCContext { + const AVClass *class; + int eia608_extract; FFDemuxSubtitlesQueue q; } MCCContext; static int mcc_probe(const AVProbeData *p) { - char buf[28]; + char buf[28]; FFTextReader tr; ff_text_init_buf(&tr, p->buf, p->buf_size); @@ -61,21 +76,27 @@ static int convert(uint8_t x) } typedef struct alias { - uint8_t key; - int len; + uint8_t key; + int len; const char *value; } alias; +#define CCPAD "\xFA\x0\x0" +#define CCPAD3 CCPAD CCPAD CCPAD + +static const char cc_pad[27] = CCPAD3 CCPAD3 CCPAD3; + static const alias aliases[20] = { - { .key = 16, .len = 3, .value = "\xFA\x0\x0", }, - { .key = 17, .len = 6, .value = "\xFA\x0\x0\xFA\x0\x0", }, - { .key = 18, .len = 9, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 19, .len = 12, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 20, .len = 15, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 21, .len = 18, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 22, .len = 21, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 23, .len = 24, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, - { .key = 24, .len = 27, .value = "\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0\xFA\x0\x0", }, + // clang-format off + { .key = 16, .len = 3, .value = cc_pad, }, + { .key = 17, .len = 6, .value = cc_pad, }, + { .key = 18, .len = 9, .value = cc_pad, }, + { .key = 19, .len = 12, .value = cc_pad, }, + { .key = 20, .len = 15, .value = cc_pad, }, + { .key = 21, .len = 18, .value = cc_pad, }, + { .key = 22, .len = 21, .value = cc_pad, }, + { .key = 23, .len = 24, .value = cc_pad, }, + { .key = 24, .len = 27, .value = cc_pad, }, { .key = 25, .len = 3, .value = "\xFB\x80\x80", }, { .key = 26, .len = 3, .value = "\xFC\x80\x80", }, { .key = 27, .len = 3, .value = "\xFD\x80\x80", }, @@ -87,113 +108,266 @@ static const alias aliases[20] = { { .key = 33, .len = 0, .value = NULL, }, { .key = 34, .len = 0, .value = NULL, }, { .key = 35, .len = 1, .value = "\x0", }, + // clang-format on +}; + +typedef struct TimeTracker { + int64_t last_ts; + int64_t twenty_four_hr; + AVTimecode timecode; +} TimeTracker; + +static int time_tracker_init(TimeTracker *tt, AVStream *st, AVRational rate, void *log_ctx) +{ + *tt = (TimeTracker){ .last_ts = 0 }; + int ret = av_timecode_init(&tt->timecode, rate, rate.den == 1001 ? AV_TIMECODE_FLAG_DROPFRAME : 0, 0, log_ctx); + if (ret < 0) + return ret; + // wrap pts values at 24hr ourselves since they can be bigger than fits in an int + AVTimecode twenty_four_hr; + ret = av_timecode_init_from_components(&twenty_four_hr, rate, tt->timecode.flags, 24, 0, 0, 0, log_ctx); + if (ret < 0) + return ret; + tt->twenty_four_hr = twenty_four_hr.start; + // timecode uses reciprocal of timebase + avpriv_set_pts_info(st, 64, rate.den, rate.num); + return 0; +} + +typedef struct MCCTimecode { + unsigned hh, mm, ss, ff, field, line_number; +} MCCTimecode; + +static int time_tracker_set_time(TimeTracker *tt, const MCCTimecode *tc, void *log_ctx) +{ + AVTimecode last = tt->timecode; + int ret = av_timecode_init_from_components(&tt->timecode, last.rate, last.flags, tc->hh, tc->mm, tc->ss, tc->ff, log_ctx); + if (ret < 0) { + tt->timecode = last; + return ret; + } + tt->last_ts -= last.start; + tt->last_ts += tt->timecode.start; + if (tt->timecode.start < last.start) + tt->last_ts += tt->twenty_four_hr; + return 0; +} + +struct ValidTimeCodeRate { + AVRational rate; + const char *str; }; +static struct ValidTimeCodeRate valid_time_code_rates[] = { + { .rate = { .num = 24, .den = 1 }, .str = "24" }, + { .rate = { .num = 25, .den = 1 }, .str = "25" }, + { .rate = { .num = 30000, .den = 1001 }, .str = "30DF" }, + { .rate = { .num = 30, .den = 1 }, .str = "30" }, + { .rate = { .num = 50, .den = 1 }, .str = "50" }, + { .rate = { .num = 60000, .den = 1001 }, .str = "60DF" }, + { .rate = { .num = 60, .den = 1 }, .str = "60" }, +}; + +static int parse_time_code_rate(AVFormatContext *s, AVStream *st, TimeTracker *tt, const char *time_code_rate) +{ + for (size_t i = 0; i < FF_ARRAY_ELEMS(valid_time_code_rates); i++) { + const char *after; + if (av_stristart(time_code_rate, valid_time_code_rates[i].str, &after) != 0) { + bool bad_after = false; + for (; *after; after++) { + if (!av_isspace(*after)) { + bad_after = true; + break; + } + } + if (bad_after) + continue; + return time_tracker_init(tt, st, valid_time_code_rates[i].rate, s); + } + } + av_log(s, AV_LOG_FATAL, "invalid mcc time code rate: %s", time_code_rate); + return AVERROR_INVALIDDATA; +} + +static int mcc_parse_time_code_part(char **line_left, unsigned *value, unsigned max, const char *after_set) +{ + *value = 0; + if (!av_isdigit(**line_left)) + return AVERROR_INVALIDDATA; + while (av_isdigit(**line_left)) { + unsigned digit = **line_left - '0'; + *value = *value * 10 + digit; + ++*line_left; + if (*value > max) + return AVERROR_INVALIDDATA; + } + unsigned char after = **line_left; + if (!after || !strchr(after_set, after)) + return AVERROR_INVALIDDATA; + ++*line_left; + return after; +} + +static int mcc_parse_time_code(char **line_left, MCCTimecode *tc) +{ + *tc = (MCCTimecode){ .field = 0, .line_number = 9 }; + int ret = mcc_parse_time_code_part(line_left, &tc->hh, 23, ":"); + if (ret < 0) + return ret; + ret = mcc_parse_time_code_part(line_left, &tc->mm, 59, ":"); + if (ret < 0) + return ret; + ret = mcc_parse_time_code_part(line_left, &tc->ss, 59, ":;"); + if (ret < 0) + return ret; + ret = mcc_parse_time_code_part(line_left, &tc->ff, 59, ".\t"); + if (ret < 0) + return ret; + if (ret == '.') { + ret = mcc_parse_time_code_part(line_left, &tc->field, 1, ",\t"); + if (ret < 0) + return ret; + if (ret == ',') { + ret = mcc_parse_time_code_part(line_left, &tc->line_number, 0xFFFF, "\t"); + if (ret < 0) + return ret; + } + } + if (ret != '\t') + return AVERROR_INVALIDDATA; + return 0; +} + static int mcc_read_header(AVFormatContext *s) { - MCCContext *mcc = s->priv_data; - AVStream *st = avformat_new_stream(s, NULL); - AVRational rate = {0}; - int64_t ts, pos; - uint8_t out[4096]; - char line[4096]; + MCCContext *mcc = s->priv_data; + AVStream *st = avformat_new_stream(s, NULL); + int64_t pos; + AVSmpte436mCodedAnc coded_anc = { + .payload_sample_coding = AV_SMPTE_436M_PAYLOAD_SAMPLE_CODING_8BIT_LUMA, + }; + char line[4096]; FFTextReader tr; - int ret = 0; + int ret = 0; ff_text_init_avio(s, &tr, s->pb); if (!st) return AVERROR(ENOMEM); - st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE; - st->codecpar->codec_id = AV_CODEC_ID_EIA_608; - avpriv_set_pts_info(st, 64, 1, 30); + if (mcc->eia608_extract) { + st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE; + st->codecpar->codec_id = AV_CODEC_ID_EIA_608; + } else { + st->codecpar->codec_type = AVMEDIA_TYPE_DATA; + st->codecpar->codec_id = AV_CODEC_ID_SMPTE_436M_ANC; + av_dict_set(&st->metadata, "data_type", "vbi_vanc_smpte_436M", 0); + } - while (!ff_text_eof(&tr)) { - int hh, mm, ss, fs, i = 0, j = 0; - int start = 12, count = 0; - AVPacket *sub; - char *lline; + TimeTracker tt; + ret = time_tracker_init(&tt, st, (AVRational){ .num = 30, .den = 1 }, s); + if (ret < 0) + return ret; + while (!ff_text_eof(&tr)) { + pos = ff_text_pos(&tr); ff_subtitles_read_line(&tr, line, sizeof(line)); if (!strncmp(line, "File Format=MacCaption_MCC V", 28)) continue; if (!strncmp(line, "//", 2)) continue; if (!strncmp(line, "Time Code Rate=", 15)) { - char *rate_str = line + 15; - char *df = NULL; - int num = -1, den = -1; - - if (rate_str[0]) { - num = strtol(rate_str, &df, 10); - den = 1; - if (df && !av_strncasecmp(df, "DF", 2)) { - av_reduce(&num, &den, num * 1000LL, 1001, INT_MAX); - } - } - - if (num > 0 && den > 0) { - rate = av_make_q(num, den); - avpriv_set_pts_info(st, 64, rate.den, rate.num); - } + ret = parse_time_code_rate(s, st, &tt, line + 15); + if (ret < 0) + return ret; + continue; + } + if (strchr(line, '=')) + continue; // skip attributes + char *line_left = line; + while (av_isspace(*line_left)) + line_left++; + if (!*line_left) // skip empty lines + continue; + MCCTimecode tc; + ret = mcc_parse_time_code(&line_left, &tc); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "can't parse mcc time code"); continue; } - if (av_sscanf(line, "%d:%d:%d:%d", &hh, &mm, &ss, &fs) != 4 || rate.den <= 0) + int64_t last_pts = tt.last_ts; + ret = time_tracker_set_time(&tt, &tc, s); + if (ret < 0) continue; + bool merge = last_pts == tt.last_ts; - ts = av_sat_add64(av_rescale(hh * 3600LL + mm * 60LL + ss, rate.num, rate.den), fs); + coded_anc.line_number = tc.line_number; + coded_anc.wrapping_type = tc.field ? AV_SMPTE_436M_WRAPPING_TYPE_VANC_FIELD_2 : AV_SMPTE_436M_WRAPPING_TYPE_VANC_FRAME; - lline = (char *)&line; - lline += 12; - pos = ff_text_pos(&tr); + PutByteContext pb; + bytestream2_init_writer(&pb, coded_anc.payload, AV_SMPTE_436M_CODED_ANC_PAYLOAD_CAPACITY); - while (lline[i]) { - uint8_t v = convert(lline[i]); + while (*line_left) { + uint8_t v = convert(*line_left++); if (v >= 16 && v <= 35) { int idx = v - 16; - if (aliases[idx].len) { - if (j >= sizeof(out) - 1 - aliases[idx].len) { - j = 0; - break; - } - memcpy(out + j, aliases[idx].value, aliases[idx].len); - j += aliases[idx].len; - } + bytestream2_put_buffer(&pb, aliases[idx].value, aliases[idx].len); } else { uint8_t vv; - if (i + 13 >= sizeof(line) - 1) - break; - vv = convert(lline[i + 1]); - if (j >= sizeof(out) - 1) { - j = 0; + if (!*line_left) break; - } - out[j++] = vv | (v << 4); - i++; + vv = convert(*line_left++); + bytestream2_put_byte(&pb, vv | (v << 4)); } - - i++; } - out[j] = 0; - - if (out[7] & 0x80) - start += 4; - count = (out[11] & 0x1f) * 3; - if (j < start + count + 1) + if (pb.eof) continue; - - if (!count) + // remove trailing ANC checksum byte (not to be confused with the CDP checksum byte), + // it's not included in 8-bit sample encodings. see section 6.2 (page 14) of: + // https://pub.smpte.org/latest/st436/s436m-2006.pdf + bytestream2_seek_p(&pb, -1, SEEK_CUR); + coded_anc.payload_sample_count = bytestream2_tell_p(&pb); + if (coded_anc.payload_sample_count == 0) + continue; // ignore if too small + // add padding to align to 4 bytes + while (!pb.eof && bytestream2_tell_p(&pb) % 4) + bytestream2_put_byte(&pb, 0); + if (pb.eof) continue; - sub = ff_subtitles_queue_insert(&mcc->q, out + start, count, 0); - if (!sub) - return AVERROR(ENOMEM); + coded_anc.payload_array_length = bytestream2_tell_p(&pb); + + AVPacket *sub; + if (mcc->eia608_extract) { + AVSmpte291mAnc8bit anc; + if (av_smpte_291m_anc_8bit_decode( + &anc, coded_anc.payload_sample_coding, coded_anc.payload_sample_count, coded_anc.payload, s) + < 0) + continue; + // reuse line + int cc_count = av_smpte_291m_anc_8bit_extract_cta_708(&anc, line, s); + if (cc_count < 0) // continue if error or if it's not a closed captions packet + continue; + int len = cc_count * 3; + + sub = ff_subtitles_queue_insert(&mcc->q, line, len, merge); + if (!sub) + return AVERROR(ENOMEM); + } else { + sub = ff_subtitles_queue_insert(&mcc->q, NULL, 0, merge); + if (!sub) + return AVERROR(ENOMEM); - sub->pos = pos; - sub->pts = ts; + ret = av_smpte_436m_anc_append(sub, 1, &coded_anc); + if (ret < 0) + return ret; + } + + sub->pos = pos; + sub->pts = tt.last_ts; sub->duration = 1; + continue; } ff_subtitles_queue_finalize(s, &mcc->q); @@ -201,15 +375,52 @@ static int mcc_read_header(AVFormatContext *s) return ret; } +static int mcc_read_packet(AVFormatContext *s, AVPacket *pkt) +{ + MCCContext *mcc = s->priv_data; + return ff_subtitles_queue_read_packet(&mcc->q, pkt); +} + +static int mcc_read_seek(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags) +{ + MCCContext *mcc = s->priv_data; + return ff_subtitles_queue_seek(&mcc->q, s, stream_index, min_ts, ts, max_ts, flags); +} + +static int mcc_read_close(AVFormatContext *s) +{ + MCCContext *mcc = s->priv_data; + ff_subtitles_queue_clean(&mcc->q); + return 0; +} + +#define OFFSET(x) offsetof(MCCContext, x) +#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM +// clang-format off +static const AVOption mcc_options[] = { + { "eia608_extract", "extract EIA-608/708 captions from VANC packets", OFFSET(eia608_extract), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, SD }, + { NULL }, +}; +// clang-format on + +static const AVClass mcc_class = { + .class_name = "mcc demuxer", + .item_name = av_default_item_name, + .option = mcc_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_DEMUXER, +}; + const FFInputFormat ff_mcc_demuxer = { .p.name = "mcc", .p.long_name = NULL_IF_CONFIG_SMALL("MacCaption"), .p.extensions = "mcc", + .p.priv_class = &mcc_class, .priv_data_size = sizeof(MCCContext), .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP, .read_probe = mcc_probe, .read_header = mcc_read_header, - .read_packet = ff_subtitles_read_packet, - .read_seek2 = ff_subtitles_read_seek, - .read_close = ff_subtitles_read_close, + .read_packet = mcc_read_packet, + .read_seek2 = mcc_read_seek, + .read_close = mcc_read_close, }; |