aboutsummaryrefslogtreecommitdiffstats
path: root/libavformat/tedcaptionsdec.c
diff options
context:
space:
mode:
authorNicolas George <nicolas.george@normalesup.org>2012-06-05 16:34:01 +0200
committerNicolas George <nicolas.george@normalesup.org>2012-12-09 18:50:38 +0100
commit160013736e40744989c8fc7dd1000980d00cf02d (patch)
treeab5223474c295cb240ee2ea4b47a121f21ed7a36 /libavformat/tedcaptionsdec.c
parent18eb31963b9e5b343b934df98ed32bc7d13006f4 (diff)
downloadffmpeg-160013736e40744989c8fc7dd1000980d00cf02d.tar.gz
lavf: TED Talks JSON captions demuxer.
Diffstat (limited to 'libavformat/tedcaptionsdec.c')
-rw-r--r--libavformat/tedcaptionsdec.c365
1 files changed, 365 insertions, 0 deletions
diff --git a/libavformat/tedcaptionsdec.c b/libavformat/tedcaptionsdec.c
new file mode 100644
index 0000000000..85bed0a9ac
--- /dev/null
+++ b/libavformat/tedcaptionsdec.c
@@ -0,0 +1,365 @@
+/*
+ * TED Talks captions format decoder
+ * Copyright (c) 2012 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/bprint.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avformat.h"
+#include "internal.h"
+#include "subtitles.h"
+
+typedef struct {
+ AVClass *class;
+ int64_t start_time;
+ FFDemuxSubtitlesQueue subs;
+} TEDCaptionsDemuxer;
+
+static const AVOption tedcaptions_options[] = {
+ { "start_time", "set the start time (offset) of the subtitles, in ms",
+ offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
+ { .i64 = 15000 }, INT64_MIN, INT64_MAX,
+ AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
+ { NULL },
+};
+
+static const AVClass tedcaptions_demuxer_class = {
+ .class_name = "tedcaptions_demuxer",
+ .item_name = av_default_item_name,
+ .option = tedcaptions_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+#define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
+
+#define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
+#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
+#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
+
+static void av_bprint_utf8(AVBPrint *bp, unsigned c)
+{
+ int bytes, i;
+
+ if (c <= 0x7F) {
+ av_bprint_chars(bp, c, 1);
+ return;
+ }
+ bytes = (av_log2(c) - 2) / 5;
+ av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
+ for (i = bytes - 1; i >= 0; i--)
+ av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
+}
+
+static void next_byte(AVIOContext *pb, int *cur_byte)
+{
+ uint8_t b;
+ int ret = avio_read(pb, &b, 1);
+ *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
+}
+
+static void skip_spaces(AVIOContext *pb, int *cur_byte)
+{
+ while (*cur_byte == ' ' || *cur_byte == '\t' ||
+ *cur_byte == '\n' || *cur_byte == '\r')
+ next_byte(pb, cur_byte);
+}
+
+static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
+{
+ skip_spaces(pb, cur_byte);
+ if (*cur_byte != c)
+ return ERR_CODE(*cur_byte);
+ next_byte(pb, cur_byte);
+ return 0;
+}
+
+static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
+{
+ int ret;
+
+ av_bprint_init(bp, 0, full ? -1 : 1);
+ ret = expect_byte(pb, cur_byte, '"');
+ if (ret < 0)
+ goto fail;
+ while (*cur_byte > 0 && *cur_byte != '"') {
+ if (*cur_byte == '\\') {
+ next_byte(pb, cur_byte);
+ if (*cur_byte < 0) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+ if ((*cur_byte | 32) == 'u') {
+ unsigned chr = 0, i;
+ for (i = 0; i < 4; i++) {
+ next_byte(pb, cur_byte);
+ if (!HEX_DIGIT_TEST(*cur_byte)) {
+ ret = ERR_CODE(*cur_byte);
+ goto fail;
+ }
+ chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
+ }
+ av_bprint_utf8(bp, chr);
+ } else {
+ av_bprint_chars(bp, *cur_byte, 1);
+ }
+ } else {
+ av_bprint_chars(bp, *cur_byte, 1);
+ }
+ next_byte(pb, cur_byte);
+ }
+ ret = expect_byte(pb, cur_byte, '"');
+ if (ret < 0)
+ goto fail;
+ if (full && !av_bprint_is_complete(bp)) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+ return 0;
+
+fail:
+ av_bprint_finalize(bp, NULL);
+ return ret;
+}
+
+static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
+{
+ int ret;
+
+ ret = parse_string(pb, cur_byte, bp, 0);
+ if (ret < 0)
+ return ret;
+ ret = expect_byte(pb, cur_byte, ':');
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
+{
+ const char *text[] = { "false", "true" }, *p;
+ int i;
+
+ skip_spaces(pb, cur_byte);
+ for (i = 0; i < 2; i++) {
+ p = text[i];
+ if (*cur_byte != *p)
+ continue;
+ for (; *p; p++, next_byte(pb, cur_byte))
+ if (*cur_byte != *p)
+ return AVERROR_INVALIDDATA;
+ if (BETWEEN(*cur_byte | 32, 'a', 'z'))
+ return AVERROR_INVALIDDATA;
+ *result = i;
+ return 0;
+ }
+ return AVERROR_INVALIDDATA;
+}
+
+static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
+{
+ int64_t val = 0;
+
+ skip_spaces(pb, cur_byte);
+ if ((unsigned)*cur_byte - '0' > 9)
+ return AVERROR_INVALIDDATA;
+ while (BETWEEN(*cur_byte, '0', '9')) {
+ val = val * 10 + (*cur_byte - '0');
+ next_byte(pb, cur_byte);
+ }
+ *result = val;
+ return 0;
+}
+
+static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
+{
+ int ret, cur_byte, start_of_par;
+ AVBPrint label, content;
+ int64_t pos, start, duration;
+ AVPacket *pkt;
+
+ next_byte(pb, &cur_byte);
+ ret = expect_byte(pb, &cur_byte, '{');
+ if (ret < 0)
+ return AVERROR_INVALIDDATA;
+ ret = parse_label(pb, &cur_byte, &label);
+ if (ret < 0 || strcmp(label.str, "captions"))
+ return AVERROR_INVALIDDATA;
+ ret = expect_byte(pb, &cur_byte, '[');
+ if (ret < 0)
+ return AVERROR_INVALIDDATA;
+ while (1) {
+ content.size = 0;
+ start = duration = AV_NOPTS_VALUE;
+ ret = expect_byte(pb, &cur_byte, '{');
+ if (ret < 0)
+ return ret;
+ pos = avio_tell(pb) - 1;
+ while (1) {
+ ret = parse_label(pb, &cur_byte, &label);
+ if (ret < 0)
+ return ret;
+ if (!strcmp(label.str, "startOfParagraph")) {
+ ret = parse_boolean(pb, &cur_byte, &start_of_par);
+ if (ret < 0)
+ return ret;
+ } else if (!strcmp(label.str, "content")) {
+ ret = parse_string(pb, &cur_byte, &content, 1);
+ if (ret < 0)
+ return ret;
+ } else if (!strcmp(label.str, "startTime")) {
+ ret = parse_int(pb, &cur_byte, &start);
+ if (ret < 0)
+ return ret;
+ } else if (!strcmp(label.str, "duration")) {
+ ret = parse_int(pb, &cur_byte, &duration);
+ if (ret < 0)
+ return ret;
+ } else {
+ return AVERROR_INVALIDDATA;
+ }
+ skip_spaces(pb, &cur_byte);
+ if (cur_byte != ',')
+ break;
+ next_byte(pb, &cur_byte);
+ }
+ ret = expect_byte(pb, &cur_byte, '}');
+ if (ret < 0)
+ return ret;
+
+ if (!content.size || start == AV_NOPTS_VALUE ||
+ duration == AV_NOPTS_VALUE)
+ return AVERROR_INVALIDDATA;
+ pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
+ if (!pkt)
+ return AVERROR(ENOMEM);
+ pkt->pos = pos;
+ pkt->pts = start;
+ pkt->duration = duration;
+ av_bprint_finalize(&content, NULL);
+
+ skip_spaces(pb, &cur_byte);
+ if (cur_byte != ',')
+ break;
+ next_byte(pb, &cur_byte);
+ }
+ ret = expect_byte(pb, &cur_byte, ']');
+ if (ret < 0)
+ return ret;
+ ret = expect_byte(pb, &cur_byte, '}');
+ if (ret < 0)
+ return ret;
+ skip_spaces(pb, &cur_byte);
+ if (cur_byte != AVERROR_EOF)
+ return ERR_CODE(cur_byte);
+ return 0;
+}
+
+static av_cold int tedcaptions_read_header(AVFormatContext *avf)
+{
+ TEDCaptionsDemuxer *tc = avf->priv_data;
+ AVStream *st;
+ int ret, i;
+ AVPacket *last;
+
+ ret = parse_file(avf->pb, &tc->subs);
+ if (ret < 0) {
+ if (ret == AVERROR_INVALIDDATA)
+ av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
+ avio_tell(avf->pb));
+ ff_subtitles_queue_clean(&tc->subs);
+ return ret;
+ }
+ ff_subtitles_queue_finalize(&tc->subs);
+ for (i = 0; i < tc->subs.nb_subs; i++)
+ tc->subs.subs[i].pts += tc->start_time;
+
+ last = &tc->subs.subs[tc->subs.nb_subs - 1];
+ st = avformat_new_stream(avf, NULL);
+ if (!st)
+ return AVERROR(ENOMEM);
+ st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
+ st->codec->codec_id = CODEC_ID_TEXT;
+ avpriv_set_pts_info(st, 64, 1, 1000);
+ st->probe_packets = 0;
+ st->start_time = 0;
+ st->duration = last->pts + last->duration;
+ st->cur_dts = 0;
+
+ return 0;
+}
+
+static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
+{
+ TEDCaptionsDemuxer *tc = avf->priv_data;
+
+ return ff_subtitles_queue_read_packet(&tc->subs, packet);
+}
+
+static int tedcaptions_read_close(AVFormatContext *avf)
+{
+ TEDCaptionsDemuxer *tc = avf->priv_data;
+
+ ff_subtitles_queue_clean(&tc->subs);
+ return 0;
+}
+
+static av_cold int tedcaptions_read_probe(AVProbeData *p)
+{
+ static const char *const tags[] = {
+ "\"captions\"", "\"duration\"", "\"content\"",
+ "\"startOfParagraph\"", "\"startTime\"",
+ };
+ unsigned i, count = 0;
+ const char *t;
+
+ if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
+ return 0;
+ for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
+ if (!(t = strstr(p->buf, tags[i])))
+ continue;
+ t += strlen(tags[i]);
+ t += strspn(t, " \t\r\n");
+ if (*t == ':')
+ count++;
+ }
+ return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
+ count ? AVPROBE_SCORE_MAX / 2 : 0;
+}
+
+static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index,
+ int64_t min_ts, int64_t ts, int64_t max_ts,
+ int flags)
+{
+ TEDCaptionsDemuxer *tc = avf->priv_data;
+ return ff_subtitles_queue_seek(&tc->subs, avf, stream_index,
+ min_ts, ts, max_ts, flags);
+}
+
+AVInputFormat ff_tedcaptions_demuxer = {
+ .name = "tedcaptions",
+ .long_name = NULL_IF_CONFIG_SMALL("TED Talks captions"),
+ .priv_data_size = sizeof(TEDCaptionsDemuxer),
+ .priv_class = &tedcaptions_demuxer_class,
+ .read_header = tedcaptions_read_header,
+ .read_packet = tedcaptions_read_packet,
+ .read_close = tedcaptions_read_close,
+ .read_probe = tedcaptions_read_probe,
+ .read_seek2 = tedcaptions_read_seek,
+};