diff options
author | wm4 <nfxjfg@googlemail.com> | 2014-09-02 20:52:07 +0200 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-09-05 23:13:07 +0200 |
commit | d658ef18e3d1ebe63a64f404ac4646890ecf02c9 (patch) | |
tree | 914ba72abb4caa1ea91dd805493d3e7fa6b9339c /libavformat | |
parent | 3e8426170ce005c111dfcae7982e18b647b7383f (diff) | |
download | ffmpeg-d658ef18e3d1ebe63a64f404ac4646890ecf02c9.tar.gz |
avformat/srtdec: UTF-16 support
Diffstat (limited to 'libavformat')
-rw-r--r-- | libavformat/srtdec.c | 25 | ||||
-rw-r--r-- | libavformat/subtitles.c | 53 | ||||
-rw-r--r-- | libavformat/subtitles.h | 34 |
3 files changed, 96 insertions, 16 deletions
diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c index 53182cda92..b63d3444d3 100644 --- a/libavformat/srtdec.c +++ b/libavformat/srtdec.c @@ -31,20 +31,21 @@ typedef struct { static int srt_probe(AVProbeData *p) { - const unsigned char *ptr = p->buf; int i, v, num = 0; + FFTextReader tr; - if (AV_RB24(ptr) == 0xEFBBBF) - ptr += 3; /* skip UTF-8 BOM */ + ff_text_init_buf(&tr, p->buf, p->buf_size); - while (*ptr == '\r' || *ptr == '\n') - ptr++; + while (ff_text_peek_r8(&tr) == '\r' || ff_text_peek_r8(&tr) == '\n') + ff_text_r8(&tr); for (i=0; i<2; i++) { + char buf[128]; + if (ff_subtitles_read_line(&tr, buf, sizeof(buf)) < 0) + break; if ((num == i || num + 1 == i) - && sscanf(ptr, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1) + && sscanf(buf, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1) return AVPROBE_SCORE_MAX; - num = atoi(ptr); - ptr += ff_subtitles_next_line(ptr); + num = atoi(buf); } return 0; } @@ -79,6 +80,8 @@ static int srt_read_header(AVFormatContext *s) AVBPrint buf; AVStream *st = avformat_new_stream(s, NULL); int res = 0; + FFTextReader tr; + ff_text_init_avio(&tr, s->pb); if (!st) return AVERROR(ENOMEM); @@ -88,11 +91,11 @@ static int srt_read_header(AVFormatContext *s) av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); - while (!avio_feof(s->pb)) { - ff_subtitles_read_chunk(s->pb, &buf); + while (!ff_text_eof(&tr)) { + ff_subtitles_read_text_chunk(&tr, &buf); if (buf.len) { - int64_t pos = avio_tell(s->pb); + int64_t pos = ff_text_pos(&tr); int64_t pts; int duration; const char *ptr = buf.str; diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c index cebd453965..bb526cd5e6 100644 --- a/libavformat/subtitles.c +++ b/libavformat/subtitles.c @@ -86,6 +86,25 @@ void ff_text_read(FFTextReader *r, char *buf, size_t size) *buf++ = ff_text_r8(r); } +int ff_text_eof(FFTextReader *r) +{ + return r->buf_pos >= r->buf_len && avio_feof(r->pb); +} + +int ff_text_peek_r8(FFTextReader *r) +{ + int c; + if (r->buf_pos < r->buf_len) + return r->buf[r->buf_pos]; + c = ff_text_r8(r); + if (!avio_feof(r->pb)) { + r->buf_pos = 0; + r->buf_len = 1; + r->buf[0] = c; + } + return c; +} + AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, const uint8_t *event, int len, int merge) { @@ -303,7 +322,7 @@ static inline int is_eol(char c) return c == '\r' || c == '\n'; } -void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) +void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf) { char eol_buf[5], last_was_cr = 0; int n = 0, i = 0, nb_eol = 0; @@ -311,7 +330,7 @@ void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) av_bprint_clear(buf); for (;;) { - char c = avio_r8(pb); + char c = ff_text_r8(tr); if (!c) break; @@ -344,3 +363,33 @@ void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) n++; } } + +void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) +{ + FFTextReader tr; + tr.buf_pos = tr.buf_len = 0; + tr.type = 0; + tr.pb = pb; + ff_subtitles_read_text_chunk(&tr, buf); +} + +ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size) +{ + size_t cur = 0; + if (!size) + return 0; + while (cur + 1 < size) { + unsigned char c = ff_text_r8(tr); + if (!c) + return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA; + if (c == '\r' || c == '\n') + break; + buf[cur++] = c; + buf[cur] = '\0'; + } + if (ff_text_peek_r8(tr) == '\r') + ff_text_r8(tr); + if (ff_text_peek_r8(tr) == '\n') + ff_text_r8(tr); + return cur; +} diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h index 38502d6f86..cf8c408486 100644 --- a/libavformat/subtitles.h +++ b/libavformat/subtitles.h @@ -22,6 +22,7 @@ #define AVFORMAT_SUBTITLES_H #include <stdint.h> +#include <stddef.h> #include "avformat.h" #include "libavutil/bprint.h" @@ -81,6 +82,16 @@ int64_t ff_text_pos(FFTextReader *r); int ff_text_r8(FFTextReader *r); /** + * Return non-zero if EOF was reached. + */ +int ff_text_eof(FFTextReader *r); + +/** + * Like ff_text_r8(), but don't remove the byte from the buffer. + */ +int ff_text_peek_r8(FFTextReader *r); + +/** * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are * written. */ @@ -144,19 +155,24 @@ int ff_smil_extract_next_chunk(AVIOContext *pb, AVBPrint *buf, char *c); const char *ff_smil_get_attr_ptr(const char *s, const char *attr); /** - * @brief Read a subtitles chunk. + * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext. + */ +void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf); + +/** + * @brief Read a subtitles chunk from FFTextReader. * * A chunk is defined by a multiline "event", ending with a second line break. * The trailing line breaks are trimmed. CRLF are supported. * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb * will focus on the 'n' of the "next" string. * - * @param pb I/O context + * @param tr I/O context * @param buf an initialized buf where the chunk is written * * @note buf is cleared before writing into it. */ -void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf); +void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf); /** * Get the number of characters to increment to jump to the next line, or to @@ -177,4 +193,16 @@ static av_always_inline int ff_subtitles_next_line(const char *ptr) return n; } +/** + * Read a line of text. Discards line ending characters. + * The function handles the following line breaks schemes: + * LF, CRLF (MS), or standalone CR (old MacOS). + * + * Returns the number of bytes written to buf. Always writes a terminating 0, + * similar as with snprintf. + * + * @note returns a negative error code if a \0 byte is found + */ +ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size); + #endif /* AVFORMAT_SUBTITLES_H */ |