aboutsummaryrefslogtreecommitdiffstats
path: root/libavformat/subtitles.h
diff options
context:
space:
mode:
authorwm4 <nfxjfg@googlemail.com>2014-09-02 20:48:45 +0200
committerClément Bœsch <u@pkh.me>2014-09-05 23:13:07 +0200
commit3e8426170ce005c111dfcae7982e18b647b7383f (patch)
tree2c99e5e08a455bba9b3b411134118d0d2c693c49 /libavformat/subtitles.h
parentdcb29d37d4ffedc84e44df99f8d22ecf27e0f2cd (diff)
downloadffmpeg-3e8426170ce005c111dfcae7982e18b647b7383f.tar.gz
avformat/assdec: UTF-16 support
Use the UTF-16 BOM to detect UTF-16 encoding. Convert the file contents to UTF-8 on the fly using FFTextReader, which acts as converting wrapper around AVIOContext. It also can work on a static buffer, needed for format probing. The FFTextReader wrapper now also takes care of skipping the UTF-8 BOM. Fix Ticket #3496.
Diffstat (limited to 'libavformat/subtitles.h')
-rw-r--r--libavformat/subtitles.h56
1 files changed, 56 insertions, 0 deletions
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index b5a96ec08c..38502d6f86 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -30,6 +30,62 @@ enum sub_sort {
SUB_SORT_POS_TS, ///< sort by position, then timestamps
};
+enum ff_utf_type {
+ FF_UTF_8, // or other 8 bit encodings
+ FF_UTF16LE,
+ FF_UTF16BE,
+};
+
+typedef struct {
+ int type;
+ AVIOContext *pb;
+ unsigned char buf[8];
+ int buf_pos, buf_len;
+ AVIOContext buf_pb;
+} FFTextReader;
+
+/**
+ * Initialize the FFTextReader from the given AVIOContext. This function will
+ * read some bytes from pb, and test for UTF-8 or UTF-16 BOMs. Further accesses
+ * to FFTextReader will read more data from pb.
+ *
+ * The purpose of FFTextReader is to transparently convert read data to UTF-8
+ * if the stream had a UTF-16 BOM.
+ *
+ * @param r object which will be initialized
+ * @param pb stream to read from (referenced as long as FFTextReader is in use)
+ */
+void ff_text_init_avio(FFTextReader *r, AVIOContext *pb);
+
+/**
+ * Similar to ff_text_init_avio(), but sets it up to read from a bounded buffer.
+ *
+ * @param r object which will be initialized
+ * @param buf buffer to read from (referenced as long as FFTextReader is in use)
+ * @param size size of buf
+ */
+void ff_text_init_buf(FFTextReader *r, void *buf, size_t size);
+
+/**
+ * Return the byte position of the next byte returned by ff_text_r8(). For
+ * UTF-16 source streams, this will return the original position, but it will
+ * be incorrect if a codepoint was only partially read with ff_text_r8().
+ */
+int64_t ff_text_pos(FFTextReader *r);
+
+/**
+ * Return the next byte. The return value is always 0 - 255. Returns 0 on EOF.
+ * If the source stream is UTF-16, this reads from the stream converted to
+ * UTF-8. On invalid UTF-16, 0 is returned.
+ */
+int ff_text_r8(FFTextReader *r);
+
+/**
+ * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
+ * written.
+ */
+void ff_text_read(FFTextReader *r, char *buf, size_t size);
+
typedef struct {
AVPacket *subs; ///< array of subtitles packets
int nb_subs; ///< number of subtitles packets