lavu/avstring: add av_utf8_decode() function

author: Stefano Sabatini <stefasab@gmail.com> 2013-10-03 01:21:40 +0200
committer: Stefano Sabatini <stefasab@gmail.com> 2013-11-22 16:51:05 +0100
commit: 68590650f05f2bf97766362f2817372987c8a52e (patch)
tree: b5b0292ed90bb1db4172f646e5e8e59c4326f31c /libavutil/avstring.h
parent: e782eea183ba3c03f5179ac83f85e25ae9c1290f (diff)
download: ffmpeg-68590650f05f2bf97766362f2817372987c8a52e.tar.gz
1 files changed, 40 insertions, 0 deletions
diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 438ef799eb..882a2b57dc 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -22,6 +22,7 @@
 #define AVUTIL_AVSTRING_H
 
 #include <stddef.h>
+#include <stdint.h>
 #include "attributes.h"
 
 /**
@@ -295,6 +296,45 @@ enum AVEscapeMode {
 int av_escape(char **dst, const char *src, const char *special_chars,
               enum AVEscapeMode mode, int flags);
 
+#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES          1 ///< accept codepoints over 0x10FFFF
+#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS             2 ///< accept non-characters - 0xFFFE and 0xFFFF
+#define AV_UTF8_FLAG_ACCEPT_SURROGATES                 4 ///< accept UTF-16 surrogates codes
+#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
+
+#define AV_UTF8_FLAG_ACCEPT_ALL \
+    AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
+
+/**
+ * Read and decode a single UTF-8 code point (character) from the
+ * buffer in *buf, and update *buf to point to the next byte to
+ * decode.
+ *
+ * In case of an invalid byte sequence, the pointer will be updated to
+ * the next byte after the invalid sequence and the function will
+ * return an error code.
+ *
+ * Depending on the specified flags, the function will also fail in
+ * case the decoded code point does not belong to a valid range.
+ *
+ * @note For speed-relevant code a carefully implemented use of
+ * GET_UTF8() may be preferred.
+ *
+ * @param codep   pointer used to return the parsed code in case of success.
+ *                The value in *codep is set even in case the range check fails.
+ * @param bufp    pointer to the address the first byte of the sequence
+ *                to decode, updated by the function to point to the
+ *                byte next after the decoded sequence
+ * @param buf_end pointer to the end of the buffer, points to the next
+ *                byte past the last in the buffer. This is used to
+ *                avoid buffer overreads (in case of an unfinished
+ *                UTF-8 sequence towards the end of the buffer).
+ * @param flags   a collection of AV_UTF8_FLAG_* flags
+ * @return >= 0 in case a sequence was successfully read, a negative
+ * value in case of invalid sequence
+ */
+int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
+                   unsigned int flags);
+
 /**
  * @}
  */
author	Stefano Sabatini <stefasab@gmail.com>	2013-10-03 01:21:40 +0200
committer	Stefano Sabatini <stefasab@gmail.com>	2013-11-22 16:51:05 +0100
commit	68590650f05f2bf97766362f2817372987c8a52e (patch)
tree	b5b0292ed90bb1db4172f646e5e8e59c4326f31c /libavutil/avstring.h
parent	e782eea183ba3c03f5179ac83f85e25ae9c1290f (diff)
download	ffmpeg-68590650f05f2bf97766362f2817372987c8a52e.tar.gz