aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/avstring.h
diff options
context:
space:
mode:
authorStefano Sabatini <stefasab@gmail.com>2013-10-03 01:21:40 +0200
committerStefano Sabatini <stefasab@gmail.com>2013-11-22 16:51:05 +0100
commit68590650f05f2bf97766362f2817372987c8a52e (patch)
treeb5b0292ed90bb1db4172f646e5e8e59c4326f31c /libavutil/avstring.h
parente782eea183ba3c03f5179ac83f85e25ae9c1290f (diff)
downloadffmpeg-68590650f05f2bf97766362f2817372987c8a52e.tar.gz
lavu/avstring: add av_utf8_decode() function
Diffstat (limited to 'libavutil/avstring.h')
-rw-r--r--libavutil/avstring.h40
1 files changed, 40 insertions, 0 deletions
diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 438ef799eb..882a2b57dc 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -22,6 +22,7 @@
#define AVUTIL_AVSTRING_H
#include <stddef.h>
+#include <stdint.h>
#include "attributes.h"
/**
@@ -295,6 +296,45 @@ enum AVEscapeMode {
int av_escape(char **dst, const char *src, const char *special_chars,
enum AVEscapeMode mode, int flags);
+#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
+#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
+#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
+#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
+
+#define AV_UTF8_FLAG_ACCEPT_ALL \
+ AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
+
+/**
+ * Read and decode a single UTF-8 code point (character) from the
+ * buffer in *buf, and update *buf to point to the next byte to
+ * decode.
+ *
+ * In case of an invalid byte sequence, the pointer will be updated to
+ * the next byte after the invalid sequence and the function will
+ * return an error code.
+ *
+ * Depending on the specified flags, the function will also fail in
+ * case the decoded code point does not belong to a valid range.
+ *
+ * @note For speed-relevant code a carefully implemented use of
+ * GET_UTF8() may be preferred.
+ *
+ * @param codep pointer used to return the parsed code in case of success.
+ * The value in *codep is set even in case the range check fails.
+ * @param bufp pointer to the address the first byte of the sequence
+ * to decode, updated by the function to point to the
+ * byte next after the decoded sequence
+ * @param buf_end pointer to the end of the buffer, points to the next
+ * byte past the last in the buffer. This is used to
+ * avoid buffer overreads (in case of an unfinished
+ * UTF-8 sequence towards the end of the buffer).
+ * @param flags a collection of AV_UTF8_FLAG_* flags
+ * @return >= 0 in case a sequence was successfully read, a negative
+ * value in case of invalid sequence
+ */
+int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
+ unsigned int flags);
+
/**
* @}
*/