diff options
author | Stefano Sabatini <stefasab@gmail.com> | 2013-10-03 01:21:40 +0200 |
---|---|---|
committer | Stefano Sabatini <stefasab@gmail.com> | 2013-11-22 16:51:05 +0100 |
commit | 68590650f05f2bf97766362f2817372987c8a52e (patch) | |
tree | b5b0292ed90bb1db4172f646e5e8e59c4326f31c /libavutil/avstring.h | |
parent | e782eea183ba3c03f5179ac83f85e25ae9c1290f (diff) | |
download | ffmpeg-68590650f05f2bf97766362f2817372987c8a52e.tar.gz |
lavu/avstring: add av_utf8_decode() function
Diffstat (limited to 'libavutil/avstring.h')
-rw-r--r-- | libavutil/avstring.h | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/libavutil/avstring.h b/libavutil/avstring.h index 438ef799eb..882a2b57dc 100644 --- a/libavutil/avstring.h +++ b/libavutil/avstring.h @@ -22,6 +22,7 @@ #define AVUTIL_AVSTRING_H #include <stddef.h> +#include <stdint.h> #include "attributes.h" /** @@ -295,6 +296,45 @@ enum AVEscapeMode { int av_escape(char **dst, const char *src, const char *special_chars, enum AVEscapeMode mode, int flags); +#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF +#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF +#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes +#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML + +#define AV_UTF8_FLAG_ACCEPT_ALL \ + AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES + +/** + * Read and decode a single UTF-8 code point (character) from the + * buffer in *buf, and update *buf to point to the next byte to + * decode. + * + * In case of an invalid byte sequence, the pointer will be updated to + * the next byte after the invalid sequence and the function will + * return an error code. + * + * Depending on the specified flags, the function will also fail in + * case the decoded code point does not belong to a valid range. + * + * @note For speed-relevant code a carefully implemented use of + * GET_UTF8() may be preferred. + * + * @param codep pointer used to return the parsed code in case of success. + * The value in *codep is set even in case the range check fails. + * @param bufp pointer to the address the first byte of the sequence + * to decode, updated by the function to point to the + * byte next after the decoded sequence + * @param buf_end pointer to the end of the buffer, points to the next + * byte past the last in the buffer. This is used to + * avoid buffer overreads (in case of an unfinished + * UTF-8 sequence towards the end of the buffer). + * @param flags a collection of AV_UTF8_FLAG_* flags + * @return >= 0 in case a sequence was successfully read, a negative + * value in case of invalid sequence + */ +int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, + unsigned int flags); + /** * @} */ |