diff options
author | Zuxy Meng <zuxy.meng@gmail.com> | 2006-11-06 10:32:48 +0000 |
---|---|---|
committer | Guillaume Poirier <gpoirier@mplayerhq.hu> | 2006-11-06 10:32:48 +0000 |
commit | 360932f76d30862ed6b7d325d0f31756f5b5da7c (patch) | |
tree | 1e416cddb9c708720944aa6184646e086ccc1e19 | |
parent | f4e31985da4ba620bb32347c0ef30de71727116c (diff) | |
download | ffmpeg-360932f76d30862ed6b7d325d0f31756f5b5da7c.tar.gz |
Fix ASF format parser's broken UTF-16 string handling
1. Add a PUT_UTF8 macro to common.h; code borrowed from libavcodec/flacenc.c.
2. Make use of the macro in flacenc.c
Patch by Zuxy Meng % zuxy P meng A gmail P com %
Original thread:
Date: Nov 5, 2006 9:56 AM
Subject: [Ffmpeg-devel] PUT_UTF8 & asf format enhancement
Originally committed as revision 6911 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/flacenc.c | 16 | ||||
-rw-r--r-- | libavutil/common.h | 36 |
2 files changed, 37 insertions, 15 deletions
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 73f0bb416f..b7b7d0d8e6 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -1122,20 +1122,8 @@ static void put_sbits(PutBitContext *pb, int bits, int32_t val) static void write_utf8(PutBitContext *pb, uint32_t val) { - int bytes, shift; - - if(val < 0x80){ - put_bits(pb, 8, val); - return; - } - - bytes= (av_log2(val)+4) / 5; - shift = (bytes - 1) * 6; - put_bits(pb, 8, (256 - (256>>bytes)) | (val >> shift)); - while(shift >= 6){ - shift -= 6; - put_bits(pb, 8, 0x80 | ((val >> shift) & 0x3F)); - } + uint8_t tmp; + PUT_UTF8(val, tmp, put_bits(pb, 8, tmp);) } static void output_frame_header(FlacEncodeContext *s) diff --git a/libavutil/common.h b/libavutil/common.h index b4a9112ee9..673043746e 100644 --- a/libavutil/common.h +++ b/libavutil/common.h @@ -322,7 +322,21 @@ static inline int ff_get_fourcc(const char *s){ #define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24)) #define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24)) - +/*! + * \def PUT_UTF8(val, GET_BYTE, ERROR) + * converts a 32-bit unicode character to its utf-8 encoded form (up to 6 bytes long). + * \param val is an input only argument and should be of type uint32_t. It holds + * a ucs4 encoded unicode character that is to be converted to utf-8. If + * val is given as a function it's executed only once. + * \param tmp is a temporary variable and should be of type uint8_t. It + * represents an intermediate value during conversion that is to be + * outputted by PUT_BYTE. + * \param PUT_BYTE writes the converted utf-8 bytes to any proper destination. + * It could be a function or a statement, and uses tmp as the input byte. + * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be + * executed up to 6 times, depending on the length of the converted + * unicode character. + */ #define GET_UTF8(val, GET_BYTE, ERROR)\ val= GET_BYTE;\ {\ @@ -338,6 +352,26 @@ static inline int ff_get_fourcc(const char *s){ }\ } +#define PUT_UTF8(val, tmp, PUT_BYTE)\ + {\ + int bytes, shift;\ + uint32_t in = val;\ + if (in < 0x80) {\ + tmp = in;\ + PUT_BYTE\ + } else {\ + bytes = (av_log2(in) + 4) / 5;\ + shift = (bytes - 1) * 6;\ + tmp = (256 - (256 >> bytes)) | (in >> shift);\ + PUT_BYTE\ + while (shift >= 6) {\ + shift -= 6;\ + tmp = 0x80 | ((in >> shift) & 0x3f);\ + PUT_BYTE\ + }\ + }\ + } + #if defined(ARCH_X86) || defined(ARCH_POWERPC) #if defined(ARCH_X86_64) static inline uint64_t read_time(void) |