diff options
author | Yayoi <yayoi.ukai@gmail.com> | 2015-08-30 07:24:47 -0700 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2015-09-06 13:07:44 +0200 |
commit | 50332f8a6bc303b6147fe34a481a2c0b80ac12be (patch) | |
tree | 35381d4fe000ce43ae23f3406a254be37cacb436 | |
parent | d9464e9c14f412f6ad80eb7ff9e15c9786384d15 (diff) | |
download | ffmpeg-50332f8a6bc303b6147fe34a481a2c0b80ac12be.tar.gz |
avcodec/srtdec: factor out HTML parsing code
This code will be used in SAMI decoder in a later commit.
Signed-off-by: Clément Bœsch <u@pkh.me>
-rw-r--r-- | libavcodec/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/htmlsubtitles.c | 176 | ||||
-rw-r--r-- | libavcodec/htmlsubtitles.h | 29 | ||||
-rw-r--r-- | libavcodec/srtdec.c | 149 |
4 files changed, 208 insertions, 148 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 893fc96d84..4f6a52cceb 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -473,7 +473,7 @@ OBJS-$(CONFIG_SONIC_DECODER) += sonic.o OBJS-$(CONFIG_SONIC_ENCODER) += sonic.o OBJS-$(CONFIG_SONIC_LS_ENCODER) += sonic.o OBJS-$(CONFIG_SP5X_DECODER) += sp5xdec.o -OBJS-$(CONFIG_SRT_DECODER) += srtdec.o ass.o +OBJS-$(CONFIG_SRT_DECODER) += srtdec.o ass.o htmlsubtitles.o OBJS-$(CONFIG_SRT_ENCODER) += srtenc.o ass_split.o OBJS-$(CONFIG_STL_DECODER) += textdec.o ass.o OBJS-$(CONFIG_SUBRIP_DECODER) += srtdec.o ass.o diff --git a/libavcodec/htmlsubtitles.c b/libavcodec/htmlsubtitles.c new file mode 100644 index 0000000000..97be82daa0 --- /dev/null +++ b/libavcodec/htmlsubtitles.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avstring.h" +#include "libavutil/common.h" +#include "libavutil/parseutils.h" +#include "avcodec.h" +#include "htmlsubtitles.h" + +static int html_color_parse(AVCodecContext *avctx, const char *str) +{ + uint8_t rgba[4]; + if (av_parse_color(rgba, str, strcspn(str, "\" >"), avctx) < 0) + return -1; + return rgba[0] | rgba[1] << 8 | rgba[2] << 16; +} + +enum { + PARAM_UNKNOWN = -1, + PARAM_SIZE, + PARAM_COLOR, + PARAM_FACE, + PARAM_NUMBER +}; + +typedef struct SrtStack { + char tag[128]; + char param[PARAM_NUMBER][128]; +} SrtStack; + +static void rstrip_spaces_buf(AVBPrint *buf) +{ + while (buf->len > 0 && buf->str[buf->len - 1] == ' ') + buf->str[--buf->len] = 0; +} + +void ff_htmlmarkup_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *in) +{ + char *param, buffer[128], tmp[128]; + int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0; + SrtStack stack[16]; + + stack[0].tag[0] = 0; + strcpy(stack[0].param[PARAM_SIZE], "{\\fs}"); + strcpy(stack[0].param[PARAM_COLOR], "{\\c}"); + strcpy(stack[0].param[PARAM_FACE], "{\\fn}"); + + for (; !end && *in; in++) { + switch (*in) { + case '\r': + break; + case '\n': + if (line_start) { + end = 1; + break; + } + rstrip_spaces_buf(dst); + av_bprintf(dst, "\\N"); + line_start = 1; + break; + case ' ': + if (!line_start) + av_bprint_chars(dst, *in, 1); + break; + case '{': /* skip all {\xxx} substrings except for {\an%d} + and all microdvd like styles such as {Y:xxx} */ + len = 0; + an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0; + if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) || + (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) { + in += len - 1; + } else + av_bprint_chars(dst, *in, 1); + break; + case '<': + tag_close = in[1] == '/'; + len = 0; + if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) { + if ((param = strchr(buffer, ' '))) + *param++ = 0; + if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) || + ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, buffer))) { + int i, j, unknown = 0; + in += len + tag_close; + if (!tag_close) + memset(stack+sptr, 0, sizeof(*stack)); + if (!strcmp(buffer, "font")) { + if (tag_close) { + for (i=PARAM_NUMBER-1; i>=0; i--) + if (stack[sptr-1].param[i][0]) + for (j=sptr-2; j>=0; j--) + if (stack[j].param[i][0]) { + av_bprintf(dst, "%s", stack[j].param[i]); + break; + } + } else { + while (param) { + if (!strncmp(param, "size=", 5)) { + unsigned font_size; + param += 5 + (param[5] == '"'); + if (sscanf(param, "%u", &font_size) == 1) { + snprintf(stack[sptr].param[PARAM_SIZE], + sizeof(stack[0].param[PARAM_SIZE]), + "{\\fs%u}", font_size); + } + } else if (!strncmp(param, "color=", 6)) { + param += 6 + (param[6] == '"'); + snprintf(stack[sptr].param[PARAM_COLOR], + sizeof(stack[0].param[PARAM_COLOR]), + "{\\c&H%X&}", + html_color_parse(avctx, param)); + } else if (!strncmp(param, "face=", 5)) { + param += 5 + (param[5] == '"'); + len = strcspn(param, + param[-1] == '"' ? "\"" :" "); + av_strlcpy(tmp, param, + FFMIN(sizeof(tmp), len+1)); + param += len; + snprintf(stack[sptr].param[PARAM_FACE], + sizeof(stack[0].param[PARAM_FACE]), + "{\\fn%s}", tmp); + } + if ((param = strchr(param, ' '))) + param++; + } + for (i=0; i<PARAM_NUMBER; i++) + if (stack[sptr].param[i][0]) + av_bprintf(dst, "%s", stack[sptr].param[i]); + } + } else if (!buffer[1] && strspn(buffer, "bisu") == 1) { + av_bprintf(dst, "{\\%c%d}", buffer[0], !tag_close); + } else { + unknown = 1; + snprintf(tmp, sizeof(tmp), "</%s>", buffer); + } + if (tag_close) { + sptr--; + } else if (unknown && !strstr(in, tmp)) { + in -= len + tag_close; + av_bprint_chars(dst, *in, 1); + } else + av_strlcpy(stack[sptr++].tag, buffer, + sizeof(stack[0].tag)); + break; + } + } + default: + av_bprint_chars(dst, *in, 1); + break; + } + if (*in != ' ' && *in != '\r' && *in != '\n') + line_start = 0; + } + + while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2)) + dst->len -= 2; + dst->str[dst->len] = 0; + rstrip_spaces_buf(dst); +} diff --git a/libavcodec/htmlsubtitles.h b/libavcodec/htmlsubtitles.h new file mode 100644 index 0000000000..b06a5af6c8 --- /dev/null +++ b/libavcodec/htmlsubtitles.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_HTMLSUBTITLES_H +#define AVCODEC_HTMLSUBTITLES_H + +#include "libavutil/bprint.h" + +void ff_htmlmarkup_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *in); + + +#endif /* AVCODEC_HTMLSUBTITLES_H */ diff --git a/libavcodec/srtdec.c b/libavcodec/srtdec.c index ed3af95063..dd6bc9d104 100644 --- a/libavcodec/srtdec.c +++ b/libavcodec/srtdec.c @@ -25,46 +25,11 @@ #include "libavutil/parseutils.h" #include "avcodec.h" #include "ass.h" - -static int html_color_parse(AVCodecContext *avctx, const char *str) -{ - uint8_t rgba[4]; - if (av_parse_color(rgba, str, strcspn(str, "\" >"), avctx) < 0) - return -1; - return rgba[0] | rgba[1] << 8 | rgba[2] << 16; -} - -enum { - PARAM_UNKNOWN = -1, - PARAM_SIZE, - PARAM_COLOR, - PARAM_FACE, - PARAM_NUMBER -}; - -typedef struct SrtStack { - char tag[128]; - char param[PARAM_NUMBER][128]; -} SrtStack; - -static void rstrip_spaces_buf(AVBPrint *buf) -{ - while (buf->len > 0 && buf->str[buf->len - 1] == ' ') - buf->str[--buf->len] = 0; -} +#include "htmlsubtitles.h" static void srt_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *in, int x1, int y1, int x2, int y2) { - char *param, buffer[128], tmp[128]; - int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0; - SrtStack stack[16]; - - stack[0].tag[0] = 0; - strcpy(stack[0].param[PARAM_SIZE], "{\\fs}"); - strcpy(stack[0].param[PARAM_COLOR], "{\\c}"); - strcpy(stack[0].param[PARAM_FACE], "{\\fn}"); - if (x1 >= 0 && y1 >= 0) { /* XXX: here we rescale coordinate assuming they are in DVD resolution * (720x480) since we don't have anything better */ @@ -84,117 +49,7 @@ static void srt_to_ass(AVCodecContext *avctx, AVBPrint *dst, } } - for (; !end && *in; in++) { - switch (*in) { - case '\r': - break; - case '\n': - if (line_start) { - end = 1; - break; - } - rstrip_spaces_buf(dst); - av_bprintf(dst, "\\N"); - line_start = 1; - break; - case ' ': - if (!line_start) - av_bprint_chars(dst, *in, 1); - break; - case '{': /* skip all {\xxx} substrings except for {\an%d} - and all microdvd like styles such as {Y:xxx} */ - len = 0; - an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0; - if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) || - (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) { - in += len - 1; - } else - av_bprint_chars(dst, *in, 1); - break; - case '<': - tag_close = in[1] == '/'; - len = 0; - if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) { - if ((param = strchr(buffer, ' '))) - *param++ = 0; - if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) || - ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, buffer))) { - int i, j, unknown = 0; - in += len + tag_close; - if (!tag_close) - memset(stack+sptr, 0, sizeof(*stack)); - if (!strcmp(buffer, "font")) { - if (tag_close) { - for (i=PARAM_NUMBER-1; i>=0; i--) - if (stack[sptr-1].param[i][0]) - for (j=sptr-2; j>=0; j--) - if (stack[j].param[i][0]) { - av_bprintf(dst, "%s", stack[j].param[i]); - break; - } - } else { - while (param) { - if (!strncmp(param, "size=", 5)) { - unsigned font_size; - param += 5 + (param[5] == '"'); - if (sscanf(param, "%u", &font_size) == 1) { - snprintf(stack[sptr].param[PARAM_SIZE], - sizeof(stack[0].param[PARAM_SIZE]), - "{\\fs%u}", font_size); - } - } else if (!strncmp(param, "color=", 6)) { - param += 6 + (param[6] == '"'); - snprintf(stack[sptr].param[PARAM_COLOR], - sizeof(stack[0].param[PARAM_COLOR]), - "{\\c&H%X&}", - html_color_parse(avctx, param)); - } else if (!strncmp(param, "face=", 5)) { - param += 5 + (param[5] == '"'); - len = strcspn(param, - param[-1] == '"' ? "\"" :" "); - av_strlcpy(tmp, param, - FFMIN(sizeof(tmp), len+1)); - param += len; - snprintf(stack[sptr].param[PARAM_FACE], - sizeof(stack[0].param[PARAM_FACE]), - "{\\fn%s}", tmp); - } - if ((param = strchr(param, ' '))) - param++; - } - for (i=0; i<PARAM_NUMBER; i++) - if (stack[sptr].param[i][0]) - av_bprintf(dst, "%s", stack[sptr].param[i]); - } - } else if (!buffer[1] && strspn(buffer, "bisu") == 1) { - av_bprintf(dst, "{\\%c%d}", buffer[0], !tag_close); - } else { - unknown = 1; - snprintf(tmp, sizeof(tmp), "</%s>", buffer); - } - if (tag_close) { - sptr--; - } else if (unknown && !strstr(in, tmp)) { - in -= len + tag_close; - av_bprint_chars(dst, *in, 1); - } else - av_strlcpy(stack[sptr++].tag, buffer, - sizeof(stack[0].tag)); - break; - } - } - default: - av_bprint_chars(dst, *in, 1); - break; - } - if (*in != ' ' && *in != '\r' && *in != '\n') - line_start = 0; - } - - while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2)) - dst->len -= 2; - dst->str[dst->len] = 0; - rstrip_spaces_buf(dst); + ff_htmlmarkup_to_ass(avctx, dst, in); } static int srt_decode_frame(AVCodecContext *avctx, |