aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libmysql_r/strings/xml.cc
diff options
context:
space:
mode:
authorhcpp <hcpp@ydb.tech>2023-11-08 12:09:41 +0300
committerhcpp <hcpp@ydb.tech>2023-11-08 12:56:14 +0300
commita361f5b98b98b44ea510d274f6769164640dd5e1 (patch)
treec47c80962c6e2e7b06798238752fd3da0191a3f6 /contrib/libs/libmysql_r/strings/xml.cc
parent9478806fde1f4d40bd5a45e7cbe77237dab613e9 (diff)
downloadydb-a361f5b98b98b44ea510d274f6769164640dd5e1.tar.gz
metrics have been added
Diffstat (limited to 'contrib/libs/libmysql_r/strings/xml.cc')
-rw-r--r--contrib/libs/libmysql_r/strings/xml.cc517
1 files changed, 517 insertions, 0 deletions
diff --git a/contrib/libs/libmysql_r/strings/xml.cc b/contrib/libs/libmysql_r/strings/xml.cc
new file mode 100644
index 0000000000..7a9ce71c89
--- /dev/null
+++ b/contrib/libs/libmysql_r/strings/xml.cc
@@ -0,0 +1,517 @@
+/* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2.0,
+ as published by the Free Software Foundation.
+
+ This program is also distributed with certain software (including
+ but not limited to OpenSSL) that is licensed under separate terms,
+ as designated in a particular file or component or in included license
+ documentation. The authors of MySQL hereby grant you an additional
+ permission to link the program and your derivative works with the
+ separately licensed software that they have included with MySQL.
+
+ Without limiting anything contained in the foregoing, this file,
+ which is part of C Driver for MySQL (Connector/C), is also subject to the
+ Universal FOSS Exception, version 1.0, a copy of which can be found at
+ http://oss.oracle.com/licenses/universal-foss-exception.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License, version 2.0, for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "m_string.h"
+#include "my_inttypes.h"
+#include "my_xml.h"
+
+#define MY_XML_UNKNOWN 'U'
+#define MY_XML_EOF 'E'
+#define MY_XML_STRING 'S'
+#define MY_XML_IDENT 'I'
+#define MY_XML_EQ '='
+#define MY_XML_LT '<'
+#define MY_XML_GT '>'
+#define MY_XML_SLASH '/'
+#define MY_XML_COMMENT 'C'
+#define MY_XML_TEXT 'T'
+#define MY_XML_QUESTION '?'
+#define MY_XML_EXCLAM '!'
+#define MY_XML_CDATA 'D'
+
+struct MY_XML_ATTR {
+ const char *beg;
+ const char *end;
+};
+
+/*
+ XML ctype:
+*/
+#define MY_XML_ID0 0x01 /* Identifier initial character */
+#define MY_XML_ID1 0x02 /* Identifier medial character */
+#define MY_XML_SPC 0x08 /* Spacing character */
+
+/*
+ http://www.w3.org/TR/REC-xml/
+ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
+ CombiningChar | Extender
+ [5] Name ::= (Letter | '_' | ':') (NameChar)*
+*/
+
+static char my_xml_ctype[256] = {
+ /*00*/ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 8, 8, 0, 0, 8, 0, 0,
+ /*10*/ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /*20*/ 8, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 2, 2, 0, /* !"#$%&'()*+,-./ */
+ /*30*/ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 3, 0, 0, 0, 0, 0, /* 0123456789:;<=>? */
+ /*40*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */
+ /*50*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 0, 0, 0, 0, 3, /* PQRSTUVWXYZ[\]^_ */
+ /*60*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, /* `abcdefghijklmno */
+ /*70*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 0, 0, 0, 0, 0, /* pqrstuvwxyz{|}~ */
+ /*80*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*90*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*A0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*B0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*C0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*D0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*E0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ /*F0*/ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3};
+
+#define my_xml_is_space(c) (my_xml_ctype[(uchar)(c)] & MY_XML_SPC)
+#define my_xml_is_id0(c) (my_xml_ctype[(uchar)(c)] & MY_XML_ID0)
+#define my_xml_is_id1(c) (my_xml_ctype[(uchar)(c)] & MY_XML_ID1)
+
+static const char *lex2str(int lex) {
+ switch (lex) {
+ case MY_XML_EOF:
+ return "END-OF-INPUT";
+ case MY_XML_STRING:
+ return "STRING";
+ case MY_XML_IDENT:
+ return "IDENT";
+ case MY_XML_CDATA:
+ return "CDATA";
+ case MY_XML_EQ:
+ return "'='";
+ case MY_XML_LT:
+ return "'<'";
+ case MY_XML_GT:
+ return "'>'";
+ case MY_XML_SLASH:
+ return "'/'";
+ case MY_XML_COMMENT:
+ return "COMMENT";
+ case MY_XML_TEXT:
+ return "TEXT";
+ case MY_XML_QUESTION:
+ return "'?'";
+ case MY_XML_EXCLAM:
+ return "'!'";
+ }
+ return "unknown token";
+}
+
+static void my_xml_norm_text(MY_XML_ATTR *a) {
+ for (; (a->beg < a->end) && my_xml_is_space(a->beg[0]); a->beg++)
+ ;
+ for (; (a->beg < a->end) && my_xml_is_space(a->end[-1]); a->end--)
+ ;
+}
+
+static inline bool my_xml_parser_prefix_cmp(MY_XML_PARSER *p, const char *s,
+ size_t slen) {
+ return (p->cur + slen > p->end) || memcmp(p->cur, s, slen);
+}
+
+static int my_xml_scan(MY_XML_PARSER *p, MY_XML_ATTR *a) {
+ int lex;
+
+ for (; (p->cur < p->end) && my_xml_is_space(p->cur[0]); p->cur++)
+ ;
+
+ if (p->cur >= p->end) {
+ a->beg = p->end;
+ a->end = p->end;
+ lex = MY_XML_EOF;
+ goto ret;
+ }
+
+ a->beg = p->cur;
+ a->end = p->cur;
+
+ if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("<!--"))) {
+ for (; p->cur < p->end; p->cur++) {
+ if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("-->"))) {
+ p->cur += 3;
+ break;
+ }
+ }
+ a->end = p->cur;
+ lex = MY_XML_COMMENT;
+ } else if (!my_xml_parser_prefix_cmp(p, STRING_WITH_LEN("<![CDATA["))) {
+ p->cur += 9;
+ for (; p->cur < p->end - 2; p->cur++) {
+ if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>') {
+ p->cur += 3;
+ a->end = p->cur;
+ break;
+ }
+ }
+ lex = MY_XML_CDATA;
+ } else if (strchr("?=/<>!", p->cur[0])) {
+ p->cur++;
+ a->end = p->cur;
+ lex = a->beg[0];
+ } else if ((p->cur[0] == '"') || (p->cur[0] == '\'')) {
+ /*
+ "string" or 'string' found.
+ Scan until the closing quote/doublequote, or until the END-OF-INPUT.
+ */
+ p->cur++;
+ for (; (p->cur < p->end) && (p->cur[0] != a->beg[0]); p->cur++) {
+ }
+ a->end = p->cur;
+ if (p->cur < p->end) /* Closing quote or doublequote has been found */
+ p->cur++;
+ a->beg++;
+ if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) my_xml_norm_text(a);
+ lex = MY_XML_STRING;
+ } else if (my_xml_is_id0(p->cur[0])) {
+ p->cur++;
+ while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++;
+ a->end = p->cur;
+ my_xml_norm_text(a);
+ lex = MY_XML_IDENT;
+ } else
+ lex = MY_XML_UNKNOWN;
+
+#if 0
+ printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
+#endif
+
+ret:
+ return lex;
+}
+
+static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len) {
+ return (st->value) ? (st->value)(st, str, len) : MY_XML_OK;
+}
+
+/**
+ Ensure the attr buffer is wide enough to hold the new value
+
+ Expand and/or allocate dynamic buffer as needed to hold the concatenated
+ path and the terminating zero.
+
+ @param st the parser instance
+ @param len the length of the attribute to be added
+ @return state
+ @retval 1 failed
+ @retval 0 success
+*/
+static int my_xml_attr_ensure_space(MY_XML_PARSER *st, size_t len) {
+ size_t ofs = st->attr.end - st->attr.start;
+ len++; // Add terminating zero.
+ if (ofs + len > st->attr.buffer_size) {
+ st->attr.buffer_size = (SIZE_T_MAX - len) / 2 > st->attr.buffer_size
+ ? st->attr.buffer_size * 2 + len
+ : SIZE_T_MAX;
+
+ if (!st->attr.buffer) {
+ st->attr.buffer = (char *)my_str_malloc(st->attr.buffer_size);
+ if (st->attr.buffer)
+ memcpy(st->attr.buffer, st->attr.static_buffer,
+ ofs + 1 /*term. zero */);
+ } else
+ st->attr.buffer =
+ (char *)my_str_realloc(st->attr.buffer, st->attr.buffer_size);
+ st->attr.start = st->attr.buffer;
+ st->attr.end = st->attr.start + ofs;
+
+ return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
+ }
+ return MY_XML_OK;
+}
+
+/** rewind the attr buffer to initial state */
+static void my_xml_attr_rewind(MY_XML_PARSER *p) {
+ /* keep the buffer already allocated */
+ p->attr.end = p->attr.start;
+}
+
+static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len) {
+ if (my_xml_attr_ensure_space(st, len + 1 /* the separator char */))
+ return MY_XML_ERROR;
+
+ if (st->attr.end > st->attr.start) {
+ st->attr.end[0] = '/';
+ st->attr.end++;
+ }
+ memcpy(st->attr.end, str, len);
+ st->attr.end += len;
+ st->attr.end[0] = '\0';
+ if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
+ return st->enter ? st->enter(st, str, len) : MY_XML_OK;
+ else
+ return st->enter
+ ? st->enter(st, st->attr.start, st->attr.end - st->attr.start)
+ : MY_XML_OK;
+}
+
+static void mstr(char *s, const char *src, size_t l1, size_t l2) {
+ l1 = l1 < l2 ? l1 : l2;
+ memcpy(s, src, l1);
+ s[l1] = '\0';
+}
+
+static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen) {
+ char *e;
+ size_t glen;
+ char s[32];
+ char g[32];
+ int rc;
+
+ /* Find previous '/' or beginning */
+ for (e = p->attr.end; (e > p->attr.start) && (e[0] != '/'); e--)
+ ;
+ glen = (size_t)((e[0] == '/') ? (p->attr.end - e - 1) : p->attr.end - e);
+
+ if (str && (slen != glen)) {
+ mstr(s, str, sizeof(s) - 1, slen);
+ if (glen) {
+ mstr(g, e + 1, sizeof(g) - 1, glen),
+ sprintf(p->errstr, "'</%s>' unexpected ('</%s>' wanted)", s, g);
+ } else
+ sprintf(p->errstr, "'</%s>' unexpected (END-OF-INPUT wanted)", s);
+ return MY_XML_ERROR;
+ }
+
+ if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
+ rc = p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
+ else
+ rc = (p->leave_xml
+ ? p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start)
+ : MY_XML_OK);
+
+ *e = '\0';
+ p->attr.end = e;
+
+ return rc;
+}
+
+int my_xml_parse(MY_XML_PARSER *p, const char *str, size_t len) {
+ my_xml_attr_rewind(p);
+
+ p->beg = str;
+ p->cur = str;
+ p->end = str + len;
+
+ while (p->cur < p->end) {
+ MY_XML_ATTR a;
+ if (p->cur[0] == '<') {
+ int lex;
+ int question = 0;
+ int exclam = 0;
+
+ lex = my_xml_scan(p, &a);
+
+ if (MY_XML_COMMENT == lex) continue;
+
+ if (lex == MY_XML_CDATA) {
+ a.beg += 9;
+ a.end -= 3;
+ my_xml_value(p, a.beg, (size_t)(a.end - a.beg));
+ continue;
+ }
+
+ lex = my_xml_scan(p, &a);
+
+ if (MY_XML_SLASH == lex) {
+ if (MY_XML_IDENT != (lex = my_xml_scan(p, &a))) {
+ sprintf(p->errstr, "%s unexpected (ident wanted)", lex2str(lex));
+ return MY_XML_ERROR;
+ }
+ if (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg)))
+ return MY_XML_ERROR;
+ lex = my_xml_scan(p, &a);
+ goto gt;
+ }
+
+ if (MY_XML_EXCLAM == lex) {
+ lex = my_xml_scan(p, &a);
+ exclam = 1;
+ } else if (MY_XML_QUESTION == lex) {
+ lex = my_xml_scan(p, &a);
+ question = 1;
+ }
+
+ if (MY_XML_IDENT == lex) {
+ p->current_node_type = MY_XML_NODE_TAG;
+ if (MY_XML_OK != my_xml_enter(p, a.beg, (size_t)(a.end - a.beg)))
+ return MY_XML_ERROR;
+ } else {
+ sprintf(p->errstr, "%s unexpected (ident or '/' wanted)", lex2str(lex));
+ return MY_XML_ERROR;
+ }
+
+ while ((MY_XML_IDENT == (lex = my_xml_scan(p, &a))) ||
+ ((MY_XML_STRING == lex && exclam))) {
+ MY_XML_ATTR b;
+ if (MY_XML_EQ == (lex = my_xml_scan(p, &b))) {
+ lex = my_xml_scan(p, &b);
+ if ((lex == MY_XML_IDENT) || (lex == MY_XML_STRING)) {
+ p->current_node_type = MY_XML_NODE_ATTR;
+ if ((MY_XML_OK !=
+ my_xml_enter(p, a.beg, (size_t)(a.end - a.beg))) ||
+ (MY_XML_OK !=
+ my_xml_value(p, b.beg, (size_t)(b.end - b.beg))) ||
+ (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg))))
+ return MY_XML_ERROR;
+ } else {
+ sprintf(p->errstr, "%s unexpected (ident or string wanted)",
+ lex2str(lex));
+ return MY_XML_ERROR;
+ }
+ } else if (MY_XML_IDENT == lex) {
+ p->current_node_type = MY_XML_NODE_ATTR;
+ if ((MY_XML_OK != my_xml_enter(p, a.beg, (size_t)(a.end - a.beg))) ||
+ (MY_XML_OK != my_xml_leave(p, a.beg, (size_t)(a.end - a.beg))))
+ return MY_XML_ERROR;
+ } else if ((MY_XML_STRING == lex) && exclam) {
+ /*
+ We are in <!DOCTYPE>, e.g.
+ <!DOCTYPE name SYSTEM "SystemLiteral">
+ <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
+ Just skip "SystemLiteral" and "PublicidLiteral"
+ */
+ } else
+ break;
+ }
+
+ if (lex == MY_XML_SLASH) {
+ if (MY_XML_OK != my_xml_leave(p, NULL, 0)) return MY_XML_ERROR;
+ lex = my_xml_scan(p, &a);
+ }
+
+ gt:
+ if (question) {
+ if (lex != MY_XML_QUESTION) {
+ sprintf(p->errstr, "%s unexpected ('?' wanted)", lex2str(lex));
+ return MY_XML_ERROR;
+ }
+ if (MY_XML_OK != my_xml_leave(p, NULL, 0)) return MY_XML_ERROR;
+ lex = my_xml_scan(p, &a);
+ }
+
+ if (exclam) {
+ if (MY_XML_OK != my_xml_leave(p, NULL, 0)) return MY_XML_ERROR;
+ }
+
+ if (lex != MY_XML_GT) {
+ sprintf(p->errstr, "%s unexpected ('>' wanted)", lex2str(lex));
+ return MY_XML_ERROR;
+ }
+ } else {
+ a.beg = p->cur;
+ for (; (p->cur < p->end) && (p->cur[0] != '<'); p->cur++)
+ ;
+ a.end = p->cur;
+
+ if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
+ my_xml_norm_text(&a);
+ if (a.beg != a.end) {
+ my_xml_value(p, a.beg, (size_t)(a.end - a.beg));
+ }
+ }
+ }
+
+ if (p->attr.start[0]) {
+ sprintf(p->errstr, "unexpected END-OF-INPUT");
+ return MY_XML_ERROR;
+ }
+ return MY_XML_OK;
+}
+
+void my_xml_parser_create(MY_XML_PARSER *p) {
+ memset(p, 0, sizeof(p[0]));
+ /*
+ Use static buffer while it's sufficient.
+ */
+ p->attr.start = p->attr.end = p->attr.static_buffer;
+ p->attr.buffer_size = sizeof(p->attr.static_buffer);
+}
+
+void my_xml_parser_free(MY_XML_PARSER *p) {
+ if (p->attr.buffer) {
+ my_str_free(p->attr.buffer);
+ p->attr.buffer = NULL;
+ }
+}
+
+void my_xml_set_value_handler(MY_XML_PARSER *p,
+ int (*action)(MY_XML_PARSER *p, const char *s,
+ size_t l)) {
+ p->value = action;
+}
+
+void my_xml_set_enter_handler(MY_XML_PARSER *p,
+ int (*action)(MY_XML_PARSER *p, const char *s,
+ size_t l)) {
+ p->enter = action;
+}
+
+void my_xml_set_leave_handler(MY_XML_PARSER *p,
+ int (*action)(MY_XML_PARSER *p, const char *s,
+ size_t l)) {
+ p->leave_xml = action;
+}
+
+void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data) {
+ p->user_data = user_data;
+}
+
+const char *my_xml_error_string(MY_XML_PARSER *p) { return p->errstr; }
+
+size_t my_xml_error_pos(MY_XML_PARSER *p) {
+ const char *beg = p->beg;
+ const char *s;
+ for (s = p->cur - 1; s > p->beg - 1; s--) {
+ if (s[0] == '\n') {
+ beg = s;
+ break;
+ }
+ }
+ return (size_t)(p->cur - beg);
+}
+
+uint my_xml_error_lineno(MY_XML_PARSER *p) {
+ uint res = 0;
+ const char *s;
+ for (s = p->beg; s < p->cur; s++) {
+ if (s[0] == '\n') res++;
+ }
+ return res;
+}