aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libxml/xmlregexp.c
diff options
context:
space:
mode:
authorsetser <setser@yandex-team.ru>2022-05-09 00:13:37 +0300
committersetser <setser@yandex-team.ru>2022-05-09 00:13:37 +0300
commite87e3fc8d0e04eb7ba3eee221bb91613b527ad85 (patch)
tree5279c128bdbdf902b9a08d9fae8e55b91910a553 /contrib/libs/libxml/xmlregexp.c
parentf4f3e4024a1f32bd0bc3fa20239025a1b179e42d (diff)
downloadydb-e87e3fc8d0e04eb7ba3eee221bb91613b527ad85.tar.gz
Update libxml to 2.9.13
ref:f572491d236694e847142c36f0f5546c649e05d7
Diffstat (limited to 'contrib/libs/libxml/xmlregexp.c')
-rw-r--r--contrib/libs/libxml/xmlregexp.c146
1 files changed, 112 insertions, 34 deletions
diff --git a/contrib/libs/libxml/xmlregexp.c b/contrib/libs/libxml/xmlregexp.c
index c119ff1fb7..8d01c2ba32 100644
--- a/contrib/libs/libxml/xmlregexp.c
+++ b/contrib/libs/libxml/xmlregexp.c
@@ -26,6 +26,9 @@
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
#include <libxml/tree.h>
#include <libxml/parserInternals.h>
@@ -36,6 +39,9 @@
#ifndef INT_MAX
#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
#endif
+#ifndef SIZE_MAX
+#define SIZE_MAX ((size_t) -1)
+#endif
/* #define DEBUG_REGEXP_GRAPH */
/* #define DEBUG_REGEXP_EXEC */
@@ -267,6 +273,8 @@ struct _xmlAutomata {
int determinist;
int negs;
int flags;
+
+ int depth;
};
struct _xmlRegexp {
@@ -418,6 +426,32 @@ xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
************************************************************************/
static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
+
+/**
+ * xmlRegCalloc2:
+ * @dim1: size of first dimension
+ * @dim2: size of second dimension
+ * @elemSize: size of element
+ *
+ * Allocate a two-dimensional array and set all elements to zero.
+ *
+ * Returns the new array or NULL in case of error.
+ */
+static void*
+xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
+ size_t totalSize;
+ void *ret;
+
+ /* Check for overflow */
+ if (dim1 > SIZE_MAX / dim2 / elemSize)
+ return (NULL);
+ totalSize = dim1 * dim2 * elemSize;
+ ret = xmlMalloc(totalSize);
+ if (ret != NULL)
+ memset(ret, 0, totalSize);
+ return (ret);
+}
+
/**
* xmlRegEpxFromParse:
* @ctxt: the parser context used to build it
@@ -540,8 +574,8 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
#ifdef DEBUG_COMPACTION
printf("Final: %d atoms\n", nbatoms);
#endif
- transitions = (int *) xmlMalloc((nbstates + 1) *
- (nbatoms + 1) * sizeof(int));
+ transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
+ sizeof(int));
if (transitions == NULL) {
xmlFree(stateRemap);
xmlFree(stringRemap);
@@ -551,7 +585,6 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
xmlFree(ret);
return(NULL);
}
- memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
/*
* Allocate the transition table. The first entry for each
@@ -577,12 +610,9 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
continue;
atomno = stringRemap[trans->atom->no];
if ((trans->atom->data != NULL) && (transdata == NULL)) {
- transdata = (void **) xmlMalloc(nbstates * nbatoms *
- sizeof(void *));
- if (transdata != NULL)
- memset(transdata, 0,
- nbstates * nbatoms * sizeof(void *));
- else {
+ transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
+ sizeof(void *));
+ if (transdata == NULL) {
xmlRegexpErrMemory(ctxt, "compiling regexp");
break;
}
@@ -1862,6 +1892,12 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
* then X and Y are semantically equivalent and X can be eliminated
* If X is the start state then make Y the start state, else replace the
* target of all transitions to X by transitions to Y.
+ *
+ * If X is a final state, skip it.
+ * Otherwise it would be necessary to manipulate counters for this case when
+ * eliminating state 2:
+ * State 1 has a transition with an atom to state 2.
+ * State 2 is final and has an epsilon transition to state 1.
*/
static void
xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
@@ -1874,7 +1910,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
continue;
if (state->nbTrans != 1)
continue;
- if (state->type == XML_REGEXP_UNREACH_STATE)
+ if (state->type == XML_REGEXP_UNREACH_STATE ||
+ state->type == XML_REGEXP_FINAL_STATE)
continue;
/* is the only transition out a basic transition */
if ((state->trans[0].atom == NULL) &&
@@ -2628,7 +2665,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
state->markd = XML_REGEXP_MARK_VISITED;
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
to, atom);
- state->markd = 0;
if (res == 0) {
ret = 0;
/* t1->nd = 1; */
@@ -2647,6 +2683,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
}
/**
+ * xmlFAFinishRecurseDeterminism:
+ * @ctxt: a regexp parser context
+ *
+ * Reset flags after checking determinism.
+ */
+static void
+xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
+ int transnr, nbTrans;
+
+ if (state == NULL)
+ return;
+ if (state->markd != XML_REGEXP_MARK_VISITED)
+ return;
+ state->markd = 0;
+
+ nbTrans = state->nbTrans;
+ for (transnr = 0; transnr < nbTrans; transnr++) {
+ xmlRegTransPtr t1 = &state->trans[transnr];
+ if ((t1->atom == NULL) && (t1->to >= 0))
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
+ }
+}
+
+/**
* xmlFAComputesDeterminism:
* @ctxt: a regexp parser context
*
@@ -2759,6 +2819,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
*/
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
t2->to, t2->atom);
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
/* don't shortcut the computation so all non deterministic
transition get marked down
if (ret == 0)
@@ -4221,7 +4282,7 @@ xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
- * Extract informations from the regexp execution, internal routine to
+ * Extract information from the regexp execution, internal routine to
* implement xmlRegExecNextValues() and xmlRegExecErrInfo()
*
* Returns: 0 in case of success or -1 in case of error.
@@ -4380,7 +4441,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
- * Extract informations from the regexp execution,
+ * Extract information from the regexp execution,
* the parameter @values must point to an array of @nbval string pointers
* on return nbval will contain the number of possible strings in that
* state and the @values array will be updated with them. The string values
@@ -4404,7 +4465,7 @@ xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
* @values: pointer to the array of acceptable values
* @terminal: return value if this was a terminal state
*
- * Extract error informations from the regexp execution, the parameter
+ * Extract error information from the regexp execution, the parameter
* @string will be updated with the value pushed and not accepted,
* the parameter @values must point to an array of @nbval string pointers
* on return nbval will contain the number of possible strings in that
@@ -5101,7 +5162,7 @@ xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
} else {
xmlFAParseCharRange(ctxt);
}
- } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
+ } while ((CUR != ']') && (CUR != '-') &&
(CUR != 0) && (ctxt->error == 0));
}
@@ -5116,34 +5177,31 @@ xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
*/
static void
xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
- int n = ctxt->neg;
- while ((CUR != ']') && (ctxt->error == 0)) {
- if (CUR == '^') {
- int neg = ctxt->neg;
+ int neg = ctxt->neg;
- NEXT;
- ctxt->neg = !ctxt->neg;
- xmlFAParsePosCharGroup(ctxt);
- ctxt->neg = neg;
- } else if ((CUR == '-') && (NXT(1) == '[')) {
- int neg = ctxt->neg;
- ctxt->neg = 2;
+ if (CUR == '^') {
+ NEXT;
+ ctxt->neg = !ctxt->neg;
+ xmlFAParsePosCharGroup(ctxt);
+ ctxt->neg = neg;
+ }
+ while ((CUR != ']') && (ctxt->error == 0)) {
+ if ((CUR == '-') && (NXT(1) == '[')) {
NEXT; /* eat the '-' */
NEXT; /* eat the '[' */
+ ctxt->neg = 2;
xmlFAParseCharGroup(ctxt);
+ ctxt->neg = neg;
if (CUR == ']') {
NEXT;
} else {
ERROR("charClassExpr: ']' expected");
- break;
}
- ctxt->neg = neg;
break;
- } else if (CUR != ']') {
+ } else {
xmlFAParsePosCharGroup(ctxt);
}
}
- ctxt->neg = n;
}
/**
@@ -5183,13 +5241,24 @@ static int
xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
int ret = 0;
int ok = 0;
+ int overflow = 0;
while ((CUR >= '0') && (CUR <= '9')) {
- ret = ret * 10 + (CUR - '0');
+ if (ret > INT_MAX / 10) {
+ overflow = 1;
+ } else {
+ int digit = CUR - '0';
+
+ ret *= 10;
+ if (ret > INT_MAX - digit)
+ overflow = 1;
+ else
+ ret += digit;
+ }
ok = 1;
NEXT;
}
- if (ok != 1) {
+ if ((ok != 1) || (overflow == 1)) {
return(-1);
}
return(ret);
@@ -5229,6 +5298,9 @@ xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
cur = xmlFAParseQuantExact(ctxt);
if (cur >= 0)
min = cur;
+ else {
+ ERROR("Improper quantifier");
+ }
if (CUR == ',') {
NEXT;
if (CUR == '}')
@@ -5288,6 +5360,10 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
xmlRegStatePtr start, oldend, start0;
NEXT;
+ if (ctxt->depth >= 50) {
+ ERROR("xmlFAParseAtom: maximum nesting depth exceeded");
+ return(-1);
+ }
/*
* this extra Epsilon transition is needed if we count with 0 allowed
* unfortunately this can't be known at that point
@@ -5299,7 +5375,9 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
oldend = ctxt->end;
ctxt->end = NULL;
ctxt->atom = NULL;
+ ctxt->depth++;
xmlFAParseRegExp(ctxt, 0);
+ ctxt->depth--;
if (CUR == ')') {
NEXT;
} else {
@@ -6055,7 +6133,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(NULL);
if (min < 1)
return(NULL);
- if ((max < min) || (max < 1))
+ if (max < min)
return(NULL);
atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
if (atom == NULL)
@@ -6134,7 +6212,7 @@ xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(NULL);
if (min < 1)
return(NULL);
- if ((max < min) || (max < 1))
+ if (max < min)
return(NULL);
atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
if (atom == NULL)