aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/pcre/pcrecpp.cc
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:44:49 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:49 +0300
commit718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/pcre/pcrecpp.cc
parente9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
downloadydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/pcre/pcrecpp.cc')
-rw-r--r--contrib/libs/pcre/pcrecpp.cc132
1 files changed, 66 insertions, 66 deletions
diff --git a/contrib/libs/pcre/pcrecpp.cc b/contrib/libs/pcre/pcrecpp.cc
index 57daa1f59f..7a3ee6cb98 100644
--- a/contrib/libs/pcre/pcrecpp.cc
+++ b/contrib/libs/pcre/pcrecpp.cc
@@ -30,7 +30,7 @@
// Author: Sanjay Ghemawat
#ifdef HAVE_CONFIG_H
-#include "pcre_config.h"
+#include "pcre_config.h"
#endif
#include <stdlib.h>
@@ -66,8 +66,8 @@ Arg RE::no_arg((void*)NULL);
// inclusive test if we ever needed it. (Note that not only the
// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are
// gnu-specific.)
-#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) \
- && !defined(__INTEL_COMPILER) && !defined(__LCC__)
+#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) \
+ && !defined(__INTEL_COMPILER) && !defined(__LCC__)
# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x)
# define ULP_AS_STRING_INTERNAL(x) #x
# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__)
@@ -81,25 +81,25 @@ static const string empty_string;
// If the user doesn't ask for any options, we just use this one
static RE_Options default_options;
-// Specials for the start of patterns. See comments where start_options is used
-// below. (PH June 2018)
-static const char *start_options[] = {
- "(*UTF8)",
- "(*UTF)",
- "(*UCP)",
- "(*NO_START_OPT)",
- "(*NO_AUTO_POSSESS)",
- "(*LIMIT_RECURSION=",
- "(*LIMIT_MATCH=",
- "(*CRLF)",
- "(*LF)",
- "(*CR)",
- "(*BSR_UNICODE)",
- "(*BSR_ANYCRLF)",
- "(*ANYCRLF)",
- "(*ANY)",
- "" };
-
+// Specials for the start of patterns. See comments where start_options is used
+// below. (PH June 2018)
+static const char *start_options[] = {
+ "(*UTF8)",
+ "(*UTF)",
+ "(*UCP)",
+ "(*NO_START_OPT)",
+ "(*NO_AUTO_POSSESS)",
+ "(*LIMIT_RECURSION=",
+ "(*LIMIT_MATCH=",
+ "(*CRLF)",
+ "(*LF)",
+ "(*CR)",
+ "(*BSR_UNICODE)",
+ "(*BSR_ANYCRLF)",
+ "(*ANYCRLF)",
+ "(*ANY)",
+ "" };
+
void RE::Init(const string& pat, const RE_Options* options) {
pattern_ = pat;
if (options == NULL) {
@@ -155,49 +155,49 @@ pcre* RE::Compile(Anchor anchor) {
} else {
// Tack a '\z' at the end of RE. Parenthesize it first so that
// the '\z' applies to all top-level alternatives in the regexp.
-
- /* When this code was written (for PCRE 6.0) it was enough just to
- parenthesize the entire pattern. Unfortunately, when the feature of
- starting patterns with (*UTF8) or (*CR) etc. was added to PCRE patterns,
- this code was never updated. This bug was not noticed till 2018, long after
- PCRE became obsolescent and its maintainer no longer around. Since PCRE is
- frozen, I have added a hack to check for all the existing "start of
- pattern" specials - knowing that no new ones will ever be added. I am not a
- C++ programmer, so the code style is no doubt crude. It is also
- inefficient, but is only run when the pattern starts with "(*".
- PH June 2018. */
-
- string wrapped = "";
-
- if (pattern_.c_str()[0] == '(' && pattern_.c_str()[1] == '*') {
- int kk, klen, kmat;
- for (;;) { // Loop for any number of leading items
-
- for (kk = 0; start_options[kk][0] != 0; kk++) {
- klen = strlen(start_options[kk]);
- kmat = strncmp(pattern_.c_str(), start_options[kk], klen);
- if (kmat >= 0) break;
- }
- if (kmat != 0) break; // Not found
-
- // If the item ended in "=" we must copy digits up to ")".
-
- if (start_options[kk][klen-1] == '=') {
- while (isdigit(pattern_.c_str()[klen])) klen++;
- if (pattern_.c_str()[klen] != ')') break; // Syntax error
- klen++;
- }
-
- // Move the item from the pattern to the start of the wrapped string.
-
- wrapped += pattern_.substr(0, klen);
- pattern_.erase(0, klen);
- }
- }
-
- // Wrap the rest of the pattern.
-
- wrapped += "(?:"; // A non-counting grouping operator
+
+ /* When this code was written (for PCRE 6.0) it was enough just to
+ parenthesize the entire pattern. Unfortunately, when the feature of
+ starting patterns with (*UTF8) or (*CR) etc. was added to PCRE patterns,
+ this code was never updated. This bug was not noticed till 2018, long after
+ PCRE became obsolescent and its maintainer no longer around. Since PCRE is
+ frozen, I have added a hack to check for all the existing "start of
+ pattern" specials - knowing that no new ones will ever be added. I am not a
+ C++ programmer, so the code style is no doubt crude. It is also
+ inefficient, but is only run when the pattern starts with "(*".
+ PH June 2018. */
+
+ string wrapped = "";
+
+ if (pattern_.c_str()[0] == '(' && pattern_.c_str()[1] == '*') {
+ int kk, klen, kmat;
+ for (;;) { // Loop for any number of leading items
+
+ for (kk = 0; start_options[kk][0] != 0; kk++) {
+ klen = strlen(start_options[kk]);
+ kmat = strncmp(pattern_.c_str(), start_options[kk], klen);
+ if (kmat >= 0) break;
+ }
+ if (kmat != 0) break; // Not found
+
+ // If the item ended in "=" we must copy digits up to ")".
+
+ if (start_options[kk][klen-1] == '=') {
+ while (isdigit(pattern_.c_str()[klen])) klen++;
+ if (pattern_.c_str()[klen] != ')') break; // Syntax error
+ klen++;
+ }
+
+ // Move the item from the pattern to the start of the wrapped string.
+
+ wrapped += pattern_.substr(0, klen);
+ pattern_.erase(0, klen);
+ }
+ }
+
+ // Wrap the rest of the pattern.
+
+ wrapped += "(?:"; // A non-counting grouping operator
wrapped += pattern_;
wrapped += ")\\z";
re = pcre_compile(wrapped.c_str(), pcre_options,
@@ -477,7 +477,7 @@ int RE::GlobalReplace(const StringPiece& rewrite,
matchend++;
}
// We also need to advance more than one char if we're in utf8 mode.
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UTF
if (options_.utf8()) {
while (matchend < static_cast<int>(str->length()) &&
((*str)[matchend] & 0xc0) == 0x80)