aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/re2/util
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.ru>2022-02-10 16:45:12 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:12 +0300
commit49116032d905455a7b1c994e4a696afc885c1e71 (patch)
treebe835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/re2/util
parent4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)
downloadydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/re2/util')
-rw-r--r--contrib/libs/re2/util/flags.h52
-rw-r--r--contrib/libs/re2/util/logging.h4
-rw-r--r--contrib/libs/re2/util/mutex.h38
-rw-r--r--contrib/libs/re2/util/pcre.cc2050
-rw-r--r--contrib/libs/re2/util/pcre.h1362
-rw-r--r--contrib/libs/re2/util/strutil.cc24
-rw-r--r--contrib/libs/re2/util/strutil.h8
-rw-r--r--contrib/libs/re2/util/test.cc68
-rw-r--r--contrib/libs/re2/util/test.h100
-rw-r--r--contrib/libs/re2/util/util.h50
10 files changed, 1878 insertions, 1878 deletions
diff --git a/contrib/libs/re2/util/flags.h b/contrib/libs/re2/util/flags.h
index a3d5fc1234..3386b729d4 100644
--- a/contrib/libs/re2/util/flags.h
+++ b/contrib/libs/re2/util/flags.h
@@ -1,26 +1,26 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_FLAGS_H_
-#define UTIL_FLAGS_H_
-
-// Simplified version of Google's command line flags.
-// Does not support parsing the command line.
-// If you want to do that, see
-// https://gflags.github.io/gflags/
-
-#define DEFINE_FLAG(type, name, deflt, desc) \
- namespace re2 { type FLAGS_##name = deflt; }
-
-#define DECLARE_FLAG(type, name) \
- namespace re2 { extern type FLAGS_##name; }
-
-namespace re2 {
-template <typename T>
-T GetFlag(const T& flag) {
- return flag;
-}
-} // namespace re2
-
-#endif // UTIL_FLAGS_H_
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_FLAGS_H_
+#define UTIL_FLAGS_H_
+
+// Simplified version of Google's command line flags.
+// Does not support parsing the command line.
+// If you want to do that, see
+// https://gflags.github.io/gflags/
+
+#define DEFINE_FLAG(type, name, deflt, desc) \
+ namespace re2 { type FLAGS_##name = deflt; }
+
+#define DECLARE_FLAG(type, name) \
+ namespace re2 { extern type FLAGS_##name; }
+
+namespace re2 {
+template <typename T>
+T GetFlag(const T& flag) {
+ return flag;
+}
+} // namespace re2
+
+#endif // UTIL_FLAGS_H_
diff --git a/contrib/libs/re2/util/logging.h b/contrib/libs/re2/util/logging.h
index be5b4d4dbb..5b2217f29c 100644
--- a/contrib/libs/re2/util/logging.h
+++ b/contrib/libs/re2/util/logging.h
@@ -62,7 +62,7 @@ class LogMessage {
}
void Flush() {
stream() << "\n";
- std::string s = str_.str();
+ std::string s = str_.str();
size_t n = s.size();
if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
flushed_ = true;
@@ -93,7 +93,7 @@ class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
: LogMessage(file, line) {}
- ATTRIBUTE_NORETURN ~LogMessageFatal() {
+ ATTRIBUTE_NORETURN ~LogMessageFatal() {
Flush();
abort();
}
diff --git a/contrib/libs/re2/util/mutex.h b/contrib/libs/re2/util/mutex.h
index 0ad97ff1eb..158046bb5c 100644
--- a/contrib/libs/re2/util/mutex.h
+++ b/contrib/libs/re2/util/mutex.h
@@ -10,13 +10,13 @@
* You should assume the locks are *not* re-entrant.
*/
-#ifdef _WIN32
-// Requires Windows Vista or Windows Server 2008 at minimum.
-#include <windows.h>
-#if defined(WINVER) && WINVER >= 0x0600
-#define MUTEX_IS_WIN32_SRWLOCK
-#endif
-#else
+#ifdef _WIN32
+// Requires Windows Vista or Windows Server 2008 at minimum.
+#include <windows.h>
+#if defined(WINVER) && WINVER >= 0x0600
+#define MUTEX_IS_WIN32_SRWLOCK
+#endif
+#else
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif
@@ -26,9 +26,9 @@
#endif
#endif
-#if defined(MUTEX_IS_WIN32_SRWLOCK)
-typedef SRWLOCK MutexType;
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+typedef SRWLOCK MutexType;
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
#include <pthread.h>
#include <stdlib.h>
typedef pthread_rwlock_t MutexType;
@@ -64,17 +64,17 @@ class Mutex {
Mutex& operator=(const Mutex&) = delete;
};
-#if defined(MUTEX_IS_WIN32_SRWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
Mutex::Mutex() : mutex_(SRWLOCK_INIT) { }
-Mutex::~Mutex() { }
-void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
-void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
-void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
-void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
-
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-
+Mutex::~Mutex() { }
+void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
+void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
+void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
+void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+
#define SAFE_PTHREAD(fncall) \
do { \
if ((fncall) != 0) abort(); \
diff --git a/contrib/libs/re2/util/pcre.cc b/contrib/libs/re2/util/pcre.cc
index 93ffe9421b..b68985144f 100644
--- a/contrib/libs/re2/util/pcre.cc
+++ b/contrib/libs/re2/util/pcre.cc
@@ -1,1025 +1,1025 @@
-// Copyright 2003-2009 Google Inc. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
-// The main changes are the addition of the HitLimit method and
-// compilation as PCRE in namespace re2.
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits>
-#include <string>
-#include <utility>
-
-#include "util/util.h"
-#include "util/flags.h"
-#include "util/logging.h"
-#include "util/pcre.h"
-#include "util/strutil.h"
-
-// Silence warnings about the wacky formatting in the operator() functions.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
-#pragma GCC diagnostic ignored "-Wmisleading-indentation"
-#endif
-
-#define PCREPORT(level) LOG(level)
-
-// Default PCRE limits.
-// Defaults chosen to allow a plausible amount of CPU and
-// not exceed main thread stacks. Note that other threads
-// often have smaller stacks, and therefore tightening
-// regexp_stack_limit may frequently be necessary.
-DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
- "default PCRE stack limit (bytes)");
-DEFINE_FLAG(int, regexp_match_limit, 1000000,
- "default PCRE match limit (function calls)");
-
-#ifndef USEPCRE
-
-// Fake just enough of the PCRE API to allow this file to build. :)
-
-struct pcre_extra {
- int flags;
- int match_limit;
- int match_limit_recursion;
-};
-
-#define PCRE_EXTRA_MATCH_LIMIT 0
-#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
-#define PCRE_ANCHORED 0
-#define PCRE_NOTEMPTY 0
-#define PCRE_ERROR_NOMATCH 1
-#define PCRE_ERROR_MATCHLIMIT 2
-#define PCRE_ERROR_RECURSIONLIMIT 3
-#define PCRE_INFO_CAPTURECOUNT 0
-
-void pcre_free(void*) {
-}
-
-pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
- return NULL;
-}
-
-int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
- return 0;
-}
-
-int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
- return 0;
-}
-
-#endif
-
-namespace re2 {
-
-// Maximum number of args we can set
-static const int kMaxArgs = 16;
-static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
-
-// Approximate size of a recursive invocation of PCRE's
-// internal "match()" frame. This varies depending on the
-// compiler and architecture, of course, so the constant is
-// just a conservative estimate. To find the exact number,
-// run regexp_unittest with --regexp_stack_limit=0 under
-// a debugger and look at the frames when it crashes.
-// The exact frame size was 656 in production on 2008/02/03.
-static const int kPCREFrameSize = 700;
-
-// Special name for missing C++ arguments.
-PCRE::Arg PCRE::no_more_args((void*)NULL);
-
-const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
-const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
-const PCRE::ConsumeFunctor PCRE::Consume = { };
-const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
-
-// If a regular expression has no error, its error_ field points here
-static const std::string empty_string;
-
-void PCRE::Init(const char* pattern, Option options, int match_limit,
- int stack_limit, bool report_errors) {
- pattern_ = pattern;
- options_ = options;
- match_limit_ = match_limit;
- stack_limit_ = stack_limit;
- hit_limit_ = false;
- error_ = &empty_string;
- report_errors_ = report_errors;
- re_full_ = NULL;
- re_partial_ = NULL;
-
- if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
- error_ = new std::string("illegal regexp option");
- PCREPORT(ERROR)
- << "Error compiling '" << pattern << "': illegal regexp option";
- } else {
- re_partial_ = Compile(UNANCHORED);
- if (re_partial_ != NULL) {
- re_full_ = Compile(ANCHOR_BOTH);
- }
- }
-}
-
-PCRE::PCRE(const char* pattern) {
- Init(pattern, None, 0, 0, true);
-}
-PCRE::PCRE(const char* pattern, Option option) {
- Init(pattern, option, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern) {
- Init(pattern.c_str(), None, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern, Option option) {
- Init(pattern.c_str(), option, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
- Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
- re_option.stack_limit(), re_option.report_errors());
-}
-
-PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
- Init(pattern, re_option.option(), re_option.match_limit(),
- re_option.stack_limit(), re_option.report_errors());
-}
-
-PCRE::~PCRE() {
- if (re_full_ != NULL) pcre_free(re_full_);
- if (re_partial_ != NULL) pcre_free(re_partial_);
- if (error_ != &empty_string) delete error_;
-}
-
-pcre* PCRE::Compile(Anchor anchor) {
- // Special treatment for anchoring. This is needed because at
- // runtime pcre only provides an option for anchoring at the
- // beginning of a string.
- //
- // There are three types of anchoring we want:
- // UNANCHORED Compile the original pattern, and use
- // a pcre unanchored match.
- // ANCHOR_START Compile the original pattern, and use
- // a pcre anchored match.
- // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
- // and use a pcre anchored match.
-
- const char* error = "";
- int eoffset;
- pcre* re;
- if (anchor != ANCHOR_BOTH) {
- re = pcre_compile(pattern_.c_str(),
- (options_ & EnabledCompileOptions),
- &error, &eoffset, NULL);
- } else {
- // Tack a '\z' at the end of PCRE. Parenthesize it first so that
- // the '\z' applies to all top-level alternatives in the regexp.
- std::string wrapped = "(?:"; // A non-counting grouping operator
- wrapped += pattern_;
- wrapped += ")\\z";
- re = pcre_compile(wrapped.c_str(),
- (options_ & EnabledCompileOptions),
- &error, &eoffset, NULL);
- }
- if (re == NULL) {
- if (error_ == &empty_string) error_ = new std::string(error);
- PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
- }
- return re;
-}
-
-/***** Convenience interfaces *****/
-
-bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
-}
-
-bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
-}
-
-bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
- args, n, vec, kVecSize)) {
- input->remove_prefix(consumed);
- return true;
- } else {
- return false;
- }
-}
-
-bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
- args, n, vec, kVecSize)) {
- input->remove_prefix(consumed);
- return true;
- } else {
- return false;
- }
-}
-
-bool PCRE::Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
- int vec[kVecSize] = {};
- int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
- if (matches == 0)
- return false;
-
- std::string s;
- if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
- return false;
-
- assert(vec[0] >= 0);
- assert(vec[1] >= 0);
- str->replace(vec[0], vec[1] - vec[0], s);
- return true;
-}
-
-int PCRE::GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
- int count = 0;
- int vec[kVecSize] = {};
- std::string out;
- size_t start = 0;
- bool last_match_was_empty_string = false;
-
- while (start <= str->size()) {
- // If the previous match was for the empty string, we shouldn't
- // just match again: we'll match in the same way and get an
- // infinite loop. Instead, we do the match in a special way:
- // anchored -- to force another try at the same position --
- // and with a flag saying that this time, ignore empty matches.
- // If this special match returns, that means there's a non-empty
- // match at this position as well, and we can continue. If not,
- // we do what perl does, and just advance by one.
- // Notice that perl prints '@@@' for this;
- // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
- int matches;
- if (last_match_was_empty_string) {
- matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
- vec, kVecSize);
- if (matches <= 0) {
- if (start < str->size())
- out.push_back((*str)[start]);
- start++;
- last_match_was_empty_string = false;
- continue;
- }
- } else {
- matches = pattern.TryMatch(*str, start, UNANCHORED, true,
- vec, kVecSize);
- if (matches <= 0)
- break;
- }
- size_t matchstart = vec[0], matchend = vec[1];
- assert(matchstart >= start);
- assert(matchend >= matchstart);
-
- out.append(*str, start, matchstart - start);
- pattern.Rewrite(&out, rewrite, *str, vec, matches);
- start = matchend;
- count++;
- last_match_was_empty_string = (matchstart == matchend);
- }
-
- if (count == 0)
- return 0;
-
- if (start < str->size())
- out.append(*str, start, str->size() - start);
- using std::swap;
- swap(out, *str);
- return count;
-}
-
-bool PCRE::Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out) {
- int vec[kVecSize] = {};
- int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
- if (matches == 0)
- return false;
- out->clear();
- return pattern.Rewrite(out, rewrite, text, vec, matches);
-}
-
-std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
- std::string result;
- result.reserve(unquoted.size() << 1);
-
- // Escape any ascii character not in [A-Za-z_0-9].
- //
- // Note that it's legal to escape a character even if it has no
- // special meaning in a regular expression -- so this function does
- // that. (This also makes it identical to the perl function of the
- // same name except for the null-character special case;
- // see `perldoc -f quotemeta`.)
- for (size_t ii = 0; ii < unquoted.size(); ++ii) {
- // Note that using 'isalnum' here raises the benchmark time from
- // 32ns to 58ns:
- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
- unquoted[ii] != '_' &&
- // If this is the part of a UTF8 or Latin1 character, we need
- // to copy this byte without escaping. Experimentally this is
- // what works correctly with the regexp library.
- !(unquoted[ii] & 128)) {
- if (unquoted[ii] == '\0') { // Special handling for null chars.
- // Can't use "\\0" since the next character might be a digit.
- result += "\\x00";
- continue;
- }
- result += '\\';
- }
- result += unquoted[ii];
- }
-
- return result;
-}
-
-/***** Actual matching and rewriting code *****/
-
-bool PCRE::HitLimit() {
- return hit_limit_ != 0;
-}
-
-void PCRE::ClearHitLimit() {
- hit_limit_ = 0;
-}
-
-int PCRE::TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const {
- pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
- if (re == NULL) {
- PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
- return 0;
- }
-
- int match_limit = match_limit_;
- if (match_limit <= 0) {
- match_limit = GetFlag(FLAGS_regexp_match_limit);
- }
-
- int stack_limit = stack_limit_;
- if (stack_limit <= 0) {
- stack_limit = GetFlag(FLAGS_regexp_stack_limit);
- }
-
- pcre_extra extra = { 0 };
- if (match_limit > 0) {
- extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
- extra.match_limit = match_limit;
- }
- if (stack_limit > 0) {
- extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra.match_limit_recursion = stack_limit / kPCREFrameSize;
- }
-
- int options = 0;
- if (anchor != UNANCHORED)
- options |= PCRE_ANCHORED;
- if (!empty_ok)
- options |= PCRE_NOTEMPTY;
-
- int rc = pcre_exec(re, // The regular expression object
- &extra,
- (text.data() == NULL) ? "" : text.data(),
- static_cast<int>(text.size()),
- static_cast<int>(startpos),
- options,
- vec,
- vecsize);
-
- // Handle errors
- if (rc == 0) {
- // pcre_exec() returns 0 as a special case when the number of
- // capturing subpatterns exceeds the size of the vector.
- // When this happens, there is a match and the output vector
- // is filled, but we miss out on the positions of the extra subpatterns.
- rc = vecsize / 2;
- } else if (rc < 0) {
- switch (rc) {
- case PCRE_ERROR_NOMATCH:
- return 0;
- case PCRE_ERROR_MATCHLIMIT:
- // Writing to hit_limit is not safe if multiple threads
- // are using the PCRE, but the flag is only intended
- // for use by unit tests anyway, so we let it go.
- hit_limit_ = true;
- PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
- << " when matching '" << pattern_ << "'"
- << " against text that is " << text.size() << " bytes.";
- return 0;
- case PCRE_ERROR_RECURSIONLIMIT:
- // See comment about hit_limit above.
- hit_limit_ = true;
- PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
- << " when matching '" << pattern_ << "'"
- << " against text that is " << text.size() << " bytes.";
- return 0;
- default:
- // There are other return codes from pcre.h :
- // PCRE_ERROR_NULL (-2)
- // PCRE_ERROR_BADOPTION (-3)
- // PCRE_ERROR_BADMAGIC (-4)
- // PCRE_ERROR_UNKNOWN_NODE (-5)
- // PCRE_ERROR_NOMEMORY (-6)
- // PCRE_ERROR_NOSUBSTRING (-7)
- // ...
- PCREPORT(ERROR) << "Unexpected return code: " << rc
- << " when matching '" << pattern_ << "'"
- << ", re=" << re
- << ", text=" << text
- << ", vec=" << vec
- << ", vecsize=" << vecsize;
- return 0;
- }
- }
-
- return rc;
-}
-
-bool PCRE::DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const* args,
- int n,
- int* vec,
- int vecsize) const {
- assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
- if (NumberOfCapturingGroups() < n) {
- // RE has fewer capturing groups than number of Arg pointers passed in.
- return false;
- }
-
- int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
- assert(matches >= 0); // TryMatch never returns negatives
- if (matches == 0)
- return false;
-
- *consumed = vec[1];
-
- if (n == 0 || args == NULL) {
- // We are not interested in results
- return true;
- }
-
- // If we got here, we must have matched the whole pattern.
- // We do not need (can not do) any more checks on the value of 'matches' here
- // -- see the comment for TryMatch.
- for (int i = 0; i < n; i++) {
- const int start = vec[2*(i+1)];
- const int limit = vec[2*(i+1)+1];
-
- // Avoid invoking undefined behavior when text.data() happens
- // to be null and start happens to be -1, the latter being the
- // case for an unmatched subexpression. Even if text.data() is
- // not null, pointing one byte before was a longstanding bug.
- const char* addr = NULL;
- if (start != -1) {
- addr = text.data() + start;
- }
-
- if (!args[i]->Parse(addr, limit-start)) {
- // TODO: Should we indicate what the error was?
- return false;
- }
- }
-
- return true;
-}
-
-bool PCRE::DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n) const {
- assert(n >= 0);
- const int vecsize = (1 + n) * 3; // results + PCRE workspace
- // (as for kVecSize)
- int* vec = new int[vecsize];
- bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
- delete[] vec;
- return b;
-}
-
-bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
- const StringPiece &text, int *vec, int veclen) const {
- int number_of_capturing_groups = NumberOfCapturingGroups();
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- int c = *s;
- if (c == '\\') {
- c = *++s;
- if (isdigit(c)) {
- int n = (c - '0');
- if (n >= veclen) {
- if (n <= number_of_capturing_groups) {
- // unmatched optional capturing group. treat
- // its value as empty string; i.e., nothing to append.
- } else {
- PCREPORT(ERROR) << "requested group " << n
- << " in regexp " << rewrite.data();
- return false;
- }
- }
- int start = vec[2 * n];
- if (start >= 0)
- out->append(text.data() + start, vec[2 * n + 1] - start);
- } else if (c == '\\') {
- out->push_back('\\');
- } else {
- PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
- return false;
- }
- } else {
- out->push_back(c);
- }
- }
- return true;
-}
-
-bool PCRE::CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const {
- int max_token = -1;
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- int c = *s;
- if (c != '\\') {
- continue;
- }
- if (++s == end) {
- *error = "Rewrite schema error: '\\' not allowed at end.";
- return false;
- }
- c = *s;
- if (c == '\\') {
- continue;
- }
- if (!isdigit(c)) {
- *error = "Rewrite schema error: "
- "'\\' must be followed by a digit or '\\'.";
- return false;
- }
- int n = (c - '0');
- if (max_token < n) {
- max_token = n;
- }
- }
-
- if (max_token > NumberOfCapturingGroups()) {
- *error = StringPrintf(
- "Rewrite schema requests %d matches, but the regexp only has %d "
- "parenthesized subexpressions.",
- max_token, NumberOfCapturingGroups());
- return false;
- }
- return true;
-}
-
-
-// Return the number of capturing subpatterns, or -1 if the
-// regexp wasn't valid on construction.
-int PCRE::NumberOfCapturingGroups() const {
- if (re_partial_ == NULL) return -1;
-
- int result;
- int rc = pcre_fullinfo(re_partial_, // The regular expression object
- NULL, // We did not study the pattern
- PCRE_INFO_CAPTURECOUNT,
- &result);
- if (rc != 0) {
- PCREPORT(ERROR) << "Unexpected return code: " << rc;
- return -1;
- }
- return result;
-}
-
-
-/***** Parsers for various types *****/
-
-bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
- // We fail if somebody asked us to store into a non-NULL void* pointer
- return (dest == NULL);
-}
-
-bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- reinterpret_cast<std::string*>(dest)->assign(str, n);
- return true;
-}
-
-bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
- return true;
-}
-
-bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<char*>(dest)) = str[0];
- return true;
-}
-
-bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<signed char*>(dest)) = str[0];
- return true;
-}
-
-bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned char*>(dest)) = str[0];
- return true;
-}
-
-// Largest number spec that we are willing to parse
-static const int kMaxNumberLength = 32;
-
-// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
-// PCREQUIPCRES "n > 0"
-// Copies "str" into "buf" and null-terminates if necessary.
-// Returns one of:
-// a. "str" if no termination is needed
-// b. "buf" if the string was copied and null-terminated
-// c. "" if the input was invalid and has no hope of being parsed
-static const char* TerminateNumber(char* buf, const char* str, size_t n) {
- if ((n > 0) && isspace(*str)) {
- // We are less forgiving than the strtoxxx() routines and do not
- // allow leading spaces.
- return "";
- }
-
- // See if the character right after the input text may potentially
- // look like a digit.
- if (isdigit(str[n]) ||
- ((str[n] >= 'a') && (str[n] <= 'f')) ||
- ((str[n] >= 'A') && (str[n] <= 'F'))) {
- if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
- memcpy(buf, str, n);
- buf[n] = '\0';
- return buf;
- } else {
- // We can parse right out of the supplied string, so return it.
- return str;
- }
-}
-
-bool PCRE::Arg::parse_long_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- char* end;
- errno = 0;
- long r = strtol(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_ulong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- if (str[0] == '-') {
- // strtoul() will silently accept negative numbers and parse
- // them. This module is more strict and treats them as errors.
- return false;
- }
-
- char* end;
- errno = 0;
- unsigned long r = strtoul(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_short_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- long r;
- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
- if ((short)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<short*>(dest)) = (short)r;
- return true;
-}
-
-bool PCRE::Arg::parse_ushort_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- unsigned long r;
- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned short)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
- return true;
-}
-
-bool PCRE::Arg::parse_int_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- long r;
- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
- if ((int)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<int*>(dest)) = (int)r;
- return true;
-}
-
-bool PCRE::Arg::parse_uint_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- unsigned long r;
- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned int)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
- return true;
-}
-
-bool PCRE::Arg::parse_longlong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- char* end;
- errno = 0;
- long long r = strtoll(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<long long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_ulonglong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- if (str[0] == '-') {
- // strtoull() will silently accept negative numbers and parse
- // them. This module is more strict and treats them as errors.
- return false;
- }
- char* end;
- errno = 0;
- unsigned long long r = strtoull(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned long long*>(dest)) = r;
- return true;
-}
-
-static bool parse_double_float(const char* str, size_t n, bool isfloat,
- void* dest) {
- if (n == 0) return false;
- static const int kMaxLength = 200;
- char buf[kMaxLength];
- if (n >= kMaxLength) return false;
- memcpy(buf, str, n);
- buf[n] = '\0';
- char* end;
- errno = 0;
- double r;
- if (isfloat) {
- r = strtof(buf, &end);
- } else {
- r = strtod(buf, &end);
- }
- if (end != buf + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- if (isfloat) {
- *(reinterpret_cast<float*>(dest)) = (float)r;
- } else {
- *(reinterpret_cast<double*>(dest)) = r;
- }
- return true;
-}
-
-bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
- return parse_double_float(str, n, false, dest);
-}
-
-bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
- return parse_double_float(str, n, true, dest);
-}
-
-#define DEFINE_INTEGER_PARSER(name) \
- bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \
- return parse_##name##_radix(str, n, dest, 10); \
- } \
- bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
- return parse_##name##_radix(str, n, dest, 16); \
- } \
- bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \
- void* dest) { \
- return parse_##name##_radix(str, n, dest, 8); \
- } \
- bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \
- void* dest) { \
- return parse_##name##_radix(str, n, dest, 0); \
- }
-
-DEFINE_INTEGER_PARSER(short);
-DEFINE_INTEGER_PARSER(ushort);
-DEFINE_INTEGER_PARSER(int);
-DEFINE_INTEGER_PARSER(uint);
-DEFINE_INTEGER_PARSER(long);
-DEFINE_INTEGER_PARSER(ulong);
-DEFINE_INTEGER_PARSER(longlong);
-DEFINE_INTEGER_PARSER(ulonglong);
-
-#undef DEFINE_INTEGER_PARSER
-
-} // namespace re2
+// Copyright 2003-2009 Google Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits>
+#include <string>
+#include <utility>
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/pcre.h"
+#include "util/strutil.h"
+
+// Silence warnings about the wacky formatting in the operator() functions.
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wmisleading-indentation"
+#endif
+
+#define PCREPORT(level) LOG(level)
+
+// Default PCRE limits.
+// Defaults chosen to allow a plausible amount of CPU and
+// not exceed main thread stacks. Note that other threads
+// often have smaller stacks, and therefore tightening
+// regexp_stack_limit may frequently be necessary.
+DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
+ "default PCRE stack limit (bytes)");
+DEFINE_FLAG(int, regexp_match_limit, 1000000,
+ "default PCRE match limit (function calls)");
+
+#ifndef USEPCRE
+
+// Fake just enough of the PCRE API to allow this file to build. :)
+
+struct pcre_extra {
+ int flags;
+ int match_limit;
+ int match_limit_recursion;
+};
+
+#define PCRE_EXTRA_MATCH_LIMIT 0
+#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
+#define PCRE_ANCHORED 0
+#define PCRE_NOTEMPTY 0
+#define PCRE_ERROR_NOMATCH 1
+#define PCRE_ERROR_MATCHLIMIT 2
+#define PCRE_ERROR_RECURSIONLIMIT 3
+#define PCRE_INFO_CAPTURECOUNT 0
+
+void pcre_free(void*) {
+}
+
+pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
+ return NULL;
+}
+
+int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
+ return 0;
+}
+
+int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
+ return 0;
+}
+
+#endif
+
+namespace re2 {
+
+// Maximum number of args we can set
+static const int kMaxArgs = 16;
+static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
+
+// Approximate size of a recursive invocation of PCRE's
+// internal "match()" frame. This varies depending on the
+// compiler and architecture, of course, so the constant is
+// just a conservative estimate. To find the exact number,
+// run regexp_unittest with --regexp_stack_limit=0 under
+// a debugger and look at the frames when it crashes.
+// The exact frame size was 656 in production on 2008/02/03.
+static const int kPCREFrameSize = 700;
+
+// Special name for missing C++ arguments.
+PCRE::Arg PCRE::no_more_args((void*)NULL);
+
+const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
+const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
+const PCRE::ConsumeFunctor PCRE::Consume = { };
+const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
+
+// If a regular expression has no error, its error_ field points here
+static const std::string empty_string;
+
+void PCRE::Init(const char* pattern, Option options, int match_limit,
+ int stack_limit, bool report_errors) {
+ pattern_ = pattern;
+ options_ = options;
+ match_limit_ = match_limit;
+ stack_limit_ = stack_limit;
+ hit_limit_ = false;
+ error_ = &empty_string;
+ report_errors_ = report_errors;
+ re_full_ = NULL;
+ re_partial_ = NULL;
+
+ if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
+ error_ = new std::string("illegal regexp option");
+ PCREPORT(ERROR)
+ << "Error compiling '" << pattern << "': illegal regexp option";
+ } else {
+ re_partial_ = Compile(UNANCHORED);
+ if (re_partial_ != NULL) {
+ re_full_ = Compile(ANCHOR_BOTH);
+ }
+ }
+}
+
+PCRE::PCRE(const char* pattern) {
+ Init(pattern, None, 0, 0, true);
+}
+PCRE::PCRE(const char* pattern, Option option) {
+ Init(pattern, option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern) {
+ Init(pattern.c_str(), None, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, Option option) {
+ Init(pattern.c_str(), option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
+ Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
+ re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
+ Init(pattern, re_option.option(), re_option.match_limit(),
+ re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::~PCRE() {
+ if (re_full_ != NULL) pcre_free(re_full_);
+ if (re_partial_ != NULL) pcre_free(re_partial_);
+ if (error_ != &empty_string) delete error_;
+}
+
+pcre* PCRE::Compile(Anchor anchor) {
+ // Special treatment for anchoring. This is needed because at
+ // runtime pcre only provides an option for anchoring at the
+ // beginning of a string.
+ //
+ // There are three types of anchoring we want:
+ // UNANCHORED Compile the original pattern, and use
+ // a pcre unanchored match.
+ // ANCHOR_START Compile the original pattern, and use
+ // a pcre anchored match.
+ // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
+ // and use a pcre anchored match.
+
+ const char* error = "";
+ int eoffset;
+ pcre* re;
+ if (anchor != ANCHOR_BOTH) {
+ re = pcre_compile(pattern_.c_str(),
+ (options_ & EnabledCompileOptions),
+ &error, &eoffset, NULL);
+ } else {
+ // Tack a '\z' at the end of PCRE. Parenthesize it first so that
+ // the '\z' applies to all top-level alternatives in the regexp.
+ std::string wrapped = "(?:"; // A non-counting grouping operator
+ wrapped += pattern_;
+ wrapped += ")\\z";
+ re = pcre_compile(wrapped.c_str(),
+ (options_ & EnabledCompileOptions),
+ &error, &eoffset, NULL);
+ }
+ if (re == NULL) {
+ if (error_ == &empty_string) error_ = new std::string(error);
+ PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
+ }
+ return re;
+}
+
+/***** Convenience interfaces *****/
+
+bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
+ const PCRE& re,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
+ const PCRE& re,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
+ const PCRE& pattern,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
+ args, n, vec, kVecSize)) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
+ const PCRE& pattern,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
+ args, n, vec, kVecSize)) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool PCRE::Replace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite) {
+ int vec[kVecSize] = {};
+ int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
+ if (matches == 0)
+ return false;
+
+ std::string s;
+ if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
+ return false;
+
+ assert(vec[0] >= 0);
+ assert(vec[1] >= 0);
+ str->replace(vec[0], vec[1] - vec[0], s);
+ return true;
+}
+
+int PCRE::GlobalReplace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite) {
+ int count = 0;
+ int vec[kVecSize] = {};
+ std::string out;
+ size_t start = 0;
+ bool last_match_was_empty_string = false;
+
+ while (start <= str->size()) {
+ // If the previous match was for the empty string, we shouldn't
+ // just match again: we'll match in the same way and get an
+ // infinite loop. Instead, we do the match in a special way:
+ // anchored -- to force another try at the same position --
+ // and with a flag saying that this time, ignore empty matches.
+ // If this special match returns, that means there's a non-empty
+ // match at this position as well, and we can continue. If not,
+ // we do what perl does, and just advance by one.
+ // Notice that perl prints '@@@' for this;
+ // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
+ int matches;
+ if (last_match_was_empty_string) {
+ matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
+ vec, kVecSize);
+ if (matches <= 0) {
+ if (start < str->size())
+ out.push_back((*str)[start]);
+ start++;
+ last_match_was_empty_string = false;
+ continue;
+ }
+ } else {
+ matches = pattern.TryMatch(*str, start, UNANCHORED, true,
+ vec, kVecSize);
+ if (matches <= 0)
+ break;
+ }
+ size_t matchstart = vec[0], matchend = vec[1];
+ assert(matchstart >= start);
+ assert(matchend >= matchstart);
+
+ out.append(*str, start, matchstart - start);
+ pattern.Rewrite(&out, rewrite, *str, vec, matches);
+ start = matchend;
+ count++;
+ last_match_was_empty_string = (matchstart == matchend);
+ }
+
+ if (count == 0)
+ return 0;
+
+ if (start < str->size())
+ out.append(*str, start, str->size() - start);
+ using std::swap;
+ swap(out, *str);
+ return count;
+}
+
+bool PCRE::Extract(const StringPiece &text,
+ const PCRE& pattern,
+ const StringPiece &rewrite,
+ std::string *out) {
+ int vec[kVecSize] = {};
+ int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
+ if (matches == 0)
+ return false;
+ out->clear();
+ return pattern.Rewrite(out, rewrite, text, vec, matches);
+}
+
+std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
+ std::string result;
+ result.reserve(unquoted.size() << 1);
+
+ // Escape any ascii character not in [A-Za-z_0-9].
+ //
+ // Note that it's legal to escape a character even if it has no
+ // special meaning in a regular expression -- so this function does
+ // that. (This also makes it identical to the perl function of the
+ // same name except for the null-character special case;
+ // see `perldoc -f quotemeta`.)
+ for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+ // Note that using 'isalnum' here raises the benchmark time from
+ // 32ns to 58ns:
+ if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
+ if (unquoted[ii] == '\0') { // Special handling for null chars.
+ // Can't use "\\0" since the next character might be a digit.
+ result += "\\x00";
+ continue;
+ }
+ result += '\\';
+ }
+ result += unquoted[ii];
+ }
+
+ return result;
+}
+
+/***** Actual matching and rewriting code *****/
+
+bool PCRE::HitLimit() {
+ return hit_limit_ != 0;
+}
+
+void PCRE::ClearHitLimit() {
+ hit_limit_ = 0;
+}
+
+int PCRE::TryMatch(const StringPiece& text,
+ size_t startpos,
+ Anchor anchor,
+ bool empty_ok,
+ int *vec,
+ int vecsize) const {
+ pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
+ if (re == NULL) {
+ PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
+ return 0;
+ }
+
+ int match_limit = match_limit_;
+ if (match_limit <= 0) {
+ match_limit = GetFlag(FLAGS_regexp_match_limit);
+ }
+
+ int stack_limit = stack_limit_;
+ if (stack_limit <= 0) {
+ stack_limit = GetFlag(FLAGS_regexp_stack_limit);
+ }
+
+ pcre_extra extra = { 0 };
+ if (match_limit > 0) {
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ extra.match_limit = match_limit;
+ }
+ if (stack_limit > 0) {
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ extra.match_limit_recursion = stack_limit / kPCREFrameSize;
+ }
+
+ int options = 0;
+ if (anchor != UNANCHORED)
+ options |= PCRE_ANCHORED;
+ if (!empty_ok)
+ options |= PCRE_NOTEMPTY;
+
+ int rc = pcre_exec(re, // The regular expression object
+ &extra,
+ (text.data() == NULL) ? "" : text.data(),
+ static_cast<int>(text.size()),
+ static_cast<int>(startpos),
+ options,
+ vec,
+ vecsize);
+
+ // Handle errors
+ if (rc == 0) {
+ // pcre_exec() returns 0 as a special case when the number of
+ // capturing subpatterns exceeds the size of the vector.
+ // When this happens, there is a match and the output vector
+ // is filled, but we miss out on the positions of the extra subpatterns.
+ rc = vecsize / 2;
+ } else if (rc < 0) {
+ switch (rc) {
+ case PCRE_ERROR_NOMATCH:
+ return 0;
+ case PCRE_ERROR_MATCHLIMIT:
+ // Writing to hit_limit is not safe if multiple threads
+ // are using the PCRE, but the flag is only intended
+ // for use by unit tests anyway, so we let it go.
+ hit_limit_ = true;
+ PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
+ << " when matching '" << pattern_ << "'"
+ << " against text that is " << text.size() << " bytes.";
+ return 0;
+ case PCRE_ERROR_RECURSIONLIMIT:
+ // See comment about hit_limit above.
+ hit_limit_ = true;
+ PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
+ << " when matching '" << pattern_ << "'"
+ << " against text that is " << text.size() << " bytes.";
+ return 0;
+ default:
+ // There are other return codes from pcre.h :
+ // PCRE_ERROR_NULL (-2)
+ // PCRE_ERROR_BADOPTION (-3)
+ // PCRE_ERROR_BADMAGIC (-4)
+ // PCRE_ERROR_UNKNOWN_NODE (-5)
+ // PCRE_ERROR_NOMEMORY (-6)
+ // PCRE_ERROR_NOSUBSTRING (-7)
+ // ...
+ PCREPORT(ERROR) << "Unexpected return code: " << rc
+ << " when matching '" << pattern_ << "'"
+ << ", re=" << re
+ << ", text=" << text
+ << ", vec=" << vec
+ << ", vecsize=" << vecsize;
+ return 0;
+ }
+ }
+
+ return rc;
+}
+
+bool PCRE::DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const* args,
+ int n,
+ int* vec,
+ int vecsize) const {
+ assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
+ if (NumberOfCapturingGroups() < n) {
+ // RE has fewer capturing groups than number of Arg pointers passed in.
+ return false;
+ }
+
+ int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
+ assert(matches >= 0); // TryMatch never returns negatives
+ if (matches == 0)
+ return false;
+
+ *consumed = vec[1];
+
+ if (n == 0 || args == NULL) {
+ // We are not interested in results
+ return true;
+ }
+
+ // If we got here, we must have matched the whole pattern.
+ // We do not need (can not do) any more checks on the value of 'matches' here
+ // -- see the comment for TryMatch.
+ for (int i = 0; i < n; i++) {
+ const int start = vec[2*(i+1)];
+ const int limit = vec[2*(i+1)+1];
+
+ // Avoid invoking undefined behavior when text.data() happens
+ // to be null and start happens to be -1, the latter being the
+ // case for an unmatched subexpression. Even if text.data() is
+ // not null, pointing one byte before was a longstanding bug.
+ const char* addr = NULL;
+ if (start != -1) {
+ addr = text.data() + start;
+ }
+
+ if (!args[i]->Parse(addr, limit-start)) {
+ // TODO: Should we indicate what the error was?
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool PCRE::DoMatch(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const args[],
+ int n) const {
+ assert(n >= 0);
+ const int vecsize = (1 + n) * 3; // results + PCRE workspace
+ // (as for kVecSize)
+ int* vec = new int[vecsize];
+ bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
+ delete[] vec;
+ return b;
+}
+
+bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
+ const StringPiece &text, int *vec, int veclen) const {
+ int number_of_capturing_groups = NumberOfCapturingGroups();
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ int c = *s;
+ if (c == '\\') {
+ c = *++s;
+ if (isdigit(c)) {
+ int n = (c - '0');
+ if (n >= veclen) {
+ if (n <= number_of_capturing_groups) {
+ // unmatched optional capturing group. treat
+ // its value as empty string; i.e., nothing to append.
+ } else {
+ PCREPORT(ERROR) << "requested group " << n
+ << " in regexp " << rewrite.data();
+ return false;
+ }
+ }
+ int start = vec[2 * n];
+ if (start >= 0)
+ out->append(text.data() + start, vec[2 * n + 1] - start);
+ } else if (c == '\\') {
+ out->push_back('\\');
+ } else {
+ PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+ return false;
+ }
+ } else {
+ out->push_back(c);
+ }
+ }
+ return true;
+}
+
+bool PCRE::CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const {
+ int max_token = -1;
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ int c = *s;
+ if (c != '\\') {
+ continue;
+ }
+ if (++s == end) {
+ *error = "Rewrite schema error: '\\' not allowed at end.";
+ return false;
+ }
+ c = *s;
+ if (c == '\\') {
+ continue;
+ }
+ if (!isdigit(c)) {
+ *error = "Rewrite schema error: "
+ "'\\' must be followed by a digit or '\\'.";
+ return false;
+ }
+ int n = (c - '0');
+ if (max_token < n) {
+ max_token = n;
+ }
+ }
+
+ if (max_token > NumberOfCapturingGroups()) {
+ *error = StringPrintf(
+ "Rewrite schema requests %d matches, but the regexp only has %d "
+ "parenthesized subexpressions.",
+ max_token, NumberOfCapturingGroups());
+ return false;
+ }
+ return true;
+}
+
+
+// Return the number of capturing subpatterns, or -1 if the
+// regexp wasn't valid on construction.
+int PCRE::NumberOfCapturingGroups() const {
+ if (re_partial_ == NULL) return -1;
+
+ int result;
+ int rc = pcre_fullinfo(re_partial_, // The regular expression object
+ NULL, // We did not study the pattern
+ PCRE_INFO_CAPTURECOUNT,
+ &result);
+ if (rc != 0) {
+ PCREPORT(ERROR) << "Unexpected return code: " << rc;
+ return -1;
+ }
+ return result;
+}
+
+
+/***** Parsers for various types *****/
+
+bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
+ // We fail if somebody asked us to store into a non-NULL void* pointer
+ return (dest == NULL);
+}
+
+bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
+ return true;
+}
+
+bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+ return true;
+}
+
+bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<char*>(dest)) = str[0];
+ return true;
+}
+
+bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<signed char*>(dest)) = str[0];
+ return true;
+}
+
+bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+ return true;
+}
+
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+
+// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
+// PCREQUIPCRES "n > 0"
+// Copies "str" into "buf" and null-terminates if necessary.
+// Returns one of:
+// a. "str" if no termination is needed
+// b. "buf" if the string was copied and null-terminated
+// c. "" if the input was invalid and has no hope of being parsed
+static const char* TerminateNumber(char* buf, const char* str, size_t n) {
+ if ((n > 0) && isspace(*str)) {
+ // We are less forgiving than the strtoxxx() routines and do not
+ // allow leading spaces.
+ return "";
+ }
+
+ // See if the character right after the input text may potentially
+ // look like a digit.
+ if (isdigit(str[n]) ||
+ ((str[n] >= 'a') && (str[n] <= 'f')) ||
+ ((str[n] >= 'A') && (str[n] <= 'F'))) {
+ if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ return buf;
+ } else {
+ // We can parse right out of the supplied string, so return it.
+ return str;
+ }
+}
+
+bool PCRE::Arg::parse_long_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+ long r = strtol(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ulong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') {
+ // strtoul() will silently accept negative numbers and parse
+ // them. This module is more strict and treats them as errors.
+ return false;
+ }
+
+ char* end;
+ errno = 0;
+ unsigned long r = strtoul(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_short_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((short)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<short*>(dest)) = (short)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ushort_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned short)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_int_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((int)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<int*>(dest)) = (int)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_uint_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned int)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_longlong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+ long long r = strtoll(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ulonglong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') {
+ // strtoull() will silently accept negative numbers and parse
+ // them. This module is more strict and treats them as errors.
+ return false;
+ }
+ char* end;
+ errno = 0;
+ unsigned long long r = strtoull(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long long*>(dest)) = r;
+ return true;
+}
+
+static bool parse_double_float(const char* str, size_t n, bool isfloat,
+ void* dest) {
+ if (n == 0) return false;
+ static const int kMaxLength = 200;
+ char buf[kMaxLength];
+ if (n >= kMaxLength) return false;
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ char* end;
+ errno = 0;
+ double r;
+ if (isfloat) {
+ r = strtof(buf, &end);
+ } else {
+ r = strtod(buf, &end);
+ }
+ if (end != buf + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ if (isfloat) {
+ *(reinterpret_cast<float*>(dest)) = (float)r;
+ } else {
+ *(reinterpret_cast<double*>(dest)) = r;
+ }
+ return true;
+}
+
+bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
+ return parse_double_float(str, n, false, dest);
+}
+
+bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
+ return parse_double_float(str, n, true, dest);
+}
+
+#define DEFINE_INTEGER_PARSER(name) \
+ bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 10); \
+ } \
+ bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 16); \
+ } \
+ bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \
+ void* dest) { \
+ return parse_##name##_radix(str, n, dest, 8); \
+ } \
+ bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \
+ void* dest) { \
+ return parse_##name##_radix(str, n, dest, 0); \
+ }
+
+DEFINE_INTEGER_PARSER(short);
+DEFINE_INTEGER_PARSER(ushort);
+DEFINE_INTEGER_PARSER(int);
+DEFINE_INTEGER_PARSER(uint);
+DEFINE_INTEGER_PARSER(long);
+DEFINE_INTEGER_PARSER(ulong);
+DEFINE_INTEGER_PARSER(longlong);
+DEFINE_INTEGER_PARSER(ulonglong);
+
+#undef DEFINE_INTEGER_PARSER
+
+} // namespace re2
diff --git a/contrib/libs/re2/util/pcre.h b/contrib/libs/re2/util/pcre.h
index 500c56d283..896b0bdf89 100644
--- a/contrib/libs/re2/util/pcre.h
+++ b/contrib/libs/re2/util/pcre.h
@@ -1,681 +1,681 @@
-// Copyright 2003-2010 Google Inc. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_PCRE_H_
-#define UTIL_PCRE_H_
-
-// This is a variant of PCRE's pcrecpp.h, originally written at Google.
-// The main changes are the addition of the HitLimit method and
-// compilation as PCRE in namespace re2.
-
-// C++ interface to the pcre regular-expression library. PCRE supports
-// Perl-style regular expressions (with extensions like \d, \w, \s,
-// ...).
-//
-// -----------------------------------------------------------------------
-// REGEXP SYNTAX:
-//
-// This module uses the pcre library and hence supports its syntax
-// for regular expressions:
-//
-// http://www.google.com/search?q=pcre
-//
-// The syntax is pretty similar to Perl's. For those not familiar
-// with Perl's regular expressions, here are some examples of the most
-// commonly used extensions:
-//
-// "hello (\\w+) world" -- \w matches a "word" character
-// "version (\\d+)" -- \d matches a digit
-// "hello\\s+world" -- \s matches any whitespace character
-// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
-// "(?i)hello" -- (?i) turns on case-insensitive matching
-// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
-//
-// -----------------------------------------------------------------------
-// MATCHING INTERFACE:
-//
-// The "FullMatch" operation checks that supplied text matches a
-// supplied pattern exactly.
-//
-// Example: successful match
-// CHECK(PCRE::FullMatch("hello", "h.*o"));
-//
-// Example: unsuccessful match (requires full match):
-// CHECK(!PCRE::FullMatch("hello", "e"));
-//
-// -----------------------------------------------------------------------
-// UTF-8 AND THE MATCHING INTERFACE:
-//
-// By default, pattern and text are plain text, one byte per character.
-// The UTF8 flag, passed to the constructor, causes both pattern
-// and string to be treated as UTF-8 text, still a byte stream but
-// potentially multiple bytes per character. In practice, the text
-// is likelier to be UTF-8 than the pattern, but the match returned
-// may depend on the UTF8 flag, so always use it when matching
-// UTF8 text. E.g., "." will match one byte normally but with UTF8
-// set may match up to three bytes of a multi-byte character.
-//
-// Example:
-// PCRE re(utf8_pattern, PCRE::UTF8);
-// CHECK(PCRE::FullMatch(utf8_string, re));
-//
-// -----------------------------------------------------------------------
-// MATCHING WITH SUBSTRING EXTRACTION:
-//
-// You can supply extra pointer arguments to extract matched substrings.
-//
-// Example: extracts "ruby" into "s" and 1234 into "i"
-// int i;
-// std::string s;
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
-//
-// Example: fails because string cannot be stored in integer
-// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
-//
-// Example: fails because there aren't enough sub-patterns:
-// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
-//
-// Example: does not try to extract any extra sub-patterns
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
-//
-// Example: does not try to extract into NULL
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
-//
-// Example: integer overflow causes failure
-// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
-//
-// -----------------------------------------------------------------------
-// PARTIAL MATCHES
-//
-// You can use the "PartialMatch" operation when you want the pattern
-// to match any substring of the text.
-//
-// Example: simple search for a string:
-// CHECK(PCRE::PartialMatch("hello", "ell"));
-//
-// Example: find first number in a string
-// int number;
-// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
-// CHECK_EQ(number, 100);
-//
-// -----------------------------------------------------------------------
-// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
-//
-// PCRE makes it easy to use any string as a regular expression, without
-// requiring a separate compilation step.
-//
-// If speed is of the essence, you can create a pre-compiled "PCRE"
-// object from the pattern and use it multiple times. If you do so,
-// you can typically parse text faster than with sscanf.
-//
-// Example: precompile pattern for faster matching:
-// PCRE pattern("h.*o");
-// while (ReadLine(&str)) {
-// if (PCRE::FullMatch(str, pattern)) ...;
-// }
-//
-// -----------------------------------------------------------------------
-// SCANNING TEXT INCPCREMENTALLY
-//
-// The "Consume" operation may be useful if you want to repeatedly
-// match regular expressions at the front of a string and skip over
-// them as they match. This requires use of the "StringPiece" type,
-// which represents a sub-range of a real string.
-//
-// Example: read lines of the form "var = value" from a string.
-// std::string contents = ...; // Fill string somehow
-// StringPiece input(contents); // Wrap a StringPiece around it
-//
-// std::string var;
-// int value;
-// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
-// ...;
-// }
-//
-// Each successful call to "Consume" will set "var/value", and also
-// advance "input" so it points past the matched text. Note that if the
-// regular expression matches an empty string, input will advance
-// by 0 bytes. If the regular expression being used might match
-// an empty string, the loop body must check for this case and either
-// advance the string or break out of the loop.
-//
-// The "FindAndConsume" operation is similar to "Consume" but does not
-// anchor your match at the beginning of the string. For example, you
-// could extract all words from a string by repeatedly calling
-// PCRE::FindAndConsume(&input, "(\\w+)", &word)
-//
-// -----------------------------------------------------------------------
-// PARSING HEX/OCTAL/C-RADIX NUMBERS
-//
-// By default, if you pass a pointer to a numeric value, the
-// corresponding text is interpreted as a base-10 number. You can
-// instead wrap the pointer with a call to one of the operators Hex(),
-// Octal(), or CRadix() to interpret the text in another base. The
-// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
-// prefixes, but defaults to base-10.
-//
-// Example:
-// int a, b, c, d;
-// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
-// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
-// will leave 64 in a, b, c, and d.
-
-#include "util/util.h"
-#include "re2/stringpiece.h"
-
-#ifdef USEPCRE
-#include <pcre.h>
-namespace re2 {
-const bool UsingPCRE = true;
-} // namespace re2
-#else
-struct pcre; // opaque
-namespace re2 {
-const bool UsingPCRE = false;
-} // namespace re2
-#endif
-
-namespace re2 {
-
-class PCRE_Options;
-
-// Interface for regular expression matching. Also corresponds to a
-// pre-compiled regular expression. An "PCRE" object is safe for
-// concurrent use by multiple threads.
-class PCRE {
- public:
- // We convert user-passed pointers into special Arg objects
- class Arg;
-
- // Marks end of arg list.
- // ONLY USE IN OPTIONAL ARG DEFAULTS.
- // DO NOT PASS EXPLICITLY.
- static Arg no_more_args;
-
- // Options are same value as those in pcre. We provide them here
- // to avoid users needing to include pcre.h and also to isolate
- // users from pcre should we change the underlying library.
- // Only those needed by Google programs are exposed here to
- // avoid collision with options employed internally by regexp.cc
- // Note that some options have equivalents that can be specified in
- // the regexp itself. For example, prefixing your regexp with
- // "(?s)" has the same effect as the PCRE_DOTALL option.
- enum Option {
- None = 0x0000,
- UTF8 = 0x0800, // == PCRE_UTF8
- EnabledCompileOptions = UTF8,
- EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
- };
-
- // We provide implicit conversions from strings so that users can
- // pass in a string or a "const char*" wherever an "PCRE" is expected.
- PCRE(const char* pattern);
- PCRE(const char* pattern, Option option);
- PCRE(const std::string& pattern);
- PCRE(const std::string& pattern, Option option);
- PCRE(const char *pattern, const PCRE_Options& re_option);
- PCRE(const std::string& pattern, const PCRE_Options& re_option);
-
- ~PCRE();
-
- // The string specification for this PCRE. E.g.
- // PCRE re("ab*c?d+");
- // re.pattern(); // "ab*c?d+"
- const std::string& pattern() const { return pattern_; }
-
- // If PCRE could not be created properly, returns an error string.
- // Else returns the empty string.
- const std::string& error() const { return *error_; }
-
- // Whether the PCRE has hit a match limit during execution.
- // Not thread safe. Intended only for testing.
- // If hitting match limits is a problem,
- // you should be using PCRE2 (re2/re2.h)
- // instead of checking this flag.
- bool HitLimit();
- void ClearHitLimit();
-
- /***** The useful part: the matching interface *****/
-
- // Matches "text" against "pattern". If pointer arguments are
- // supplied, copies matched sub-patterns into them.
- //
- // You can pass in a "const char*" or a "std::string" for "text".
- // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
- //
- // The provided pointer arguments can be pointers to any scalar numeric
- // type, or one of:
- // std::string (matched piece is copied to string)
- // StringPiece (StringPiece is mutated to point to matched piece)
- // T (where "bool T::ParseFrom(const char*, size_t)" exists)
- // (void*)NULL (the corresponding matched sub-pattern is not copied)
- //
- // Returns true iff all of the following conditions are satisfied:
- // a. "text" matches "pattern" exactly
- // b. The number of matched sub-patterns is >= number of supplied pointers
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, "i"th captured sub-pattern is
- // ignored.
- //
- // CAVEAT: An optional sub-pattern that does not exist in the
- // matched string is assigned the empty string. Therefore, the
- // following will return false (because the empty string is not a
- // valid number):
- // int number;
- // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
- struct FullMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const FullMatchFunctor FullMatch;
-
- // Exactly like FullMatch(), except that "pattern" is allowed to match
- // a substring of "text".
- struct PartialMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const PartialMatchFunctor PartialMatch;
-
- // Like FullMatch() and PartialMatch(), except that pattern has to
- // match a prefix of "text", and "input" is advanced past the matched
- // text. Note: "input" is modified iff this routine returns true.
- struct ConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const ConsumeFunctor Consume;
-
- // Like Consume(..), but does not anchor the match at the beginning of the
- // string. That is, "pattern" need not start its match at the beginning of
- // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
- // word in "s" and stores it in "word".
- struct FindAndConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern,
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const FindAndConsumeFunctor FindAndConsume;
-
- // Replace the first match of "pattern" in "str" with "rewrite".
- // Within "rewrite", backslash-escaped digits (\1 to \9) can be
- // used to insert text matching corresponding parenthesized group
- // from the pattern. \0 in "rewrite" refers to the entire matching
- // text. E.g.,
- //
- // std::string s = "yabba dabba doo";
- // CHECK(PCRE::Replace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dabba doo"
- //
- // Returns true if the pattern matches and a replacement occurs,
- // false otherwise.
- static bool Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
-
- // Like Replace(), except replaces all occurrences of the pattern in
- // the string with the rewrite. Replacements are not subject to
- // re-matching. E.g.,
- //
- // std::string s = "yabba dabba doo";
- // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dada doo"
- //
- // Returns the number of replacements made.
- static int GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
-
- // Like Replace, except that if the pattern matches, "rewrite"
- // is copied into "out" with substitutions. The non-matching
- // portions of "text" are ignored.
- //
- // Returns true iff a match occurred and the extraction happened
- // successfully; if no match occurs, the string is left unaffected.
- static bool Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out);
-
- // Check that the given @p rewrite string is suitable for use with
- // this PCRE. It checks that:
- // * The PCRE has enough parenthesized subexpressions to satisfy all
- // of the \N tokens in @p rewrite, and
- // * The @p rewrite string doesn't have any syntax errors
- // ('\' followed by anything besides [0-9] and '\').
- // Making this test will guarantee that "replace" and "extract"
- // operations won't LOG(ERROR) or fail because of a bad rewrite
- // string.
- // @param rewrite The proposed rewrite string.
- // @param error An error message is recorded here, iff we return false.
- // Otherwise, it is unchanged.
- // @return true, iff @p rewrite is suitable for use with the PCRE.
- bool CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const;
-
- // Returns a copy of 'unquoted' with all potentially meaningful
- // regexp characters backslash-escaped. The returned string, used
- // as a regular expression, will exactly match the original string.
- // For example,
- // 1.5-2.0?
- // becomes:
- // 1\.5\-2\.0\?
- static std::string QuoteMeta(const StringPiece& unquoted);
-
- /***** Generic matching interface (not so nice to use) *****/
-
- // Type of match (TODO: Should be restructured as an Option)
- enum Anchor {
- UNANCHORED, // No anchoring
- ANCHOR_START, // Anchor at start only
- ANCHOR_BOTH, // Anchor at start and end
- };
-
- // General matching routine. Stores the length of the match in
- // "*consumed" if successful.
- bool DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const* args, int n) const;
-
- // Return the number of capturing subpatterns, or -1 if the
- // regexp wasn't valid on construction.
- int NumberOfCapturingGroups() const;
-
- private:
- void Init(const char* pattern, Option option, int match_limit,
- int stack_limit, bool report_errors);
-
- // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
- // pairs of integers for the beginning and end positions of matched
- // text. The first pair corresponds to the entire matched text;
- // subsequent pairs correspond, in order, to parentheses-captured
- // matches. Returns the number of pairs (one more than the number of
- // the last subpattern with a match) if matching was successful
- // and zero if the match failed.
- // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
- // against "foo", "bar", and "baz" respectively.
- // When matching PCRE("(foo)|hello") against "hello", it will return 1.
- // But the values for all subpattern are filled in into "vec".
- int TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const;
-
- // Append the "rewrite" string, with backslash subsitutions from "text"
- // and "vec", to string "out".
- bool Rewrite(std::string *out,
- const StringPiece &rewrite,
- const StringPiece &text,
- int *vec,
- int veclen) const;
-
- // internal implementation for DoMatch
- bool DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n,
- int* vec,
- int vecsize) const;
-
- // Compile the regexp for the specified anchoring mode
- pcre* Compile(Anchor anchor);
-
- std::string pattern_;
- Option options_;
- pcre* re_full_; // For full matches
- pcre* re_partial_; // For partial matches
- const std::string* error_; // Error indicator (or empty string)
- bool report_errors_; // Silences error logging if false
- int match_limit_; // Limit on execution resources
- int stack_limit_; // Limit on stack resources (bytes)
- mutable int32_t hit_limit_; // Hit limit during execution (bool)
-
- PCRE(const PCRE&) = delete;
- PCRE& operator=(const PCRE&) = delete;
-};
-
-// PCRE_Options allow you to set the PCRE::Options, plus any pcre
-// "extra" options. The only extras are match_limit, which limits
-// the CPU time of a match, and stack_limit, which limits the
-// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
-// that should not cause too many problems in production code.
-// If PCRE hits a limit during a match, it may return a false negative,
-// but (hopefully) it won't crash.
-//
-// NOTE: If you are handling regular expressions specified by
-// (external or internal) users, rather than hard-coded ones,
-// you should be using PCRE2, which uses an alternate implementation
-// that avoids these issues. See http://go/re2quick.
-class PCRE_Options {
- public:
- // constructor
- PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
- // accessors
- PCRE::Option option() const { return option_; }
- void set_option(PCRE::Option option) {
- option_ = option;
- }
- int match_limit() const { return match_limit_; }
- void set_match_limit(int match_limit) {
- match_limit_ = match_limit;
- }
- int stack_limit() const { return stack_limit_; }
- void set_stack_limit(int stack_limit) {
- stack_limit_ = stack_limit;
- }
-
- // If the regular expression is malformed, an error message will be printed
- // iff report_errors() is true. Default: true.
- bool report_errors() const { return report_errors_; }
- void set_report_errors(bool report_errors) {
- report_errors_ = report_errors;
- }
- private:
- PCRE::Option option_;
- int match_limit_;
- int stack_limit_;
- bool report_errors_;
-};
-
-
-/***** Implementation details *****/
-
-// Hex/Octal/Binary?
-
-// Special class for parsing into objects that define a ParseFrom() method
-template <typename T>
-class _PCRE_MatchObject {
- public:
- static inline bool Parse(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- T* object = reinterpret_cast<T*>(dest);
- return object->ParseFrom(str, n);
- }
-};
-
-class PCRE::Arg {
- public:
- // Empty constructor so we can declare arrays of PCRE::Arg
- Arg();
-
- // Constructor specially designed for NULL arguments
- Arg(void*);
-
- typedef bool (*Parser)(const char* str, size_t n, void* dest);
-
-// Type-specific parsers
-#define MAKE_PARSER(type, name) \
- Arg(type* p) : arg_(p), parser_(name) {} \
- Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
-
- MAKE_PARSER(char, parse_char);
- MAKE_PARSER(signed char, parse_schar);
- MAKE_PARSER(unsigned char, parse_uchar);
- MAKE_PARSER(float, parse_float);
- MAKE_PARSER(double, parse_double);
- MAKE_PARSER(std::string, parse_string);
- MAKE_PARSER(StringPiece, parse_stringpiece);
-
- MAKE_PARSER(short, parse_short);
- MAKE_PARSER(unsigned short, parse_ushort);
- MAKE_PARSER(int, parse_int);
- MAKE_PARSER(unsigned int, parse_uint);
- MAKE_PARSER(long, parse_long);
- MAKE_PARSER(unsigned long, parse_ulong);
- MAKE_PARSER(long long, parse_longlong);
- MAKE_PARSER(unsigned long long, parse_ulonglong);
-
-#undef MAKE_PARSER
-
- // Generic constructor
- template <typename T> Arg(T*, Parser parser);
- // Generic constructor template
- template <typename T> Arg(T* p)
- : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
- }
-
- // Parse the data
- bool Parse(const char* str, size_t n) const;
-
- private:
- void* arg_;
- Parser parser_;
-
- static bool parse_null (const char* str, size_t n, void* dest);
- static bool parse_char (const char* str, size_t n, void* dest);
- static bool parse_schar (const char* str, size_t n, void* dest);
- static bool parse_uchar (const char* str, size_t n, void* dest);
- static bool parse_float (const char* str, size_t n, void* dest);
- static bool parse_double (const char* str, size_t n, void* dest);
- static bool parse_string (const char* str, size_t n, void* dest);
- static bool parse_stringpiece (const char* str, size_t n, void* dest);
-
-#define DECLARE_INTEGER_PARSER(name) \
- private: \
- static bool parse_##name(const char* str, size_t n, void* dest); \
- static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
- int radix); \
- \
- public: \
- static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
- static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
- static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
-
- DECLARE_INTEGER_PARSER(short);
- DECLARE_INTEGER_PARSER(ushort);
- DECLARE_INTEGER_PARSER(int);
- DECLARE_INTEGER_PARSER(uint);
- DECLARE_INTEGER_PARSER(long);
- DECLARE_INTEGER_PARSER(ulong);
- DECLARE_INTEGER_PARSER(longlong);
- DECLARE_INTEGER_PARSER(ulonglong);
-
-#undef DECLARE_INTEGER_PARSER
-
-};
-
-inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
-inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
-
-inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
- return (*parser_)(str, n, arg_);
-}
-
-// This part of the parser, appropriate only for ints, deals with bases
-#define MAKE_INTEGER_PARSER(type, name) \
- inline PCRE::Arg Hex(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
- } \
- inline PCRE::Arg Octal(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
- } \
- inline PCRE::Arg CRadix(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
- }
-
-MAKE_INTEGER_PARSER(short, short);
-MAKE_INTEGER_PARSER(unsigned short, ushort);
-MAKE_INTEGER_PARSER(int, int);
-MAKE_INTEGER_PARSER(unsigned int, uint);
-MAKE_INTEGER_PARSER(long, long);
-MAKE_INTEGER_PARSER(unsigned long, ulong);
-MAKE_INTEGER_PARSER(long long, longlong);
-MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
-
-#undef MAKE_INTEGER_PARSER
-
-} // namespace re2
-
-#endif // UTIL_PCRE_H_
+// Copyright 2003-2010 Google Inc. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_PCRE_H_
+#define UTIL_PCRE_H_
+
+// This is a variant of PCRE's pcrecpp.h, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+// C++ interface to the pcre regular-expression library. PCRE supports
+// Perl-style regular expressions (with extensions like \d, \w, \s,
+// ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the pcre library and hence supports its syntax
+// for regular expressions:
+//
+// http://www.google.com/search?q=pcre
+//
+// The syntax is pretty similar to Perl's. For those not familiar
+// with Perl's regular expressions, here are some examples of the most
+// commonly used extensions:
+//
+// "hello (\\w+) world" -- \w matches a "word" character
+// "version (\\d+)" -- \d matches a digit
+// "hello\\s+world" -- \s matches any whitespace character
+// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
+// "(?i)hello" -- (?i) turns on case-insensitive matching
+// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+// CHECK(PCRE::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+// CHECK(!PCRE::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, pattern and text are plain text, one byte per character.
+// The UTF8 flag, passed to the constructor, causes both pattern
+// and string to be treated as UTF-8 text, still a byte stream but
+// potentially multiple bytes per character. In practice, the text
+// is likelier to be UTF-8 than the pattern, but the match returned
+// may depend on the UTF8 flag, so always use it when matching
+// UTF8 text. E.g., "." will match one byte normally but with UTF8
+// set may match up to three bytes of a multi-byte character.
+//
+// Example:
+// PCRE re(utf8_pattern, PCRE::UTF8);
+// CHECK(PCRE::FullMatch(utf8_string, re));
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUBSTRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched substrings.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+// int i;
+// std::string s;
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
+//
+// Example: fails because there aren't enough sub-patterns:
+// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+// CHECK(PCRE::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+// int number;
+// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
+// CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
+//
+// PCRE makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "PCRE"
+// object from the pattern and use it multiple times. If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+// PCRE pattern("h.*o");
+// while (ReadLine(&str)) {
+// if (PCRE::FullMatch(str, pattern)) ...;
+// }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCPCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match. This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
+// std::string contents = ...; // Fill string somehow
+// StringPiece input(contents); // Wrap a StringPiece around it
+//
+// std::string var;
+// int value;
+// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+// ...;
+// }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text. Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes. If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string. For example, you
+// could extract all words from a string by repeatedly calling
+// PCRE::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number. You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base. The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+// int a, b, c, d;
+// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+#ifdef USEPCRE
+#include <pcre.h>
+namespace re2 {
+const bool UsingPCRE = true;
+} // namespace re2
+#else
+struct pcre; // opaque
+namespace re2 {
+const bool UsingPCRE = false;
+} // namespace re2
+#endif
+
+namespace re2 {
+
+class PCRE_Options;
+
+// Interface for regular expression matching. Also corresponds to a
+// pre-compiled regular expression. An "PCRE" object is safe for
+// concurrent use by multiple threads.
+class PCRE {
+ public:
+ // We convert user-passed pointers into special Arg objects
+ class Arg;
+
+ // Marks end of arg list.
+ // ONLY USE IN OPTIONAL ARG DEFAULTS.
+ // DO NOT PASS EXPLICITLY.
+ static Arg no_more_args;
+
+ // Options are same value as those in pcre. We provide them here
+ // to avoid users needing to include pcre.h and also to isolate
+ // users from pcre should we change the underlying library.
+ // Only those needed by Google programs are exposed here to
+ // avoid collision with options employed internally by regexp.cc
+ // Note that some options have equivalents that can be specified in
+ // the regexp itself. For example, prefixing your regexp with
+ // "(?s)" has the same effect as the PCRE_DOTALL option.
+ enum Option {
+ None = 0x0000,
+ UTF8 = 0x0800, // == PCRE_UTF8
+ EnabledCompileOptions = UTF8,
+ EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
+ };
+
+ // We provide implicit conversions from strings so that users can
+ // pass in a string or a "const char*" wherever an "PCRE" is expected.
+ PCRE(const char* pattern);
+ PCRE(const char* pattern, Option option);
+ PCRE(const std::string& pattern);
+ PCRE(const std::string& pattern, Option option);
+ PCRE(const char *pattern, const PCRE_Options& re_option);
+ PCRE(const std::string& pattern, const PCRE_Options& re_option);
+
+ ~PCRE();
+
+ // The string specification for this PCRE. E.g.
+ // PCRE re("ab*c?d+");
+ // re.pattern(); // "ab*c?d+"
+ const std::string& pattern() const { return pattern_; }
+
+ // If PCRE could not be created properly, returns an error string.
+ // Else returns the empty string.
+ const std::string& error() const { return *error_; }
+
+ // Whether the PCRE has hit a match limit during execution.
+ // Not thread safe. Intended only for testing.
+ // If hitting match limits is a problem,
+ // you should be using PCRE2 (re2/re2.h)
+ // instead of checking this flag.
+ bool HitLimit();
+ void ClearHitLimit();
+
+ /***** The useful part: the matching interface *****/
+
+ // Matches "text" against "pattern". If pointer arguments are
+ // supplied, copies matched sub-patterns into them.
+ //
+ // You can pass in a "const char*" or a "std::string" for "text".
+ // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
+ //
+ // The provided pointer arguments can be pointers to any scalar numeric
+ // type, or one of:
+ // std::string (matched piece is copied to string)
+ // StringPiece (StringPiece is mutated to point to matched piece)
+ // T (where "bool T::ParseFrom(const char*, size_t)" exists)
+ // (void*)NULL (the corresponding matched sub-pattern is not copied)
+ //
+ // Returns true iff all of the following conditions are satisfied:
+ // a. "text" matches "pattern" exactly
+ // b. The number of matched sub-patterns is >= number of supplied pointers
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
+ // number of sub-patterns, "i"th captured sub-pattern is
+ // ignored.
+ //
+ // CAVEAT: An optional sub-pattern that does not exist in the
+ // matched string is assigned the empty string. Therefore, the
+ // following will return false (because the empty string is not a
+ // valid number):
+ // int number;
+ // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+ struct FullMatchFunctor {
+ bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const FullMatchFunctor FullMatch;
+
+ // Exactly like FullMatch(), except that "pattern" is allowed to match
+ // a substring of "text".
+ struct PartialMatchFunctor {
+ bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const PartialMatchFunctor PartialMatch;
+
+ // Like FullMatch() and PartialMatch(), except that pattern has to
+ // match a prefix of "text", and "input" is advanced past the matched
+ // text. Note: "input" is modified iff this routine returns true.
+ struct ConsumeFunctor {
+ bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const ConsumeFunctor Consume;
+
+ // Like Consume(..), but does not anchor the match at the beginning of the
+ // string. That is, "pattern" need not start its match at the beginning of
+ // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
+ // word in "s" and stores it in "word".
+ struct FindAndConsumeFunctor {
+ bool operator ()(StringPiece* input, const PCRE& pattern,
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const FindAndConsumeFunctor FindAndConsume;
+
+ // Replace the first match of "pattern" in "str" with "rewrite".
+ // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+ // used to insert text matching corresponding parenthesized group
+ // from the pattern. \0 in "rewrite" refers to the entire matching
+ // text. E.g.,
+ //
+ // std::string s = "yabba dabba doo";
+ // CHECK(PCRE::Replace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dabba doo"
+ //
+ // Returns true if the pattern matches and a replacement occurs,
+ // false otherwise.
+ static bool Replace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite);
+
+ // Like Replace(), except replaces all occurrences of the pattern in
+ // the string with the rewrite. Replacements are not subject to
+ // re-matching. E.g.,
+ //
+ // std::string s = "yabba dabba doo";
+ // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dada doo"
+ //
+ // Returns the number of replacements made.
+ static int GlobalReplace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite);
+
+ // Like Replace, except that if the pattern matches, "rewrite"
+ // is copied into "out" with substitutions. The non-matching
+ // portions of "text" are ignored.
+ //
+ // Returns true iff a match occurred and the extraction happened
+ // successfully; if no match occurs, the string is left unaffected.
+ static bool Extract(const StringPiece &text,
+ const PCRE& pattern,
+ const StringPiece &rewrite,
+ std::string *out);
+
+ // Check that the given @p rewrite string is suitable for use with
+ // this PCRE. It checks that:
+ // * The PCRE has enough parenthesized subexpressions to satisfy all
+ // of the \N tokens in @p rewrite, and
+ // * The @p rewrite string doesn't have any syntax errors
+ // ('\' followed by anything besides [0-9] and '\').
+ // Making this test will guarantee that "replace" and "extract"
+ // operations won't LOG(ERROR) or fail because of a bad rewrite
+ // string.
+ // @param rewrite The proposed rewrite string.
+ // @param error An error message is recorded here, iff we return false.
+ // Otherwise, it is unchanged.
+ // @return true, iff @p rewrite is suitable for use with the PCRE.
+ bool CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const;
+
+ // Returns a copy of 'unquoted' with all potentially meaningful
+ // regexp characters backslash-escaped. The returned string, used
+ // as a regular expression, will exactly match the original string.
+ // For example,
+ // 1.5-2.0?
+ // becomes:
+ // 1\.5\-2\.0\?
+ static std::string QuoteMeta(const StringPiece& unquoted);
+
+ /***** Generic matching interface (not so nice to use) *****/
+
+ // Type of match (TODO: Should be restructured as an Option)
+ enum Anchor {
+ UNANCHORED, // No anchoring
+ ANCHOR_START, // Anchor at start only
+ ANCHOR_BOTH, // Anchor at start and end
+ };
+
+ // General matching routine. Stores the length of the match in
+ // "*consumed" if successful.
+ bool DoMatch(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const* args, int n) const;
+
+ // Return the number of capturing subpatterns, or -1 if the
+ // regexp wasn't valid on construction.
+ int NumberOfCapturingGroups() const;
+
+ private:
+ void Init(const char* pattern, Option option, int match_limit,
+ int stack_limit, bool report_errors);
+
+ // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
+ // pairs of integers for the beginning and end positions of matched
+ // text. The first pair corresponds to the entire matched text;
+ // subsequent pairs correspond, in order, to parentheses-captured
+ // matches. Returns the number of pairs (one more than the number of
+ // the last subpattern with a match) if matching was successful
+ // and zero if the match failed.
+ // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
+ // against "foo", "bar", and "baz" respectively.
+ // When matching PCRE("(foo)|hello") against "hello", it will return 1.
+ // But the values for all subpattern are filled in into "vec".
+ int TryMatch(const StringPiece& text,
+ size_t startpos,
+ Anchor anchor,
+ bool empty_ok,
+ int *vec,
+ int vecsize) const;
+
+ // Append the "rewrite" string, with backslash subsitutions from "text"
+ // and "vec", to string "out".
+ bool Rewrite(std::string *out,
+ const StringPiece &rewrite,
+ const StringPiece &text,
+ int *vec,
+ int veclen) const;
+
+ // internal implementation for DoMatch
+ bool DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const args[],
+ int n,
+ int* vec,
+ int vecsize) const;
+
+ // Compile the regexp for the specified anchoring mode
+ pcre* Compile(Anchor anchor);
+
+ std::string pattern_;
+ Option options_;
+ pcre* re_full_; // For full matches
+ pcre* re_partial_; // For partial matches
+ const std::string* error_; // Error indicator (or empty string)
+ bool report_errors_; // Silences error logging if false
+ int match_limit_; // Limit on execution resources
+ int stack_limit_; // Limit on stack resources (bytes)
+ mutable int32_t hit_limit_; // Hit limit during execution (bool)
+
+ PCRE(const PCRE&) = delete;
+ PCRE& operator=(const PCRE&) = delete;
+};
+
+// PCRE_Options allow you to set the PCRE::Options, plus any pcre
+// "extra" options. The only extras are match_limit, which limits
+// the CPU time of a match, and stack_limit, which limits the
+// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
+// that should not cause too many problems in production code.
+// If PCRE hits a limit during a match, it may return a false negative,
+// but (hopefully) it won't crash.
+//
+// NOTE: If you are handling regular expressions specified by
+// (external or internal) users, rather than hard-coded ones,
+// you should be using PCRE2, which uses an alternate implementation
+// that avoids these issues. See http://go/re2quick.
+class PCRE_Options {
+ public:
+ // constructor
+ PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
+ // accessors
+ PCRE::Option option() const { return option_; }
+ void set_option(PCRE::Option option) {
+ option_ = option;
+ }
+ int match_limit() const { return match_limit_; }
+ void set_match_limit(int match_limit) {
+ match_limit_ = match_limit;
+ }
+ int stack_limit() const { return stack_limit_; }
+ void set_stack_limit(int stack_limit) {
+ stack_limit_ = stack_limit;
+ }
+
+ // If the regular expression is malformed, an error message will be printed
+ // iff report_errors() is true. Default: true.
+ bool report_errors() const { return report_errors_; }
+ void set_report_errors(bool report_errors) {
+ report_errors_ = report_errors;
+ }
+ private:
+ PCRE::Option option_;
+ int match_limit_;
+ int stack_limit_;
+ bool report_errors_;
+};
+
+
+/***** Implementation details *****/
+
+// Hex/Octal/Binary?
+
+// Special class for parsing into objects that define a ParseFrom() method
+template <typename T>
+class _PCRE_MatchObject {
+ public:
+ static inline bool Parse(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ T* object = reinterpret_cast<T*>(dest);
+ return object->ParseFrom(str, n);
+ }
+};
+
+class PCRE::Arg {
+ public:
+ // Empty constructor so we can declare arrays of PCRE::Arg
+ Arg();
+
+ // Constructor specially designed for NULL arguments
+ Arg(void*);
+
+ typedef bool (*Parser)(const char* str, size_t n, void* dest);
+
+// Type-specific parsers
+#define MAKE_PARSER(type, name) \
+ Arg(type* p) : arg_(p), parser_(name) {} \
+ Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
+
+ MAKE_PARSER(char, parse_char);
+ MAKE_PARSER(signed char, parse_schar);
+ MAKE_PARSER(unsigned char, parse_uchar);
+ MAKE_PARSER(float, parse_float);
+ MAKE_PARSER(double, parse_double);
+ MAKE_PARSER(std::string, parse_string);
+ MAKE_PARSER(StringPiece, parse_stringpiece);
+
+ MAKE_PARSER(short, parse_short);
+ MAKE_PARSER(unsigned short, parse_ushort);
+ MAKE_PARSER(int, parse_int);
+ MAKE_PARSER(unsigned int, parse_uint);
+ MAKE_PARSER(long, parse_long);
+ MAKE_PARSER(unsigned long, parse_ulong);
+ MAKE_PARSER(long long, parse_longlong);
+ MAKE_PARSER(unsigned long long, parse_ulonglong);
+
+#undef MAKE_PARSER
+
+ // Generic constructor
+ template <typename T> Arg(T*, Parser parser);
+ // Generic constructor template
+ template <typename T> Arg(T* p)
+ : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
+ }
+
+ // Parse the data
+ bool Parse(const char* str, size_t n) const;
+
+ private:
+ void* arg_;
+ Parser parser_;
+
+ static bool parse_null (const char* str, size_t n, void* dest);
+ static bool parse_char (const char* str, size_t n, void* dest);
+ static bool parse_schar (const char* str, size_t n, void* dest);
+ static bool parse_uchar (const char* str, size_t n, void* dest);
+ static bool parse_float (const char* str, size_t n, void* dest);
+ static bool parse_double (const char* str, size_t n, void* dest);
+ static bool parse_string (const char* str, size_t n, void* dest);
+ static bool parse_stringpiece (const char* str, size_t n, void* dest);
+
+#define DECLARE_INTEGER_PARSER(name) \
+ private: \
+ static bool parse_##name(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
+ int radix); \
+ \
+ public: \
+ static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
+
+ DECLARE_INTEGER_PARSER(short);
+ DECLARE_INTEGER_PARSER(ushort);
+ DECLARE_INTEGER_PARSER(int);
+ DECLARE_INTEGER_PARSER(uint);
+ DECLARE_INTEGER_PARSER(long);
+ DECLARE_INTEGER_PARSER(ulong);
+ DECLARE_INTEGER_PARSER(longlong);
+ DECLARE_INTEGER_PARSER(ulonglong);
+
+#undef DECLARE_INTEGER_PARSER
+
+};
+
+inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
+inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
+
+inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
+ return (*parser_)(str, n, arg_);
+}
+
+// This part of the parser, appropriate only for ints, deals with bases
+#define MAKE_INTEGER_PARSER(type, name) \
+ inline PCRE::Arg Hex(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
+ } \
+ inline PCRE::Arg Octal(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
+ } \
+ inline PCRE::Arg CRadix(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
+ }
+
+MAKE_INTEGER_PARSER(short, short);
+MAKE_INTEGER_PARSER(unsigned short, ushort);
+MAKE_INTEGER_PARSER(int, int);
+MAKE_INTEGER_PARSER(unsigned int, uint);
+MAKE_INTEGER_PARSER(long, long);
+MAKE_INTEGER_PARSER(unsigned long, ulong);
+MAKE_INTEGER_PARSER(long long, longlong);
+MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
+
+#undef MAKE_INTEGER_PARSER
+
+} // namespace re2
+
+#endif // UTIL_PCRE_H_
diff --git a/contrib/libs/re2/util/strutil.cc b/contrib/libs/re2/util/strutil.cc
index f9af3a442c..fb7e6b1b0c 100644
--- a/contrib/libs/re2/util/strutil.cc
+++ b/contrib/libs/re2/util/strutil.cc
@@ -65,34 +65,34 @@ static size_t CEscapeString(const char* src, size_t src_len,
// Copies 'src' to result, escaping dangerous characters using
// C-style escape sequences. 'src' and 'dest' should not overlap.
// ----------------------------------------------------------------------
-std::string CEscape(const StringPiece& src) {
+std::string CEscape(const StringPiece& src) {
const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
char* dest = new char[dest_len];
const size_t used = CEscapeString(src.data(), src.size(),
dest, dest_len);
- std::string s = std::string(dest, used);
+ std::string s = std::string(dest, used);
delete[] dest;
return s;
}
-void PrefixSuccessor(std::string* prefix) {
+void PrefixSuccessor(std::string* prefix) {
// We can increment the last character in the string and be done
// unless that character is 255, in which case we have to erase the
// last character and increment the previous character, unless that
// is 255, etc. If the string is empty or consists entirely of
// 255's, we just return the empty string.
- while (!prefix->empty()) {
- char& c = prefix->back();
- if (c == '\xff') { // char literal avoids signed/unsigned.
- prefix->pop_back();
+ while (!prefix->empty()) {
+ char& c = prefix->back();
+ if (c == '\xff') { // char literal avoids signed/unsigned.
+ prefix->pop_back();
} else {
- ++c;
- break;
+ ++c;
+ break;
}
}
}
-static void StringAppendV(std::string* dst, const char* format, va_list ap) {
+static void StringAppendV(std::string* dst, const char* format, va_list ap) {
// First try with a small fixed size buffer
char space[1024];
@@ -137,10 +137,10 @@ static void StringAppendV(std::string* dst, const char* format, va_list ap) {
}
}
-std::string StringPrintf(const char* format, ...) {
+std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
- std::string result;
+ std::string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
diff --git a/contrib/libs/re2/util/strutil.h b/contrib/libs/re2/util/strutil.h
index 16631b0833..a69908a0dd 100644
--- a/contrib/libs/re2/util/strutil.h
+++ b/contrib/libs/re2/util/strutil.h
@@ -12,10 +12,10 @@
namespace re2 {
-std::string CEscape(const StringPiece& src);
-void PrefixSuccessor(std::string* prefix);
-std::string StringPrintf(const char* format, ...);
+std::string CEscape(const StringPiece& src);
+void PrefixSuccessor(std::string* prefix);
+std::string StringPrintf(const char* format, ...);
} // namespace re2
-
+
#endif // UTIL_STRUTIL_H_
diff --git a/contrib/libs/re2/util/test.cc b/contrib/libs/re2/util/test.cc
index 855295f5bf..028616b359 100644
--- a/contrib/libs/re2/util/test.cc
+++ b/contrib/libs/re2/util/test.cc
@@ -1,34 +1,34 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stdio.h>
-#include <string>
-
-#include "util/test.h"
-
-namespace testing {
-std::string TempDir() { return "/tmp/"; }
-} // namespace testing
-
-struct Test {
- void (*fn)(void);
- const char *name;
-};
-
-static Test tests[10000];
-static int ntests;
-
-void RegisterTest(void (*fn)(void), const char *name) {
- tests[ntests].fn = fn;
- tests[ntests++].name = name;
-}
-
-int main(int argc, char** argv) {
- for (int i = 0; i < ntests; i++) {
- printf("%s\n", tests[i].name);
- tests[i].fn();
- }
- printf("PASS\n");
- return 0;
-}
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string>
+
+#include "util/test.h"
+
+namespace testing {
+std::string TempDir() { return "/tmp/"; }
+} // namespace testing
+
+struct Test {
+ void (*fn)(void);
+ const char *name;
+};
+
+static Test tests[10000];
+static int ntests;
+
+void RegisterTest(void (*fn)(void), const char *name) {
+ tests[ntests].fn = fn;
+ tests[ntests++].name = name;
+}
+
+int main(int argc, char** argv) {
+ for (int i = 0; i < ntests; i++) {
+ printf("%s\n", tests[i].name);
+ tests[i].fn();
+ }
+ printf("PASS\n");
+ return 0;
+}
diff --git a/contrib/libs/re2/util/test.h b/contrib/libs/re2/util/test.h
index 40978b8fae..54e6f8fbbb 100644
--- a/contrib/libs/re2/util/test.h
+++ b/contrib/libs/re2/util/test.h
@@ -1,50 +1,50 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_TEST_H_
-#define UTIL_TEST_H_
-
-#include "util/util.h"
-#include "util/logging.h"
-
-namespace testing {
-std::string TempDir();
-} // namespace testing
-
-#define TEST(x, y) \
- void x##y(void); \
- TestRegisterer r##x##y(x##y, # x "." # y); \
- void x##y(void)
-
-void RegisterTest(void (*)(void), const char*);
-
-class TestRegisterer {
- public:
- TestRegisterer(void (*fn)(void), const char *s) {
- RegisterTest(fn, s);
- }
-};
-
-// fatal assertions
-#define ASSERT_TRUE CHECK
-#define ASSERT_FALSE(x) CHECK(!(x))
-#define ASSERT_EQ CHECK_EQ
-#define ASSERT_NE CHECK_NE
-#define ASSERT_LT CHECK_LT
-#define ASSERT_LE CHECK_LE
-#define ASSERT_GT CHECK_GT
-#define ASSERT_GE CHECK_GE
-
-// nonfatal assertions
-// TODO(rsc): Do a better job?
-#define EXPECT_TRUE CHECK
-#define EXPECT_FALSE(x) CHECK(!(x))
-#define EXPECT_EQ CHECK_EQ
-#define EXPECT_NE CHECK_NE
-#define EXPECT_LT CHECK_LT
-#define EXPECT_LE CHECK_LE
-#define EXPECT_GT CHECK_GT
-#define EXPECT_GE CHECK_GE
-
-#endif // UTIL_TEST_H_
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_TEST_H_
+#define UTIL_TEST_H_
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace testing {
+std::string TempDir();
+} // namespace testing
+
+#define TEST(x, y) \
+ void x##y(void); \
+ TestRegisterer r##x##y(x##y, # x "." # y); \
+ void x##y(void)
+
+void RegisterTest(void (*)(void), const char*);
+
+class TestRegisterer {
+ public:
+ TestRegisterer(void (*fn)(void), const char *s) {
+ RegisterTest(fn, s);
+ }
+};
+
+// fatal assertions
+#define ASSERT_TRUE CHECK
+#define ASSERT_FALSE(x) CHECK(!(x))
+#define ASSERT_EQ CHECK_EQ
+#define ASSERT_NE CHECK_NE
+#define ASSERT_LT CHECK_LT
+#define ASSERT_LE CHECK_LE
+#define ASSERT_GT CHECK_GT
+#define ASSERT_GE CHECK_GE
+
+// nonfatal assertions
+// TODO(rsc): Do a better job?
+#define EXPECT_TRUE CHECK
+#define EXPECT_FALSE(x) CHECK(!(x))
+#define EXPECT_EQ CHECK_EQ
+#define EXPECT_NE CHECK_NE
+#define EXPECT_LT CHECK_LT
+#define EXPECT_LE CHECK_LE
+#define EXPECT_GT CHECK_GT
+#define EXPECT_GE CHECK_GE
+
+#endif // UTIL_TEST_H_
diff --git a/contrib/libs/re2/util/util.h b/contrib/libs/re2/util/util.h
index 0d28a8ca74..56e46c1a33 100644
--- a/contrib/libs/re2/util/util.h
+++ b/contrib/libs/re2/util/util.h
@@ -5,35 +5,35 @@
#ifndef UTIL_UTIL_H_
#define UTIL_UTIL_H_
-#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
+#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
-#ifndef ATTRIBUTE_NORETURN
-#if defined(__GNUC__)
-#define ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define ATTRIBUTE_NORETURN __declspec(noreturn)
-#else
-#define ATTRIBUTE_NORETURN
-#endif
-#endif
+#ifndef ATTRIBUTE_NORETURN
+#if defined(__GNUC__)
+#define ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+#endif
+
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
-#ifndef ATTRIBUTE_UNUSED
-#if defined(__GNUC__)
-#define ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-#define ATTRIBUTE_UNUSED
-#endif
-#endif
-
#ifndef FALLTHROUGH_INTENDED
-#if defined(__clang__)
-#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
-#elif defined(__GNUC__) && __GNUC__ >= 7
-#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
-#else
-#define FALLTHROUGH_INTENDED do {} while (0)
+#if defined(__clang__)
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#else
+#define FALLTHROUGH_INTENDED do {} while (0)
+#endif
#endif
-#endif
#ifndef NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS