aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/re2
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.ru>2022-02-10 16:45:12 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:12 +0300
commit49116032d905455a7b1c994e4a696afc885c1e71 (patch)
treebe835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/re2
parent4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)
downloadydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/re2')
-rw-r--r--contrib/libs/re2/AUTHORS26
-rw-r--r--contrib/libs/re2/CONTRIBUTING.md2
-rw-r--r--contrib/libs/re2/CONTRIBUTORS82
-rw-r--r--contrib/libs/re2/LICENSE54
-rw-r--r--contrib/libs/re2/README82
-rw-r--r--contrib/libs/re2/SECURITY.md8
-rw-r--r--contrib/libs/re2/include/re2/re2.h2
-rw-r--r--contrib/libs/re2/include/re2/stringpiece.h2
-rw-r--r--contrib/libs/re2/include/util/logging.h2
-rw-r--r--contrib/libs/re2/include/util/utf.h2
-rw-r--r--contrib/libs/re2/include/util/util.h2
-rw-r--r--contrib/libs/re2/libre2.symbols38
-rw-r--r--contrib/libs/re2/re2/bitmap256.h16
-rw-r--r--contrib/libs/re2/re2/bitstate.cc290
-rw-r--r--contrib/libs/re2/re2/compile.cc430
-rw-r--r--contrib/libs/re2/re2/dfa.cc668
-rw-r--r--contrib/libs/re2/re2/filtered_re2.cc40
-rw-r--r--contrib/libs/re2/re2/filtered_re2.h54
-rw-r--r--contrib/libs/re2/re2/mimics_pcre.cc42
-rw-r--r--contrib/libs/re2/re2/nfa.cc322
-rw-r--r--contrib/libs/re2/re2/onepass.cc42
-rw-r--r--contrib/libs/re2/re2/parse.cc594
-rw-r--r--contrib/libs/re2/re2/perl_groups.cc68
-rw-r--r--contrib/libs/re2/re2/pod_array.h110
-rw-r--r--contrib/libs/re2/re2/prefilter.cc90
-rw-r--r--contrib/libs/re2/re2/prefilter.h10
-rw-r--r--contrib/libs/re2/re2/prefilter_tree.cc100
-rw-r--r--contrib/libs/re2/re2/prefilter_tree.h26
-rw-r--r--contrib/libs/re2/re2/prog.cc820
-rw-r--r--contrib/libs/re2/re2/prog.h230
-rw-r--r--contrib/libs/re2/re2/re2.cc806
-rw-r--r--contrib/libs/re2/re2/re2.h722
-rw-r--r--contrib/libs/re2/re2/regexp.cc212
-rw-r--r--contrib/libs/re2/re2/regexp.h72
-rw-r--r--contrib/libs/re2/re2/set.cc194
-rw-r--r--contrib/libs/re2/re2/set.h88
-rw-r--r--contrib/libs/re2/re2/simplify.cc46
-rw-r--r--contrib/libs/re2/re2/sparse_array.h248
-rw-r--r--contrib/libs/re2/re2/sparse_set.h134
-rw-r--r--contrib/libs/re2/re2/stringpiece.h44
-rw-r--r--contrib/libs/re2/re2/testing/backtrack.cc544
-rw-r--r--contrib/libs/re2/re2/testing/charclass_test.cc452
-rw-r--r--contrib/libs/re2/re2/testing/compile_test.cc854
-rw-r--r--contrib/libs/re2/re2/testing/dump.cc326
-rw-r--r--contrib/libs/re2/re2/testing/exhaustive_tester.cc378
-rw-r--r--contrib/libs/re2/re2/testing/exhaustive_tester.h210
-rw-r--r--contrib/libs/re2/re2/testing/filtered_re2_test.cc680
-rw-r--r--contrib/libs/re2/re2/testing/mimics_pcre_test.cc154
-rw-r--r--contrib/libs/re2/re2/testing/null_walker.cc98
-rw-r--r--contrib/libs/re2/re2/testing/parse_test.cc1016
-rw-r--r--contrib/libs/re2/re2/testing/possible_match_test.cc494
-rw-r--r--contrib/libs/re2/re2/testing/re2_arg_test.cc320
-rw-r--r--contrib/libs/re2/re2/testing/re2_test.cc3318
-rw-r--r--contrib/libs/re2/re2/testing/regexp_generator.cc552
-rw-r--r--contrib/libs/re2/re2/testing/regexp_generator.h154
-rw-r--r--contrib/libs/re2/re2/testing/regexp_test.cc172
-rw-r--r--contrib/libs/re2/re2/testing/required_prefix_test.cc398
-rw-r--r--contrib/libs/re2/re2/testing/search_test.cc668
-rw-r--r--contrib/libs/re2/re2/testing/set_test.cc460
-rw-r--r--contrib/libs/re2/re2/testing/simplify_test.cc546
-rw-r--r--contrib/libs/re2/re2/testing/string_generator.cc282
-rw-r--r--contrib/libs/re2/re2/testing/string_generator.h152
-rw-r--r--contrib/libs/re2/re2/testing/string_generator_test.cc220
-rw-r--r--contrib/libs/re2/re2/testing/tester.cc1352
-rw-r--r--contrib/libs/re2/re2/testing/tester.h246
-rw-r--r--contrib/libs/re2/re2/testing/ya.make90
-rw-r--r--contrib/libs/re2/re2/tostring.cc26
-rw-r--r--contrib/libs/re2/re2/unicode_casefold.cc130
-rw-r--r--contrib/libs/re2/re2/unicode_groups.cc6092
-rw-r--r--contrib/libs/re2/re2/walker-inl.h24
-rw-r--r--contrib/libs/re2/util/flags.h52
-rw-r--r--contrib/libs/re2/util/logging.h4
-rw-r--r--contrib/libs/re2/util/mutex.h38
-rw-r--r--contrib/libs/re2/util/pcre.cc2050
-rw-r--r--contrib/libs/re2/util/pcre.h1362
-rw-r--r--contrib/libs/re2/util/strutil.cc24
-rw-r--r--contrib/libs/re2/util/strutil.h8
-rw-r--r--contrib/libs/re2/util/test.cc68
-rw-r--r--contrib/libs/re2/util/test.h100
-rw-r--r--contrib/libs/re2/util/util.h50
-rw-r--r--contrib/libs/re2/ya.make36
81 files changed, 15510 insertions, 15510 deletions
diff --git a/contrib/libs/re2/AUTHORS b/contrib/libs/re2/AUTHORS
index 38866ae96a..0754006fec 100644
--- a/contrib/libs/re2/AUTHORS
+++ b/contrib/libs/re2/AUTHORS
@@ -1,13 +1,13 @@
-# This is the official list of RE2 authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-# Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Google Inc.
-Samsung Electronics
-Stefano Rivera <stefano.rivera@gmail.com>
+# This is the official list of RE2 authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+# Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Google Inc.
+Samsung Electronics
+Stefano Rivera <stefano.rivera@gmail.com>
diff --git a/contrib/libs/re2/CONTRIBUTING.md b/contrib/libs/re2/CONTRIBUTING.md
index f8d54cec2c..882b0e2f34 100644
--- a/contrib/libs/re2/CONTRIBUTING.md
+++ b/contrib/libs/re2/CONTRIBUTING.md
@@ -1,2 +1,2 @@
-RE2 uses Gerrit instead of GitHub pull requests.
+RE2 uses Gerrit instead of GitHub pull requests.
See the [Contribute](https://github.com/google/re2/wiki/Contribute) wiki page.
diff --git a/contrib/libs/re2/CONTRIBUTORS b/contrib/libs/re2/CONTRIBUTORS
index fdceed8aa2..1a1c84827d 100644
--- a/contrib/libs/re2/CONTRIBUTORS
+++ b/contrib/libs/re2/CONTRIBUTORS
@@ -1,41 +1,41 @@
-# This is the official list of people who can contribute
-# (and typically have contributed) code to the RE2 repository.
-# The AUTHORS file lists the copyright holders; this file
-# lists people. For example, Google employees are listed here
-# but not in AUTHORS, because Google holds the copyright.
-#
-# The submission process automatically checks to make sure
-# that people submitting code are listed in this file (by email address).
-#
-# Names should be added to this file only after verifying that
-# the individual or the individual's organization has agreed to
-# the appropriate Contributor License Agreement, found here:
-#
-# http://code.google.com/legal/individual-cla-v1.0.html
-# http://code.google.com/legal/corporate-cla-v1.0.html
-#
-# The agreement for individuals can be filled out on the web.
-#
-# When adding J Random Contributor's name to this file,
-# either J's name or J's organization's name should be
-# added to the AUTHORS file, depending on whether the
-# individual or corporate CLA was used.
-
-# Names should be added to this file like so:
-# Name <email address>
-
-# Please keep the list sorted.
-
-Dominic Battré <battre@chromium.org>
-Doug Kwan <dougkwan@google.com>
-Dmitriy Vyukov <dvyukov@google.com>
-John Millikin <jmillikin@gmail.com>
-Mike Nazarewicz <mpn@google.com>
-Nico Weber <thakis@chromium.org>
-Pawel Hajdan <phajdan.jr@gmail.com>
-Rob Pike <r@google.com>
-Russ Cox <rsc@swtch.com>
-Sanjay Ghemawat <sanjay@google.com>
-Stefano Rivera <stefano.rivera@gmail.com>
-Srinivasan Venkatachary <vsri@google.com>
-Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
+# This is the official list of people who can contribute
+# (and typically have contributed) code to the RE2 repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people. For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+# http://code.google.com/legal/individual-cla-v1.0.html
+# http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+# Name <email address>
+
+# Please keep the list sorted.
+
+Dominic Battré <battre@chromium.org>
+Doug Kwan <dougkwan@google.com>
+Dmitriy Vyukov <dvyukov@google.com>
+John Millikin <jmillikin@gmail.com>
+Mike Nazarewicz <mpn@google.com>
+Nico Weber <thakis@chromium.org>
+Pawel Hajdan <phajdan.jr@gmail.com>
+Rob Pike <r@google.com>
+Russ Cox <rsc@swtch.com>
+Sanjay Ghemawat <sanjay@google.com>
+Stefano Rivera <stefano.rivera@gmail.com>
+Srinivasan Venkatachary <vsri@google.com>
+Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
diff --git a/contrib/libs/re2/LICENSE b/contrib/libs/re2/LICENSE
index 36747eca8b..09e5ec1c74 100644
--- a/contrib/libs/re2/LICENSE
+++ b/contrib/libs/re2/LICENSE
@@ -1,27 +1,27 @@
-// Copyright (c) 2009 The RE2 Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright (c) 2009 The RE2 Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/libs/re2/README b/contrib/libs/re2/README
index 2a9cc501da..caee6afb6b 100644
--- a/contrib/libs/re2/README
+++ b/contrib/libs/re2/README
@@ -1,42 +1,42 @@
-This is the source code repository for RE2, a regular expression library.
-
-For documentation about how to install and use RE2,
-visit https://github.com/google/re2/.
-
-The short version is:
-
-make
-make test
-make install
-make testinstall
-
-There is a fair amount of documentation (including code snippets) in
-the re2.h header file.
-
-More information can be found on the wiki:
-https://github.com/google/re2/wiki
-
-Issue tracker:
-https://github.com/google/re2/issues
-
-Mailing list:
-https://groups.google.com/group/re2-dev
-
-Unless otherwise noted, the RE2 source files are distributed
-under the BSD-style license found in the LICENSE file.
-
-RE2's native language is C++.
-
-The Python wrapper is at https://github.com/google/re2/tree/abseil/python
-and on PyPI (https://pypi.org/project/google-re2/).
-
-A C wrapper is at https://github.com/marcomaggi/cre2/.
+This is the source code repository for RE2, a regular expression library.
+
+For documentation about how to install and use RE2,
+visit https://github.com/google/re2/.
+
+The short version is:
+
+make
+make test
+make install
+make testinstall
+
+There is a fair amount of documentation (including code snippets) in
+the re2.h header file.
+
+More information can be found on the wiki:
+https://github.com/google/re2/wiki
+
+Issue tracker:
+https://github.com/google/re2/issues
+
+Mailing list:
+https://groups.google.com/group/re2-dev
+
+Unless otherwise noted, the RE2 source files are distributed
+under the BSD-style license found in the LICENSE file.
+
+RE2's native language is C++.
+
+The Python wrapper is at https://github.com/google/re2/tree/abseil/python
+and on PyPI (https://pypi.org/project/google-re2/).
+
+A C wrapper is at https://github.com/marcomaggi/cre2/.
A D wrapper is at https://github.com/ShigekiKarita/re2d/ and on DUB (code.dlang.org).
-An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
-An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
-A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
-An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
-A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
-An R wrapper is at https://github.com/girishji/re2/ and on CRAN (cran.r-project.org).
-A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
-A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
+An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
+An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
+A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
+An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
+A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
+An R wrapper is at https://github.com/girishji/re2/ and on CRAN (cran.r-project.org).
+A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
+A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).
diff --git a/contrib/libs/re2/SECURITY.md b/contrib/libs/re2/SECURITY.md
index 8b7e853ad7..39ba0e93f2 100644
--- a/contrib/libs/re2/SECURITY.md
+++ b/contrib/libs/re2/SECURITY.md
@@ -1,4 +1,4 @@
-To report a security issue, please use https://g.co/vulnz. We use
-https://g.co/vulnz for our intake, and do coordination and disclosure here on
-GitHub (including using GitHub Security Advisory). The Google Security Team will
-respond within 5 working days of your report on https://g.co/vulnz.
+To report a security issue, please use https://g.co/vulnz. We use
+https://g.co/vulnz for our intake, and do coordination and disclosure here on
+GitHub (including using GitHub Security Advisory). The Google Security Team will
+respond within 5 working days of your report on https://g.co/vulnz.
diff --git a/contrib/libs/re2/include/re2/re2.h b/contrib/libs/re2/include/re2/re2.h
index 3305b909e5..31cfa08363 100644
--- a/contrib/libs/re2/include/re2/re2.h
+++ b/contrib/libs/re2/include/re2/re2.h
@@ -1 +1 @@
-#include "../../re2/re2.h" /* inclink generated by yamaker */
+#include "../../re2/re2.h" /* inclink generated by yamaker */
diff --git a/contrib/libs/re2/include/re2/stringpiece.h b/contrib/libs/re2/include/re2/stringpiece.h
index 7e4d849d40..fce36b84eb 100644
--- a/contrib/libs/re2/include/re2/stringpiece.h
+++ b/contrib/libs/re2/include/re2/stringpiece.h
@@ -1 +1 @@
-#include "../../re2/stringpiece.h" /* inclink generated by yamaker */
+#include "../../re2/stringpiece.h" /* inclink generated by yamaker */
diff --git a/contrib/libs/re2/include/util/logging.h b/contrib/libs/re2/include/util/logging.h
index a03a5f7595..6b83bd42dd 100644
--- a/contrib/libs/re2/include/util/logging.h
+++ b/contrib/libs/re2/include/util/logging.h
@@ -1 +1 @@
-#include "../../util/logging.h" /* inclink generated by yamaker */
+#include "../../util/logging.h" /* inclink generated by yamaker */
diff --git a/contrib/libs/re2/include/util/utf.h b/contrib/libs/re2/include/util/utf.h
index 7542658dc3..fa6fec714a 100644
--- a/contrib/libs/re2/include/util/utf.h
+++ b/contrib/libs/re2/include/util/utf.h
@@ -1 +1 @@
-#include "../../util/utf.h" /* inclink generated by yamaker */
+#include "../../util/utf.h" /* inclink generated by yamaker */
diff --git a/contrib/libs/re2/include/util/util.h b/contrib/libs/re2/include/util/util.h
index da1cab83fd..86b8c06006 100644
--- a/contrib/libs/re2/include/util/util.h
+++ b/contrib/libs/re2/include/util/util.h
@@ -1 +1 @@
-#include "../../util/util.h" /* inclink generated by yamaker */
+#include "../../util/util.h" /* inclink generated by yamaker */
diff --git a/contrib/libs/re2/libre2.symbols b/contrib/libs/re2/libre2.symbols
index 7b667473b8..93b71b4862 100644
--- a/contrib/libs/re2/libre2.symbols
+++ b/contrib/libs/re2/libre2.symbols
@@ -1,19 +1,19 @@
-{
- global:
- # re2::RE2*
- _ZN3re23RE2*;
- _ZNK3re23RE2*;
- # re2::StringPiece*
- _ZN3re211StringPiece*;
- _ZNK3re211StringPiece*;
- # re2::operator<<*
- _ZN3re2ls*;
- # re2::FilteredRE2*
- _ZN3re211FilteredRE2*;
- _ZNK3re211FilteredRE2*;
- # re2::re2_internal*
- _ZN3re212re2_internal*;
- _ZNK3re212re2_internal*;
- local:
- *;
-};
+{
+ global:
+ # re2::RE2*
+ _ZN3re23RE2*;
+ _ZNK3re23RE2*;
+ # re2::StringPiece*
+ _ZN3re211StringPiece*;
+ _ZNK3re211StringPiece*;
+ # re2::operator<<*
+ _ZN3re2ls*;
+ # re2::FilteredRE2*
+ _ZN3re211FilteredRE2*;
+ _ZNK3re211FilteredRE2*;
+ # re2::re2_internal*
+ _ZN3re212re2_internal*;
+ _ZNK3re212re2_internal*;
+ local:
+ *;
+};
diff --git a/contrib/libs/re2/re2/bitmap256.h b/contrib/libs/re2/re2/bitmap256.h
index 9328aea005..4899379e4d 100644
--- a/contrib/libs/re2/re2/bitmap256.h
+++ b/contrib/libs/re2/re2/bitmap256.h
@@ -19,11 +19,11 @@ namespace re2 {
class Bitmap256 {
public:
Bitmap256() {
- Clear();
- }
-
- // Clears all of the bits.
- void Clear() {
+ Clear();
+ }
+
+ // Clears all of the bits.
+ void Clear() {
memset(words_, 0, sizeof words_);
}
@@ -32,7 +32,7 @@ class Bitmap256 {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);
- return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
+ return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
}
// Sets the bit with index c.
@@ -40,7 +40,7 @@ class Bitmap256 {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);
- words_[c / 64] |= (uint64_t{1} << (c % 64));
+ words_[c / 64] |= (uint64_t{1} << (c % 64));
}
// Finds the next non-zero bit with index >= c.
@@ -88,7 +88,7 @@ int Bitmap256::FindNextSetBit(int c) const {
// Check the word that contains the bit. Mask out any lower bits.
int i = c / 64;
- uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
+ uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
if (word != 0)
return (i * 64) + FindLSBSet(word);
diff --git a/contrib/libs/re2/re2/bitstate.cc b/contrib/libs/re2/re2/bitstate.cc
index 6dfee10cc5..877e548234 100644
--- a/contrib/libs/re2/re2/bitstate.cc
+++ b/contrib/libs/re2/re2/bitstate.cc
@@ -5,10 +5,10 @@
// Tested by search_test.cc, exhaustive_test.cc, tester.cc
// Prog::SearchBitState is a regular expression search with submatch
-// tracking for small regular expressions and texts. Similarly to
-// testing/backtrack.cc, it allocates a bitmap with (count of
-// lists) * (length of text) bits to make sure it never explores the
-// same (instruction list, character position) multiple times. This
+// tracking for small regular expressions and texts. Similarly to
+// testing/backtrack.cc, it allocates a bitmap with (count of
+// lists) * (length of text) bits to make sure it never explores the
+// same (instruction list, character position) multiple times. This
// limits the search to run in time linear in the length of the text.
//
// Unlike testing/backtrack.cc, SearchBitState is not recursive
@@ -20,11 +20,11 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
-#include <limits>
-#include <utility>
+#include <limits>
+#include <utility>
#include "util/logging.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -32,7 +32,7 @@ namespace re2 {
struct Job {
int id;
- int rle; // run length encoding
+ int rle; // run length encoding
const char* p;
};
@@ -48,8 +48,8 @@ class BitState {
private:
inline bool ShouldVisit(int id, const char* p);
- void Push(int id, const char* p);
- void GrowStack();
+ void Push(int id, const char* p);
+ void GrowStack();
bool TrySearch(int id, const char* p);
// Search parameters
@@ -59,18 +59,18 @@ class BitState {
bool anchored_; // whether search is anchored at text.begin()
bool longest_; // whether search wants leftmost-longest match
bool endmatch_; // whether match must end at text.end()
- StringPiece* submatch_; // submatches to fill in
+ StringPiece* submatch_; // submatches to fill in
int nsubmatch_; // # of submatches to fill in
// Search state
- static constexpr int kVisitedBits = 64;
- PODArray<uint64_t> visited_; // bitmap: (list ID, char*) pairs visited
- PODArray<const char*> cap_; // capture registers
- PODArray<Job> job_; // stack of text positions to explore
- int njob_; // stack size
-
- BitState(const BitState&) = delete;
- BitState& operator=(const BitState&) = delete;
+ static constexpr int kVisitedBits = 64;
+ PODArray<uint64_t> visited_; // bitmap: (list ID, char*) pairs visited
+ PODArray<const char*> cap_; // capture registers
+ PODArray<Job> job_; // stack of text positions to explore
+ int njob_; // stack size
+
+ BitState(const BitState&) = delete;
+ BitState& operator=(const BitState&) = delete;
};
BitState::BitState(Prog* prog)
@@ -80,115 +80,115 @@ BitState::BitState(Prog* prog)
endmatch_(false),
submatch_(NULL),
nsubmatch_(0),
- njob_(0) {
+ njob_(0) {
}
-// Given id, which *must* be a list head, we can look up its list ID.
-// Then the question is: Should the search visit the (list ID, p) pair?
+// Given id, which *must* be a list head, we can look up its list ID.
+// Then the question is: Should the search visit the (list ID, p) pair?
// If so, remember that it was visited so that the next time,
// we don't repeat the visit.
bool BitState::ShouldVisit(int id, const char* p) {
- int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
- static_cast<int>(p-text_.data());
- if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
+ int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) +
+ static_cast<int>(p-text_.data());
+ if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1))))
return false;
- visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
+ visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1));
return true;
}
// Grow the stack.
-void BitState::GrowStack() {
- PODArray<Job> tmp(2*job_.size());
- memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]);
- job_ = std::move(tmp);
+void BitState::GrowStack() {
+ PODArray<Job> tmp(2*job_.size());
+ memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]);
+ job_ = std::move(tmp);
}
-// Push (id, p) onto the stack, growing it if necessary.
-void BitState::Push(int id, const char* p) {
- if (njob_ >= job_.size()) {
- GrowStack();
- if (njob_ >= job_.size()) {
- LOG(DFATAL) << "GrowStack() failed: "
- << "njob_ = " << njob_ << ", "
- << "job_.size() = " << job_.size();
+// Push (id, p) onto the stack, growing it if necessary.
+void BitState::Push(int id, const char* p) {
+ if (njob_ >= job_.size()) {
+ GrowStack();
+ if (njob_ >= job_.size()) {
+ LOG(DFATAL) << "GrowStack() failed: "
+ << "njob_ = " << njob_ << ", "
+ << "job_.size() = " << job_.size();
return;
- }
+ }
}
- // If id < 0, it's undoing a Capture,
- // so we mustn't interfere with that.
- if (id >= 0 && njob_ > 0) {
- Job* top = &job_[njob_-1];
- if (id == top->id &&
- p == top->p + top->rle + 1 &&
- top->rle < std::numeric_limits<int>::max()) {
- ++top->rle;
- return;
- }
- }
-
- Job* top = &job_[njob_++];
- top->id = id;
- top->rle = 0;
- top->p = p;
+ // If id < 0, it's undoing a Capture,
+ // so we mustn't interfere with that.
+ if (id >= 0 && njob_ > 0) {
+ Job* top = &job_[njob_-1];
+ if (id == top->id &&
+ p == top->p + top->rle + 1 &&
+ top->rle < std::numeric_limits<int>::max()) {
+ ++top->rle;
+ return;
+ }
+ }
+
+ Job* top = &job_[njob_++];
+ top->id = id;
+ top->rle = 0;
+ top->p = p;
}
// Try a search from instruction id0 in state p0.
// Return whether it succeeded.
bool BitState::TrySearch(int id0, const char* p0) {
bool matched = false;
- const char* end = text_.data() + text_.size();
+ const char* end = text_.data() + text_.size();
njob_ = 0;
- // Push() no longer checks ShouldVisit(),
- // so we must perform the check ourselves.
- if (ShouldVisit(id0, p0))
- Push(id0, p0);
+ // Push() no longer checks ShouldVisit(),
+ // so we must perform the check ourselves.
+ if (ShouldVisit(id0, p0))
+ Push(id0, p0);
while (njob_ > 0) {
// Pop job off stack.
--njob_;
int id = job_[njob_].id;
- int& rle = job_[njob_].rle;
+ int& rle = job_[njob_].rle;
const char* p = job_[njob_].p;
- if (id < 0) {
- // Undo the Capture.
- cap_[prog_->inst(-id)->cap()] = p;
- continue;
- }
-
- if (rle > 0) {
- p += rle;
- // Revivify job on stack.
- --rle;
- ++njob_;
+ if (id < 0) {
+ // Undo the Capture.
+ cap_[prog_->inst(-id)->cap()] = p;
+ continue;
+ }
+
+ if (rle > 0) {
+ p += rle;
+ // Revivify job on stack.
+ --rle;
+ ++njob_;
}
- Loop:
- // Visit id, p.
+ Loop:
+ // Visit id, p.
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
default:
- LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
+ LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
return false;
case kInstFail:
- break;
+ break;
case kInstAltMatch:
- if (ip->greedy(prog_)) {
- // out1 is the Match instruction.
- id = ip->out1();
- p = end;
- goto Loop;
+ if (ip->greedy(prog_)) {
+ // out1 is the Match instruction.
+ id = ip->out1();
+ p = end;
+ goto Loop;
}
- if (longest_) {
- // ip must be non-greedy...
- // out is the Match instruction.
- id = ip->out();
- p = end;
- goto Loop;
- }
- goto Next;
+ if (longest_) {
+ // ip must be non-greedy...
+ // out is the Match instruction.
+ id = ip->out();
+ p = end;
+ goto Loop;
+ }
+ goto Next;
case kInstByteRange: {
int c = -1;
@@ -197,50 +197,50 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (!ip->Matches(c))
goto Next;
- if (ip->hint() != 0)
- Push(id+ip->hint(), p); // try the next when we're done
+ if (ip->hint() != 0)
+ Push(id+ip->hint(), p); // try the next when we're done
id = ip->out();
p++;
goto CheckAndLoop;
}
case kInstCapture:
- if (!ip->last())
- Push(id+1, p); // try the next when we're done
+ if (!ip->last())
+ Push(id+1, p); // try the next when we're done
- if (0 <= ip->cap() && ip->cap() < cap_.size()) {
- // Capture p to register, but save old value first.
- Push(-id, cap_[ip->cap()]); // undo when we're done
- cap_[ip->cap()] = p;
- }
+ if (0 <= ip->cap() && ip->cap() < cap_.size()) {
+ // Capture p to register, but save old value first.
+ Push(-id, cap_[ip->cap()]); // undo when we're done
+ cap_[ip->cap()] = p;
+ }
- id = ip->out();
- goto CheckAndLoop;
+ id = ip->out();
+ goto CheckAndLoop;
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
goto Next;
if (!ip->last())
- Push(id+1, p); // try the next when we're done
+ Push(id+1, p); // try the next when we're done
id = ip->out();
goto CheckAndLoop;
case kInstNop:
if (!ip->last())
- Push(id+1, p); // try the next when we're done
+ Push(id+1, p); // try the next when we're done
id = ip->out();
- CheckAndLoop:
- // Sanity check: id is the head of its list, which must
- // be the case if id-1 is the last of *its* list. :)
- DCHECK(id == 0 || prog_->inst(id-1)->last());
- if (ShouldVisit(id, p))
- goto Loop;
- break;
-
+ CheckAndLoop:
+ // Sanity check: id is the head of its list, which must
+ // be the case if id-1 is the last of *its* list. :)
+ DCHECK(id == 0 || prog_->inst(id-1)->last());
+ if (ShouldVisit(id, p))
+ goto Loop;
+ break;
+
case kInstMatch: {
- if (endmatch_ && p != end)
+ if (endmatch_ && p != end)
goto Next;
// We found a match. If the caller doesn't care
@@ -254,7 +254,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
matched = true;
cap_[1] = p;
if (submatch_[0].data() == NULL ||
- (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
+ (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
for (int i = 0; i < nsubmatch_; i++)
submatch_[i] =
StringPiece(cap_[2 * i],
@@ -266,18 +266,18 @@ bool BitState::TrySearch(int id0, const char* p0) {
return true;
// If we used the entire text, no longer match is possible.
- if (p == end)
+ if (p == end)
return true;
// Otherwise, continue on in hope of a longer match.
- // Note the absence of the ShouldVisit() check here
- // due to execution remaining in the same list.
- Next:
- if (!ip->last()) {
- id++;
- goto Loop;
- }
- break;
+ // Note the absence of the ShouldVisit() check here
+ // due to execution remaining in the same list.
+ Next:
+ if (!ip->last()) {
+ id++;
+ goto Loop;
+ }
+ break;
}
}
}
@@ -291,7 +291,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
// Search parameters.
text_ = text;
context_ = context;
- if (context_.data() == NULL)
+ if (context_.data() == NULL)
context_ = text;
if (prog_->anchor_start() && BeginPtr(context_) != BeginPtr(text))
return false;
@@ -306,24 +306,24 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
submatch_[i] = StringPiece();
// Allocate scratch space.
- int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
- nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
- visited_ = PODArray<uint64_t>(nvisited);
- memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
+ int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
+ nvisited = (nvisited + kVisitedBits-1) / kVisitedBits;
+ visited_ = PODArray<uint64_t>(nvisited);
+ memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
- int ncap = 2*nsubmatch;
- if (ncap < 2)
- ncap = 2;
- cap_ = PODArray<const char*>(ncap);
- memset(cap_.data(), 0, ncap*sizeof cap_[0]);
+ int ncap = 2*nsubmatch;
+ if (ncap < 2)
+ ncap = 2;
+ cap_ = PODArray<const char*>(ncap);
+ memset(cap_.data(), 0, ncap*sizeof cap_[0]);
- // When sizeof(Job) == 16, we start with a nice round 1KiB. :)
- job_ = PODArray<Job>(64);
+ // When sizeof(Job) == 16, we start with a nice round 1KiB. :)
+ job_ = PODArray<Job>(64);
// Anchored search must start at text.begin().
if (anchored_) {
- cap_[0] = text.data();
- return TrySearch(prog_->start(), text.data());
+ cap_[0] = text.data();
+ return TrySearch(prog_->start(), text.data());
}
// Unanchored search, starting from each possible text position.
@@ -332,22 +332,22 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
// This looks like it's quadratic in the size of the text,
// but we are not clearing visited_ between calls to TrySearch,
// so no work is duplicated and it ends up still being linear.
- const char* etext = text.data() + text.size();
- for (const char* p = text.data(); p <= etext; p++) {
- // Try to use prefix accel (e.g. memchr) to skip ahead.
- if (p < etext && prog_->can_prefix_accel()) {
- p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
+ const char* etext = text.data() + text.size();
+ for (const char* p = text.data(); p <= etext; p++) {
+ // Try to use prefix accel (e.g. memchr) to skip ahead.
+ if (p < etext && prog_->can_prefix_accel()) {
+ p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
if (p == NULL)
- p = etext;
+ p = etext;
}
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;
- // Avoid invoking undefined behavior (arithmetic on a null pointer)
- // by simply not continuing the loop.
- if (p == NULL)
- break;
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ if (p == NULL)
+ break;
}
return false;
}
diff --git a/contrib/libs/re2/re2/compile.cc b/contrib/libs/re2/re2/compile.cc
index 2d836818a0..61d801a630 100644
--- a/contrib/libs/re2/re2/compile.cc
+++ b/contrib/libs/re2/re2/compile.cc
@@ -15,7 +15,7 @@
#include "util/logging.h"
#include "util/utf.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
@@ -30,60 +30,60 @@ namespace re2 {
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
//
// Because the out and out1 fields in Inst are no longer pointers,
-// we can't use pointers directly here either. Instead, head refers
-// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
-// head == 0 represents the NULL list. This is okay because instruction #0
+// we can't use pointers directly here either. Instead, head refers
+// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
+// head == 0 represents the NULL list. This is okay because instruction #0
// is always the fail instruction, which never appears on a list.
struct PatchList {
// Returns patch list containing just p.
- static PatchList Mk(uint32_t p) {
- return {p, p};
- }
+ static PatchList Mk(uint32_t p) {
+ return {p, p};
+ }
- // Patches all the entries on l to have value p.
+ // Patches all the entries on l to have value p.
// Caller must not ever use patch list again.
- static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
- while (l.head != 0) {
- Prog::Inst* ip = &inst0[l.head>>1];
- if (l.head&1) {
- l.head = ip->out1();
- ip->out1_ = p;
- } else {
- l.head = ip->out();
- ip->set_out(p);
- }
+ static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
+ while (l.head != 0) {
+ Prog::Inst* ip = &inst0[l.head>>1];
+ if (l.head&1) {
+ l.head = ip->out1();
+ ip->out1_ = p;
+ } else {
+ l.head = ip->out();
+ ip->set_out(p);
+ }
}
}
- // Appends two patch lists and returns result.
- static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
- if (l1.head == 0)
- return l2;
- if (l2.head == 0)
- return l1;
- Prog::Inst* ip = &inst0[l1.tail>>1];
- if (l1.tail&1)
- ip->out1_ = l2.head;
- else
- ip->set_out(l2.head);
- return {l1.head, l2.tail};
+ // Appends two patch lists and returns result.
+ static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
+ if (l1.head == 0)
+ return l2;
+ if (l2.head == 0)
+ return l1;
+ Prog::Inst* ip = &inst0[l1.tail>>1];
+ if (l1.tail&1)
+ ip->out1_ = l2.head;
+ else
+ ip->set_out(l2.head);
+ return {l1.head, l2.tail};
}
- uint32_t head;
- uint32_t tail; // for constant-time append
-};
+ uint32_t head;
+ uint32_t tail; // for constant-time append
+};
-static const PatchList kNullPatchList = {0, 0};
+static const PatchList kNullPatchList = {0, 0};
// Compiled program fragment.
struct Frag {
uint32_t begin;
PatchList end;
- bool nullable;
+ bool nullable;
- Frag() : begin(0), end(kNullPatchList), nullable(false) {}
- Frag(uint32_t begin, PatchList end, bool nullable)
- : begin(begin), end(end), nullable(nullable) {}
+ Frag() : begin(0), end(kNullPatchList), nullable(false) {}
+ Frag(uint32_t begin, PatchList end, bool nullable)
+ : begin(begin), end(end), nullable(nullable) {}
};
// Input encodings.
@@ -105,7 +105,7 @@ class Compiler : public Regexp::Walker<Frag> {
// Compiles alternation of all the re to a new Prog.
// Each re has a match with an id equal to its index in the vector.
- static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
+ static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
// Interface for Regexp::Walker, which helps traverse the Regexp.
// The walk is purely post-recursive: given the machines for the
@@ -180,8 +180,8 @@ class Compiler : public Regexp::Walker<Frag> {
int AddSuffixRecursive(int root, int id);
// Finds the trie node for the given suffix. Returns a Frag in order to
- // distinguish between pointing at the root node directly (end.head == 0)
- // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
+ // distinguish between pointing at the root node directly (end.head == 0)
+ // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
Frag FindByteRange(int root, int id);
// Compares two ByteRanges and returns true iff they are equal.
@@ -193,8 +193,8 @@ class Compiler : public Regexp::Walker<Frag> {
// Single rune.
Frag Literal(Rune r, bool foldcase);
- void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
- Prog* Finish(Regexp* re);
+ void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
+ Prog* Finish(Regexp* re);
// Returns .* where dot = any byte
Frag DotStar();
@@ -205,9 +205,9 @@ class Compiler : public Regexp::Walker<Frag> {
Encoding encoding_; // Input encoding
bool reversed_; // Should program run backward over text?
- PODArray<Prog::Inst> inst_;
- int ninst_; // Number of instructions used.
- int max_ninst_; // Maximum number of instructions.
+ PODArray<Prog::Inst> inst_;
+ int ninst_; // Number of instructions used.
+ int max_ninst_; // Maximum number of instructions.
int64_t max_mem_; // Total memory budget.
@@ -225,12 +225,12 @@ Compiler::Compiler() {
failed_ = false;
encoding_ = kEncodingUTF8;
reversed_ = false;
- ninst_ = 0;
- max_ninst_ = 1; // make AllocInst for fail instruction okay
+ ninst_ = 0;
+ max_ninst_ = 1; // make AllocInst for fail instruction okay
max_mem_ = 0;
int fail = AllocInst(1);
inst_[fail].InitFail();
- max_ninst_ = 0; // Caller must change
+ max_ninst_ = 0; // Caller must change
}
Compiler::~Compiler() {
@@ -238,25 +238,25 @@ Compiler::~Compiler() {
}
int Compiler::AllocInst(int n) {
- if (failed_ || ninst_ + n > max_ninst_) {
+ if (failed_ || ninst_ + n > max_ninst_) {
failed_ = true;
return -1;
}
- if (ninst_ + n > inst_.size()) {
- int cap = inst_.size();
- if (cap == 0)
- cap = 8;
- while (ninst_ + n > cap)
- cap *= 2;
- PODArray<Prog::Inst> inst(cap);
- if (inst_.data() != NULL)
- memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
- memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
- inst_ = std::move(inst);
+ if (ninst_ + n > inst_.size()) {
+ int cap = inst_.size();
+ if (cap == 0)
+ cap = 8;
+ while (ninst_ + n > cap)
+ cap *= 2;
+ PODArray<Prog::Inst> inst(cap);
+ if (inst_.data() != NULL)
+ memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
+ memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
+ inst_ = std::move(inst);
}
- int id = ninst_;
- ninst_ += n;
+ int id = ninst_;
+ ninst_ += n;
return id;
}
@@ -266,7 +266,7 @@ int Compiler::AllocInst(int n) {
// Returns an unmatchable fragment.
Frag Compiler::NoMatch() {
- return Frag();
+ return Frag();
}
// Is a an unmatchable fragment?
@@ -282,21 +282,21 @@ Frag Compiler::Cat(Frag a, Frag b) {
// Elide no-op.
Prog::Inst* begin = &inst_[a.begin];
if (begin->opcode() == kInstNop &&
- a.end.head == (a.begin << 1) &&
+ a.end.head == (a.begin << 1) &&
begin->out() == 0) {
- // in case refs to a somewhere
- PatchList::Patch(inst_.data(), a.end, b.begin);
+ // in case refs to a somewhere
+ PatchList::Patch(inst_.data(), a.end, b.begin);
return b;
}
// To run backward over string, reverse all concatenations.
if (reversed_) {
- PatchList::Patch(inst_.data(), b.end, a.begin);
- return Frag(b.begin, a.end, b.nullable && a.nullable);
+ PatchList::Patch(inst_.data(), b.end, a.begin);
+ return Frag(b.begin, a.end, b.nullable && a.nullable);
}
- PatchList::Patch(inst_.data(), a.end, b.begin);
- return Frag(a.begin, b.end, a.nullable && b.nullable);
+ PatchList::Patch(inst_.data(), a.end, b.begin);
+ return Frag(a.begin, b.end, a.nullable && b.nullable);
}
// Given fragments for a and b, returns fragment for a|b.
@@ -312,8 +312,8 @@ Frag Compiler::Alt(Frag a, Frag b) {
return NoMatch();
inst_[id].InitAlt(a.begin, b.begin);
- return Frag(id, PatchList::Append(inst_.data(), a.end, b.end),
- a.nullable || b.nullable);
+ return Frag(id, PatchList::Append(inst_.data(), a.end, b.end),
+ a.nullable || b.nullable);
}
// When capturing submatches in like-Perl mode, a kOpAlt Inst
@@ -323,44 +323,44 @@ Frag Compiler::Alt(Frag a, Frag b) {
// then the operator is greedy. If out1_ is the repetition
// (and out_ moves forward), then the operator is non-greedy.
-// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
-Frag Compiler::Plus(Frag a, bool nongreedy) {
+// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
+Frag Compiler::Plus(Frag a, bool nongreedy) {
int id = AllocInst(1);
if (id < 0)
return NoMatch();
- PatchList pl;
+ PatchList pl;
if (nongreedy) {
- inst_[id].InitAlt(0, a.begin);
- pl = PatchList::Mk(id << 1);
+ inst_[id].InitAlt(0, a.begin);
+ pl = PatchList::Mk(id << 1);
} else {
- inst_[id].InitAlt(a.begin, 0);
- pl = PatchList::Mk((id << 1) | 1);
+ inst_[id].InitAlt(a.begin, 0);
+ pl = PatchList::Mk((id << 1) | 1);
}
- PatchList::Patch(inst_.data(), a.end, id);
- return Frag(a.begin, pl, a.nullable);
+ PatchList::Patch(inst_.data(), a.end, id);
+ return Frag(a.begin, pl, a.nullable);
}
-// Given a fragment for a, returns a fragment for a* or a*? (if nongreedy)
-Frag Compiler::Star(Frag a, bool nongreedy) {
- // When the subexpression is nullable, one Alt isn't enough to guarantee
- // correct priority ordering within the transitive closure. The simplest
- // solution is to handle it as (a+)? instead, which adds the second Alt.
- if (a.nullable)
- return Quest(Plus(a, nongreedy), nongreedy);
-
- int id = AllocInst(1);
- if (id < 0)
- return NoMatch();
- PatchList pl;
- if (nongreedy) {
- inst_[id].InitAlt(0, a.begin);
- pl = PatchList::Mk(id << 1);
- } else {
- inst_[id].InitAlt(a.begin, 0);
- pl = PatchList::Mk((id << 1) | 1);
- }
- PatchList::Patch(inst_.data(), a.end, id);
- return Frag(id, pl, true);
+// Given a fragment for a, returns a fragment for a* or a*? (if nongreedy)
+Frag Compiler::Star(Frag a, bool nongreedy) {
+ // When the subexpression is nullable, one Alt isn't enough to guarantee
+ // correct priority ordering within the transitive closure. The simplest
+ // solution is to handle it as (a+)? instead, which adds the second Alt.
+ if (a.nullable)
+ return Quest(Plus(a, nongreedy), nongreedy);
+
+ int id = AllocInst(1);
+ if (id < 0)
+ return NoMatch();
+ PatchList pl;
+ if (nongreedy) {
+ inst_[id].InitAlt(0, a.begin);
+ pl = PatchList::Mk(id << 1);
+ } else {
+ inst_[id].InitAlt(a.begin, 0);
+ pl = PatchList::Mk((id << 1) | 1);
+ }
+ PatchList::Patch(inst_.data(), a.end, id);
+ return Frag(id, pl, true);
}
// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
@@ -378,7 +378,7 @@ Frag Compiler::Quest(Frag a, bool nongreedy) {
inst_[id].InitAlt(a.begin, 0);
pl = PatchList::Mk((id << 1) | 1);
}
- return Frag(id, PatchList::Append(inst_.data(), pl, a.end), true);
+ return Frag(id, PatchList::Append(inst_.data(), pl, a.end), true);
}
// Returns a fragment for the byte range lo-hi.
@@ -387,7 +387,7 @@ Frag Compiler::ByteRange(int lo, int hi, bool foldcase) {
if (id < 0)
return NoMatch();
inst_[id].InitByteRange(lo, hi, foldcase, 0);
- return Frag(id, PatchList::Mk(id << 1), false);
+ return Frag(id, PatchList::Mk(id << 1), false);
}
// Returns a no-op fragment. Sometimes unavoidable.
@@ -396,7 +396,7 @@ Frag Compiler::Nop() {
if (id < 0)
return NoMatch();
inst_[id].InitNop(0);
- return Frag(id, PatchList::Mk(id << 1), true);
+ return Frag(id, PatchList::Mk(id << 1), true);
}
// Returns a fragment that signals a match.
@@ -405,7 +405,7 @@ Frag Compiler::Match(int32_t match_id) {
if (id < 0)
return NoMatch();
inst_[id].InitMatch(match_id);
- return Frag(id, kNullPatchList, false);
+ return Frag(id, kNullPatchList, false);
}
// Returns a fragment matching a particular empty-width op (like ^ or $)
@@ -414,7 +414,7 @@ Frag Compiler::EmptyWidth(EmptyOp empty) {
if (id < 0)
return NoMatch();
inst_[id].InitEmptyWidth(empty, 0);
- return Frag(id, PatchList::Mk(id << 1), true);
+ return Frag(id, PatchList::Mk(id << 1), true);
}
// Given a fragment a, returns a fragment with capturing parens around a.
@@ -426,9 +426,9 @@ Frag Compiler::Capture(Frag a, int n) {
return NoMatch();
inst_[id].InitCapture(2*n, a.begin);
inst_[id+1].InitCapture(2*n+1, 0);
- PatchList::Patch(inst_.data(), a.end, id+1);
+ PatchList::Patch(inst_.data(), a.end, id+1);
- return Frag(id, PatchList::Mk((id+1) << 1), a.nullable);
+ return Frag(id, PatchList::Mk((id+1) << 1), a.nullable);
}
// A Rune is a name for a Unicode code point.
@@ -453,16 +453,16 @@ static int MaxRune(int len) {
void Compiler::BeginRange() {
rune_cache_.clear();
rune_range_.begin = 0;
- rune_range_.end = kNullPatchList;
+ rune_range_.end = kNullPatchList;
}
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
int next) {
Frag f = ByteRange(lo, hi, foldcase);
if (next != 0) {
- PatchList::Patch(inst_.data(), f.end, next);
+ PatchList::Patch(inst_.data(), f.end, next);
} else {
- rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end);
+ rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end);
}
return f.begin;
}
@@ -534,9 +534,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
}
int br;
- if (f.end.head == 0)
+ if (f.end.head == 0)
br = root;
- else if (f.end.head&1)
+ else if (f.end.head&1)
br = inst_[f.begin].out1();
else
br = inst_[f.begin].out();
@@ -552,9 +552,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
// Ensure that the parent points to the clone, not to the original.
// Note that this could leave the head unreachable except via the cache.
br = byterange;
- if (f.end.head == 0)
+ if (f.end.head == 0)
root = br;
- else if (f.end.head&1)
+ else if (f.end.head&1)
inst_[f.begin].out1_ = br;
else
inst_[f.begin].set_out(br);
@@ -564,10 +564,10 @@ int Compiler::AddSuffixRecursive(int root, int id) {
if (!IsCachedRuneByteSuffix(id)) {
// The head should be the instruction most recently allocated, so free it
// instead of leaving it unreachable.
- DCHECK_EQ(id, ninst_-1);
+ DCHECK_EQ(id, ninst_-1);
inst_[id].out_opcode_ = 0;
inst_[id].out1_ = 0;
- ninst_--;
+ ninst_--;
}
out = AddSuffixRecursive(inst_[br].out(), out);
@@ -587,7 +587,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
Frag Compiler::FindByteRange(int root, int id) {
if (inst_[root].opcode() == kInstByteRange) {
if (ByteRangeEqual(root, id))
- return Frag(root, kNullPatchList, false);
+ return Frag(root, kNullPatchList, false);
else
return NoMatch();
}
@@ -595,7 +595,7 @@ Frag Compiler::FindByteRange(int root, int id) {
while (inst_[root].opcode() == kInstAlt) {
int out1 = inst_[root].out1();
if (ByteRangeEqual(out1, id))
- return Frag(root, PatchList::Mk((root << 1) | 1), false);
+ return Frag(root, PatchList::Mk((root << 1) | 1), false);
// CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
// what we're looking for, then we can stop immediately. Unfortunately, we
@@ -607,7 +607,7 @@ Frag Compiler::FindByteRange(int root, int id) {
if (inst_[out].opcode() == kInstAlt)
root = out;
else if (ByteRangeEqual(out, id))
- return Frag(root, PatchList::Mk(root << 1), false);
+ return Frag(root, PatchList::Mk(root << 1), false);
else
return NoMatch();
}
@@ -648,43 +648,43 @@ void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
static_cast<uint8_t>(hi), foldcase, 0));
}
-void Compiler::Add_80_10ffff() {
- // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
- // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
- // permitting overlong encodings in E0 and F0 sequences and code points
- // over 10FFFF in F4 sequences, the size of the bytecode and the number
- // of equivalence classes are reduced significantly.
- int id;
- if (reversed_) {
- // Prefix factoring matters, but we don't have to handle it here
- // because the rune range trie logic takes care of that already.
- id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- AddSuffix(id);
-
- id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- AddSuffix(id);
-
- id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
- AddSuffix(id);
- } else {
- // Suffix factoring matters - and we do have to handle it here.
- int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
- id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
- AddSuffix(id);
-
- int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
- id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
- AddSuffix(id);
-
- int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
- id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
- AddSuffix(id);
+void Compiler::Add_80_10ffff() {
+ // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
+ // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
+ // permitting overlong encodings in E0 and F0 sequences and code points
+ // over 10FFFF in F4 sequences, the size of the bytecode and the number
+ // of equivalence classes are reduced significantly.
+ int id;
+ if (reversed_) {
+ // Prefix factoring matters, but we don't have to handle it here
+ // because the rune range trie logic takes care of that already.
+ id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ AddSuffix(id);
+
+ id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ AddSuffix(id);
+
+ id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+ AddSuffix(id);
+ } else {
+ // Suffix factoring matters - and we do have to handle it here.
+ int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
+ id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
+ AddSuffix(id);
+
+ int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
+ id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
+ AddSuffix(id);
+
+ int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
+ id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
+ AddSuffix(id);
}
}
@@ -692,8 +692,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
if (lo > hi)
return;
- // Pick off 80-10FFFF as a common special case.
- if (lo == 0x80 && hi == 0x10ffff) {
+ // Pick off 80-10FFFF as a common special case.
+ if (lo == 0x80 && hi == 0x10ffff) {
Add_80_10ffff();
return;
}
@@ -854,11 +854,11 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
case kRegexpHaveMatch: {
Frag f = Match(re->match_id());
- if (anchor_ == RE2::ANCHOR_BOTH) {
- // Append \z or else the subexpression will effectively be unanchored.
- // Complemented by the UNANCHORED case in CompileSet().
- f = Cat(EmptyWidth(kEmptyEndText), f);
- }
+ if (anchor_ == RE2::ANCHOR_BOTH) {
+ // Append \z or else the subexpression will effectively be unanchored.
+ // Complemented by the UNANCHORED case in CompileSet().
+ f = Cat(EmptyWidth(kEmptyEndText), f);
+ }
return f;
}
@@ -998,11 +998,11 @@ static bool IsAnchorStart(Regexp** pre, int depth) {
if (re->nsub() > 0) {
sub = re->sub()[0]->Incref();
if (IsAnchorStart(&sub, depth+1)) {
- PODArray<Regexp*> subcopy(re->nsub());
+ PODArray<Regexp*> subcopy(re->nsub());
subcopy[0] = sub; // already have reference
for (int i = 1; i < re->nsub(); i++)
subcopy[i] = re->sub()[i]->Incref();
- *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
+ *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
re->Decref();
return true;
}
@@ -1045,11 +1045,11 @@ static bool IsAnchorEnd(Regexp** pre, int depth) {
if (re->nsub() > 0) {
sub = re->sub()[re->nsub() - 1]->Incref();
if (IsAnchorEnd(&sub, depth+1)) {
- PODArray<Regexp*> subcopy(re->nsub());
+ PODArray<Regexp*> subcopy(re->nsub());
subcopy[re->nsub() - 1] = sub; // already have reference
for (int i = 0; i < re->nsub() - 1; i++)
subcopy[i] = re->sub()[i]->Incref();
- *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
+ *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
re->Decref();
return true;
}
@@ -1079,15 +1079,15 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
encoding_ = kEncodingLatin1;
max_mem_ = max_mem;
if (max_mem <= 0) {
- max_ninst_ = 100000; // more than enough
+ max_ninst_ = 100000; // more than enough
} else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
// No room for anything.
- max_ninst_ = 0;
+ max_ninst_ = 0;
} else {
int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
// Limit instruction count so that inst->id() fits nicely in an int.
// SparseArray also assumes that the indices (inst->id()) are ints.
- // The call to WalkExponential uses 2*max_ninst_ below,
+ // The call to WalkExponential uses 2*max_ninst_ below,
// and other places in the code use 2 or 3 * prog->size().
// Limiting to 2^24 should avoid overflow in those places.
// (The point of allowing more than 32 bits of memory is to
@@ -1098,7 +1098,7 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
// Inst imposes its own limit (currently bigger than 2^24 but be safe).
if (m > Prog::Inst::kMaxInst)
m = Prog::Inst::kMaxInst;
- max_ninst_ = static_cast<int>(m);
+ max_ninst_ = static_cast<int>(m);
}
anchor_ = anchor;
}
@@ -1110,7 +1110,7 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
// The reversed flag is also recorded in the returned program.
Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
Compiler c;
- c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */);
+ c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */);
c.reversed_ = reversed;
// Simplify to remove things like counted repetitions
@@ -1125,7 +1125,7 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
bool is_anchor_end = IsAnchorEnd(&sre, 0);
// Generate fragment for entire regexp.
- Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
+ Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
sre->Decref();
if (c.failed_)
return NULL;
@@ -1134,10 +1134,10 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
// Turn off c.reversed_ (if it is set) to force the remaining concatenations
// to behave normally.
c.reversed_ = false;
- all = c.Cat(all, c.Match(0));
+ all = c.Cat(all, c.Match(0));
- c.prog_->set_reversed(reversed);
- if (c.prog_->reversed()) {
+ c.prog_->set_reversed(reversed);
+ if (c.prog_->reversed()) {
c.prog_->set_anchor_start(is_anchor_end);
c.prog_->set_anchor_end(is_anchor_start);
} else {
@@ -1145,49 +1145,49 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
c.prog_->set_anchor_end(is_anchor_end);
}
- c.prog_->set_start(all.begin);
- if (!c.prog_->anchor_start()) {
- // Also create unanchored version, which starts with a .*? loop.
- all = c.Cat(c.DotStar(), all);
+ c.prog_->set_start(all.begin);
+ if (!c.prog_->anchor_start()) {
+ // Also create unanchored version, which starts with a .*? loop.
+ all = c.Cat(c.DotStar(), all);
}
- c.prog_->set_start_unanchored(all.begin);
+ c.prog_->set_start_unanchored(all.begin);
// Hand ownership of prog_ to caller.
- return c.Finish(re);
+ return c.Finish(re);
}
-Prog* Compiler::Finish(Regexp* re) {
+Prog* Compiler::Finish(Regexp* re) {
if (failed_)
return NULL;
if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
// No possible matches; keep Fail instruction only.
- ninst_ = 1;
+ ninst_ = 1;
}
// Hand off the array to Prog.
- prog_->inst_ = std::move(inst_);
- prog_->size_ = ninst_;
+ prog_->inst_ = std::move(inst_);
+ prog_->size_ = ninst_;
prog_->Optimize();
prog_->Flatten();
prog_->ComputeByteMap();
- if (!prog_->reversed()) {
- std::string prefix;
- bool prefix_foldcase;
- if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase))
- prog_->ConfigurePrefixAccel(prefix, prefix_foldcase);
- }
-
+ if (!prog_->reversed()) {
+ std::string prefix;
+ bool prefix_foldcase;
+ if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase))
+ prog_->ConfigurePrefixAccel(prefix, prefix_foldcase);
+ }
+
// Record remaining memory for DFA.
if (max_mem_ <= 0) {
prog_->set_dfa_mem(1<<20);
} else {
- int64_t m = max_mem_ - sizeof(Prog);
- m -= prog_->size_*sizeof(Prog::Inst); // account for inst_
- if (prog_->CanBitState())
- m -= prog_->size_*sizeof(uint16_t); // account for list_heads_
+ int64_t m = max_mem_ - sizeof(Prog);
+ m -= prog_->size_*sizeof(Prog::Inst); // account for inst_
+ if (prog_->CanBitState())
+ m -= prog_->size_*sizeof(uint16_t); // account for list_heads_
if (m < 0)
m = 0;
prog_->set_dfa_mem(m);
@@ -1212,31 +1212,31 @@ Frag Compiler::DotStar() {
}
// Compiles RE set to Prog.
-Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
+Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
Compiler c;
- c.Setup(re->parse_flags(), max_mem, anchor);
+ c.Setup(re->parse_flags(), max_mem, anchor);
- Regexp* sre = re->Simplify();
- if (sre == NULL)
- return NULL;
+ Regexp* sre = re->Simplify();
+ if (sre == NULL)
+ return NULL;
- Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
- sre->Decref();
+ Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
+ sre->Decref();
if (c.failed_)
return NULL;
- c.prog_->set_anchor_start(true);
- c.prog_->set_anchor_end(true);
-
+ c.prog_->set_anchor_start(true);
+ c.prog_->set_anchor_end(true);
+
if (anchor == RE2::UNANCHORED) {
- // Prepend .* or else the expression will effectively be anchored.
- // Complemented by the ANCHOR_BOTH case in PostVisit().
+ // Prepend .* or else the expression will effectively be anchored.
+ // Complemented by the ANCHOR_BOTH case in PostVisit().
all = c.Cat(c.DotStar(), all);
}
c.prog_->set_start(all.begin);
c.prog_->set_start_unanchored(all.begin);
- Prog* prog = c.Finish(re);
+ Prog* prog = c.Finish(re);
if (prog == NULL)
return NULL;
@@ -1254,8 +1254,8 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
return prog;
}
-Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
- return Compiler::CompileSet(re, anchor, max_mem);
+Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
+ return Compiler::CompileSet(re, anchor, max_mem);
}
} // namespace re2
diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc
index 0c5ba373db..d47c7d50a7 100644
--- a/contrib/libs/re2/re2/dfa.cc
+++ b/contrib/libs/re2/re2/dfa.cc
@@ -27,11 +27,11 @@
#include <string.h>
#include <algorithm>
#include <atomic>
-#include <deque>
+#include <deque>
#include <mutex>
#include <new>
#include <string>
-#include <unordered_map>
+#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
@@ -40,10 +40,10 @@
#include "util/mix.h"
#include "util/mutex.h"
#include "util/strutil.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
-#include "re2/re2.h"
-#include "re2/sparse_set.h"
+#include "re2/re2.h"
+#include "re2/sparse_set.h"
#include "re2/stringpiece.h"
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
@@ -56,10 +56,10 @@ namespace re2 {
// Controls whether the DFA should bail out early if the NFA would be faster.
static bool dfa_should_bail_when_slow = true;
-void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
- dfa_should_bail_when_slow = b;
-}
-
+void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
+ dfa_should_bail_when_slow = b;
+}
+
// Changing this to true compiles in prints that trace execution of the DFA.
// Generates a lot of output -- only useful for debugging.
static const bool ExtraDebug = false;
@@ -90,17 +90,17 @@ class DFA {
// memory), it sets *failed and returns false.
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool want_earliest_match, bool run_forward,
- bool* failed, const char** ep, SparseSet* matches);
+ bool* failed, const char** ep, SparseSet* matches);
- // Builds out all states for the entire DFA.
- // If cb is not empty, it receives one callback per state built.
- // Returns the number of states built.
- // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
- int BuildAllStates(const Prog::DFAStateCallback& cb);
+ // Builds out all states for the entire DFA.
+ // If cb is not empty, it receives one callback per state built.
+ // Returns the number of states built.
+ // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
+ int BuildAllStates(const Prog::DFAStateCallback& cb);
// Computes min and max for matching strings. Won't return strings
// bigger than maxlen.
- bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
+ bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
// These data structures are logically private, but C++ makes it too
// difficult to mark them as such.
@@ -120,7 +120,7 @@ class DFA {
// into this state, along with kFlagMatch if this
// is a matching state.
-// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
+// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
std::atomic<State*> next_[0]; // Outgoing arrows from State,
@@ -134,9 +134,9 @@ class DFA {
enum {
kByteEndText = 256, // imaginary byte at end of text
- kFlagEmptyMask = 0xFF, // State.flag_: bits holding kEmptyXXX flags
- kFlagMatch = 0x0100, // State.flag_: this is a matching state
- kFlagLastWord = 0x0200, // State.flag_: last byte was a word char
+ kFlagEmptyMask = 0xFF, // State.flag_: bits holding kEmptyXXX flags
+ kFlagMatch = 0x0100, // State.flag_: this is a matching state
+ kFlagLastWord = 0x0200, // State.flag_: last byte was a word char
kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left
};
@@ -171,9 +171,9 @@ class DFA {
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
private:
- // Make it easier to swap in a scalable reader-writer mutex.
- using CacheMutex = Mutex;
-
+ // Make it easier to swap in a scalable reader-writer mutex.
+ using CacheMutex = Mutex;
+
enum {
// Indices into start_ for unanchored searches.
// Add kStartAnchored for anchored searches.
@@ -196,7 +196,7 @@ class DFA {
// Looks up and returns the State corresponding to a Workq.
// L >= mutex_
- State* WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag);
+ State* WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag);
// Looks up and returns a State matching the inst, ninst, and flag.
// L >= mutex_
@@ -219,7 +219,7 @@ class DFA {
// sets *ismatch to true.
// L >= mutex_
void RunWorkqOnByte(Workq* q, Workq* nq,
- int c, uint32_t flag, bool* ismatch);
+ int c, uint32_t flag, bool* ismatch);
// Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
// L >= mutex_
@@ -231,38 +231,38 @@ class DFA {
void AddToQueue(Workq* q, int id, uint32_t flag);
// For debugging, returns a text representation of State.
- static std::string DumpState(State* state);
+ static std::string DumpState(State* state);
// For debugging, returns a text representation of a Workq.
- static std::string DumpWorkq(Workq* q);
+ static std::string DumpWorkq(Workq* q);
// Search parameters
struct SearchParams {
SearchParams(const StringPiece& text, const StringPiece& context,
RWLocker* cache_lock)
- : text(text),
- context(context),
+ : text(text),
+ context(context),
anchored(false),
- can_prefix_accel(false),
+ can_prefix_accel(false),
want_earliest_match(false),
run_forward(false),
start(NULL),
cache_lock(cache_lock),
failed(false),
ep(NULL),
- matches(NULL) {}
+ matches(NULL) {}
StringPiece text;
StringPiece context;
bool anchored;
- bool can_prefix_accel;
+ bool can_prefix_accel;
bool want_earliest_match;
bool run_forward;
State* start;
- RWLocker* cache_lock;
+ RWLocker* cache_lock;
bool failed; // "out" parameter: whether search gave up
const char* ep; // "out" parameter: end pointer for match
- SparseSet* matches;
+ SparseSet* matches;
private:
SearchParams(const SearchParams&) = delete;
@@ -270,13 +270,13 @@ class DFA {
};
// Before each search, the parameters to Search are analyzed by
- // AnalyzeSearch to determine the state in which to start.
+ // AnalyzeSearch to determine the state in which to start.
struct StartInfo {
- StartInfo() : start(NULL) {}
- std::atomic<State*> start;
+ StartInfo() : start(NULL) {}
+ std::atomic<State*> start;
};
- // Fills in params->start and params->can_prefix_accel using
+ // Fills in params->start and params->can_prefix_accel using
// the other search parameters. Returns true on success,
// false on failure.
// cache_mutex_.r <= L < mutex_
@@ -287,10 +287,10 @@ class DFA {
// The generic search loop, inlined to create specialized versions.
// cache_mutex_.r <= L < mutex_
// Might unlock and relock cache_mutex_ via params->cache_lock.
- template <bool can_prefix_accel,
- bool want_earliest_match,
- bool run_forward>
- inline bool InlinedSearchLoop(SearchParams* params);
+ template <bool can_prefix_accel,
+ bool want_earliest_match,
+ bool run_forward>
+ inline bool InlinedSearchLoop(SearchParams* params);
// The specialized versions of InlinedSearchLoop. The three letters
// at the ends of the name denote the true/false values used as the
@@ -330,7 +330,7 @@ class DFA {
// Scratch areas, protected by mutex_.
Workq* q0_; // Two pre-allocated work queues.
Workq* q1_;
- PODArray<int> stack_; // Pre-allocated stack for AddToQueue
+ PODArray<int> stack_; // Pre-allocated stack for AddToQueue
// State* cache. Many threads use and add to the cache simultaneously,
// holding cache_mutex_ for reading and mutex_ (above) when adding.
@@ -338,14 +338,14 @@ class DFA {
// while holding cache_mutex_ for writing, to avoid interrupting other
// readers. Any State* pointers are only valid while cache_mutex_
// is held.
- CacheMutex cache_mutex_;
+ CacheMutex cache_mutex_;
int64_t mem_budget_; // Total memory budget for all States.
int64_t state_budget_; // Amount of memory remaining for new States.
StateSet state_cache_; // All States computed so far.
StartInfo start_[kMaxStart];
-
- DFA(const DFA&) = delete;
- DFA& operator=(const DFA&) = delete;
+
+ DFA(const DFA&) = delete;
+ DFA& operator=(const DFA&) = delete;
};
// Shorthand for casting to uint8_t*.
@@ -359,10 +359,10 @@ static inline const uint8_t* BytePtr(const void* v) {
// in the work queue when in leftmost-longest matching mode.
#define Mark (-1)
-// Separates the match IDs from the instructions in inst_.
-// Used only for "many match" DFA states.
-#define MatchSep (-2)
-
+// Separates the match IDs from the instructions in inst_.
+// Used only for "many match" DFA states.
+#define MatchSep (-2)
+
// Internally, the DFA uses a sparse array of
// program instruction pointers as a work queue.
// In leftmost longest mode, marks separate sections
@@ -428,21 +428,21 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
q1_(NULL),
mem_budget_(max_mem) {
if (ExtraDebug)
- fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
+ fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
if (kind_ == Prog::kLongestMatch)
nmark = prog_->size();
// See DFA::AddToQueue() for why this is so.
- int nstack = prog_->inst_count(kInstCapture) +
- prog_->inst_count(kInstEmptyWidth) +
- prog_->inst_count(kInstNop) +
- nmark + 1; // + 1 for start inst
+ int nstack = prog_->inst_count(kInstCapture) +
+ prog_->inst_count(kInstEmptyWidth) +
+ prog_->inst_count(kInstNop) +
+ nmark + 1; // + 1 for start inst
- // Account for space needed for DFA, q0, q1, stack.
+ // Account for space needed for DFA, q0, q1, stack.
mem_budget_ -= sizeof(DFA);
mem_budget_ -= (prog_->size() + nmark) *
(sizeof(int)+sizeof(int)) * 2; // q0, q1
- mem_budget_ -= nstack * sizeof(int); // stack
+ mem_budget_ -= nstack * sizeof(int); // stack
if (mem_budget_ < 0) {
init_failed_ = true;
return;
@@ -466,7 +466,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
q0_ = new Workq(prog_->size(), nmark);
q1_ = new Workq(prog_->size(), nmark);
- stack_ = PODArray<int>(nstack);
+ stack_ = PODArray<int>(nstack);
}
DFA::~DFA() {
@@ -490,15 +490,15 @@ DFA::~DFA() {
// Debugging printouts
// For debugging, returns a string representation of the work queue.
-std::string DFA::DumpWorkq(Workq* q) {
- std::string s;
+std::string DFA::DumpWorkq(Workq* q) {
+ std::string s;
const char* sep = "";
- for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
+ for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
if (q->is_mark(*it)) {
- s += "|";
+ s += "|";
sep = "";
} else {
- s += StringPrintf("%s%d", sep, *it);
+ s += StringPrintf("%s%d", sep, *it);
sep = ",";
}
}
@@ -506,29 +506,29 @@ std::string DFA::DumpWorkq(Workq* q) {
}
// For debugging, returns a string representation of the state.
-std::string DFA::DumpState(State* state) {
+std::string DFA::DumpState(State* state) {
if (state == NULL)
return "_";
if (state == DeadState)
return "X";
if (state == FullMatchState)
return "*";
- std::string s;
+ std::string s;
const char* sep = "";
- s += StringPrintf("(%p)", state);
+ s += StringPrintf("(%p)", state);
for (int i = 0; i < state->ninst_; i++) {
if (state->inst_[i] == Mark) {
- s += "|";
+ s += "|";
+ sep = "";
+ } else if (state->inst_[i] == MatchSep) {
+ s += "||";
sep = "";
- } else if (state->inst_[i] == MatchSep) {
- s += "||";
- sep = "";
} else {
- s += StringPrintf("%s%d", sep, state->inst_[i]);
+ s += StringPrintf("%s%d", sep, state->inst_[i]);
sep = ",";
}
}
- s += StringPrintf(" flag=%#x", state->flag_);
+ s += StringPrintf(" flag=%#x", state->flag_);
return s;
}
@@ -590,16 +590,16 @@ std::string DFA::DumpState(State* state) {
// Looks in the State cache for a State matching q, flag.
// If one is found, returns it. If one is not found, allocates one,
// inserts it in the cache, and returns it.
-// If mq is not null, MatchSep and the match IDs in mq will be appended
-// to the State.
-DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
+// If mq is not null, MatchSep and the match IDs in mq will be appended
+// to the State.
+DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
//mutex_.AssertHeld();
// Construct array of instruction ids for the new state.
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
// those are the only operators with any effect in
// RunWorkqOnEmptyString or RunWorkqOnByte.
- PODArray<int> inst(q->size());
+ PODArray<int> inst(q->size());
int n = 0;
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
@@ -684,7 +684,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// unordered state sets separated by Marks. Sort each set
// to canonicalize, to reduce the number of distinct sets stored.
if (kind_ == Prog::kLongestMatch) {
- int* ip = inst.data();
+ int* ip = inst.data();
int* ep = ip + n;
while (ip < ep) {
int* markp = ip;
@@ -697,30 +697,30 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
}
}
- // If we're in many match mode, canonicalize for similar reasons:
- // we have an unordered set of states (i.e. we don't have Marks)
- // and sorting will reduce the number of distinct sets stored.
- if (kind_ == Prog::kManyMatch) {
- int* ip = inst.data();
- int* ep = ip + n;
- std::sort(ip, ep);
- }
-
- // Append MatchSep and the match IDs in mq if necessary.
- if (mq != NULL) {
- inst[n++] = MatchSep;
- for (Workq::iterator i = mq->begin(); i != mq->end(); ++i) {
- int id = *i;
- Prog::Inst* ip = prog_->inst(id);
- if (ip->opcode() == kInstMatch)
- inst[n++] = ip->match_id();
- }
- }
-
+ // If we're in many match mode, canonicalize for similar reasons:
+ // we have an unordered set of states (i.e. we don't have Marks)
+ // and sorting will reduce the number of distinct sets stored.
+ if (kind_ == Prog::kManyMatch) {
+ int* ip = inst.data();
+ int* ep = ip + n;
+ std::sort(ip, ep);
+ }
+
+ // Append MatchSep and the match IDs in mq if necessary.
+ if (mq != NULL) {
+ inst[n++] = MatchSep;
+ for (Workq::iterator i = mq->begin(); i != mq->end(); ++i) {
+ int id = *i;
+ Prog::Inst* ip = prog_->inst(id);
+ if (ip->opcode() == kInstMatch)
+ inst[n++] = ip->match_id();
+ }
+ }
+
// Save the needed empty-width flags in the top bits for use later.
flag |= needflags << kFlagNeedShift;
- State* state = CachedState(inst.data(), n, flag);
+ State* state = CachedState(inst.data(), n, flag);
return state;
}
@@ -759,7 +759,7 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
mem_budget_ -= mem + kStateCacheOverhead;
// Allocate new state along with room for next_ and inst_.
- char* space = std::allocator<char>().allocate(mem);
+ char* space = std::allocator<char>().allocate(mem);
State* s = new (space) State;
(void) new (s->next_) std::atomic<State*>[nnext];
// Work around a unfortunate bug in older versions of libstdc++.
@@ -786,12 +786,12 @@ void DFA::ClearCache() {
StateSet::iterator tmp = begin;
++begin;
// Deallocate the blob of memory that we allocated in DFA::CachedState().
- // We recompute mem in order to benefit from sized delete where possible.
- int ninst = (*tmp)->ninst_;
- int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
- ninst*sizeof(int);
- std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
+ // We recompute mem in order to benefit from sized delete where possible.
+ int ninst = (*tmp)->ninst_;
+ int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
+ int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
+ ninst*sizeof(int);
+ std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
}
state_cache_.clear();
}
@@ -800,22 +800,22 @@ void DFA::ClearCache() {
void DFA::StateToWorkq(State* s, Workq* q) {
q->clear();
for (int i = 0; i < s->ninst_; i++) {
- if (s->inst_[i] == Mark) {
+ if (s->inst_[i] == Mark) {
q->mark();
- } else if (s->inst_[i] == MatchSep) {
- // Nothing after this is an instruction!
- break;
- } else {
+ } else if (s->inst_[i] == MatchSep) {
+ // Nothing after this is an instruction!
+ break;
+ } else {
// Explore from the head of the list.
AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
- }
+ }
}
}
// Adds ip to the work queue, following empty arrows according to flag.
void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
- // Use stack_ to hold our stack of instructions yet to process.
+ // Use stack_ to hold our stack of instructions yet to process.
// It was preallocated as follows:
// one entry per Capture;
// one entry per EmptyWidth; and
@@ -824,12 +824,12 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
// perform. (Each instruction can be processed at most once.)
// When using marks, we also added nmark == prog_->size().
// (Otherwise, nmark == 0.)
- int* stk = stack_.data();
+ int* stk = stack_.data();
int nstk = 0;
stk[nstk++] = id;
while (nstk > 0) {
- DCHECK_LE(nstk, stack_.size());
+ DCHECK_LE(nstk, stack_.size());
id = stk[--nstk];
Loop:
@@ -928,7 +928,7 @@ void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) {
// means to match c$. Sets the bool *ismatch to true if the end of the
// regular expression program has been reached (the regexp has matched).
void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
- int c, uint32_t flag, bool* ismatch) {
+ int c, uint32_t flag, bool* ismatch) {
//mutex_.AssertHeld();
newq->clear();
@@ -954,29 +954,29 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
break;
case kInstByteRange: // can follow if c is in range
- if (!ip->Matches(c))
- break;
- AddToQueue(newq, ip->out(), flag);
- if (ip->hint() != 0) {
- // We have a hint, but we must cancel out the
- // increment that will occur after the break.
- i += ip->hint() - 1;
- } else {
- // We have no hint, so we must find the end
- // of the current list and then skip to it.
- Prog::Inst* ip0 = ip;
- while (!ip->last())
- ++ip;
- i += ip - ip0;
- }
+ if (!ip->Matches(c))
+ break;
+ AddToQueue(newq, ip->out(), flag);
+ if (ip->hint() != 0) {
+ // We have a hint, but we must cancel out the
+ // increment that will occur after the break.
+ i += ip->hint() - 1;
+ } else {
+ // We have no hint, so we must find the end
+ // of the current list and then skip to it.
+ Prog::Inst* ip0 = ip;
+ while (!ip->last())
+ ++ip;
+ i += ip - ip0;
+ }
break;
case kInstMatch:
- if (prog_->anchor_end() && c != kByteEndText &&
- kind_ != Prog::kManyMatch)
+ if (prog_->anchor_end() && c != kByteEndText &&
+ kind_ != Prog::kManyMatch)
break;
*ismatch = true;
- if (kind_ == Prog::kFirstMatch) {
+ if (kind_ == Prog::kFirstMatch) {
// Can stop processing work queue since we found a match.
return;
}
@@ -985,8 +985,8 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
}
if (ExtraDebug)
- fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
- DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
+ fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
+ DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
}
// Processes input byte c in state, returning new state.
@@ -1068,9 +1068,9 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
swap(q0_, q1_);
}
bool ismatch = false;
- RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch);
- using std::swap;
- swap(q0_, q1_);
+ RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch);
+ using std::swap;
+ swap(q0_, q1_);
// Save afterflag along with ismatch and isword in new state.
uint32_t flag = afterflag;
@@ -1079,10 +1079,10 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
if (isword)
flag |= kFlagLastWord;
- if (ismatch && kind_ == Prog::kManyMatch)
- ns = WorkqToCachedState(q0_, q1_, flag);
- else
- ns = WorkqToCachedState(q0_, NULL, flag);
+ if (ismatch && kind_ == Prog::kManyMatch)
+ ns = WorkqToCachedState(q0_, q1_, flag);
+ else
+ ns = WorkqToCachedState(q0_, NULL, flag);
// Flush ns before linking to it.
// Write barrier before updating state->next_ so that the
@@ -1113,7 +1113,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
class DFA::RWLocker {
public:
- explicit RWLocker(CacheMutex* mu);
+ explicit RWLocker(CacheMutex* mu);
~RWLocker();
// If the lock is only held for reading right now,
@@ -1123,19 +1123,19 @@ class DFA::RWLocker {
void LockForWriting();
private:
- CacheMutex* mu_;
+ CacheMutex* mu_;
bool writing_;
RWLocker(const RWLocker&) = delete;
RWLocker& operator=(const RWLocker&) = delete;
};
-DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
+DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
mu_->ReaderLock();
}
-// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
-// the annotations don't support lock upgrade.
+// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
+// the annotations don't support lock upgrade.
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
if (!writing_) {
mu_->ReaderUnlock();
@@ -1167,14 +1167,14 @@ void DFA::ResetCache(RWLocker* cache_lock) {
// Re-acquire the cache_mutex_ for writing (exclusive use).
cache_lock->LockForWriting();
- hooks::GetDFAStateCacheResetHook()({
- state_budget_,
- state_cache_.size(),
- });
-
+ hooks::GetDFAStateCacheResetHook()({
+ state_budget_,
+ state_cache_.size(),
+ });
+
// Clear the cache, reset the memory budget.
- for (int i = 0; i < kMaxStart; i++)
- start_[i].start.store(NULL, std::memory_order_relaxed);
+ for (int i = 0; i < kMaxStart; i++)
+ start_[i].start.store(NULL, std::memory_order_relaxed);
ClearCache();
mem_budget_ = state_budget_;
}
@@ -1289,7 +1289,7 @@ DFA::State* DFA::StateSaver::Restore() {
// situation, the DFA can do better than executing the simple loop.
// Instead, it can call memchr to search very quickly for the byte c.
// Whether the start state has this property is determined during a
-// pre-compilation pass and the "can_prefix_accel" argument is set.
+// pre-compilation pass and the "can_prefix_accel" argument is set.
//
// Fourth, the desired behavior is to search for the leftmost-best match
// (approximately, the same one that Perl would find), which is not
@@ -1321,16 +1321,16 @@ DFA::State* DFA::StateSaver::Restore() {
// The bools are equal to the same-named variables in params, but
// making them function arguments lets the inliner specialize
// this function to each combination (see two paragraphs above).
-template <bool can_prefix_accel,
- bool want_earliest_match,
- bool run_forward>
-inline bool DFA::InlinedSearchLoop(SearchParams* params) {
+template <bool can_prefix_accel,
+ bool want_earliest_match,
+ bool run_forward>
+inline bool DFA::InlinedSearchLoop(SearchParams* params) {
State* start = params->start;
- const uint8_t* bp = BytePtr(params->text.data()); // start of text
- const uint8_t* p = bp; // text scanning point
- const uint8_t* ep = BytePtr(params->text.data() +
- params->text.size()); // end of text
- const uint8_t* resetp = NULL; // p at last cache reset
+ const uint8_t* bp = BytePtr(params->text.data()); // start of text
+ const uint8_t* p = bp; // text scanning point
+ const uint8_t* ep = BytePtr(params->text.data() +
+ params->text.size()); // end of text
+ const uint8_t* resetp = NULL; // p at last cache reset
if (!run_forward) {
using std::swap;
swap(p, ep);
@@ -1339,24 +1339,24 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
const uint8_t* bytemap = prog_->bytemap();
const uint8_t* lastmatch = NULL; // most recent matching position in text
bool matched = false;
-
+
State* s = start;
- if (ExtraDebug)
- fprintf(stderr, "@stx: %s\n", DumpState(s).c_str());
+ if (ExtraDebug)
+ fprintf(stderr, "@stx: %s\n", DumpState(s).c_str());
if (s->IsMatch()) {
matched = true;
lastmatch = p;
- if (ExtraDebug)
- fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
- for (int i = s->ninst_ - 1; i >= 0; i--) {
- int id = s->inst_[i];
- if (id == MatchSep)
- break;
- params->matches->insert(id);
- }
- }
+ if (ExtraDebug)
+ fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ for (int i = s->ninst_ - 1; i >= 0; i--) {
+ int id = s->inst_[i];
+ if (id == MatchSep)
+ break;
+ params->matches->insert(id);
+ }
+ }
if (want_earliest_match) {
params->ep = reinterpret_cast<const char*>(lastmatch);
return true;
@@ -1365,16 +1365,16 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
while (p != ep) {
if (ExtraDebug)
- fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
-
- if (can_prefix_accel && s == start) {
- // In start state, only way out is to find the prefix,
- // so we use prefix accel (e.g. memchr) to skip ahead.
- // If not found, we can skip to the end of the string.
- p = BytePtr(prog_->PrefixAccel(p, ep - p));
- if (p == NULL) {
- p = ep;
- break;
+ fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
+
+ if (can_prefix_accel && s == start) {
+ // In start state, only way out is to find the prefix,
+ // so we use prefix accel (e.g. memchr) to skip ahead.
+ // If not found, we can skip to the end of the string.
+ p = BytePtr(prog_->PrefixAccel(p, ep - p));
+ if (p == NULL) {
+ p = ep;
+ break;
}
}
@@ -1413,11 +1413,11 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the
// same at about 2 MB/s. Unless we're processing an average
// of 10 bytes per state computation, fail so that RE2 can
- // fall back to the NFA. However, RE2::Set cannot fall back,
- // so we just have to keep on keeping on in that case.
+ // fall back to the NFA. However, RE2::Set cannot fall back,
+ // so we just have to keep on keeping on in that case.
if (dfa_should_bail_when_slow && resetp != NULL &&
- static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
- kind_ != Prog::kManyMatch) {
+ static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
+ kind_ != Prog::kManyMatch) {
params->failed = true;
return false;
}
@@ -1454,7 +1454,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
params->ep = reinterpret_cast<const char*>(ep);
return true;
}
-
+
s = ns;
if (s->IsMatch()) {
matched = true;
@@ -1465,15 +1465,15 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
else
lastmatch = p + 1;
if (ExtraDebug)
- fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
- for (int i = s->ninst_ - 1; i >= 0; i--) {
- int id = s->inst_[i];
- if (id == MatchSep)
- break;
- params->matches->insert(id);
- }
- }
+ fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ for (int i = s->ninst_ - 1; i >= 0; i--) {
+ int id = s->inst_[i];
+ if (id == MatchSep)
+ break;
+ params->matches->insert(id);
+ }
+ }
if (want_earliest_match) {
params->ep = reinterpret_cast<const char*>(lastmatch);
return true;
@@ -1483,9 +1483,9 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// Process one more byte to see if it triggers a match.
// (Remember, matches are delayed one byte.)
- if (ExtraDebug)
- fprintf(stderr, "@etx: %s\n", DumpState(s).c_str());
-
+ if (ExtraDebug)
+ fprintf(stderr, "@etx: %s\n", DumpState(s).c_str());
+
int lastbyte;
if (run_forward) {
if (EndPtr(params->text) == EndPtr(params->context))
@@ -1517,60 +1517,60 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
}
}
}
- if (ns <= SpecialStateMax) {
- if (ns == DeadState) {
- params->ep = reinterpret_cast<const char*>(lastmatch);
- return matched;
- }
- // FullMatchState
+ if (ns <= SpecialStateMax) {
+ if (ns == DeadState) {
+ params->ep = reinterpret_cast<const char*>(lastmatch);
+ return matched;
+ }
+ // FullMatchState
params->ep = reinterpret_cast<const char*>(ep);
return true;
}
-
- s = ns;
- if (s->IsMatch()) {
+
+ s = ns;
+ if (s->IsMatch()) {
matched = true;
lastmatch = p;
- if (ExtraDebug)
- fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
- for (int i = s->ninst_ - 1; i >= 0; i--) {
- int id = s->inst_[i];
- if (id == MatchSep)
- break;
- params->matches->insert(id);
+ if (ExtraDebug)
+ fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
+ if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ for (int i = s->ninst_ - 1; i >= 0; i--) {
+ int id = s->inst_[i];
+ if (id == MatchSep)
+ break;
+ params->matches->insert(id);
}
}
}
-
+
params->ep = reinterpret_cast<const char*>(lastmatch);
return matched;
}
// Inline specializations of the general loop.
bool DFA::SearchFFF(SearchParams* params) {
- return InlinedSearchLoop<false, false, false>(params);
+ return InlinedSearchLoop<false, false, false>(params);
}
bool DFA::SearchFFT(SearchParams* params) {
- return InlinedSearchLoop<false, false, true>(params);
+ return InlinedSearchLoop<false, false, true>(params);
}
bool DFA::SearchFTF(SearchParams* params) {
- return InlinedSearchLoop<false, true, false>(params);
+ return InlinedSearchLoop<false, true, false>(params);
}
bool DFA::SearchFTT(SearchParams* params) {
- return InlinedSearchLoop<false, true, true>(params);
+ return InlinedSearchLoop<false, true, true>(params);
}
bool DFA::SearchTFF(SearchParams* params) {
- return InlinedSearchLoop<true, false, false>(params);
+ return InlinedSearchLoop<true, false, false>(params);
}
bool DFA::SearchTFT(SearchParams* params) {
- return InlinedSearchLoop<true, false, true>(params);
+ return InlinedSearchLoop<true, false, true>(params);
}
bool DFA::SearchTTF(SearchParams* params) {
- return InlinedSearchLoop<true, true, false>(params);
+ return InlinedSearchLoop<true, true, false>(params);
}
bool DFA::SearchTTT(SearchParams* params) {
- return InlinedSearchLoop<true, true, true>(params);
+ return InlinedSearchLoop<true, true, true>(params);
}
// For performance, calls the appropriate specialized version
@@ -1589,7 +1589,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
&DFA::SearchTTT,
};
- int index = 4 * params->can_prefix_accel +
+ int index = 4 * params->can_prefix_accel +
2 * params->want_earliest_match +
1 * params->run_forward;
return (this->*Searches[index])(params);
@@ -1665,7 +1665,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
flags = 0;
}
}
- if (params->anchored)
+ if (params->anchored)
start |= kStartAnchored;
StartInfo* info = &start_[start];
@@ -1681,22 +1681,22 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
}
}
- params->start = info->start.load(std::memory_order_acquire);
-
- // Even if we could prefix accel, we cannot do so when anchored and,
- // less obviously, we cannot do so when we are going to need flags.
- // This trick works only when there is a single byte that leads to a
- // different state!
- if (prog_->can_prefix_accel() &&
- !params->anchored &&
- params->start > SpecialStateMax &&
- params->start->flag_ >> kFlagNeedShift == 0)
- params->can_prefix_accel = true;
-
+ params->start = info->start.load(std::memory_order_acquire);
+
+ // Even if we could prefix accel, we cannot do so when anchored and,
+ // less obviously, we cannot do so when we are going to need flags.
+ // This trick works only when there is a single byte that leads to a
+ // different state!
+ if (prog_->can_prefix_accel() &&
+ !params->anchored &&
+ params->start > SpecialStateMax &&
+ params->start->flag_ >> kFlagNeedShift == 0)
+ params->can_prefix_accel = true;
+
if (ExtraDebug)
- fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
+ fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
params->anchored, params->run_forward, flags,
- DumpState(params->start).c_str(), params->can_prefix_accel);
+ DumpState(params->start).c_str(), params->can_prefix_accel);
return true;
}
@@ -1705,25 +1705,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
uint32_t flags) {
// Quick check.
- State* start = info->start.load(std::memory_order_acquire);
- if (start != NULL)
+ State* start = info->start.load(std::memory_order_acquire);
+ if (start != NULL)
return true;
MutexLock l(&mutex_);
- start = info->start.load(std::memory_order_relaxed);
- if (start != NULL)
+ start = info->start.load(std::memory_order_relaxed);
+ if (start != NULL)
return true;
q0_->clear();
AddToQueue(q0_,
params->anchored ? prog_->start() : prog_->start_unanchored(),
flags);
- start = WorkqToCachedState(q0_, NULL, flags);
- if (start == NULL)
+ start = WorkqToCachedState(q0_, NULL, flags);
+ if (start == NULL)
return false;
// Synchronize with "quick check" above.
- info->start.store(start, std::memory_order_release);
+ info->start.store(start, std::memory_order_release);
return true;
}
@@ -1735,7 +1735,7 @@ bool DFA::Search(const StringPiece& text,
bool run_forward,
bool* failed,
const char** epp,
- SparseSet* matches) {
+ SparseSet* matches) {
*epp = NULL;
if (!ok()) {
*failed = true;
@@ -1746,7 +1746,7 @@ bool DFA::Search(const StringPiece& text,
if (ExtraDebug) {
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
- std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
+ std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
}
RWLocker l(&cache_mutex_);
@@ -1764,9 +1764,9 @@ bool DFA::Search(const StringPiece& text,
return false;
if (params.start == FullMatchState) {
if (run_forward == want_earliest_match)
- *epp = text.data();
+ *epp = text.data();
else
- *epp = text.data() + text.size();
+ *epp = text.data() + text.size();
return true;
}
if (ExtraDebug)
@@ -1825,17 +1825,17 @@ void Prog::DeleteDFA(DFA* dfa) {
//
bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
Anchor anchor, MatchKind kind, StringPiece* match0,
- bool* failed, SparseSet* matches) {
+ bool* failed, SparseSet* matches) {
*failed = false;
StringPiece context = const_context;
- if (context.data() == NULL)
+ if (context.data() == NULL)
context = text;
- bool caret = anchor_start();
+ bool caret = anchor_start();
bool dollar = anchor_end();
if (reversed_) {
- using std::swap;
- swap(caret, dollar);
+ using std::swap;
+ swap(caret, dollar);
}
if (caret && BeginPtr(context) != BeginPtr(text))
return false;
@@ -1847,7 +1847,7 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch;
bool endmatch = false;
if (kind == kManyMatch) {
- // This is split out in order to avoid clobbering kind.
+ // This is split out in order to avoid clobbering kind.
} else if (kind == kFullMatch || anchor_end()) {
endmatch = true;
kind = kLongestMatch;
@@ -1855,32 +1855,32 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
// If the caller doesn't care where the match is (just whether one exists),
// then we can stop at the very first match we find, the so-called
- // "earliest match".
- bool want_earliest_match = false;
- if (kind == kManyMatch) {
- // This is split out in order to avoid clobbering kind.
- if (matches == NULL) {
- want_earliest_match = true;
- }
- } else if (match0 == NULL && !endmatch) {
- want_earliest_match = true;
+ // "earliest match".
+ bool want_earliest_match = false;
+ if (kind == kManyMatch) {
+ // This is split out in order to avoid clobbering kind.
+ if (matches == NULL) {
+ want_earliest_match = true;
+ }
+ } else if (match0 == NULL && !endmatch) {
+ want_earliest_match = true;
kind = kLongestMatch;
}
DFA* dfa = GetDFA(kind);
const char* ep;
bool matched = dfa->Search(text, context, anchored,
- want_earliest_match, !reversed_,
+ want_earliest_match, !reversed_,
failed, &ep, matches);
- if (*failed) {
- hooks::GetDFASearchFailureHook()({
- // Nothing yet...
- });
+ if (*failed) {
+ hooks::GetDFASearchFailureHook()({
+ // Nothing yet...
+ });
return false;
- }
+ }
if (!matched)
return false;
- if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
+ if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
return false;
// If caller cares, record the boundary of the match.
@@ -1888,17 +1888,17 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
// as the beginning.
if (match0) {
if (reversed_)
- *match0 =
- StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
+ *match0 =
+ StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
else
*match0 =
- StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
+ StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
}
return true;
}
// Build out all states in DFA. Returns number of states.
-int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
+int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
if (!ok())
return 0;
@@ -1907,72 +1907,72 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
RWLocker l(&cache_mutex_);
SearchParams params(StringPiece(), StringPiece(), &l);
params.anchored = false;
- if (!AnalyzeSearch(&params) ||
- params.start == NULL ||
- params.start == DeadState)
+ if (!AnalyzeSearch(&params) ||
+ params.start == NULL ||
+ params.start == DeadState)
return 0;
// Add start state to work queue.
- // Note that any State* that we handle here must point into the cache,
- // so we can simply depend on pointer-as-a-number hashing and equality.
- std::unordered_map<State*, int> m;
- std::deque<State*> q;
- m.emplace(params.start, static_cast<int>(m.size()));
+ // Note that any State* that we handle here must point into the cache,
+ // so we can simply depend on pointer-as-a-number hashing and equality.
+ std::unordered_map<State*, int> m;
+ std::deque<State*> q;
+ m.emplace(params.start, static_cast<int>(m.size()));
q.push_back(params.start);
- // Compute the input bytes needed to cover all of the next pointers.
- int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- std::vector<int> input(nnext);
- for (int c = 0; c < 256; c++) {
- int b = prog_->bytemap()[c];
- while (c < 256-1 && prog_->bytemap()[c+1] == b)
- c++;
- input[b] = c;
- }
- input[prog_->bytemap_range()] = kByteEndText;
-
- // Scratch space for the output.
- std::vector<int> output(nnext);
-
+ // Compute the input bytes needed to cover all of the next pointers.
+ int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
+ std::vector<int> input(nnext);
+ for (int c = 0; c < 256; c++) {
+ int b = prog_->bytemap()[c];
+ while (c < 256-1 && prog_->bytemap()[c+1] == b)
+ c++;
+ input[b] = c;
+ }
+ input[prog_->bytemap_range()] = kByteEndText;
+
+ // Scratch space for the output.
+ std::vector<int> output(nnext);
+
// Flood to expand every state.
- bool oom = false;
- while (!q.empty()) {
- State* s = q.front();
- q.pop_front();
- for (int c : input) {
+ bool oom = false;
+ while (!q.empty()) {
+ State* s = q.front();
+ q.pop_front();
+ for (int c : input) {
State* ns = RunStateOnByteUnlocked(s, c);
- if (ns == NULL) {
- oom = true;
- break;
- }
- if (ns == DeadState) {
- output[ByteMap(c)] = -1;
- continue;
- }
- if (m.find(ns) == m.end()) {
- m.emplace(ns, static_cast<int>(m.size()));
+ if (ns == NULL) {
+ oom = true;
+ break;
+ }
+ if (ns == DeadState) {
+ output[ByteMap(c)] = -1;
+ continue;
+ }
+ if (m.find(ns) == m.end()) {
+ m.emplace(ns, static_cast<int>(m.size()));
q.push_back(ns);
}
- output[ByteMap(c)] = m[ns];
+ output[ByteMap(c)] = m[ns];
}
- if (cb)
- cb(oom ? NULL : output.data(),
- s == FullMatchState || s->IsMatch());
- if (oom)
- break;
+ if (cb)
+ cb(oom ? NULL : output.data(),
+ s == FullMatchState || s->IsMatch());
+ if (oom)
+ break;
}
- return static_cast<int>(m.size());
+ return static_cast<int>(m.size());
}
// Build out all states in DFA for kind. Returns number of states.
-int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) {
- return GetDFA(kind)->BuildAllStates(cb);
+int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) {
+ return GetDFA(kind)->BuildAllStates(cb);
}
// Computes min and max for matching string.
// Won't return strings bigger than maxlen.
-bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
+bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
if (!ok())
return false;
@@ -1989,7 +1989,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Also note that previously_visited_states[UnseenStatePtr] will, in the STL
// tradition, implicitly insert a '0' value at first use. We take advantage
// of that property below.
- std::unordered_map<State*, int> previously_visited_states;
+ std::unordered_map<State*, int> previously_visited_states;
// Pick out start state for anchored search at beginning of text.
RWLocker l(&cache_mutex_);
@@ -2094,7 +2094,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
}
// Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b
- PrefixSuccessor(max);
+ PrefixSuccessor(max);
// If there are no bytes left, we have no way to say "there is no maximum
// string". We could make the interface more complicated and be able to
@@ -2109,7 +2109,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
}
// PossibleMatchRange for a Prog.
-bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
+bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Have to use dfa_longest_ to get all strings for full matches.
// For example, (a|aa) never matches aa in first-match mode.
return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
diff --git a/contrib/libs/re2/re2/filtered_re2.cc b/contrib/libs/re2/re2/filtered_re2.cc
index 9f64cbf645..5df97456e2 100644
--- a/contrib/libs/re2/re2/filtered_re2.cc
+++ b/contrib/libs/re2/re2/filtered_re2.cc
@@ -6,7 +6,7 @@
#include <stddef.h>
#include <string>
-#include <utility>
+#include <utility>
#include "util/util.h"
#include "util/logging.h"
@@ -30,22 +30,22 @@ FilteredRE2::~FilteredRE2() {
delete re2_vec_[i];
}
-FilteredRE2::FilteredRE2(FilteredRE2&& other)
- : re2_vec_(std::move(other.re2_vec_)),
- compiled_(other.compiled_),
- prefilter_tree_(std::move(other.prefilter_tree_)) {
- other.re2_vec_.clear();
- other.re2_vec_.shrink_to_fit();
- other.compiled_ = false;
- other.prefilter_tree_.reset(new PrefilterTree());
-}
-
-FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
- this->~FilteredRE2();
- (void) new (this) FilteredRE2(std::move(other));
- return *this;
-}
-
+FilteredRE2::FilteredRE2(FilteredRE2&& other)
+ : re2_vec_(std::move(other.re2_vec_)),
+ compiled_(other.compiled_),
+ prefilter_tree_(std::move(other.prefilter_tree_)) {
+ other.re2_vec_.clear();
+ other.re2_vec_.shrink_to_fit();
+ other.compiled_ = false;
+ other.prefilter_tree_.reset(new PrefilterTree());
+}
+
+FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
+ this->~FilteredRE2();
+ (void) new (this) FilteredRE2(std::move(other));
+ return *this;
+}
+
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
const RE2::Options& options, int* id) {
RE2* re = new RE2(pattern, options);
@@ -54,7 +54,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
if (!re->ok()) {
if (options.log_errors()) {
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
- << pattern << " due to error " << re->error();
+ << pattern << " due to error " << re->error();
}
delete re;
} else {
@@ -65,7 +65,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
return code;
}
-void FilteredRE2::Compile(std::vector<std::string>* atoms) {
+void FilteredRE2::Compile(std::vector<std::string>* atoms) {
if (compiled_) {
LOG(ERROR) << "Compile called already.";
return;
@@ -134,4 +134,4 @@ void FilteredRE2::PrintPrefilter(int regexpid) {
prefilter_tree_->PrintPrefilter(regexpid);
}
-} // namespace re2
+} // namespace re2
diff --git a/contrib/libs/re2/re2/filtered_re2.h b/contrib/libs/re2/re2/filtered_re2.h
index 4f2e0e2900..dd618c70e8 100644
--- a/contrib/libs/re2/re2/filtered_re2.h
+++ b/contrib/libs/re2/re2/filtered_re2.h
@@ -10,18 +10,18 @@
// number of regexps that need to be actually searched.
//
// By design, it does not include a string matching engine. This is to
-// allow the user of the class to use their favorite string matching
+// allow the user of the class to use their favorite string matching
// engine. The overall flow is: Add all the regexps using Add, then
-// Compile the FilteredRE2. Compile returns strings that need to be
-// matched. Note that the returned strings are lowercased and distinct.
-// For applying regexps to a search text, the caller does the string
-// matching using the returned strings. When doing the string match,
-// note that the caller has to do that in a case-insensitive way or
-// on a lowercased version of the search text. Then call FirstMatch
-// or AllMatches with a vector of indices of strings that were found
-// in the text to get the actual regexp matches.
-
-#include <memory>
+// Compile the FilteredRE2. Compile returns strings that need to be
+// matched. Note that the returned strings are lowercased and distinct.
+// For applying regexps to a search text, the caller does the string
+// matching using the returned strings. When doing the string match,
+// note that the caller has to do that in a case-insensitive way or
+// on a lowercased version of the search text. Then call FirstMatch
+// or AllMatches with a vector of indices of strings that were found
+// in the text to get the actual regexp matches.
+
+#include <memory>
#include <string>
#include <vector>
@@ -37,27 +37,27 @@ class FilteredRE2 {
explicit FilteredRE2(int min_atom_len);
~FilteredRE2();
- // Not copyable.
- FilteredRE2(const FilteredRE2&) = delete;
- FilteredRE2& operator=(const FilteredRE2&) = delete;
- // Movable.
- FilteredRE2(FilteredRE2&& other);
- FilteredRE2& operator=(FilteredRE2&& other);
-
+ // Not copyable.
+ FilteredRE2(const FilteredRE2&) = delete;
+ FilteredRE2& operator=(const FilteredRE2&) = delete;
+ // Movable.
+ FilteredRE2(FilteredRE2&& other);
+ FilteredRE2& operator=(FilteredRE2&& other);
+
// Uses RE2 constructor to create a RE2 object (re). Returns
// re->error_code(). If error_code is other than NoError, then re is
// deleted and not added to re2_vec_.
RE2::ErrorCode Add(const StringPiece& pattern,
const RE2::Options& options,
- int* id);
+ int* id);
// Prepares the regexps added by Add for filtering. Returns a set
// of strings that the caller should check for in candidate texts.
- // The returned strings are lowercased and distinct. When doing
- // string matching, it should be performed in a case-insensitive
- // way or the search text should be lowercased first. Call after
+ // The returned strings are lowercased and distinct. When doing
+ // string matching, it should be performed in a case-insensitive
+ // way or the search text should be lowercased first. Call after
// all Add calls are done.
- void Compile(std::vector<std::string>* strings_to_match);
+ void Compile(std::vector<std::string>* strings_to_match);
// Returns the index of the first matching regexp.
// Returns -1 on no match. Can be called prior to Compile.
@@ -88,9 +88,9 @@ class FilteredRE2 {
// The number of regexps added.
int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
- // Get the individual RE2 objects.
- const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
-
+ // Get the individual RE2 objects.
+ const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
+
private:
// Print prefilter.
void PrintPrefilter(int regexpid);
@@ -106,7 +106,7 @@ class FilteredRE2 {
bool compiled_;
// An AND-OR tree of string atoms used for filtering regexps.
- std::unique_ptr<PrefilterTree> prefilter_tree_;
+ std::unique_ptr<PrefilterTree> prefilter_tree_;
};
} // namespace re2
diff --git a/contrib/libs/re2/re2/mimics_pcre.cc b/contrib/libs/re2/re2/mimics_pcre.cc
index a75b943a10..b1d6a51228 100644
--- a/contrib/libs/re2/re2/mimics_pcre.cc
+++ b/contrib/libs/re2/re2/mimics_pcre.cc
@@ -39,20 +39,20 @@ class PCREWalker : public Regexp::Walker<bool> {
public:
PCREWalker() {}
- virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args);
-
- virtual bool ShortVisit(Regexp* re, bool a) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- LOG(DFATAL) << "PCREWalker::ShortVisit called";
-#endif
+ virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+ bool* child_args, int nchild_args);
+
+ virtual bool ShortVisit(Regexp* re, bool a) {
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ LOG(DFATAL) << "PCREWalker::ShortVisit called";
+#endif
return a;
}
-
- private:
- PCREWalker(const PCREWalker&) = delete;
- PCREWalker& operator=(const PCREWalker&) = delete;
+
+ private:
+ PCREWalker(const PCREWalker&) = delete;
+ PCREWalker& operator=(const PCREWalker&) = delete;
};
// Called after visiting each of re's children and accumulating
@@ -121,16 +121,16 @@ bool Regexp::MimicsPCRE() {
class EmptyStringWalker : public Regexp::Walker<bool> {
public:
- EmptyStringWalker() {}
-
- virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args);
-
- virtual bool ShortVisit(Regexp* re, bool a) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ EmptyStringWalker() {}
+
+ virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+ bool* child_args, int nchild_args);
+
+ virtual bool ShortVisit(Regexp* re, bool a) {
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "EmptyStringWalker::ShortVisit called";
-#endif
+#endif
return a;
}
diff --git a/contrib/libs/re2/re2/nfa.cc b/contrib/libs/re2/re2/nfa.cc
index f25bbcfac1..c7339f8ffd 100644
--- a/contrib/libs/re2/re2/nfa.cc
+++ b/contrib/libs/re2/re2/nfa.cc
@@ -27,18 +27,18 @@
#include <stdio.h>
#include <string.h>
#include <algorithm>
-#include <deque>
+#include <deque>
#include <string>
#include <utility>
#include <vector>
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/pod_array.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
-#include "re2/sparse_array.h"
-#include "re2/sparse_set.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"
namespace re2 {
@@ -90,41 +90,41 @@ class NFA {
// Follows all empty arrows from id0 and enqueues all the states reached.
// Enqueues only the ByteRange instructions that match byte c.
- // context is used (with p) for evaluating empty-width specials.
+ // context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
- void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+ void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
const char* p, Thread* t0);
// Run runq on byte c, appending new states to nextq.
// Updates matched_ and match_ as new, better matches are found.
- // context is used (with p) for evaluating empty-width specials.
- // p is the position of byte c in the input string for AddToThreadq;
- // p-1 will be used when processing Match instructions.
+ // context is used (with p) for evaluating empty-width specials.
+ // p is the position of byte c in the input string for AddToThreadq;
+ // p-1 will be used when processing Match instructions.
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
- int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
- const char* p);
+ int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+ const char* p);
// Returns text version of capture information, for debugging.
- std::string FormatCapture(const char** capture);
-
- void CopyCapture(const char** dst, const char** src) {
- memmove(dst, src, ncapture_*sizeof src[0]);
- }
-
- Prog* prog_; // underlying program
- int start_; // start instruction in program
- int ncapture_; // number of submatches to track
- bool longest_; // whether searching for longest match
- bool endmatch_; // whether match must end at text.end()
- const char* btext_; // beginning of text (for FormatSubmatch)
- const char* etext_; // end of text (for endmatch_)
- Threadq q0_, q1_; // pre-allocated for Search.
- PODArray<AddState> stack_; // pre-allocated for AddToThreadq
- std::deque<Thread> arena_; // thread arena
- Thread* freelist_; // thread freelist
- const char** match_; // best match so far
- bool matched_; // any match so far?
+ std::string FormatCapture(const char** capture);
+
+ void CopyCapture(const char** dst, const char** src) {
+ memmove(dst, src, ncapture_*sizeof src[0]);
+ }
+
+ Prog* prog_; // underlying program
+ int start_; // start instruction in program
+ int ncapture_; // number of submatches to track
+ bool longest_; // whether searching for longest match
+ bool endmatch_; // whether match must end at text.end()
+ const char* btext_; // beginning of text (for FormatSubmatch)
+ const char* etext_; // end of text (for endmatch_)
+ Threadq q0_, q1_; // pre-allocated for Search.
+ PODArray<AddState> stack_; // pre-allocated for AddToThreadq
+ std::deque<Thread> arena_; // thread arena
+ Thread* freelist_; // thread freelist
+ const char** match_; // best match so far
+ bool matched_; // any match so far?
NFA(const NFA&) = delete;
NFA& operator=(const NFA&) = delete;
@@ -141,34 +141,34 @@ NFA::NFA(Prog* prog) {
q0_.resize(prog_->size());
q1_.resize(prog_->size());
// See NFA::AddToThreadq() for why this is so.
- int nstack = 2*prog_->inst_count(kInstCapture) +
- prog_->inst_count(kInstEmptyWidth) +
- prog_->inst_count(kInstNop) + 1; // + 1 for start inst
- stack_ = PODArray<AddState>(nstack);
- freelist_ = NULL;
+ int nstack = 2*prog_->inst_count(kInstCapture) +
+ prog_->inst_count(kInstEmptyWidth) +
+ prog_->inst_count(kInstNop) + 1; // + 1 for start inst
+ stack_ = PODArray<AddState>(nstack);
+ freelist_ = NULL;
match_ = NULL;
matched_ = false;
}
NFA::~NFA() {
delete[] match_;
- for (const Thread& t : arena_)
- delete[] t.capture;
+ for (const Thread& t : arena_)
+ delete[] t.capture;
}
NFA::Thread* NFA::AllocThread() {
- Thread* t = freelist_;
- if (t != NULL) {
- freelist_ = t->next;
+ Thread* t = freelist_;
+ if (t != NULL) {
+ freelist_ = t->next;
t->ref = 1;
- // We don't need to touch t->capture because
- // the caller will immediately overwrite it.
+ // We don't need to touch t->capture because
+ // the caller will immediately overwrite it.
return t;
}
- arena_.emplace_back();
- t = &arena_.back();
+ arena_.emplace_back();
+ t = &arena_.back();
t->ref = 1;
- t->capture = new const char*[ncapture_];
+ t->capture = new const char*[ncapture_];
return t;
}
@@ -179,37 +179,37 @@ NFA::Thread* NFA::Incref(Thread* t) {
}
void NFA::Decref(Thread* t) {
- DCHECK(t != NULL);
+ DCHECK(t != NULL);
t->ref--;
if (t->ref > 0)
return;
DCHECK_EQ(t->ref, 0);
- t->next = freelist_;
- freelist_ = t;
+ t->next = freelist_;
+ freelist_ = t;
}
// Follows all empty arrows from id0 and enqueues all the states reached.
// Enqueues only the ByteRange instructions that match byte c.
-// context is used (with p) for evaluating empty-width specials.
+// context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
-void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
const char* p, Thread* t0) {
if (id0 == 0)
return;
- // Use stack_ to hold our stack of instructions yet to process.
+ // Use stack_ to hold our stack of instructions yet to process.
// It was preallocated as follows:
// two entries per Capture;
// one entry per EmptyWidth; and
// one entry per Nop.
// This reflects the maximum number of stack pushes that each can
// perform. (Each instruction can be processed at most once.)
- AddState* stk = stack_.data();
+ AddState* stk = stack_.data();
int nstk = 0;
- stk[nstk++] = {id0, NULL};
+ stk[nstk++] = {id0, NULL};
while (nstk > 0) {
- DCHECK_LE(nstk, stack_.size());
+ DCHECK_LE(nstk, stack_.size());
AddState a = stk[--nstk];
Loop:
@@ -233,7 +233,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
// or we might not. Even if not, it is necessary to have it,
// so that we don't revisit id0 during the recursion.
q->set_new(id, NULL);
- Thread** tp = &q->get_existing(id);
+ Thread** tp = &q->get_existing(id);
int j;
Thread* t;
Prog::Inst* ip = prog_->inst(id);
@@ -251,25 +251,25 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
*tp = t;
DCHECK(!ip->last());
- a = {id+1, NULL};
+ a = {id+1, NULL};
goto Loop;
case kInstNop:
if (!ip->last())
- stk[nstk++] = {id+1, NULL};
+ stk[nstk++] = {id+1, NULL};
// Continue on.
- a = {ip->out(), NULL};
+ a = {ip->out(), NULL};
goto Loop;
case kInstCapture:
if (!ip->last())
- stk[nstk++] = {id+1, NULL};
+ stk[nstk++] = {id+1, NULL};
if ((j=ip->cap()) < ncapture_) {
// Push a dummy whose only job is to restore t0
// once we finish exploring this possibility.
- stk[nstk++] = {0, t0};
+ stk[nstk++] = {0, t0};
// Record capture.
t = AllocThread();
@@ -277,7 +277,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
t->capture[j] = p;
t0 = t;
}
- a = {ip->out(), NULL};
+ a = {ip->out(), NULL};
goto Loop;
case kInstByteRange:
@@ -290,32 +290,32 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
if (ExtraDebug)
fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
- if (ip->hint() == 0)
- break;
- a = {id+ip->hint(), NULL};
- goto Loop;
-
- case kInstMatch:
- // Save state; will pick up at next byte.
- t = Incref(t0);
- *tp = t;
- if (ExtraDebug)
- fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
-
+ if (ip->hint() == 0)
+ break;
+ a = {id+ip->hint(), NULL};
+ goto Loop;
+
+ case kInstMatch:
+ // Save state; will pick up at next byte.
+ t = Incref(t0);
+ *tp = t;
+ if (ExtraDebug)
+ fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
+
Next:
if (ip->last())
break;
- a = {id+1, NULL};
+ a = {id+1, NULL};
goto Loop;
case kInstEmptyWidth:
if (!ip->last())
- stk[nstk++] = {id+1, NULL};
+ stk[nstk++] = {id+1, NULL};
// Continue on if we have all the right flag bits.
- if (ip->empty() & ~Prog::EmptyFlags(context, p))
+ if (ip->empty() & ~Prog::EmptyFlags(context, p))
break;
- a = {ip->out(), NULL};
+ a = {ip->out(), NULL};
goto Loop;
}
}
@@ -323,17 +323,17 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
// Run runq on byte c, appending new states to nextq.
// Updates matched_ and match_ as new, better matches are found.
-// context is used (with p) for evaluating empty-width specials.
-// p is the position of byte c in the input string for AddToThreadq;
-// p-1 will be used when processing Match instructions.
+// context is used (with p) for evaluating empty-width specials.
+// p is the position of byte c in the input string for AddToThreadq;
+// p-1 will be used when processing Match instructions.
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
-int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
- const char* p) {
+int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+ const char* p) {
nextq->clear();
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
- Thread* t = i->value();
+ Thread* t = i->value();
if (t == NULL)
continue;
@@ -355,7 +355,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
break;
case kInstByteRange:
- AddToThreadq(nextq, ip->out(), c, context, p, t);
+ AddToThreadq(nextq, ip->out(), c, context, p, t);
break;
case kInstAltMatch:
@@ -367,10 +367,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
matched_ = true;
Decref(t);
- for (++i; i != runq->end(); ++i) {
- if (i->value() != NULL)
- Decref(i->value());
- }
+ for (++i; i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
runq->clear();
if (ip->greedy(prog_))
return ip->out1();
@@ -378,50 +378,50 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
}
break;
- case kInstMatch: {
- // Avoid invoking undefined behavior (arithmetic on a null pointer)
- // by storing p instead of p-1. (What would the latter even mean?!)
- // This complements the special case in NFA::Search().
- if (p == NULL) {
- CopyCapture(match_, t->capture);
- match_[1] = p;
- matched_ = true;
+ case kInstMatch: {
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by storing p instead of p-1. (What would the latter even mean?!)
+ // This complements the special case in NFA::Search().
+ if (p == NULL) {
+ CopyCapture(match_, t->capture);
+ match_[1] = p;
+ matched_ = true;
+ break;
+ }
+
+ if (endmatch_ && p-1 != etext_)
break;
- }
- if (endmatch_ && p-1 != etext_)
- break;
-
if (longest_) {
// Leftmost-longest mode: save this match only if
// it is either farther to the left or at the same
// point but longer than an existing match.
if (!matched_ || t->capture[0] < match_[0] ||
- (t->capture[0] == match_[0] && p-1 > match_[1])) {
+ (t->capture[0] == match_[0] && p-1 > match_[1])) {
CopyCapture(match_, t->capture);
- match_[1] = p-1;
+ match_[1] = p-1;
matched_ = true;
}
} else {
// Leftmost-biased mode: this match is by definition
// better than what we've already found (see next line).
CopyCapture(match_, t->capture);
- match_[1] = p-1;
+ match_[1] = p-1;
matched_ = true;
// Cut off the threads that can only find matches
// worse than the one we just found: don't run the
// rest of the current Threadq.
Decref(t);
- for (++i; i != runq->end(); ++i) {
- if (i->value() != NULL)
- Decref(i->value());
- }
+ for (++i; i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
runq->clear();
return 0;
}
break;
- }
+ }
}
Decref(t);
}
@@ -429,18 +429,18 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
return 0;
}
-std::string NFA::FormatCapture(const char** capture) {
- std::string s;
+std::string NFA::FormatCapture(const char** capture) {
+ std::string s;
for (int i = 0; i < ncapture_; i+=2) {
if (capture[i] == NULL)
- s += "(?,?)";
+ s += "(?,?)";
else if (capture[i+1] == NULL)
- s += StringPrintf("(%td,?)",
- capture[i] - btext_);
+ s += StringPrintf("(%td,?)",
+ capture[i] - btext_);
else
- s += StringPrintf("(%td,%td)",
- capture[i] - btext_,
- capture[i+1] - btext_);
+ s += StringPrintf("(%td,%td)",
+ capture[i] - btext_,
+ capture[i+1] - btext_);
}
return s;
}
@@ -452,7 +452,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
return false;
StringPiece context = const_context;
- if (context.data() == NULL)
+ if (context.data() == NULL)
context = text;
// Sanity check: make sure that text lies within context.
@@ -488,17 +488,17 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}
match_ = new const char*[ncapture_];
- memset(match_, 0, ncapture_*sizeof match_[0]);
+ memset(match_, 0, ncapture_*sizeof match_[0]);
matched_ = false;
// For debugging prints.
- btext_ = context.data();
- // For convenience.
- etext_ = text.data() + text.size();
+ btext_ = context.data();
+ // For convenience.
+ etext_ = text.data() + text.size();
if (ExtraDebug)
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
- std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
+ std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
// Set up search.
Threadq* runq = &q0_;
@@ -507,19 +507,19 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
nextq->clear();
// Loop over the text, stepping the machine.
- for (const char* p = text.data();; p++) {
+ for (const char* p = text.data();; p++) {
if (ExtraDebug) {
int c = 0;
- if (p == btext_)
+ if (p == btext_)
c = '^';
- else if (p > etext_)
+ else if (p > etext_)
c = '$';
- else if (p < etext_)
+ else if (p < etext_)
c = p[0] & 0xFF;
- fprintf(stderr, "%c:", c);
+ fprintf(stderr, "%c:", c);
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
- Thread* t = i->value();
+ Thread* t = i->value();
if (t == NULL)
continue;
fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
@@ -528,14 +528,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}
// This is a no-op the first time around the loop because runq is empty.
- int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
+ int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
DCHECK_EQ(runq->size(), 0);
using std::swap;
swap(nextq, runq);
nextq->clear();
if (id != 0) {
// We're done: full match ahead.
- p = etext_;
+ p = etext_;
for (;;) {
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
@@ -563,29 +563,29 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
break;
}
- if (p > etext_)
+ if (p > etext_)
break;
// Start a new thread if there have not been any matches.
// (No point in starting a new thread if there have been
// matches, since it would be to the right of the match
// we already found.)
- if (!matched_ && (!anchored || p == text.data())) {
- // Try to use prefix accel (e.g. memchr) to skip ahead.
- // The search must be unanchored and there must be zero
- // possible matches already.
+ if (!matched_ && (!anchored || p == text.data())) {
+ // Try to use prefix accel (e.g. memchr) to skip ahead.
+ // The search must be unanchored and there must be zero
+ // possible matches already.
if (!anchored && runq->size() == 0 &&
- p < etext_ && prog_->can_prefix_accel()) {
- p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
- if (p == NULL)
- p = etext_;
+ p < etext_ && prog_->can_prefix_accel()) {
+ p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
+ if (p == NULL)
+ p = etext_;
}
Thread* t = AllocThread();
CopyCapture(t->capture, match_);
t->capture[0] = p;
- AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
- t);
+ AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
+ t);
Decref(t);
}
@@ -596,23 +596,23 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
break;
}
- // Avoid invoking undefined behavior (arithmetic on a null pointer)
- // by simply not continuing the loop.
- // This complements the special case in NFA::Step().
- if (p == NULL) {
- (void) Step(runq, nextq, -1, context, p);
- DCHECK_EQ(runq->size(), 0);
- using std::swap;
- swap(nextq, runq);
- nextq->clear();
- break;
- }
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ // This complements the special case in NFA::Step().
+ if (p == NULL) {
+ (void) Step(runq, nextq, -1, context, p);
+ DCHECK_EQ(runq->size(), 0);
+ using std::swap;
+ swap(nextq, runq);
+ nextq->clear();
+ break;
+ }
}
- for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
- if (i->value() != NULL)
- Decref(i->value());
- }
+ for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
@@ -621,8 +621,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
if (ExtraDebug)
fprintf(stderr, "match (%td,%td)\n",
- match_[0] - btext_,
- match_[1] - btext_);
+ match_[0] - btext_,
+ match_[1] - btext_);
return true;
}
return false;
@@ -663,7 +663,7 @@ void Prog::Fanout(SparseArray<int>* fanout) {
fanout->clear();
fanout->set_new(start(), 0);
for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
- int* count = &i->value();
+ int* count = &i->value();
reachable.clear();
reachable.insert(i->index());
for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
diff --git a/contrib/libs/re2/re2/onepass.cc b/contrib/libs/re2/re2/onepass.cc
index 55bd6849e7..263974654d 100644
--- a/contrib/libs/re2/re2/onepass.cc
+++ b/contrib/libs/re2/re2/onepass.cc
@@ -61,9 +61,9 @@
#include "util/logging.h"
#include "util/strutil.h"
#include "util/utf.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
-#include "re2/sparse_set.h"
+#include "re2/sparse_set.h"
#include "re2/stringpiece.h"
// Silence "zero-sized array in struct/union" warning for OneState::action.
@@ -235,7 +235,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
matchcap[i] = NULL;
StringPiece context = const_context;
- if (context.data() == NULL)
+ if (context.data() == NULL)
context = text;
if (anchor_start() && BeginPtr(context) != BeginPtr(text))
return false;
@@ -244,13 +244,13 @@ bool Prog::SearchOnePass(const StringPiece& text,
if (anchor_end())
kind = kFullMatch;
- uint8_t* nodes = onepass_nodes_.data();
+ uint8_t* nodes = onepass_nodes_.data();
int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
// start() is always mapped to the zeroth OneState.
OneState* state = IndexToNode(nodes, statesize, 0);
uint8_t* bytemap = bytemap_;
- const char* bp = text.data();
- const char* ep = text.data() + text.size();
+ const char* bp = text.data();
+ const char* ep = text.data() + text.size();
const char* p;
bool matched = false;
matchcap[0] = bp;
@@ -383,7 +383,7 @@ struct InstCond {
// Constructs and saves corresponding one-pass NFA on success.
bool Prog::IsOnePass() {
if (did_onepass_)
- return onepass_nodes_.data() != NULL;
+ return onepass_nodes_.data() != NULL;
did_onepass_ = true;
if (start() == 0) // no match
@@ -404,11 +404,11 @@ bool Prog::IsOnePass() {
int stacksize = inst_count(kInstCapture) +
inst_count(kInstEmptyWidth) +
inst_count(kInstNop) + 1; // + 1 for start inst
- PODArray<InstCond> stack(stacksize);
+ PODArray<InstCond> stack(stacksize);
int size = this->size();
- PODArray<int> nodebyid(size); // indexed by ip
- memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
+ PODArray<int> nodebyid(size); // indexed by ip
+ memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
// Originally, nodes was a uint8_t[maxnodes*statesize], but that was
// unnecessarily optimistic: why allocate a large amount of memory
@@ -550,7 +550,7 @@ bool Prog::IsOnePass() {
if (!AddQ(&workq, ip->out())) {
if (ExtraDebug)
LOG(ERROR) << StringPrintf(
- "Not OnePass: multiple paths %d -> %d", *it, ip->out());
+ "Not OnePass: multiple paths %d -> %d", *it, ip->out());
goto fail;
}
id = ip->out();
@@ -561,7 +561,7 @@ bool Prog::IsOnePass() {
// (3) is violated
if (ExtraDebug)
LOG(ERROR) << StringPrintf(
- "Not OnePass: multiple matches from %d", *it);
+ "Not OnePass: multiple matches from %d", *it);
goto fail;
}
matched = true;
@@ -590,30 +590,30 @@ bool Prog::IsOnePass() {
if (nodebyid[i] != -1)
idmap[nodebyid[i]] = i;
- std::string dump;
+ std::string dump;
for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
int id = *it;
int nodeindex = nodebyid[id];
if (nodeindex == -1)
continue;
OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
- dump += StringPrintf("node %d id=%d: matchcond=%#x\n",
- nodeindex, id, node->matchcond);
+ dump += StringPrintf("node %d id=%d: matchcond=%#x\n",
+ nodeindex, id, node->matchcond);
for (int i = 0; i < bytemap_range_; i++) {
if ((node->action[i] & kImpossible) == kImpossible)
continue;
- dump += StringPrintf(" %d cond %#x -> %d id=%d\n",
- i, node->action[i] & 0xFFFF,
- node->action[i] >> kIndexShift,
- idmap[node->action[i] >> kIndexShift]);
+ dump += StringPrintf(" %d cond %#x -> %d id=%d\n",
+ i, node->action[i] & 0xFFFF,
+ node->action[i] >> kIndexShift,
+ idmap[node->action[i] >> kIndexShift]);
}
}
LOG(ERROR) << "nodes:\n" << dump;
}
dfa_mem_ -= nalloc*statesize;
- onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize);
- memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize);
+ onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize);
+ memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize);
return true;
fail:
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index be002ce281..85f16f060b 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -23,13 +23,13 @@
#include <algorithm>
#include <map>
#include <string>
-#include <vector>
+#include <vector>
#include "util/util.h"
#include "util/logging.h"
#include "util/strutil.h"
#include "util/utf.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/regexp.h"
#include "re2/stringpiece.h"
#include "re2/unicode_casefold.h"
@@ -44,13 +44,13 @@
namespace re2 {
-// Controls the maximum repeat count permitted by the parser.
-static int maximum_repeat_count = 1000;
+// Controls the maximum repeat count permitted by the parser.
+static int maximum_repeat_count = 1000;
+
+void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
+ maximum_repeat_count = i;
+}
-void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
- maximum_repeat_count = i;
-}
-
// Regular expression parse state.
// The list of parsed regexps so far is maintained as a vector of
// Regexp pointers called the stack. Left parenthesis and vertical
@@ -93,7 +93,7 @@ class Regexp::ParseState {
bool PushSimpleOp(RegexpOp op);
// Pushes a ^ onto the stack.
- bool PushCaret();
+ bool PushCaret();
// Pushes a \b (word == true) or \B (word == false) onto the stack.
bool PushWordBoundary(bool word);
@@ -423,7 +423,7 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
}
// Pushes a ^ onto the stack.
-bool Regexp::ParseState::PushCaret() {
+bool Regexp::ParseState::PushCaret() {
if (flags_ & OneLine) {
return PushSimpleOp(kRegexpBeginText);
}
@@ -556,10 +556,10 @@ int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
}
int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
-#endif
+#endif
return 0;
}
@@ -568,9 +568,9 @@ int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
bool Regexp::ParseState::PushRepetition(int min, int max,
const StringPiece& s,
bool nongreedy) {
- if ((max != -1 && max < min) ||
- min > maximum_repeat_count ||
- max > maximum_repeat_count) {
+ if ((max != -1 && max < min) ||
+ min > maximum_repeat_count ||
+ max > maximum_repeat_count) {
status_->set_code(kRegexpRepeatSize);
status_->set_error_arg(s);
return false;
@@ -593,7 +593,7 @@ bool Regexp::ParseState::PushRepetition(int min, int max,
stacktop_ = re;
if (min >= 2 || max >= 2) {
RepetitionWalker w;
- if (w.Walk(stacktop_, maximum_repeat_count) == 0) {
+ if (w.Walk(stacktop_, maximum_repeat_count) == 0) {
status_->set_code(kRegexpRepeatSize);
status_->set_error_arg(s);
return false;
@@ -613,7 +613,7 @@ bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
Regexp* re = new Regexp(kLeftParen, flags_);
re->cap_ = ++ncap_;
if (name.data() != NULL)
- re->name_ = new std::string(name);
+ re->name_ = new std::string(name);
return PushRegexp(re);
}
@@ -687,7 +687,7 @@ bool Regexp::ParseState::DoRightParen() {
if ((r1 = stacktop_) == NULL ||
(r2 = r1->down_) == NULL ||
r2->op() != kLeftParen) {
- status_->set_code(kRegexpUnexpectedParen);
+ status_->set_code(kRegexpUnexpectedParen);
status_->set_error_arg(whole_regexp_);
return false;
}
@@ -804,7 +804,7 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
// limit on the size of a concatenation, so we should never
// see more than two here.
Regexp* stk[4];
- size_t d = 0;
+ size_t d = 0;
while (re->op() == kRegexpConcat) {
if (d < arraysize(stk))
stk[d++] = re;
@@ -835,8 +835,8 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
}
// If re is now empty, concatenations might simplify too.
- while (d > 0) {
- re = stk[--d];
+ while (d > 0) {
+ re = stk[--d];
Regexp** sub = re->sub();
if (sub[0]->op() == kRegexpEmptyMatch) {
sub[0]->Decref();
@@ -870,180 +870,180 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
}
}
-// In the context of factoring alternations, a Splice is: a factored prefix or
-// merged character class computed by one iteration of one round of factoring;
-// the span of subexpressions of the alternation to be "spliced" (i.e. removed
-// and replaced); and, for a factored prefix, the number of suffixes after any
-// factoring that might have subsequently been performed on them. For a merged
-// character class, there are no suffixes, of course, so the field is ignored.
-struct Splice {
- Splice(Regexp* prefix, Regexp** sub, int nsub)
- : prefix(prefix),
- sub(sub),
- nsub(nsub),
- nsuffix(-1) {}
-
- Regexp* prefix;
- Regexp** sub;
- int nsub;
- int nsuffix;
-};
-
-// Named so because it is used to implement an explicit stack, a Frame is: the
-// span of subexpressions of the alternation to be factored; the current round
-// of factoring; any Splices computed; and, for a factored prefix, an iterator
-// to the next Splice to be factored (i.e. in another Frame) because suffixes.
-struct Frame {
- Frame(Regexp** sub, int nsub)
- : sub(sub),
- nsub(nsub),
- round(0) {}
-
- Regexp** sub;
- int nsub;
- int round;
- std::vector<Splice> splices;
- int spliceidx;
-};
-
-// Bundled into a class for friend access to Regexp without needing to declare
-// (or define) Splice in regexp.h.
-class FactorAlternationImpl {
- public:
- static void Round1(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices);
- static void Round2(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices);
- static void Round3(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices);
-};
-
+// In the context of factoring alternations, a Splice is: a factored prefix or
+// merged character class computed by one iteration of one round of factoring;
+// the span of subexpressions of the alternation to be "spliced" (i.e. removed
+// and replaced); and, for a factored prefix, the number of suffixes after any
+// factoring that might have subsequently been performed on them. For a merged
+// character class, there are no suffixes, of course, so the field is ignored.
+struct Splice {
+ Splice(Regexp* prefix, Regexp** sub, int nsub)
+ : prefix(prefix),
+ sub(sub),
+ nsub(nsub),
+ nsuffix(-1) {}
+
+ Regexp* prefix;
+ Regexp** sub;
+ int nsub;
+ int nsuffix;
+};
+
+// Named so because it is used to implement an explicit stack, a Frame is: the
+// span of subexpressions of the alternation to be factored; the current round
+// of factoring; any Splices computed; and, for a factored prefix, an iterator
+// to the next Splice to be factored (i.e. in another Frame) because suffixes.
+struct Frame {
+ Frame(Regexp** sub, int nsub)
+ : sub(sub),
+ nsub(nsub),
+ round(0) {}
+
+ Regexp** sub;
+ int nsub;
+ int round;
+ std::vector<Splice> splices;
+ int spliceidx;
+};
+
+// Bundled into a class for friend access to Regexp without needing to declare
+// (or define) Splice in regexp.h.
+class FactorAlternationImpl {
+ public:
+ static void Round1(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+ static void Round2(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+ static void Round3(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices);
+};
+
// Factors common prefixes from alternation.
// For example,
// ABC|ABD|AEF|BCX|BCY
// simplifies to
// A(B(C|D)|EF)|BC(X|Y)
-// and thence to
+// and thence to
// A(B[CD]|EF)|BC[XY]
//
// Rewrites sub to contain simplified list to alternate and returns
// the new length of sub. Adjusts reference counts accordingly
// (incoming sub[i] decremented, outgoing sub[i] incremented).
-int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
- std::vector<Frame> stk;
- stk.emplace_back(sub, nsub);
-
- for (;;) {
- auto& sub = stk.back().sub;
- auto& nsub = stk.back().nsub;
- auto& round = stk.back().round;
- auto& splices = stk.back().splices;
- auto& spliceidx = stk.back().spliceidx;
-
- if (splices.empty()) {
- // Advance to the next round of factoring. Note that this covers
- // the initialised state: when splices is empty and round is 0.
- round++;
- } else if (spliceidx < static_cast<int>(splices.size())) {
- // We have at least one more Splice to factor. Recurse logically.
- stk.emplace_back(splices[spliceidx].sub, splices[spliceidx].nsub);
- continue;
- } else {
- // We have no more Splices to factor. Apply them.
- auto iter = splices.begin();
- int out = 0;
- for (int i = 0; i < nsub; ) {
- // Copy until we reach where the next Splice begins.
- while (sub + i < iter->sub)
- sub[out++] = sub[i++];
- switch (round) {
- case 1:
- case 2: {
- // Assemble the Splice prefix and the suffixes.
- Regexp* re[2];
- re[0] = iter->prefix;
- re[1] = Regexp::AlternateNoFactor(iter->sub, iter->nsuffix, flags);
- sub[out++] = Regexp::Concat(re, 2, flags);
- i += iter->nsub;
- break;
- }
- case 3:
- // Just use the Splice prefix.
- sub[out++] = iter->prefix;
- i += iter->nsub;
- break;
- default:
- LOG(DFATAL) << "unknown round: " << round;
- break;
- }
- // If we are done, copy until the end of sub.
- if (++iter == splices.end()) {
- while (i < nsub)
- sub[out++] = sub[i++];
- }
- }
- splices.clear();
- nsub = out;
- // Advance to the next round of factoring.
- round++;
- }
-
- switch (round) {
- case 1:
- FactorAlternationImpl::Round1(sub, nsub, flags, &splices);
- break;
- case 2:
- FactorAlternationImpl::Round2(sub, nsub, flags, &splices);
- break;
- case 3:
- FactorAlternationImpl::Round3(sub, nsub, flags, &splices);
- break;
- case 4:
- if (stk.size() == 1) {
- // We are at the top of the stack. Just return.
- return nsub;
- } else {
- // Pop the stack and set the number of suffixes.
- // (Note that references will be invalidated!)
- int nsuffix = nsub;
- stk.pop_back();
- stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix;
- ++stk.back().spliceidx;
- continue;
- }
- default:
- LOG(DFATAL) << "unknown round: " << round;
- break;
- }
-
- // Set spliceidx depending on whether we have Splices to factor.
- if (splices.empty() || round == 3) {
- spliceidx = static_cast<int>(splices.size());
- } else {
- spliceidx = 0;
- }
- }
-}
-
-void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices) {
+int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) {
+ std::vector<Frame> stk;
+ stk.emplace_back(sub, nsub);
+
+ for (;;) {
+ auto& sub = stk.back().sub;
+ auto& nsub = stk.back().nsub;
+ auto& round = stk.back().round;
+ auto& splices = stk.back().splices;
+ auto& spliceidx = stk.back().spliceidx;
+
+ if (splices.empty()) {
+ // Advance to the next round of factoring. Note that this covers
+ // the initialised state: when splices is empty and round is 0.
+ round++;
+ } else if (spliceidx < static_cast<int>(splices.size())) {
+ // We have at least one more Splice to factor. Recurse logically.
+ stk.emplace_back(splices[spliceidx].sub, splices[spliceidx].nsub);
+ continue;
+ } else {
+ // We have no more Splices to factor. Apply them.
+ auto iter = splices.begin();
+ int out = 0;
+ for (int i = 0; i < nsub; ) {
+ // Copy until we reach where the next Splice begins.
+ while (sub + i < iter->sub)
+ sub[out++] = sub[i++];
+ switch (round) {
+ case 1:
+ case 2: {
+ // Assemble the Splice prefix and the suffixes.
+ Regexp* re[2];
+ re[0] = iter->prefix;
+ re[1] = Regexp::AlternateNoFactor(iter->sub, iter->nsuffix, flags);
+ sub[out++] = Regexp::Concat(re, 2, flags);
+ i += iter->nsub;
+ break;
+ }
+ case 3:
+ // Just use the Splice prefix.
+ sub[out++] = iter->prefix;
+ i += iter->nsub;
+ break;
+ default:
+ LOG(DFATAL) << "unknown round: " << round;
+ break;
+ }
+ // If we are done, copy until the end of sub.
+ if (++iter == splices.end()) {
+ while (i < nsub)
+ sub[out++] = sub[i++];
+ }
+ }
+ splices.clear();
+ nsub = out;
+ // Advance to the next round of factoring.
+ round++;
+ }
+
+ switch (round) {
+ case 1:
+ FactorAlternationImpl::Round1(sub, nsub, flags, &splices);
+ break;
+ case 2:
+ FactorAlternationImpl::Round2(sub, nsub, flags, &splices);
+ break;
+ case 3:
+ FactorAlternationImpl::Round3(sub, nsub, flags, &splices);
+ break;
+ case 4:
+ if (stk.size() == 1) {
+ // We are at the top of the stack. Just return.
+ return nsub;
+ } else {
+ // Pop the stack and set the number of suffixes.
+ // (Note that references will be invalidated!)
+ int nsuffix = nsub;
+ stk.pop_back();
+ stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix;
+ ++stk.back().spliceidx;
+ continue;
+ }
+ default:
+ LOG(DFATAL) << "unknown round: " << round;
+ break;
+ }
+
+ // Set spliceidx depending on whether we have Splices to factor.
+ if (splices.empty() || round == 3) {
+ spliceidx = static_cast<int>(splices.size());
+ } else {
+ spliceidx = 0;
+ }
+ }
+}
+
+void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
// Round 1: Factor out common literal prefixes.
- int start = 0;
- Rune* rune = NULL;
+ int start = 0;
+ Rune* rune = NULL;
int nrune = 0;
Regexp::ParseFlags runeflags = Regexp::NoParseFlags;
- for (int i = 0; i <= nsub; i++) {
- // Invariant: sub[start:i] consists of regexps that all
- // begin with rune[0:nrune].
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // begin with rune[0:nrune].
Rune* rune_i = NULL;
int nrune_i = 0;
Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags;
- if (i < nsub) {
- rune_i = Regexp::LeadingString(sub[i], &nrune_i, &runeflags_i);
+ if (i < nsub) {
+ rune_i = Regexp::LeadingString(sub[i], &nrune_i, &runeflags_i);
if (runeflags_i == runeflags) {
int same = 0;
while (same < nrune && same < nrune_i && rune[same] == rune_i[same])
@@ -1057,32 +1057,32 @@ void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
}
// Found end of a run with common leading literal string:
- // sub[start:i] all begin with rune[0:nrune],
- // but sub[i] does not even begin with rune[0].
+ // sub[start:i] all begin with rune[0:nrune],
+ // but sub[i] does not even begin with rune[0].
if (i == start) {
// Nothing to do - first iteration.
} else if (i == start+1) {
// Just one: don't bother factoring.
} else {
- Regexp* prefix = Regexp::LiteralString(rune, nrune, runeflags);
+ Regexp* prefix = Regexp::LiteralString(rune, nrune, runeflags);
for (int j = start; j < i; j++)
- Regexp::RemoveLeadingString(sub[j], nrune);
- splices->emplace_back(prefix, sub + start, i - start);
+ Regexp::RemoveLeadingString(sub[j], nrune);
+ splices->emplace_back(prefix, sub + start, i - start);
}
- // Prepare for next iteration (if there is one).
- if (i < nsub) {
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
start = i;
rune = rune_i;
nrune = nrune_i;
runeflags = runeflags_i;
}
}
-}
+}
-void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices) {
+void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
// Round 2: Factor out common simple prefixes,
// just the first piece of each concatenation.
// This will be good enough a lot of the time.
@@ -1091,15 +1091,15 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
// are not safe to factor because that collapses their
// distinct paths through the automaton, which affects
// correctness in some cases.
- int start = 0;
+ int start = 0;
Regexp* first = NULL;
- for (int i = 0; i <= nsub; i++) {
- // Invariant: sub[start:i] consists of regexps that all
- // begin with first.
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // begin with first.
Regexp* first_i = NULL;
- if (i < nsub) {
- first_i = Regexp::LeadingRegexp(sub[i]);
- if (first != NULL &&
+ if (i < nsub) {
+ first_i = Regexp::LeadingRegexp(sub[i]);
+ if (first != NULL &&
// first must be an empty-width op
// OR a char class, any char or any byte
// OR a fixed repeat of a literal, char class, any char or any byte.
@@ -1117,60 +1117,60 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
(first->sub()[0]->op() == kRegexpLiteral ||
first->sub()[0]->op() == kRegexpCharClass ||
first->sub()[0]->op() == kRegexpAnyChar ||
- first->sub()[0]->op() == kRegexpAnyByte))) &&
- Regexp::Equal(first, first_i))
+ first->sub()[0]->op() == kRegexpAnyByte))) &&
+ Regexp::Equal(first, first_i))
continue;
}
// Found end of a run with common leading regexp:
- // sub[start:i] all begin with first,
- // but sub[i] does not.
+ // sub[start:i] all begin with first,
+ // but sub[i] does not.
if (i == start) {
// Nothing to do - first iteration.
} else if (i == start+1) {
// Just one: don't bother factoring.
} else {
- Regexp* prefix = first->Incref();
+ Regexp* prefix = first->Incref();
for (int j = start; j < i; j++)
- sub[j] = Regexp::RemoveLeadingRegexp(sub[j]);
- splices->emplace_back(prefix, sub + start, i - start);
+ sub[j] = Regexp::RemoveLeadingRegexp(sub[j]);
+ splices->emplace_back(prefix, sub + start, i - start);
}
- // Prepare for next iteration (if there is one).
- if (i < nsub) {
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
start = i;
first = first_i;
}
}
-}
-
-void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
- Regexp::ParseFlags flags,
- std::vector<Splice>* splices) {
- // Round 3: Merge runs of literals and/or character classes.
- int start = 0;
- Regexp* first = NULL;
- for (int i = 0; i <= nsub; i++) {
- // Invariant: sub[start:i] consists of regexps that all
- // are either literals (i.e. runes) or character classes.
- Regexp* first_i = NULL;
- if (i < nsub) {
- first_i = sub[i];
- if (first != NULL &&
- (first->op() == kRegexpLiteral ||
- first->op() == kRegexpCharClass) &&
- (first_i->op() == kRegexpLiteral ||
- first_i->op() == kRegexpCharClass))
- continue;
- }
-
- // Found end of a run of Literal/CharClass:
- // sub[start:i] all are either one or the other,
- // but sub[i] is not.
+}
+
+void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
+ Regexp::ParseFlags flags,
+ std::vector<Splice>* splices) {
+ // Round 3: Merge runs of literals and/or character classes.
+ int start = 0;
+ Regexp* first = NULL;
+ for (int i = 0; i <= nsub; i++) {
+ // Invariant: sub[start:i] consists of regexps that all
+ // are either literals (i.e. runes) or character classes.
+ Regexp* first_i = NULL;
+ if (i < nsub) {
+ first_i = sub[i];
+ if (first != NULL &&
+ (first->op() == kRegexpLiteral ||
+ first->op() == kRegexpCharClass) &&
+ (first_i->op() == kRegexpLiteral ||
+ first_i->op() == kRegexpCharClass))
+ continue;
+ }
+
+ // Found end of a run of Literal/CharClass:
+ // sub[start:i] all are either one or the other,
+ // but sub[i] is not.
if (i == start) {
- // Nothing to do - first iteration.
+ // Nothing to do - first iteration.
} else if (i == start+1) {
- // Just one: don't bother factoring.
+ // Just one: don't bother factoring.
} else {
CharClassBuilder ccb;
for (int j = start; j < i; j++) {
@@ -1187,14 +1187,14 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
}
re->Decref();
}
- Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
- splices->emplace_back(re, sub + start, i - start);
+ Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags);
+ splices->emplace_back(re, sub + start, i - start);
}
- // Prepare for next iteration (if there is one).
- if (i < nsub) {
- start = i;
- first = first_i;
+ // Prepare for next iteration (if there is one).
+ if (i < nsub) {
+ start = i;
+ first = first_i;
}
}
}
@@ -1221,7 +1221,7 @@ void Regexp::ParseState::DoCollapse(RegexpOp op) {
return;
// Construct op (alternation or concatenation), flattening op of op.
- PODArray<Regexp*> subs(n);
+ PODArray<Regexp*> subs(n);
next = NULL;
int i = n;
for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {
@@ -1236,7 +1236,7 @@ void Regexp::ParseState::DoCollapse(RegexpOp op) {
}
}
- Regexp* re = ConcatOrAlternate(op, subs.data(), n, flags_, true);
+ Regexp* re = ConcatOrAlternate(op, subs.data(), n, flags_, true);
re->simple_ = re->ComputeSimple();
re->down_ = next;
stacktop_ = re;
@@ -1323,17 +1323,17 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
// Lexing routines.
-// Parses a decimal integer, storing it in *np.
+// Parses a decimal integer, storing it in *np.
// Sets *s to span the remainder of the string.
static bool ParseInteger(StringPiece* s, int* np) {
- if (s->empty() || !isdigit((*s)[0] & 0xFF))
+ if (s->empty() || !isdigit((*s)[0] & 0xFF))
return false;
// Disallow leading zeros.
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
return false;
int n = 0;
int c;
- while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
+ while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
// Avoid overflow.
if (n >= 100000000)
return false;
@@ -1355,16 +1355,16 @@ static bool ParseInteger(StringPiece* s, int* np) {
// s must NOT be edited unless MaybeParseRepetition returns true.
static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
StringPiece s = *sp;
- if (s.empty() || s[0] != '{')
+ if (s.empty() || s[0] != '{')
return false;
s.remove_prefix(1); // '{'
if (!ParseInteger(&s, lo))
return false;
- if (s.empty())
+ if (s.empty())
return false;
if (s[0] == ',') {
s.remove_prefix(1); // ','
- if (s.empty())
+ if (s.empty())
return false;
if (s[0] == '}') {
// {2,} means at least 2
@@ -1378,7 +1378,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
// {2} means exactly two
*hi = *lo;
}
- if (s.empty() || s[0] != '}')
+ if (s.empty() || s[0] != '}')
return false;
s.remove_prefix(1); // '}'
*sp = s;
@@ -1393,7 +1393,7 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
// fullrune() takes int, not size_t. However, it just looks
// at the leading byte and treats any length >= 4 the same.
- if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
+ if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
int n = chartorune(r, sp->data());
// Some copies of chartorune have a bug that accepts
// encodings of values in (10FFFF, 1FFFFF] as valid.
@@ -1421,7 +1421,7 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
StringPiece t = s;
Rune r;
- while (!t.empty()) {
+ while (!t.empty()) {
if (StringPieceToRune(&r, &t, status) < 0)
return false;
}
@@ -1452,14 +1452,14 @@ static int UnHex(int c) {
// Sets *rp to the named character.
static bool ParseEscape(StringPiece* s, Rune* rp,
RegexpStatus* status, int rune_max) {
- const char* begin = s->data();
- if (s->empty() || (*s)[0] != '\\') {
+ const char* begin = s->data();
+ if (s->empty() || (*s)[0] != '\\') {
// Should not happen - caller always checks.
status->set_code(kRegexpInternalError);
status->set_error_arg(StringPiece());
return false;
}
- if (s->size() == 1) {
+ if (s->size() == 1) {
status->set_code(kRegexpTrailingBackslash);
status->set_error_arg(StringPiece());
return false;
@@ -1490,16 +1490,16 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
case '6':
case '7':
// Single non-zero octal digit is a backreference; not supported.
- if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
+ if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
goto BadEscape;
FALLTHROUGH_INTENDED;
case '0':
// consume up to three octal digits; already have one.
code = c - '0';
- if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
+ if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') {
code = code * 8 + c - '0';
s->remove_prefix(1); // digit
- if (!s->empty()) {
+ if (!s->empty()) {
c = (*s)[0];
if ('0' <= c && c <= '7') {
code = code * 8 + c - '0';
@@ -1514,7 +1514,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Hexadecimal escapes
case 'x':
- if (s->empty())
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c, s, status) < 0)
return false;
@@ -1534,7 +1534,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
code = code * 16 + UnHex(c);
if (code > rune_max)
goto BadEscape;
- if (s->empty())
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c, s, status) < 0)
return false;
@@ -1545,7 +1545,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
return true;
}
// Easy case: two hex digits.
- if (s->empty())
+ if (s->empty())
goto BadEscape;
if (StringPieceToRune(&c1, s, status) < 0)
return false;
@@ -1595,7 +1595,7 @@ BadEscape:
// Unrecognized escape sequence.
status->set_code(kRegexpBadEscape);
status->set_error_arg(
- StringPiece(begin, static_cast<size_t>(s->data() - begin)));
+ StringPiece(begin, static_cast<size_t>(s->data() - begin)));
return false;
}
@@ -1715,7 +1715,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
return NULL;
// Could use StringPieceToRune, but there aren't
// any non-ASCII Perl group names.
- StringPiece name(s->data(), 2);
+ StringPiece name(s->data(), 2);
const UGroup *g = LookupPerlGroup(name);
if (g == NULL)
return NULL;
@@ -1755,8 +1755,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
return kParseError;
if (c != '{') {
// Name is the bit of string we just skipped over for c.
- const char* p = seq.data() + 2;
- name = StringPiece(p, static_cast<size_t>(s->data() - p));
+ const char* p = seq.data() + 2;
+ name = StringPiece(p, static_cast<size_t>(s->data() - p));
} else {
// Name is in braces. Look for closing }
size_t end = s->find('}', 0);
@@ -1767,16 +1767,16 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
status->set_error_arg(seq);
return kParseError;
}
- name = StringPiece(s->data(), end); // without '}'
+ name = StringPiece(s->data(), end); // without '}'
s->remove_prefix(end + 1); // with '}'
if (!IsValidUTF8(name, status))
return kParseError;
}
// Chop seq where s now begins.
- seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
+ seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
- if (!name.empty() && name[0] == '^') {
+ if (!name.empty() && name[0] == '^') {
sign = -sign;
name.remove_prefix(1); // '^'
}
@@ -1795,7 +1795,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Look up the group in the ICU Unicode data. Because ICU provides full
// Unicode properties support, this could be more than a lookup by name.
::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
- std::string("\\p{") + std::string(name) + std::string("}"));
+ std::string("\\p{") + std::string(name) + std::string("}"));
UErrorCode uerr = U_ZERO_ERROR;
::icu::UnicodeSet uset(ustr, uerr);
if (U_FAILURE(uerr)) {
@@ -1806,12 +1806,12 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Convert the UnicodeSet to a URange32 and UGroup that we can add.
int nr = uset.getRangeCount();
- PODArray<URange32> r(nr);
+ PODArray<URange32> r(nr);
for (int i = 0; i < nr; i++) {
r[i].lo = uset.getRangeStart(i);
r[i].hi = uset.getRangeEnd(i);
}
- UGroup g = {"", +1, 0, 0, r.data(), nr};
+ UGroup g = {"", +1, 0, 0, r.data(), nr};
AddUGroup(cc, &g, sign, parse_flags);
#endif
@@ -1862,7 +1862,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
const StringPiece& whole_class,
RegexpStatus* status) {
- if (s->empty()) {
+ if (s->empty()) {
status->set_code(kRegexpMissingBracket);
status->set_error_arg(whole_class);
return false;
@@ -1870,7 +1870,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
// Allow regular escape sequences even though
// many need not be escaped in this context.
- if ((*s)[0] == '\\')
+ if ((*s)[0] == '\\')
return ParseEscape(s, rp, status, rune_max_);
// Otherwise take the next rune.
@@ -1912,7 +1912,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
Regexp** out_re,
RegexpStatus* status) {
StringPiece whole_class = *s;
- if (s->empty() || (*s)[0] != '[') {
+ if (s->empty() || (*s)[0] != '[') {
// Caller checked this.
status->set_code(kRegexpInternalError);
status->set_error_arg(StringPiece());
@@ -1922,7 +1922,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
re->ccb_ = new CharClassBuilder;
s->remove_prefix(1); // '['
- if (!s->empty() && (*s)[0] == '^') {
+ if (!s->empty() && (*s)[0] == '^') {
s->remove_prefix(1); // '^'
negated = true;
if (!(flags_ & ClassNL) || (flags_ & NeverNL)) {
@@ -1932,7 +1932,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
}
}
bool first = true; // ] is okay as first char in class
- while (!s->empty() && ((*s)[0] != ']' || first)) {
+ while (!s->empty() && ((*s)[0] != ']' || first)) {
// - is only okay unescaped as first or last in class.
// Except that Perl allows - anywhere.
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
@@ -2000,7 +2000,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
// in the flags.
re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
}
- if (s->empty()) {
+ if (s->empty()) {
status->set_code(kRegexpMissingBracket);
status->set_error_arg(whole_class);
re->Decref();
@@ -2017,7 +2017,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
// Returns whether name is a valid capture name.
static bool IsValidCaptureName(const StringPiece& name) {
- if (name.empty())
+ if (name.empty())
return false;
// Historically, we effectively used [0-9A-Za-z_]+ to validate; that
@@ -2093,8 +2093,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
}
// t is "P<name>...", t[end] == '>'
- StringPiece capture(t.data()-2, end+3); // "(?P<name>"
- StringPiece name(t.data()+2, end-2); // "name"
+ StringPiece capture(t.data()-2, end+3); // "(?P<name>"
+ StringPiece name(t.data()+2, end-2); // "name"
if (!IsValidUTF8(name, status_))
return false;
if (!IsValidCaptureName(name)) {
@@ -2108,8 +2108,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
return false;
}
- s->remove_prefix(
- static_cast<size_t>(capture.data() + capture.size() - s->data()));
+ s->remove_prefix(
+ static_cast<size_t>(capture.data() + capture.size() - s->data()));
return true;
}
@@ -2118,7 +2118,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
int nflags = flags_;
Rune c;
for (bool done = false; !done; ) {
- if (t.empty())
+ if (t.empty())
goto BadPerlOp;
if (StringPieceToRune(&c, &t, status_) < 0)
return false;
@@ -2193,7 +2193,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
BadPerlOp:
status_->set_code(kRegexpBadPerlOp);
status_->set_error_arg(
- StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
+ StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
return false;
}
@@ -2201,7 +2201,7 @@ BadPerlOp:
// into UTF8 encoding in string.
// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is
// deprecated and because it rejects code points 0x80-0x9F.
-void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
+void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
char buf[UTFmax];
utf->clear();
@@ -2228,7 +2228,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// Convert regexp to UTF-8 (easier on the rest of the parser).
if (global_flags & Latin1) {
- std::string* tmp = new std::string;
+ std::string* tmp = new std::string;
ConvertLatin1ToUTF8(t, tmp);
status->set_tmp(tmp);
t = *tmp;
@@ -2236,7 +2236,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (global_flags & Literal) {
// Special parse loop for literal string.
- while (!t.empty()) {
+ while (!t.empty()) {
Rune r;
if (StringPieceToRune(&r, &t, status) < 0)
return NULL;
@@ -2247,7 +2247,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
}
StringPiece lastunary = StringPiece();
- while (!t.empty()) {
+ while (!t.empty()) {
StringPiece isunary = StringPiece();
switch (t[0]) {
default: {
@@ -2290,7 +2290,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
break;
case '^': // Beginning of line.
- if (!ps.PushCaret())
+ if (!ps.PushCaret())
return NULL;
t.remove_prefix(1); // '^'
break;
@@ -2331,18 +2331,18 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
bool nongreedy = false;
t.remove_prefix(1); // '*' or '+' or '?'
if (ps.flags() & PerlX) {
- if (!t.empty() && t[0] == '?') {
+ if (!t.empty() && t[0] == '?') {
nongreedy = true;
t.remove_prefix(1); // '?'
}
- if (!lastunary.empty()) {
+ if (!lastunary.empty()) {
// In Perl it is not allowed to stack repetition operators:
// a** is a syntax error, not a double-star.
// (and a++ means something else entirely, which we don't support!)
status->set_code(kRegexpRepeatOp);
status->set_error_arg(StringPiece(
- lastunary.data(),
- static_cast<size_t>(t.data() - lastunary.data())));
+ lastunary.data(),
+ static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
@@ -2366,16 +2366,16 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
}
bool nongreedy = false;
if (ps.flags() & PerlX) {
- if (!t.empty() && t[0] == '?') {
+ if (!t.empty() && t[0] == '?') {
nongreedy = true;
t.remove_prefix(1); // '?'
}
- if (!lastunary.empty()) {
+ if (!lastunary.empty()) {
// Not allowed to stack repetition operators.
status->set_code(kRegexpRepeatOp);
status->set_error_arg(StringPiece(
- lastunary.data(),
- static_cast<size_t>(t.data() - lastunary.data())));
+ lastunary.data(),
+ static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
@@ -2424,7 +2424,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (t[1] == 'Q') { // \Q ... \E: the ... is always literals
t.remove_prefix(2); // '\\', 'Q'
- while (!t.empty()) {
+ while (!t.empty()) {
if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') {
t.remove_prefix(2); // '\\', 'E'
break;
diff --git a/contrib/libs/re2/re2/perl_groups.cc b/contrib/libs/re2/re2/perl_groups.cc
index 17c74a9287..4687444581 100644
--- a/contrib/libs/re2/re2/perl_groups.cc
+++ b/contrib/libs/re2/re2/perl_groups.cc
@@ -20,12 +20,12 @@ static const URange16 code3[] = { /* \w */
{ 0x61, 0x7a },
};
const UGroup perl_groups[] = {
- { "\\d", +1, code1, 1, 0, 0 },
- { "\\D", -1, code1, 1, 0, 0 },
- { "\\s", +1, code2, 3, 0, 0 },
- { "\\S", -1, code2, 3, 0, 0 },
- { "\\w", +1, code3, 4, 0, 0 },
- { "\\W", -1, code3, 4, 0, 0 },
+ { "\\d", +1, code1, 1, 0, 0 },
+ { "\\D", -1, code1, 1, 0, 0 },
+ { "\\s", +1, code2, 3, 0, 0 },
+ { "\\S", -1, code2, 3, 0, 0 },
+ { "\\w", +1, code3, 4, 0, 0 },
+ { "\\W", -1, code3, 4, 0, 0 },
};
const int num_perl_groups = 6;
static const URange16 code4[] = { /* [:alnum:] */
@@ -85,34 +85,34 @@ static const URange16 code17[] = { /* [:xdigit:] */
{ 0x61, 0x66 },
};
const UGroup posix_groups[] = {
- { "[:alnum:]", +1, code4, 3, 0, 0 },
- { "[:^alnum:]", -1, code4, 3, 0, 0 },
- { "[:alpha:]", +1, code5, 2, 0, 0 },
- { "[:^alpha:]", -1, code5, 2, 0, 0 },
- { "[:ascii:]", +1, code6, 1, 0, 0 },
- { "[:^ascii:]", -1, code6, 1, 0, 0 },
- { "[:blank:]", +1, code7, 2, 0, 0 },
- { "[:^blank:]", -1, code7, 2, 0, 0 },
- { "[:cntrl:]", +1, code8, 2, 0, 0 },
- { "[:^cntrl:]", -1, code8, 2, 0, 0 },
- { "[:digit:]", +1, code9, 1, 0, 0 },
- { "[:^digit:]", -1, code9, 1, 0, 0 },
- { "[:graph:]", +1, code10, 1, 0, 0 },
- { "[:^graph:]", -1, code10, 1, 0, 0 },
- { "[:lower:]", +1, code11, 1, 0, 0 },
- { "[:^lower:]", -1, code11, 1, 0, 0 },
- { "[:print:]", +1, code12, 1, 0, 0 },
- { "[:^print:]", -1, code12, 1, 0, 0 },
- { "[:punct:]", +1, code13, 4, 0, 0 },
- { "[:^punct:]", -1, code13, 4, 0, 0 },
- { "[:space:]", +1, code14, 2, 0, 0 },
- { "[:^space:]", -1, code14, 2, 0, 0 },
- { "[:upper:]", +1, code15, 1, 0, 0 },
- { "[:^upper:]", -1, code15, 1, 0, 0 },
- { "[:word:]", +1, code16, 4, 0, 0 },
- { "[:^word:]", -1, code16, 4, 0, 0 },
- { "[:xdigit:]", +1, code17, 3, 0, 0 },
- { "[:^xdigit:]", -1, code17, 3, 0, 0 },
+ { "[:alnum:]", +1, code4, 3, 0, 0 },
+ { "[:^alnum:]", -1, code4, 3, 0, 0 },
+ { "[:alpha:]", +1, code5, 2, 0, 0 },
+ { "[:^alpha:]", -1, code5, 2, 0, 0 },
+ { "[:ascii:]", +1, code6, 1, 0, 0 },
+ { "[:^ascii:]", -1, code6, 1, 0, 0 },
+ { "[:blank:]", +1, code7, 2, 0, 0 },
+ { "[:^blank:]", -1, code7, 2, 0, 0 },
+ { "[:cntrl:]", +1, code8, 2, 0, 0 },
+ { "[:^cntrl:]", -1, code8, 2, 0, 0 },
+ { "[:digit:]", +1, code9, 1, 0, 0 },
+ { "[:^digit:]", -1, code9, 1, 0, 0 },
+ { "[:graph:]", +1, code10, 1, 0, 0 },
+ { "[:^graph:]", -1, code10, 1, 0, 0 },
+ { "[:lower:]", +1, code11, 1, 0, 0 },
+ { "[:^lower:]", -1, code11, 1, 0, 0 },
+ { "[:print:]", +1, code12, 1, 0, 0 },
+ { "[:^print:]", -1, code12, 1, 0, 0 },
+ { "[:punct:]", +1, code13, 4, 0, 0 },
+ { "[:^punct:]", -1, code13, 4, 0, 0 },
+ { "[:space:]", +1, code14, 2, 0, 0 },
+ { "[:^space:]", -1, code14, 2, 0, 0 },
+ { "[:upper:]", +1, code15, 1, 0, 0 },
+ { "[:^upper:]", -1, code15, 1, 0, 0 },
+ { "[:word:]", +1, code16, 4, 0, 0 },
+ { "[:^word:]", -1, code16, 4, 0, 0 },
+ { "[:xdigit:]", +1, code17, 3, 0, 0 },
+ { "[:^xdigit:]", -1, code17, 3, 0, 0 },
};
const int num_posix_groups = 28;
diff --git a/contrib/libs/re2/re2/pod_array.h b/contrib/libs/re2/re2/pod_array.h
index fdec6ffa03..f234e976f4 100644
--- a/contrib/libs/re2/re2/pod_array.h
+++ b/contrib/libs/re2/re2/pod_array.h
@@ -1,55 +1,55 @@
-// Copyright 2018 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_POD_ARRAY_H_
-#define RE2_POD_ARRAY_H_
-
-#include <memory>
-#include <type_traits>
-
-namespace re2 {
-
-template <typename T>
-class PODArray {
- public:
- static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
- "T must be POD");
-
- PODArray()
- : ptr_() {}
- explicit PODArray(int len)
- : ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
-
- T* data() const {
- return ptr_.get();
- }
-
- int size() const {
- return ptr_.get_deleter().len_;
- }
-
- T& operator[](int pos) const {
- return ptr_[pos];
- }
-
- private:
- struct Deleter {
- Deleter()
- : len_(0) {}
- explicit Deleter(int len)
- : len_(len) {}
-
- void operator()(T* ptr) const {
- std::allocator<T>().deallocate(ptr, len_);
- }
-
- int len_;
- };
-
- std::unique_ptr<T[], Deleter> ptr_;
-};
-
-} // namespace re2
-
-#endif // RE2_POD_ARRAY_H_
+// Copyright 2018 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_POD_ARRAY_H_
+#define RE2_POD_ARRAY_H_
+
+#include <memory>
+#include <type_traits>
+
+namespace re2 {
+
+template <typename T>
+class PODArray {
+ public:
+ static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
+ "T must be POD");
+
+ PODArray()
+ : ptr_() {}
+ explicit PODArray(int len)
+ : ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
+
+ T* data() const {
+ return ptr_.get();
+ }
+
+ int size() const {
+ return ptr_.get_deleter().len_;
+ }
+
+ T& operator[](int pos) const {
+ return ptr_[pos];
+ }
+
+ private:
+ struct Deleter {
+ Deleter()
+ : len_(0) {}
+ explicit Deleter(int len)
+ : len_(len) {}
+
+ void operator()(T* ptr) const {
+ std::allocator<T>().deallocate(ptr, len_);
+ }
+
+ int len_;
+ };
+
+ std::unique_ptr<T[], Deleter> ptr_;
+};
+
+} // namespace re2
+
+#endif // RE2_POD_ARRAY_H_
diff --git a/contrib/libs/re2/re2/prefilter.cc b/contrib/libs/re2/re2/prefilter.cc
index 30b2570612..a47b3120fb 100644
--- a/contrib/libs/re2/re2/prefilter.cc
+++ b/contrib/libs/re2/re2/prefilter.cc
@@ -21,8 +21,8 @@ namespace re2 {
static const bool ExtraDebug = false;
-typedef std::set<std::string>::iterator SSIter;
-typedef std::set<std::string>::const_iterator ConstSSIter;
+typedef std::set<std::string>::iterator SSIter;
+typedef std::set<std::string>::const_iterator ConstSSIter;
// Initializes a Prefilter, allocating subs_ as necessary.
Prefilter::Prefilter(Op op) {
@@ -140,35 +140,35 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
return AndOr(OR, a, b);
}
-static void SimplifyStringSet(std::set<std::string>* ss) {
+static void SimplifyStringSet(std::set<std::string>* ss) {
// Now make sure that the strings aren't redundant. For example, if
// we know "ab" is a required string, then it doesn't help at all to
// know that "abc" is also a required string, so delete "abc". This
// is because, when we are performing a string search to filter
- // regexps, matching "ab" will already allow this regexp to be a
- // candidate for match, so further matching "abc" is redundant.
- // Note that we must ignore "" because find() would find it at the
- // start of everything and thus we would end up erasing everything.
+ // regexps, matching "ab" will already allow this regexp to be a
+ // candidate for match, so further matching "abc" is redundant.
+ // Note that we must ignore "" because find() would find it at the
+ // start of everything and thus we would end up erasing everything.
for (SSIter i = ss->begin(); i != ss->end(); ++i) {
- if (i->empty())
- continue;
+ if (i->empty())
+ continue;
SSIter j = i;
++j;
while (j != ss->end()) {
- if (j->find(*i) != std::string::npos) {
- j = ss->erase(j);
- continue;
- }
+ if (j->find(*i) != std::string::npos) {
+ j = ss->erase(j);
+ continue;
+ }
++j;
}
}
}
-Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
- Prefilter* or_prefilter = new Prefilter(NONE);
+Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
+ Prefilter* or_prefilter = new Prefilter(NONE);
SimplifyStringSet(ss);
- for (SSIter i = ss->begin(); i != ss->end(); ++i)
- or_prefilter = Or(or_prefilter, FromString(*i));
+ for (SSIter i = ss->begin(); i != ss->end(); ++i)
+ or_prefilter = Or(or_prefilter, FromString(*i));
return or_prefilter;
}
@@ -191,7 +191,7 @@ static Rune ToLowerRuneLatin1(Rune r) {
return r;
}
-Prefilter* Prefilter::FromString(const std::string& str) {
+Prefilter* Prefilter::FromString(const std::string& str) {
Prefilter* m = new Prefilter(Prefilter::ATOM);
m->atom_ = str;
return m;
@@ -214,26 +214,26 @@ class Prefilter::Info {
static Info* Quest(Info* a);
static Info* EmptyString();
static Info* NoMatch();
- static Info* AnyCharOrAnyByte();
+ static Info* AnyCharOrAnyByte();
static Info* CClass(CharClass* cc, bool latin1);
static Info* Literal(Rune r);
static Info* LiteralLatin1(Rune r);
static Info* AnyMatch();
// Format Info as a string.
- std::string ToString();
+ std::string ToString();
// Caller takes ownership of the Prefilter.
Prefilter* TakeMatch();
- std::set<std::string>& exact() { return exact_; }
+ std::set<std::string>& exact() { return exact_; }
bool is_exact() const { return is_exact_; }
class Walker;
private:
- std::set<std::string> exact_;
+ std::set<std::string> exact_;
// When is_exact_ is true, the strings that match
// are placed in exact_. When it is no longer an exact
@@ -268,11 +268,11 @@ Prefilter* Prefilter::Info::TakeMatch() {
}
// Format a Info in string form.
-std::string Prefilter::Info::ToString() {
+std::string Prefilter::Info::ToString() {
if (is_exact_) {
int n = 0;
- std::string s;
- for (SSIter i = exact_.begin(); i != exact_.end(); ++i) {
+ std::string s;
+ for (SSIter i = exact_.begin(); i != exact_.end(); ++i) {
if (n++ > 0)
s += ",";
s += *i;
@@ -287,17 +287,17 @@ std::string Prefilter::Info::ToString() {
}
// Add the strings from src to dst.
-static void CopyIn(const std::set<std::string>& src,
- std::set<std::string>* dst) {
+static void CopyIn(const std::set<std::string>& src,
+ std::set<std::string>* dst) {
for (ConstSSIter i = src.begin(); i != src.end(); ++i)
dst->insert(*i);
}
// Add the cross-product of a and b to dst.
// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const std::set<std::string>& a,
- const std::set<std::string>& b,
- std::set<std::string>* dst) {
+static void CrossProduct(const std::set<std::string>& a,
+ const std::set<std::string>& b,
+ std::set<std::string>* dst) {
for (ConstSSIter i = a.begin(); i != a.end(); ++i)
for (ConstSSIter j = b.begin(); j != b.end(); ++j)
dst->insert(*i + *j);
@@ -388,15 +388,15 @@ Prefilter::Info* Prefilter::Info::Plus(Info *a) {
return ab;
}
-static std::string RuneToString(Rune r) {
+static std::string RuneToString(Rune r) {
char buf[UTFmax];
int n = runetochar(buf, &r);
- return std::string(buf, n);
+ return std::string(buf, n);
}
-static std::string RuneToStringLatin1(Rune r) {
+static std::string RuneToStringLatin1(Rune r) {
char c = r & 0xff;
- return std::string(&c, 1);
+ return std::string(&c, 1);
}
// Constructs Info for literal rune.
@@ -415,8 +415,8 @@ Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
return info;
}
-// Constructs Info for dot (any character) or \C (any byte).
-Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
+// Constructs Info for dot (any character) or \C (any byte).
+Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
Prefilter::Info* info = new Prefilter::Info();
info->match_ = new Prefilter(ALL);
return info;
@@ -459,7 +459,7 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
// If the class is too large, it's okay to overestimate.
if (cc->size() > 10)
- return AnyCharOrAnyByte();
+ return AnyCharOrAnyByte();
Prefilter::Info *a = new Prefilter::Info();
for (CCIter i = cc->begin(); i != cc->end(); ++i)
@@ -620,9 +620,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
case kRegexpAnyChar:
- case kRegexpAnyByte:
+ case kRegexpAnyByte:
// Claim nothing, except that it's not empty.
- info = AnyCharOrAnyByte();
+ info = AnyCharOrAnyByte();
break;
case kRegexpCharClass:
@@ -648,10 +648,10 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
return NULL;
Regexp* simple = re->Simplify();
- if (simple == NULL)
- return NULL;
+ if (simple == NULL)
+ return NULL;
- Prefilter::Info* info = BuildInfo(simple);
+ Prefilter::Info* info = BuildInfo(simple);
simple->Decref();
if (info == NULL)
return NULL;
@@ -661,7 +661,7 @@ Prefilter* Prefilter::FromRegexp(Regexp* re) {
return m;
}
-std::string Prefilter::DebugString() const {
+std::string Prefilter::DebugString() const {
switch (op_) {
default:
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
@@ -673,7 +673,7 @@ std::string Prefilter::DebugString() const {
case ALL:
return "";
case AND: {
- std::string s = "";
+ std::string s = "";
for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += " ";
@@ -683,7 +683,7 @@ std::string Prefilter::DebugString() const {
return s;
}
case OR: {
- std::string s = "(";
+ std::string s = "(";
for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += "|";
diff --git a/contrib/libs/re2/re2/prefilter.h b/contrib/libs/re2/re2/prefilter.h
index b11369ddac..4fedeb4a7c 100644
--- a/contrib/libs/re2/re2/prefilter.h
+++ b/contrib/libs/re2/re2/prefilter.h
@@ -37,7 +37,7 @@ class Prefilter {
~Prefilter();
Op op() { return op_; }
- const std::string& atom() const { return atom_; }
+ const std::string& atom() const { return atom_; }
void set_unique_id(int id) { unique_id_ = id; }
int unique_id() const { return unique_id_; }
@@ -57,7 +57,7 @@ class Prefilter {
static Prefilter* FromRE2(const RE2* re2);
// Returns a readable debug string of the prefilter.
- std::string DebugString() const;
+ std::string DebugString() const;
private:
class Info;
@@ -75,9 +75,9 @@ class Prefilter {
static Prefilter* FromRegexp(Regexp* a);
- static Prefilter* FromString(const std::string& str);
+ static Prefilter* FromString(const std::string& str);
- static Prefilter* OrStrings(std::set<std::string>* ss);
+ static Prefilter* OrStrings(std::set<std::string>* ss);
static Info* BuildInfo(Regexp* re);
@@ -90,7 +90,7 @@ class Prefilter {
std::vector<Prefilter*>* subs_;
// Actual string to match in leaf node.
- std::string atom_;
+ std::string atom_;
// If different prefilters have the same string atom, or if they are
// structurally the same (e.g., OR of same atom strings) they are
diff --git a/contrib/libs/re2/re2/prefilter_tree.cc b/contrib/libs/re2/re2/prefilter_tree.cc
index 6f24aa6aa3..fdf4e083c9 100644
--- a/contrib/libs/re2/re2/prefilter_tree.cc
+++ b/contrib/libs/re2/re2/prefilter_tree.cc
@@ -15,7 +15,7 @@
#include "util/util.h"
#include "util/logging.h"
-#include "util/strutil.h"
+#include "util/strutil.h"
#include "re2/prefilter.h"
#include "re2/re2.h"
@@ -54,22 +54,22 @@ void PrefilterTree::Add(Prefilter* prefilter) {
prefilter_vec_.push_back(prefilter);
}
-void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
+void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
if (compiled_) {
LOG(DFATAL) << "Compile called already.";
return;
}
- // Some legacy users of PrefilterTree call Compile() before
- // adding any regexps and expect Compile() to have no effect.
+ // Some legacy users of PrefilterTree call Compile() before
+ // adding any regexps and expect Compile() to have no effect.
if (prefilter_vec_.empty())
return;
compiled_ = true;
- // TODO(junyer): Use std::unordered_set<Prefilter*> instead?
- NodeMap nodes;
- AssignUniqueIds(&nodes, atom_vec);
+ // TODO(junyer): Use std::unordered_set<Prefilter*> instead?
+ NodeMap nodes;
+ AssignUniqueIds(&nodes, atom_vec);
// Identify nodes that are too common among prefilters and are
// triggering too many parents. Then get rid of them if possible.
@@ -102,27 +102,27 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
}
if (ExtraDebug)
- PrintDebugInfo(&nodes);
+ PrintDebugInfo(&nodes);
}
-Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
- std::string node_string = NodeString(node);
- NodeMap::iterator iter = nodes->find(node_string);
- if (iter == nodes->end())
+Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
+ std::string node_string = NodeString(node);
+ NodeMap::iterator iter = nodes->find(node_string);
+ if (iter == nodes->end())
return NULL;
return (*iter).second;
}
-std::string PrefilterTree::NodeString(Prefilter* node) const {
+std::string PrefilterTree::NodeString(Prefilter* node) const {
// Adding the operation disambiguates AND/OR/atom nodes.
- std::string s = StringPrintf("%d", node->op()) + ":";
+ std::string s = StringPrintf("%d", node->op()) + ":";
if (node->op() == Prefilter::ATOM) {
s += node->atom();
} else {
for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
s += ',';
- s += StringPrintf("%d", (*node->subs())[i]->unique_id());
+ s += StringPrintf("%d", (*node->subs())[i]->unique_id());
}
}
return s;
@@ -138,7 +138,7 @@ bool PrefilterTree::KeepNode(Prefilter* node) const {
return false;
case Prefilter::ALL:
- case Prefilter::NONE:
+ case Prefilter::NONE:
return false;
case Prefilter::ATOM:
@@ -165,8 +165,8 @@ bool PrefilterTree::KeepNode(Prefilter* node) const {
}
}
-void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
- std::vector<std::string>* atom_vec) {
+void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
+ std::vector<std::string>* atom_vec) {
atom_vec->clear();
// Build vector of all filter nodes, sorted topologically
@@ -203,11 +203,11 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
if (node == NULL)
continue;
node->set_unique_id(-1);
- Prefilter* canonical = CanonicalNode(nodes, node);
+ Prefilter* canonical = CanonicalNode(nodes, node);
if (canonical == NULL) {
// Any further nodes that have the same node string
// will find this node as the canonical node.
- nodes->emplace(NodeString(node), node);
+ nodes->emplace(NodeString(node), node);
if (node->op() == Prefilter::ATOM) {
atom_vec->push_back(node->atom());
atom_index_to_id_.push_back(unique_id);
@@ -217,7 +217,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
node->set_unique_id(canonical->unique_id());
}
}
- entries_.resize(nodes->size());
+ entries_.resize(nodes->size());
// Create parent StdIntMap for the entries.
for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
@@ -225,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
if (prefilter == NULL)
continue;
- if (CanonicalNode(nodes, prefilter) != prefilter)
+ if (CanonicalNode(nodes, prefilter) != prefilter)
continue;
Entry* entry = &entries_[prefilter->unique_id()];
@@ -238,7 +238,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
if (prefilter == NULL)
continue;
- if (CanonicalNode(nodes, prefilter) != prefilter)
+ if (CanonicalNode(nodes, prefilter) != prefilter)
continue;
Entry* entry = &entries_[prefilter->unique_id()];
@@ -258,7 +258,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
std::set<int> uniq_child;
for (size_t j = 0; j < prefilter->subs()->size(); j++) {
Prefilter* child = (*prefilter->subs())[j];
- Prefilter* canonical = CanonicalNode(nodes, child);
+ Prefilter* canonical = CanonicalNode(nodes, child);
if (canonical == NULL) {
LOG(DFATAL) << "Null canonical node";
return;
@@ -285,7 +285,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
for (size_t i = 0; i < prefilter_vec_.size(); i++) {
if (prefilter_vec_[i] == NULL)
continue;
- int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
+ int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
DCHECK_LE(0, id);
Entry* entry = &entries_[id];
entry->regexps.push_back(static_cast<int>(i));
@@ -298,27 +298,27 @@ void PrefilterTree::RegexpsGivenStrings(
std::vector<int>* regexps) const {
regexps->clear();
if (!compiled_) {
- // Some legacy users of PrefilterTree call Compile() before
- // adding any regexps and expect Compile() to have no effect.
- // This kludge is a counterpart to that kludge.
- if (prefilter_vec_.empty())
- return;
-
+ // Some legacy users of PrefilterTree call Compile() before
+ // adding any regexps and expect Compile() to have no effect.
+ // This kludge is a counterpart to that kludge.
+ if (prefilter_vec_.empty())
+ return;
+
LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
- for (size_t i = 0; i < prefilter_vec_.size(); i++)
+ for (size_t i = 0; i < prefilter_vec_.size(); i++)
regexps->push_back(static_cast<int>(i));
} else {
- IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
- std::vector<int> matched_atom_ids;
- for (size_t j = 0; j < matched_atoms.size(); j++)
- matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
- PropagateMatch(matched_atom_ids, &regexps_map);
- for (IntMap::iterator it = regexps_map.begin();
- it != regexps_map.end();
- ++it)
- regexps->push_back(it->index());
-
- regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
+ IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
+ std::vector<int> matched_atom_ids;
+ for (size_t j = 0; j < matched_atoms.size(); j++)
+ matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
+ PropagateMatch(matched_atom_ids, &regexps_map);
+ for (IntMap::iterator it = regexps_map.begin();
+ it != regexps_map.end();
+ ++it)
+ regexps->push_back(it->index());
+
+ regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
}
std::sort(regexps->begin(), regexps->end());
}
@@ -364,11 +364,11 @@ void PrefilterTree::PrintPrefilter(int regexpid) {
LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
}
-void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
+void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
LOG(ERROR) << "#Unique Nodes: " << entries_.size();
- for (size_t i = 0; i < entries_.size(); i++) {
+ for (size_t i = 0; i < entries_.size(); i++) {
StdIntMap* parents = entries_[i].parents;
const std::vector<int>& regexps = entries_[i].regexps;
LOG(ERROR) << "EntryId: " << i
@@ -377,14 +377,14 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
LOG(ERROR) << it->first;
}
LOG(ERROR) << "Map:";
- for (NodeMap::const_iterator iter = nodes->begin();
- iter != nodes->end(); ++iter)
+ for (NodeMap::const_iterator iter = nodes->begin();
+ iter != nodes->end(); ++iter)
LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
<< " Str: " << (*iter).first;
}
-std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
- std::string node_string = "";
+std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
+ std::string node_string = "";
if (node->op() == Prefilter::ATOM) {
DCHECK(!node->atom().empty());
node_string += node->atom();
@@ -395,7 +395,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
node_string += ',';
- node_string += StringPrintf("%d", (*node->subs())[i]->unique_id());
+ node_string += StringPrintf("%d", (*node->subs())[i]->unique_id());
node_string += ":";
node_string += DebugNodeString((*node->subs())[i]);
}
diff --git a/contrib/libs/re2/re2/prefilter_tree.h b/contrib/libs/re2/re2/prefilter_tree.h
index d61fdce948..5d73074d97 100644
--- a/contrib/libs/re2/re2/prefilter_tree.h
+++ b/contrib/libs/re2/re2/prefilter_tree.h
@@ -7,7 +7,7 @@
// The PrefilterTree class is used to form an AND-OR tree of strings
// that would trigger each regexp. The 'prefilter' of each regexp is
-// added to PrefilterTree, and then PrefilterTree is used to find all
+// added to PrefilterTree, and then PrefilterTree is used to find all
// the unique strings across the prefilters. During search, by using
// matches from a string matching engine, PrefilterTree deduces the
// set of regexps that are to be triggered. The 'string matching
@@ -21,8 +21,8 @@
#include <vector>
#include "util/util.h"
-#include "re2/prefilter.h"
-#include "re2/sparse_array.h"
+#include "re2/prefilter.h"
+#include "re2/sparse_array.h"
namespace re2 {
@@ -43,7 +43,7 @@ class PrefilterTree {
// The caller should use the returned set of strings to do string matching.
// Each time a string matches, the corresponding index then has to be
// and passed to RegexpsGivenStrings below.
- void Compile(std::vector<std::string>* atom_vec);
+ void Compile(std::vector<std::string>* atom_vec);
// Given the indices of the atoms that matched, returns the indexes
// of regexps that should be searched. The matched_atoms should
@@ -57,10 +57,10 @@ class PrefilterTree {
// nodes of the prefilter of the regexp.
void PrintPrefilter(int regexpid);
- private:
- typedef SparseArray<int> IntMap;
- typedef std::map<int, int> StdIntMap;
- typedef std::map<std::string, Prefilter*> NodeMap;
+ private:
+ typedef SparseArray<int> IntMap;
+ typedef std::map<int, int> StdIntMap;
+ typedef std::map<std::string, Prefilter*> NodeMap;
// Each unique node has a corresponding Entry that helps in
// passing the matching trigger information along the tree.
@@ -90,7 +90,7 @@ class PrefilterTree {
// This function assigns unique ids to various parts of the
// prefilter, by looking at if these nodes are already in the
// PrefilterTree.
- void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
+ void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
// Given the matching atoms, find the regexps to be triggered.
void PropagateMatch(const std::vector<int>& atom_ids,
@@ -98,17 +98,17 @@ class PrefilterTree {
// Returns the prefilter node that has the same NodeString as this
// node. For the canonical node, returns node.
- Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node);
+ Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node);
// A string that uniquely identifies the node. Assumes that the
// children of node has already been assigned unique ids.
- std::string NodeString(Prefilter* node) const;
+ std::string NodeString(Prefilter* node) const;
// Recursively constructs a readable prefilter string.
- std::string DebugNodeString(Prefilter* node) const;
+ std::string DebugNodeString(Prefilter* node) const;
// Used for debugging.
- void PrintDebugInfo(NodeMap* nodes);
+ void PrintDebugInfo(NodeMap* nodes);
// These are all the nodes formed by Compile. Essentially, there is
// one node for each unique atom and each unique AND/OR node.
diff --git a/contrib/libs/re2/re2/prog.cc b/contrib/libs/re2/re2/prog.cc
index 754bc88df0..a700d35de3 100644
--- a/contrib/libs/re2/re2/prog.cc
+++ b/contrib/libs/re2/re2/prog.cc
@@ -7,12 +7,12 @@
#include "re2/prog.h"
-#if defined(__AVX2__)
-#include <immintrin.h>
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-#endif
+#if defined(__AVX2__)
+#include <immintrin.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#endif
#include <stdint.h>
#include <string.h>
#include <algorithm>
@@ -40,7 +40,7 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
set_out_opcode(out, kInstByteRange);
lo_ = lo & 0xFF;
hi_ = hi & 0xFF;
- hint_foldcase_ = foldcase&1;
+ hint_foldcase_ = foldcase&1;
}
void Prog::Inst::InitCapture(int cap, uint32_t out) {
@@ -71,7 +71,7 @@ void Prog::Inst::InitFail() {
set_opcode(kInstFail);
}
-std::string Prog::Inst::Dump() {
+std::string Prog::Inst::Dump() {
switch (opcode()) {
default:
return StringPrintf("opcode %d", static_cast<int>(opcode()));
@@ -83,9 +83,9 @@ std::string Prog::Inst::Dump() {
return StringPrintf("altmatch -> %d | %d", out(), out1_);
case kInstByteRange:
- return StringPrintf("byte%s [%02x-%02x] %d -> %d",
- foldcase() ? "/i" : "",
- lo_, hi_, hint(), out());
+ return StringPrintf("byte%s [%02x-%02x] %d -> %d",
+ foldcase() ? "/i" : "",
+ lo_, hi_, hint(), out());
case kInstCapture:
return StringPrintf("capture %d -> %d", cap_, out());
@@ -115,8 +115,8 @@ Prog::Prog()
start_unanchored_(0),
size_(0),
bytemap_range_(0),
- prefix_foldcase_(false),
- prefix_size_(0),
+ prefix_foldcase_(false),
+ prefix_size_(0),
list_count_(0),
bit_state_text_max_size_(0),
dfa_mem_(0),
@@ -127,8 +127,8 @@ Prog::Prog()
Prog::~Prog() {
DeleteDFA(dfa_longest_);
DeleteDFA(dfa_first_);
- if (prefix_foldcase_)
- delete[] prefix_dfa_;
+ if (prefix_foldcase_)
+ delete[] prefix_dfa_;
}
typedef SparseSet Workq;
@@ -138,12 +138,12 @@ static inline void AddToQueue(Workq* q, int id) {
q->insert(id);
}
-static std::string ProgToString(Prog* prog, Workq* q) {
- std::string s;
+static std::string ProgToString(Prog* prog, Workq* q) {
+ std::string s;
for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
int id = *i;
Prog::Inst* ip = prog->inst(id);
- s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
+ s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
AddToQueue(q, ip->out());
if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
AddToQueue(q, ip->out1());
@@ -151,19 +151,19 @@ static std::string ProgToString(Prog* prog, Workq* q) {
return s;
}
-static std::string FlattenedProgToString(Prog* prog, int start) {
- std::string s;
+static std::string FlattenedProgToString(Prog* prog, int start) {
+ std::string s;
for (int id = start; id < prog->size(); id++) {
Prog::Inst* ip = prog->inst(id);
if (ip->last())
- s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
+ s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
else
- s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
+ s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
}
return s;
}
-std::string Prog::Dump() {
+std::string Prog::Dump() {
if (did_flatten_)
return FlattenedProgToString(this, start_);
@@ -172,7 +172,7 @@ std::string Prog::Dump() {
return ProgToString(this, &q);
}
-std::string Prog::DumpUnanchored() {
+std::string Prog::DumpUnanchored() {
if (did_flatten_)
return FlattenedProgToString(this, start_unanchored_);
@@ -181,43 +181,43 @@ std::string Prog::DumpUnanchored() {
return ProgToString(this, &q);
}
-std::string Prog::DumpByteMap() {
- std::string map;
+std::string Prog::DumpByteMap() {
+ std::string map;
for (int c = 0; c < 256; c++) {
int b = bytemap_[c];
int lo = c;
while (c < 256-1 && bytemap_[c+1] == b)
c++;
int hi = c;
- map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
+ map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
}
return map;
}
-// Is ip a guaranteed match at end of text, perhaps after some capturing?
-static bool IsMatch(Prog* prog, Prog::Inst* ip) {
- for (;;) {
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
- return false;
-
- case kInstAlt:
- case kInstAltMatch:
- case kInstByteRange:
- case kInstFail:
- case kInstEmptyWidth:
- return false;
-
- case kInstCapture:
- case kInstNop:
- ip = prog->inst(ip->out());
- break;
-
- case kInstMatch:
- return true;
- }
- }
+// Is ip a guaranteed match at end of text, perhaps after some capturing?
+static bool IsMatch(Prog* prog, Prog::Inst* ip) {
+ for (;;) {
+ switch (ip->opcode()) {
+ default:
+ LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
+ return false;
+
+ case kInstAlt:
+ case kInstAltMatch:
+ case kInstByteRange:
+ case kInstFail:
+ case kInstEmptyWidth:
+ return false;
+
+ case kInstCapture:
+ case kInstNop:
+ ip = prog->inst(ip->out());
+ break;
+
+ case kInstMatch:
+ return true;
+ }
+ }
}
// Peep-hole optimizer.
@@ -288,24 +288,24 @@ uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
int flags = 0;
// ^ and \A
- if (p == text.data())
+ if (p == text.data())
flags |= kEmptyBeginText | kEmptyBeginLine;
else if (p[-1] == '\n')
flags |= kEmptyBeginLine;
// $ and \z
- if (p == text.data() + text.size())
+ if (p == text.data() + text.size())
flags |= kEmptyEndText | kEmptyEndLine;
- else if (p < text.data() + text.size() && p[0] == '\n')
+ else if (p < text.data() + text.size() && p[0] == '\n')
flags |= kEmptyEndLine;
// \b and \B
- if (p == text.data() && p == text.data() + text.size()) {
+ if (p == text.data() && p == text.data() + text.size()) {
// no word boundary here
- } else if (p == text.data()) {
+ } else if (p == text.data()) {
if (IsWordChar(p[0]))
flags |= kEmptyWordBoundary;
- } else if (p == text.data() + text.size()) {
+ } else if (p == text.data() + text.size()) {
if (IsWordChar(p[-1]))
flags |= kEmptyWordBoundary;
} else {
@@ -353,7 +353,7 @@ class ByteMapBuilder {
int Recolor(int oldcolor);
Bitmap256 splits_;
- int colors_[256];
+ int colors_[256];
int nextcolor_;
std::vector<std::pair<int, int>> colormap_;
std::vector<std::pair<int, int>> ranges_;
@@ -467,11 +467,11 @@ void Prog::ComputeByteMap() {
foldlo = 'a';
if (foldhi > 'z')
foldhi = 'z';
- if (foldlo <= foldhi) {
- foldlo += 'A' - 'a';
- foldhi += 'A' - 'a';
- builder.Mark(foldlo, foldhi);
- }
+ if (foldlo <= foldhi) {
+ foldlo += 'A' - 'a';
+ foldhi += 'A' - 'a';
+ builder.Mark(foldlo, foldhi);
+ }
}
// If this Inst is not the last Inst in its list AND the next Inst is
// also a ByteRange AND the Insts have the same out, defer the merge.
@@ -538,7 +538,7 @@ void Prog::ComputeByteMap() {
// dominator of the instructions reachable from some "successor root" (i.e. it
// has an unreachable predecessor) and is considered a "dominator root". Since
// only Alt instructions can be "dominator roots" (other instructions would be
-// "leaves"), only Alt instructions are required to be marked as predecessors.
+// "leaves"), only Alt instructions are required to be marked as predecessors.
//
// Dividing the Prog into "trees" comprises two passes: marking the "successor
// roots" and the predecessors; and marking the "dominator roots". Sorting the
@@ -593,9 +593,9 @@ void Prog::Flatten() {
flatmap[i->value()] = static_cast<int>(flat.size());
EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
flat.back().set_last();
- // We have the bounds of the "list", so this is the
- // most convenient point at which to compute hints.
- ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
+ // We have the bounds of the "list", so this is the
+ // most convenient point at which to compute hints.
+ ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
}
list_count_ = static_cast<int>(flatmap.size());
@@ -632,18 +632,18 @@ void Prog::Flatten() {
// Finally, replace the old instructions with the new instructions.
size_ = static_cast<int>(flat.size());
- inst_ = PODArray<Inst>(size_);
- memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
-
- // Populate the list heads for BitState.
- // 512 instructions limits the memory footprint to 1KiB.
- if (size_ <= 512) {
- list_heads_ = PODArray<uint16_t>(size_);
- // 0xFF makes it more obvious if we try to look up a non-head.
- memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
- for (int i = 0; i < list_count_; ++i)
- list_heads_[flatmap[i]] = i;
- }
+ inst_ = PODArray<Inst>(size_);
+ memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
+
+ // Populate the list heads for BitState.
+ // 512 instructions limits the memory footprint to 1KiB.
+ if (size_ <= 512) {
+ list_heads_ = PODArray<uint16_t>(size_);
+ // 0xFF makes it more obvious if we try to look up a non-head.
+ memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
+ for (int i = 0; i < list_count_; ++i)
+ list_heads_[flatmap[i]] = i;
+ }
// BitState allocates a bitmap of size list_count_ * (text.size()+1)
// for tracking pairs of possibilities that it has already explored.
@@ -841,335 +841,335 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
}
}
-// For each ByteRange instruction in [begin, end), computes a hint to execution
-// engines: the delta to the next instruction (in flat) worth exploring iff the
-// current instruction matched.
-//
-// Implements a coloring algorithm related to ByteMapBuilder, but in this case,
-// colors are instructions and recoloring ranges precisely identifies conflicts
-// between instructions. Iterating backwards over [begin, end) is guaranteed to
-// identify the nearest conflict (if any) with only linear complexity.
-void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
- Bitmap256 splits;
- int colors[256];
-
- bool dirty = false;
- for (int id = end; id >= begin; --id) {
- if (id == end ||
- (*flat)[id].opcode() != kInstByteRange) {
- if (dirty) {
- dirty = false;
- splits.Clear();
- }
- splits.Set(255);
- colors[255] = id;
- // At this point, the [0-255] range is colored with id.
- // Thus, hints cannot point beyond id; and if id == end,
- // hints that would have pointed to id will be 0 instead.
- continue;
- }
- dirty = true;
-
- // We recolor the [lo-hi] range with id. Note that first ratchets backwards
- // from end to the nearest conflict (if any) during recoloring.
- int first = end;
- auto Recolor = [&](int lo, int hi) {
- // Like ByteMapBuilder, we split at lo-1 and at hi.
- --lo;
-
- if (0 <= lo && !splits.Test(lo)) {
- splits.Set(lo);
- int next = splits.FindNextSetBit(lo+1);
- colors[lo] = colors[next];
- }
- if (!splits.Test(hi)) {
- splits.Set(hi);
- int next = splits.FindNextSetBit(hi+1);
- colors[hi] = colors[next];
- }
-
- int c = lo+1;
- while (c < 256) {
- int next = splits.FindNextSetBit(c);
- // Ratchet backwards...
- first = std::min(first, colors[next]);
- // Recolor with id - because it's the new nearest conflict!
- colors[next] = id;
- if (next == hi)
- break;
- c = next+1;
- }
- };
-
- Inst* ip = &(*flat)[id];
- int lo = ip->lo();
- int hi = ip->hi();
- Recolor(lo, hi);
- if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
- int foldlo = lo;
- int foldhi = hi;
- if (foldlo < 'a')
- foldlo = 'a';
- if (foldhi > 'z')
- foldhi = 'z';
- if (foldlo <= foldhi) {
- foldlo += 'A' - 'a';
- foldhi += 'A' - 'a';
- Recolor(foldlo, foldhi);
- }
- }
-
- if (first != end) {
- uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
- ip->hint_foldcase_ |= hint<<1;
- }
- }
-}
-
-// The final state will always be this, which frees up a register for the hot
-// loop and thus avoids the spilling that can occur when building with Clang.
-static const size_t kShiftDFAFinal = 9;
-
-// This function takes the prefix as std::string (i.e. not const std::string&
-// as normal) because it's going to clobber it, so a temporary is convenient.
-static uint64_t* BuildShiftDFA(std::string prefix) {
- // This constant is for convenience now and also for correctness later when
- // we clobber the prefix, but still need to know how long it was initially.
- const size_t size = prefix.size();
-
- // Construct the NFA.
- // The table is indexed by input byte; each element is a bitfield of states
- // reachable by the input byte. Given a bitfield of the current states, the
- // bitfield of states reachable from those is - for this specific purpose -
- // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives
- // the bitfield of the next states reached by stepping over the input byte.
- // Credits for this technique: the Hyperscan paper by Geoff Langdale et al.
- uint16_t nfa[256]{};
- for (size_t i = 0; i < size; ++i) {
- uint8_t b = prefix[i];
- nfa[b] |= 1 << (i+1);
- }
- // This is the `\C*?` for unanchored search.
- for (int b = 0; b < 256; ++b)
- nfa[b] |= 1;
-
- // This maps from DFA state to NFA states; the reverse mapping is used when
- // recording transitions and gets implemented with plain old linear search.
- // The "Shift DFA" technique limits this to ten states when using uint64_t;
- // to allow for the initial state, we use at most nine bytes of the prefix.
- // That same limit is also why uint16_t is sufficient for the NFA bitfield.
- uint16_t states[kShiftDFAFinal+1]{};
- states[0] = 1;
- for (size_t dcurr = 0; dcurr < size; ++dcurr) {
- uint8_t b = prefix[dcurr];
- uint16_t ncurr = states[dcurr];
- uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
- size_t dnext = dcurr+1;
- if (dnext == size)
- dnext = kShiftDFAFinal;
- states[dnext] = nnext;
- }
-
- // Sort and unique the bytes of the prefix to avoid repeating work while we
- // record transitions. This clobbers the prefix, but it's no longer needed.
- std::sort(prefix.begin(), prefix.end());
- prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end());
-
- // Construct the DFA.
- // The table is indexed by input byte; each element is effectively a packed
- // array of uint6_t; each array value will be multiplied by six in order to
- // avoid having to do so later in the hot loop as well as masking/shifting.
- // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen.
- uint64_t* dfa = new uint64_t[256]{};
- // Record a transition from each state for each of the bytes of the prefix.
- // Note that all other input bytes go back to the initial state by default.
- for (size_t dcurr = 0; dcurr < size; ++dcurr) {
- for (uint8_t b : prefix) {
- uint16_t ncurr = states[dcurr];
- uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
- size_t dnext = 0;
- while (states[dnext] != nnext)
- ++dnext;
- dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
- // Convert ASCII letters to uppercase and record the extra transitions.
- // Note that ASCII letters are guaranteed to be lowercase at this point
- // because that's how the parser normalises them. #FunFact: 'k' and 's'
- // match U+212A and U+017F, respectively, so they won't occur here when
- // using UTF-8 encoding because the parser will emit character classes.
- if ('a' <= b && b <= 'z') {
- b -= 'a' - 'A';
- dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
- }
- }
- }
- // This lets the final state "saturate", which will matter for performance:
- // in the hot loop, we check for a match only at the end of each iteration,
- // so we must keep signalling the match until we get around to checking it.
- for (int b = 0; b < 256; ++b)
- dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
-
- return dfa;
-}
-
-void Prog::ConfigurePrefixAccel(const std::string& prefix,
- bool prefix_foldcase) {
- prefix_foldcase_ = prefix_foldcase;
- prefix_size_ = prefix.size();
- if (prefix_foldcase_) {
- // Use PrefixAccel_ShiftDFA().
- // ... and no more than nine bytes of the prefix. (See above for details.)
- prefix_size_ = std::min(prefix_size_, kShiftDFAFinal);
- prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_));
- } else if (prefix_size_ != 1) {
- // Use PrefixAccel_FrontAndBack().
- prefix_front_ = prefix.front();
- prefix_back_ = prefix.back();
- } else {
- // Use memchr(3).
- prefix_front_ = prefix.front();
- }
-}
-
-const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
- if (size < prefix_size_)
- return NULL;
-
- uint64_t curr = 0;
-
- // At the time of writing, rough benchmarks on a Broadwell machine showed
- // that this unroll factor (i.e. eight) achieves a speedup factor of two.
- if (size >= 8) {
- const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
- const uint8_t* endp = p + (size&~7);
- do {
- uint8_t b0 = p[0];
- uint8_t b1 = p[1];
- uint8_t b2 = p[2];
- uint8_t b3 = p[3];
- uint8_t b4 = p[4];
- uint8_t b5 = p[5];
- uint8_t b6 = p[6];
- uint8_t b7 = p[7];
-
- uint64_t next0 = prefix_dfa_[b0];
- uint64_t next1 = prefix_dfa_[b1];
- uint64_t next2 = prefix_dfa_[b2];
- uint64_t next3 = prefix_dfa_[b3];
- uint64_t next4 = prefix_dfa_[b4];
- uint64_t next5 = prefix_dfa_[b5];
- uint64_t next6 = prefix_dfa_[b6];
- uint64_t next7 = prefix_dfa_[b7];
-
- uint64_t curr0 = next0 >> (curr & 63);
- uint64_t curr1 = next1 >> (curr0 & 63);
- uint64_t curr2 = next2 >> (curr1 & 63);
- uint64_t curr3 = next3 >> (curr2 & 63);
- uint64_t curr4 = next4 >> (curr3 & 63);
- uint64_t curr5 = next5 >> (curr4 & 63);
- uint64_t curr6 = next6 >> (curr5 & 63);
- uint64_t curr7 = next7 >> (curr6 & 63);
-
- if ((curr7 & 63) == kShiftDFAFinal * 6) {
- // At the time of writing, using the same masking subexpressions from
- // the preceding lines caused Clang to clutter the hot loop computing
- // them - even though they aren't actually needed for shifting! Hence
- // these rewritten conditions, which achieve a speedup factor of two.
- if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_;
- if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_;
- if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_;
- if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_;
- if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_;
- if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_;
- if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_;
- if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_;
- }
-
- curr = curr7;
- p += 8;
- } while (p != endp);
- data = p;
- size = size&7;
- }
-
- const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
- const uint8_t* endp = p + size;
- while (p != endp) {
- uint8_t b = *p++;
- uint64_t next = prefix_dfa_[b];
- curr = next >> (curr & 63);
- if ((curr & 63) == kShiftDFAFinal * 6)
- return p-prefix_size_;
- }
- return NULL;
-}
-
-#if defined(__AVX2__)
-// Finds the least significant non-zero bit in n.
-static int FindLSBSet(uint32_t n) {
- DCHECK_NE(n, 0);
-#if defined(__GNUC__)
- return __builtin_ctz(n);
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
- unsigned long c;
- _BitScanForward(&c, n);
- return static_cast<int>(c);
-#else
- int c = 31;
- for (int shift = 1 << 4; shift != 0; shift >>= 1) {
- uint32_t word = n << shift;
- if (word != 0) {
- n = word;
- c -= shift;
- }
- }
- return c;
-#endif
-}
-#endif
-
-const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
- DCHECK_GE(prefix_size_, 2);
- if (size < prefix_size_)
- return NULL;
- // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
- // This also means that probing for prefix_back_ doesn't go out of bounds.
- size -= prefix_size_-1;
-
-#if defined(__AVX2__)
- // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
- if (size >= sizeof(__m256i)) {
- const __m256i* fp = reinterpret_cast<const __m256i*>(
- reinterpret_cast<const char*>(data));
- const __m256i* bp = reinterpret_cast<const __m256i*>(
- reinterpret_cast<const char*>(data) + prefix_size_-1);
- const __m256i* endfp = fp + size/sizeof(__m256i);
- const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
- const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
- do {
- const __m256i f_loadu = _mm256_loadu_si256(fp++);
- const __m256i b_loadu = _mm256_loadu_si256(bp++);
- const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
- const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
- const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
- if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
- const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
- const int fb_movemask = _mm256_movemask_epi8(fb_and);
- const int fb_ctz = FindLSBSet(fb_movemask);
- return reinterpret_cast<const char*>(fp-1) + fb_ctz;
- }
- } while (fp != endfp);
- data = fp;
- size = size%sizeof(__m256i);
- }
-#endif
-
- const char* p0 = reinterpret_cast<const char*>(data);
- for (const char* p = p0;; p++) {
- DCHECK_GE(size, static_cast<size_t>(p-p0));
- p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
- if (p == NULL || p[prefix_size_-1] == prefix_back_)
- return p;
- }
-}
-
+// For each ByteRange instruction in [begin, end), computes a hint to execution
+// engines: the delta to the next instruction (in flat) worth exploring iff the
+// current instruction matched.
+//
+// Implements a coloring algorithm related to ByteMapBuilder, but in this case,
+// colors are instructions and recoloring ranges precisely identifies conflicts
+// between instructions. Iterating backwards over [begin, end) is guaranteed to
+// identify the nearest conflict (if any) with only linear complexity.
+void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
+ Bitmap256 splits;
+ int colors[256];
+
+ bool dirty = false;
+ for (int id = end; id >= begin; --id) {
+ if (id == end ||
+ (*flat)[id].opcode() != kInstByteRange) {
+ if (dirty) {
+ dirty = false;
+ splits.Clear();
+ }
+ splits.Set(255);
+ colors[255] = id;
+ // At this point, the [0-255] range is colored with id.
+ // Thus, hints cannot point beyond id; and if id == end,
+ // hints that would have pointed to id will be 0 instead.
+ continue;
+ }
+ dirty = true;
+
+ // We recolor the [lo-hi] range with id. Note that first ratchets backwards
+ // from end to the nearest conflict (if any) during recoloring.
+ int first = end;
+ auto Recolor = [&](int lo, int hi) {
+ // Like ByteMapBuilder, we split at lo-1 and at hi.
+ --lo;
+
+ if (0 <= lo && !splits.Test(lo)) {
+ splits.Set(lo);
+ int next = splits.FindNextSetBit(lo+1);
+ colors[lo] = colors[next];
+ }
+ if (!splits.Test(hi)) {
+ splits.Set(hi);
+ int next = splits.FindNextSetBit(hi+1);
+ colors[hi] = colors[next];
+ }
+
+ int c = lo+1;
+ while (c < 256) {
+ int next = splits.FindNextSetBit(c);
+ // Ratchet backwards...
+ first = std::min(first, colors[next]);
+ // Recolor with id - because it's the new nearest conflict!
+ colors[next] = id;
+ if (next == hi)
+ break;
+ c = next+1;
+ }
+ };
+
+ Inst* ip = &(*flat)[id];
+ int lo = ip->lo();
+ int hi = ip->hi();
+ Recolor(lo, hi);
+ if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
+ int foldlo = lo;
+ int foldhi = hi;
+ if (foldlo < 'a')
+ foldlo = 'a';
+ if (foldhi > 'z')
+ foldhi = 'z';
+ if (foldlo <= foldhi) {
+ foldlo += 'A' - 'a';
+ foldhi += 'A' - 'a';
+ Recolor(foldlo, foldhi);
+ }
+ }
+
+ if (first != end) {
+ uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
+ ip->hint_foldcase_ |= hint<<1;
+ }
+ }
+}
+
+// The final state will always be this, which frees up a register for the hot
+// loop and thus avoids the spilling that can occur when building with Clang.
+static const size_t kShiftDFAFinal = 9;
+
+// This function takes the prefix as std::string (i.e. not const std::string&
+// as normal) because it's going to clobber it, so a temporary is convenient.
+static uint64_t* BuildShiftDFA(std::string prefix) {
+ // This constant is for convenience now and also for correctness later when
+ // we clobber the prefix, but still need to know how long it was initially.
+ const size_t size = prefix.size();
+
+ // Construct the NFA.
+ // The table is indexed by input byte; each element is a bitfield of states
+ // reachable by the input byte. Given a bitfield of the current states, the
+ // bitfield of states reachable from those is - for this specific purpose -
+ // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives
+ // the bitfield of the next states reached by stepping over the input byte.
+ // Credits for this technique: the Hyperscan paper by Geoff Langdale et al.
+ uint16_t nfa[256]{};
+ for (size_t i = 0; i < size; ++i) {
+ uint8_t b = prefix[i];
+ nfa[b] |= 1 << (i+1);
+ }
+ // This is the `\C*?` for unanchored search.
+ for (int b = 0; b < 256; ++b)
+ nfa[b] |= 1;
+
+ // This maps from DFA state to NFA states; the reverse mapping is used when
+ // recording transitions and gets implemented with plain old linear search.
+ // The "Shift DFA" technique limits this to ten states when using uint64_t;
+ // to allow for the initial state, we use at most nine bytes of the prefix.
+ // That same limit is also why uint16_t is sufficient for the NFA bitfield.
+ uint16_t states[kShiftDFAFinal+1]{};
+ states[0] = 1;
+ for (size_t dcurr = 0; dcurr < size; ++dcurr) {
+ uint8_t b = prefix[dcurr];
+ uint16_t ncurr = states[dcurr];
+ uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
+ size_t dnext = dcurr+1;
+ if (dnext == size)
+ dnext = kShiftDFAFinal;
+ states[dnext] = nnext;
+ }
+
+ // Sort and unique the bytes of the prefix to avoid repeating work while we
+ // record transitions. This clobbers the prefix, but it's no longer needed.
+ std::sort(prefix.begin(), prefix.end());
+ prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end());
+
+ // Construct the DFA.
+ // The table is indexed by input byte; each element is effectively a packed
+ // array of uint6_t; each array value will be multiplied by six in order to
+ // avoid having to do so later in the hot loop as well as masking/shifting.
+ // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen.
+ uint64_t* dfa = new uint64_t[256]{};
+ // Record a transition from each state for each of the bytes of the prefix.
+ // Note that all other input bytes go back to the initial state by default.
+ for (size_t dcurr = 0; dcurr < size; ++dcurr) {
+ for (uint8_t b : prefix) {
+ uint16_t ncurr = states[dcurr];
+ uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
+ size_t dnext = 0;
+ while (states[dnext] != nnext)
+ ++dnext;
+ dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
+ // Convert ASCII letters to uppercase and record the extra transitions.
+ // Note that ASCII letters are guaranteed to be lowercase at this point
+ // because that's how the parser normalises them. #FunFact: 'k' and 's'
+ // match U+212A and U+017F, respectively, so they won't occur here when
+ // using UTF-8 encoding because the parser will emit character classes.
+ if ('a' <= b && b <= 'z') {
+ b -= 'a' - 'A';
+ dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
+ }
+ }
+ }
+ // This lets the final state "saturate", which will matter for performance:
+ // in the hot loop, we check for a match only at the end of each iteration,
+ // so we must keep signalling the match until we get around to checking it.
+ for (int b = 0; b < 256; ++b)
+ dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
+
+ return dfa;
+}
+
+void Prog::ConfigurePrefixAccel(const std::string& prefix,
+ bool prefix_foldcase) {
+ prefix_foldcase_ = prefix_foldcase;
+ prefix_size_ = prefix.size();
+ if (prefix_foldcase_) {
+ // Use PrefixAccel_ShiftDFA().
+ // ... and no more than nine bytes of the prefix. (See above for details.)
+ prefix_size_ = std::min(prefix_size_, kShiftDFAFinal);
+ prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_));
+ } else if (prefix_size_ != 1) {
+ // Use PrefixAccel_FrontAndBack().
+ prefix_front_ = prefix.front();
+ prefix_back_ = prefix.back();
+ } else {
+ // Use memchr(3).
+ prefix_front_ = prefix.front();
+ }
+}
+
+const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
+ if (size < prefix_size_)
+ return NULL;
+
+ uint64_t curr = 0;
+
+ // At the time of writing, rough benchmarks on a Broadwell machine showed
+ // that this unroll factor (i.e. eight) achieves a speedup factor of two.
+ if (size >= 8) {
+ const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
+ const uint8_t* endp = p + (size&~7);
+ do {
+ uint8_t b0 = p[0];
+ uint8_t b1 = p[1];
+ uint8_t b2 = p[2];
+ uint8_t b3 = p[3];
+ uint8_t b4 = p[4];
+ uint8_t b5 = p[5];
+ uint8_t b6 = p[6];
+ uint8_t b7 = p[7];
+
+ uint64_t next0 = prefix_dfa_[b0];
+ uint64_t next1 = prefix_dfa_[b1];
+ uint64_t next2 = prefix_dfa_[b2];
+ uint64_t next3 = prefix_dfa_[b3];
+ uint64_t next4 = prefix_dfa_[b4];
+ uint64_t next5 = prefix_dfa_[b5];
+ uint64_t next6 = prefix_dfa_[b6];
+ uint64_t next7 = prefix_dfa_[b7];
+
+ uint64_t curr0 = next0 >> (curr & 63);
+ uint64_t curr1 = next1 >> (curr0 & 63);
+ uint64_t curr2 = next2 >> (curr1 & 63);
+ uint64_t curr3 = next3 >> (curr2 & 63);
+ uint64_t curr4 = next4 >> (curr3 & 63);
+ uint64_t curr5 = next5 >> (curr4 & 63);
+ uint64_t curr6 = next6 >> (curr5 & 63);
+ uint64_t curr7 = next7 >> (curr6 & 63);
+
+ if ((curr7 & 63) == kShiftDFAFinal * 6) {
+ // At the time of writing, using the same masking subexpressions from
+ // the preceding lines caused Clang to clutter the hot loop computing
+ // them - even though they aren't actually needed for shifting! Hence
+ // these rewritten conditions, which achieve a speedup factor of two.
+ if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_;
+ if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_;
+ if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_;
+ if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_;
+ if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_;
+ if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_;
+ if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_;
+ if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_;
+ }
+
+ curr = curr7;
+ p += 8;
+ } while (p != endp);
+ data = p;
+ size = size&7;
+ }
+
+ const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
+ const uint8_t* endp = p + size;
+ while (p != endp) {
+ uint8_t b = *p++;
+ uint64_t next = prefix_dfa_[b];
+ curr = next >> (curr & 63);
+ if ((curr & 63) == kShiftDFAFinal * 6)
+ return p-prefix_size_;
+ }
+ return NULL;
+}
+
+#if defined(__AVX2__)
+// Finds the least significant non-zero bit in n.
+static int FindLSBSet(uint32_t n) {
+ DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+ return __builtin_ctz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+ unsigned long c;
+ _BitScanForward(&c, n);
+ return static_cast<int>(c);
+#else
+ int c = 31;
+ for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+ uint32_t word = n << shift;
+ if (word != 0) {
+ n = word;
+ c -= shift;
+ }
+ }
+ return c;
+#endif
+}
+#endif
+
+const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
+ DCHECK_GE(prefix_size_, 2);
+ if (size < prefix_size_)
+ return NULL;
+ // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
+ // This also means that probing for prefix_back_ doesn't go out of bounds.
+ size -= prefix_size_-1;
+
+#if defined(__AVX2__)
+ // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
+ if (size >= sizeof(__m256i)) {
+ const __m256i* fp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data));
+ const __m256i* bp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data) + prefix_size_-1);
+ const __m256i* endfp = fp + size/sizeof(__m256i);
+ const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
+ const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
+ do {
+ const __m256i f_loadu = _mm256_loadu_si256(fp++);
+ const __m256i b_loadu = _mm256_loadu_si256(bp++);
+ const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
+ const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
+ const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
+ if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
+ const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
+ const int fb_movemask = _mm256_movemask_epi8(fb_and);
+ const int fb_ctz = FindLSBSet(fb_movemask);
+ return reinterpret_cast<const char*>(fp-1) + fb_ctz;
+ }
+ } while (fp != endfp);
+ data = fp;
+ size = size%sizeof(__m256i);
+ }
+#endif
+
+ const char* p0 = reinterpret_cast<const char*>(data);
+ for (const char* p = p0;; p++) {
+ DCHECK_GE(size, static_cast<size_t>(p-p0));
+ p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
+ if (p == NULL || p[prefix_size_-1] == prefix_back_)
+ return p;
+ }
+}
+
} // namespace re2
diff --git a/contrib/libs/re2/re2/prog.h b/contrib/libs/re2/re2/prog.h
index f563aaf384..4af012ab6f 100644
--- a/contrib/libs/re2/re2/prog.h
+++ b/contrib/libs/re2/re2/prog.h
@@ -10,18 +10,18 @@
// expression symbolically.
#include <stdint.h>
-#include <functional>
+#include <functional>
#include <mutex>
#include <string>
#include <vector>
-#include <type_traits>
+#include <type_traits>
#include "util/util.h"
#include "util/logging.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/re2.h"
-#include "re2/sparse_array.h"
-#include "re2/sparse_set.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"
namespace re2 {
@@ -61,8 +61,8 @@ class Prog {
// Single instruction in regexp program.
class Inst {
public:
- // See the assertion below for why this is so.
- Inst() = default;
+ // See the assertion below for why this is so.
+ Inst() = default;
// Copyable.
Inst(const Inst&) = default;
@@ -78,7 +78,7 @@ class Prog {
void InitFail();
// Getters
- int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
+ int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
int last() { return (out_opcode_>>3)&1; }
int out() { return out_opcode_>>4; }
@@ -86,8 +86,8 @@ class Prog {
int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
- int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
- int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
+ int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
+ int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
@@ -101,13 +101,13 @@ class Prog {
// Does this inst (an kInstByteRange) match c?
inline bool Matches(int c) {
DCHECK_EQ(opcode(), kInstByteRange);
- if (foldcase() && 'A' <= c && c <= 'Z')
+ if (foldcase() && 'A' <= c && c <= 'Z')
c += 'a' - 'A';
return lo_ <= c && c <= hi_;
}
// Returns string representation for debugging.
- std::string Dump();
+ std::string Dump();
// Maximum instruction id.
// (Must fit in out_opcode_. PatchList/last steal another bit.)
@@ -130,31 +130,31 @@ class Prog {
out_opcode_ = (out<<4) | (last()<<3) | opcode;
}
- uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
- union { // additional instruction arguments:
- uint32_t out1_; // opcode == kInstAlt
- // alternate next instruction
-
- int32_t cap_; // opcode == kInstCapture
- // Index of capture register (holds text
- // position recorded by capturing parentheses).
- // For \n (the submatch for the nth parentheses),
- // the left parenthesis captures into register 2*n
- // and the right one captures into register 2*n+1.
-
- int32_t match_id_; // opcode == kInstMatch
- // Match ID to identify this match (for re2::Set).
-
- struct { // opcode == kInstByteRange
- uint8_t lo_; // byte range is lo_-hi_ inclusive
- uint8_t hi_; //
- uint16_t hint_foldcase_; // 15 bits: hint, 1 (low) bit: foldcase
- // hint to execution engines: the delta to the
- // next instruction (in the current list) worth
- // exploring iff this instruction matched; 0
- // means there are no remaining possibilities,
- // which is most likely for character classes.
- // foldcase: A-Z -> a-z before checking range.
+ uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
+ union { // additional instruction arguments:
+ uint32_t out1_; // opcode == kInstAlt
+ // alternate next instruction
+
+ int32_t cap_; // opcode == kInstCapture
+ // Index of capture register (holds text
+ // position recorded by capturing parentheses).
+ // For \n (the submatch for the nth parentheses),
+ // the left parenthesis captures into register 2*n
+ // and the right one captures into register 2*n+1.
+
+ int32_t match_id_; // opcode == kInstMatch
+ // Match ID to identify this match (for re2::Set).
+
+ struct { // opcode == kInstByteRange
+ uint8_t lo_; // byte range is lo_-hi_ inclusive
+ uint8_t hi_; //
+ uint16_t hint_foldcase_; // 15 bits: hint, 1 (low) bit: foldcase
+ // hint to execution engines: the delta to the
+ // next instruction (in the current list) worth
+ // exploring iff this instruction matched; 0
+ // means there are no remaining possibilities,
+ // which is most likely for character classes.
+ // foldcase: A-Z -> a-z before checking range.
};
EmptyOp empty_; // opcode == kInstEmptyWidth
@@ -166,11 +166,11 @@ class Prog {
friend class Prog;
};
- // Inst must be trivial so that we can freely clear it with memset(3).
- // Arrays of Inst are initialised by copying the initial elements with
- // memmove(3) and then clearing any remaining elements with memset(3).
- static_assert(std::is_trivial<Inst>::value, "Inst must be trivial");
-
+ // Inst must be trivial so that we can freely clear it with memset(3).
+ // Arrays of Inst are initialised by copying the initial elements with
+ // memmove(3) and then clearing any remaining elements with memset(3).
+ static_assert(std::is_trivial<Inst>::value, "Inst must be trivial");
+
// Whether to anchor the search.
enum Anchor {
kUnanchored, // match anywhere
@@ -198,7 +198,7 @@ class Prog {
Inst *inst(int id) { return &inst_[id]; }
int start() { return start_; }
- void set_start(int start) { start_ = start; }
+ void set_start(int start) { start_ = start; }
int start_unanchored() { return start_unanchored_; }
void set_start_unanchored(int start) { start_unanchored_ = start; }
int size() { return size_; }
@@ -206,9 +206,9 @@ class Prog {
void set_reversed(bool reversed) { reversed_ = reversed; }
int list_count() { return list_count_; }
int inst_count(InstOp op) { return inst_count_[op]; }
- uint16_t* list_heads() { return list_heads_.data(); }
+ uint16_t* list_heads() { return list_heads_.data(); }
size_t bit_state_text_max_size() { return bit_state_text_max_size_; }
- int64_t dfa_mem() { return dfa_mem_; }
+ int64_t dfa_mem() { return dfa_mem_; }
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
bool anchor_start() { return anchor_start_; }
void set_anchor_start(bool b) { anchor_start_ = b; }
@@ -216,36 +216,36 @@ class Prog {
void set_anchor_end(bool b) { anchor_end_ = b; }
int bytemap_range() { return bytemap_range_; }
const uint8_t* bytemap() { return bytemap_; }
- bool can_prefix_accel() { return prefix_size_ != 0; }
-
- // Accelerates to the first likely occurrence of the prefix.
- // Returns a pointer to the first byte or NULL if not found.
- const void* PrefixAccel(const void* data, size_t size) {
- DCHECK(can_prefix_accel());
- if (prefix_foldcase_) {
- return PrefixAccel_ShiftDFA(data, size);
- } else if (prefix_size_ != 1) {
- return PrefixAccel_FrontAndBack(data, size);
- } else {
- return memchr(data, prefix_front_, size);
- }
- }
-
- // Configures prefix accel using the analysis performed during compilation.
- void ConfigurePrefixAccel(const std::string& prefix, bool prefix_foldcase);
-
- // An implementation of prefix accel that uses prefix_dfa_ to perform
- // case-insensitive search.
- const void* PrefixAccel_ShiftDFA(const void* data, size_t size);
-
- // An implementation of prefix accel that looks for prefix_front_ and
- // prefix_back_ to return fewer false positives than memchr(3) alone.
- const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
-
+ bool can_prefix_accel() { return prefix_size_ != 0; }
+
+ // Accelerates to the first likely occurrence of the prefix.
+ // Returns a pointer to the first byte or NULL if not found.
+ const void* PrefixAccel(const void* data, size_t size) {
+ DCHECK(can_prefix_accel());
+ if (prefix_foldcase_) {
+ return PrefixAccel_ShiftDFA(data, size);
+ } else if (prefix_size_ != 1) {
+ return PrefixAccel_FrontAndBack(data, size);
+ } else {
+ return memchr(data, prefix_front_, size);
+ }
+ }
+
+ // Configures prefix accel using the analysis performed during compilation.
+ void ConfigurePrefixAccel(const std::string& prefix, bool prefix_foldcase);
+
+ // An implementation of prefix accel that uses prefix_dfa_ to perform
+ // case-insensitive search.
+ const void* PrefixAccel_ShiftDFA(const void* data, size_t size);
+
+ // An implementation of prefix accel that looks for prefix_front_ and
+ // prefix_back_ to return fewer false positives than memchr(3) alone.
+ const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
+
// Returns string representation of program for debugging.
- std::string Dump();
- std::string DumpUnanchored();
- std::string DumpByteMap();
+ std::string Dump();
+ std::string DumpUnanchored();
+ std::string DumpByteMap();
// Returns the set of kEmpty flags that are in effect at
// position p within context.
@@ -292,24 +292,24 @@ class Prog {
// SearchDFA fills matches with the match IDs of the final matching state.
bool SearchDFA(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind, StringPiece* match0,
- bool* failed, SparseSet* matches);
-
- // The callback issued after building each DFA state with BuildEntireDFA().
- // If next is null, then the memory budget has been exhausted and building
- // will halt. Otherwise, the state has been built and next points to an array
- // of bytemap_range()+1 slots holding the next states as per the bytemap and
- // kByteEndText. The number of the state is implied by the callback sequence:
- // the first callback is for state 0, the second callback is for state 1, ...
- // match indicates whether the state is a matching state.
- using DFAStateCallback = std::function<void(const int* next, bool match)>;
-
- // Build the entire DFA for the given match kind.
+ bool* failed, SparseSet* matches);
+
+ // The callback issued after building each DFA state with BuildEntireDFA().
+ // If next is null, then the memory budget has been exhausted and building
+ // will halt. Otherwise, the state has been built and next points to an array
+ // of bytemap_range()+1 slots holding the next states as per the bytemap and
+ // kByteEndText. The number of the state is implied by the callback sequence:
+ // the first callback is for state 0, the second callback is for state 1, ...
+ // match indicates whether the state is a matching state.
+ using DFAStateCallback = std::function<void(const int* next, bool match)>;
+
+ // Build the entire DFA for the given match kind.
// Usually the DFA is built out incrementally, as needed, which
- // avoids lots of unnecessary work.
- // If cb is not empty, it receives one callback per state built.
- // Returns the number of states built.
- // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
- int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
+ // avoids lots of unnecessary work.
+ // If cb is not empty, it receives one callback per state built.
+ // Returns the number of states built.
+ // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
+ int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
// Compute bytemap.
void ComputeByteMap();
@@ -326,8 +326,8 @@ class Prog {
StringPiece* match, int nmatch);
// Bit-state backtracking. Fast on small cases but uses memory
- // proportional to the product of the list count and the text size.
- bool CanBitState() { return list_heads_.data() != NULL; }
+ // proportional to the product of the list count and the text size.
+ bool CanBitState() { return list_heads_.data() != NULL; }
bool SearchBitState(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind,
StringPiece* match, int nmatch);
@@ -359,15 +359,15 @@ class Prog {
// do not compile down to infinite repetitions.
//
// Returns true on success, false on error.
- bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
+ bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
// EXPERIMENTAL! SUBJECT TO CHANGE!
// Outputs the program fanout into the given sparse array.
void Fanout(SparseArray<int>* fanout);
// Compiles a collection of regexps to Prog. Each regexp will have
- // its own Match instruction recording the index in the output vector.
- static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
+ // its own Match instruction recording the index in the output vector.
+ static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
// Flattens the Prog from "tree" form to "list" form. This is an in-place
// operation in the sense that the old instructions are lost.
@@ -396,13 +396,13 @@ class Prog {
std::vector<Inst>* flat,
SparseSet* reachable, std::vector<int>* stk);
- // Computes hints for ByteRange instructions in [begin, end).
- void ComputeHints(std::vector<Inst>* flat, int begin, int end);
-
- // Controls whether the DFA should bail out early if the NFA would be faster.
- // FOR TESTING ONLY.
- static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b);
-
+ // Computes hints for ByteRange instructions in [begin, end).
+ void ComputeHints(std::vector<Inst>* flat, int begin, int end);
+
+ // Controls whether the DFA should bail out early if the NFA would be faster.
+ // FOR TESTING ONLY.
+ static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b);
+
private:
friend class Compiler;
@@ -419,16 +419,16 @@ class Prog {
int start_unanchored_; // unanchored entry point for program
int size_; // number of instructions
int bytemap_range_; // bytemap_[x] < bytemap_range_
-
- bool prefix_foldcase_; // whether prefix is case-insensitive
- size_t prefix_size_; // size of prefix (0 if no prefix)
- union {
- uint64_t* prefix_dfa_; // "Shift DFA" for prefix
- struct {
- int prefix_front_; // first byte of prefix
- int prefix_back_; // last byte of prefix
- };
- };
+
+ bool prefix_foldcase_; // whether prefix is case-insensitive
+ size_t prefix_size_; // size of prefix (0 if no prefix)
+ union {
+ uint64_t* prefix_dfa_; // "Shift DFA" for prefix
+ struct {
+ int prefix_front_; // first byte of prefix
+ int prefix_back_; // last byte of prefix
+ };
+ };
int list_count_; // count of lists (see above)
int inst_count_[kNumInst]; // count of instructions by opcode
@@ -436,8 +436,8 @@ class Prog {
// not populated if size_ is overly large
size_t bit_state_text_max_size_; // upper bound (inclusive) on text.size()
- PODArray<Inst> inst_; // pointer to instruction array
- PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
+ PODArray<Inst> inst_; // pointer to instruction array
+ PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
int64_t dfa_mem_; // Maximum memory for DFAs.
DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index 96680da33c..47fb385e4e 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -12,14 +12,14 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
-#include <atomic>
+#include <atomic>
#include <iterator>
#include <mutex>
#include <string>
@@ -32,7 +32,7 @@
#include "util/utf.h"
#include "re2/prog.h"
#include "re2/regexp.h"
-#include "re2/sparse_array.h"
+#include "re2/sparse_array.h"
namespace re2 {
@@ -60,9 +60,9 @@ RE2::Options::Options(RE2::CannedOptions opt)
// static empty objects for use as const references.
// To avoid global constructors, allocated in RE2::Init().
-static const std::string* empty_string;
-static const std::map<std::string, int>* empty_named_groups;
-static const std::map<int, std::string>* empty_group_names;
+static const std::string* empty_string;
+static const std::map<std::string, int>* empty_named_groups;
+static const std::map<int, std::string>* empty_group_names;
// Converts from Regexp error code to RE2 error code.
// Maybe some day they will diverge. In any event, this
@@ -83,8 +83,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
return RE2::ErrorMissingBracket;
case re2::kRegexpMissingParen:
return RE2::ErrorMissingParen;
- case re2::kRegexpUnexpectedParen:
- return RE2::ErrorUnexpectedParen;
+ case re2::kRegexpUnexpectedParen:
+ return RE2::ErrorUnexpectedParen;
case re2::kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
case re2::kRegexpRepeatArgument:
@@ -103,10 +103,10 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
return RE2::ErrorInternal;
}
-static std::string trunc(const StringPiece& pattern) {
+static std::string trunc(const StringPiece& pattern) {
if (pattern.size() < 100)
- return std::string(pattern);
- return std::string(pattern.substr(0, 100)) + "...";
+ return std::string(pattern);
+ return std::string(pattern.substr(0, 100)) + "...";
}
@@ -114,7 +114,7 @@ RE2::RE2(const char* pattern) {
Init(pattern, DefaultOptions);
}
-RE2::RE2(const std::string& pattern) {
+RE2::RE2(const std::string& pattern) {
Init(pattern, DefaultOptions);
}
@@ -173,24 +173,24 @@ int RE2::Options::ParseFlags() const {
void RE2::Init(const StringPiece& pattern, const Options& options) {
static std::once_flag empty_once;
std::call_once(empty_once, []() {
- empty_string = new std::string;
- empty_named_groups = new std::map<std::string, int>;
- empty_group_names = new std::map<int, std::string>;
+ empty_string = new std::string;
+ empty_named_groups = new std::map<std::string, int>;
+ empty_group_names = new std::map<int, std::string>;
});
- pattern_.assign(pattern.data(), pattern.size());
+ pattern_.assign(pattern.data(), pattern.size());
options_.Copy(options);
entire_regexp_ = NULL;
- error_ = empty_string;
- error_code_ = NoError;
- error_arg_.clear();
- prefix_.clear();
- prefix_foldcase_ = false;
+ error_ = empty_string;
+ error_code_ = NoError;
+ error_arg_.clear();
+ prefix_.clear();
+ prefix_foldcase_ = false;
suffix_regexp_ = NULL;
prog_ = NULL;
- num_captures_ = -1;
- is_one_pass_ = false;
-
+ num_captures_ = -1;
+ is_one_pass_ = false;
+
rprog_ = NULL;
named_groups_ = NULL;
group_names_ = NULL;
@@ -205,9 +205,9 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
<< status.Text();
}
- error_ = new std::string(status.Text());
+ error_ = new std::string(status.Text());
error_code_ = RegexpErrorToRE2(status.code());
- error_arg_ = std::string(status.error_arg());
+ error_arg_ = std::string(status.error_arg());
return;
}
@@ -224,16 +224,16 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
- error_ = new std::string("pattern too large - compile failed");
+ error_ = new std::string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
}
- // We used to compute this lazily, but it's used during the
- // typical control flow for a match call, so we now compute
- // it eagerly, which avoids the overhead of std::once_flag.
- num_captures_ = suffix_regexp_->NumCaptures();
-
+ // We used to compute this lazily, but it's used during the
+ // typical control flow for a match call, so we now compute
+ // it eagerly, which avoids the overhead of std::once_flag.
+ num_captures_ = suffix_regexp_->NumCaptures();
+
// Could delay this until the first match call that
// cares about submatch information, but the one-pass
// machine's memory gets cut from the DFA memory budget,
@@ -250,11 +250,11 @@ re2::Prog* RE2::ReverseProg() const {
if (re->rprog_ == NULL) {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
- // We no longer touch error_ and error_code_ because failing to compile
- // the reverse Prog is not a showstopper: falling back to NFA execution
- // is fine. More importantly, an RE2 object is supposed to be logically
- // immutable: whatever ok() would have returned after Init() completed,
- // it should continue to return that no matter what ReverseProg() does.
+ // We no longer touch error_ and error_code_ because failing to compile
+ // the reverse Prog is not a showstopper: falling back to NFA execution
+ // is fine. More importantly, an RE2 object is supposed to be logically
+ // immutable: whatever ok() would have returned after Init() completed,
+ // it should continue to return that no matter what ReverseProg() does.
}
}, this);
return rprog_;
@@ -281,73 +281,73 @@ int RE2::ProgramSize() const {
return prog_->size();
}
-int RE2::ReverseProgramSize() const {
+int RE2::ReverseProgramSize() const {
if (prog_ == NULL)
return -1;
- Prog* prog = ReverseProg();
- if (prog == NULL)
- return -1;
- return prog->size();
-}
-
-// Finds the most significant non-zero bit in n.
-static int FindMSBSet(uint32_t n) {
- DCHECK_NE(n, 0);
-#if defined(__GNUC__)
- return 31 ^ __builtin_clz(n);
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
- unsigned long c;
- _BitScanReverse(&c, n);
- return static_cast<int>(c);
-#else
- int c = 0;
- for (int shift = 1 << 4; shift != 0; shift >>= 1) {
- uint32_t word = n >> shift;
- if (word != 0) {
- n = word;
- c += shift;
+ Prog* prog = ReverseProg();
+ if (prog == NULL)
+ return -1;
+ return prog->size();
+}
+
+// Finds the most significant non-zero bit in n.
+static int FindMSBSet(uint32_t n) {
+ DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+ return 31 ^ __builtin_clz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+ unsigned long c;
+ _BitScanReverse(&c, n);
+ return static_cast<int>(c);
+#else
+ int c = 0;
+ for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+ uint32_t word = n >> shift;
+ if (word != 0) {
+ n = word;
+ c += shift;
}
}
- return c;
-#endif
+ return c;
+#endif
+}
+
+static int Fanout(Prog* prog, std::vector<int>* histogram) {
+ SparseArray<int> fanout(prog->size());
+ prog->Fanout(&fanout);
+ int data[32] = {};
+ int size = 0;
+ for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
+ if (i->value() == 0)
+ continue;
+ uint32_t value = i->value();
+ int bucket = FindMSBSet(value);
+ bucket += value & (value-1) ? 1 : 0;
+ ++data[bucket];
+ size = std::max(size, bucket+1);
+ }
+ if (histogram != NULL)
+ histogram->assign(data, data+size);
+ return size-1;
}
-static int Fanout(Prog* prog, std::vector<int>* histogram) {
- SparseArray<int> fanout(prog->size());
- prog->Fanout(&fanout);
- int data[32] = {};
- int size = 0;
- for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
- if (i->value() == 0)
- continue;
- uint32_t value = i->value();
- int bucket = FindMSBSet(value);
- bucket += value & (value-1) ? 1 : 0;
- ++data[bucket];
- size = std::max(size, bucket+1);
- }
- if (histogram != NULL)
- histogram->assign(data, data+size);
- return size-1;
+int RE2::ProgramFanout(std::vector<int>* histogram) const {
+ if (prog_ == NULL)
+ return -1;
+ return Fanout(prog_, histogram);
+}
+
+int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
+ if (prog_ == NULL)
+ return -1;
+ Prog* prog = ReverseProg();
+ if (prog == NULL)
+ return -1;
+ return Fanout(prog, histogram);
}
-int RE2::ProgramFanout(std::vector<int>* histogram) const {
- if (prog_ == NULL)
- return -1;
- return Fanout(prog_, histogram);
-}
-
-int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
- if (prog_ == NULL)
- return -1;
- Prog* prog = ReverseProg();
- if (prog == NULL)
- return -1;
- return Fanout(prog, histogram);
-}
-
// Returns named_groups_, computing it if needed.
-const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
+const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
std::call_once(named_groups_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->named_groups_ = re->suffix_regexp_->NamedCaptures();
@@ -358,7 +358,7 @@ const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
}
// Returns group_names_, computing it if needed.
-const std::map<int, std::string>& RE2::CapturingGroupNames() const {
+const std::map<int, std::string>& RE2::CapturingGroupNames() const {
std::call_once(group_names_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->group_names_ = re->suffix_regexp_->CaptureNames();
@@ -402,42 +402,42 @@ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
}
}
-bool RE2::Replace(std::string* str,
- const RE2& re,
- const StringPiece& rewrite) {
+bool RE2::Replace(std::string* str,
+ const RE2& re,
+ const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
- if (nvec > 1 + re.NumberOfCapturingGroups())
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
+ if (nvec > static_cast<int>(arraysize(vec)))
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
- return false;
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
- std::string s;
+ std::string s;
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
- assert(vec[0].data() >= str->data());
- assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
+ assert(vec[0].data() >= str->data());
+ assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
return true;
}
-int RE2::GlobalReplace(std::string* str,
- const RE2& re,
- const StringPiece& rewrite) {
+int RE2::GlobalReplace(std::string* str,
+ const RE2& re,
+ const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
- if (nvec > 1 + re.NumberOfCapturingGroups())
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
+ if (nvec > static_cast<int>(arraysize(vec)))
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
- return false;
const char* p = str->data();
const char* ep = p + str->size();
const char* lastend = NULL;
- std::string out;
+ std::string out;
int count = 0;
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
// Iterate just once when fuzzing. Otherwise, we easily get bogged down
@@ -449,15 +449,15 @@ int RE2::GlobalReplace(std::string* str,
if (!re.Match(*str, static_cast<size_t>(p - str->data()),
str->size(), UNANCHORED, vec, nvec))
break;
- if (p < vec[0].data())
- out.append(p, vec[0].data() - p);
- if (vec[0].data() == lastend && vec[0].empty()) {
+ if (p < vec[0].data())
+ out.append(p, vec[0].data() - p);
+ if (vec[0].data() == lastend && vec[0].empty()) {
// Disallow empty match at end of last match: skip ahead.
//
- // fullrune() takes int, not ptrdiff_t. However, it just looks
+ // fullrune() takes int, not ptrdiff_t. However, it just looks
// at the leading byte and treats any length >= 4 the same.
if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
- fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
+ fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
// re is in UTF-8 mode and there is enough left of str
// to allow us to advance by up to UTFmax bytes.
Rune r;
@@ -482,7 +482,7 @@ int RE2::GlobalReplace(std::string* str,
continue;
}
re.Rewrite(&out, rewrite, vec, nvec);
- p = vec[0].data() + vec[0].size();
+ p = vec[0].data() + vec[0].size();
lastend = p;
count++;
}
@@ -497,16 +497,16 @@ int RE2::GlobalReplace(std::string* str,
return count;
}
-bool RE2::Extract(const StringPiece& text,
- const RE2& re,
- const StringPiece& rewrite,
- std::string* out) {
+bool RE2::Extract(const StringPiece& text,
+ const RE2& re,
+ const StringPiece& rewrite,
+ std::string* out) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
- if (nvec > 1 + re.NumberOfCapturingGroups())
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
+ if (nvec > static_cast<int>(arraysize(vec)))
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
- return false;
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
@@ -514,8 +514,8 @@ bool RE2::Extract(const StringPiece& text,
return re.Rewrite(out, rewrite, vec, nvec);
}
-std::string RE2::QuoteMeta(const StringPiece& unquoted) {
- std::string result;
+std::string RE2::QuoteMeta(const StringPiece& unquoted) {
+ std::string result;
result.reserve(unquoted.size() << 1);
// Escape any ascii character not in [A-Za-z_0-9].
@@ -552,8 +552,8 @@ std::string RE2::QuoteMeta(const StringPiece& unquoted) {
return result;
}
-bool RE2::PossibleMatchRange(std::string* min, std::string* max,
- int maxlen) const {
+bool RE2::PossibleMatchRange(std::string* min, std::string* max,
+ int maxlen) const {
if (prog_ == NULL)
return false;
@@ -562,28 +562,28 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max,
n = maxlen;
// Determine initial min max from prefix_ literal.
- *min = prefix_.substr(0, n);
- *max = prefix_.substr(0, n);
+ *min = prefix_.substr(0, n);
+ *max = prefix_.substr(0, n);
if (prefix_foldcase_) {
- // prefix is ASCII lowercase; change *min to uppercase.
+ // prefix is ASCII lowercase; change *min to uppercase.
for (int i = 0; i < n; i++) {
- char& c = (*min)[i];
- if ('a' <= c && c <= 'z')
- c += 'A' - 'a';
+ char& c = (*min)[i];
+ if ('a' <= c && c <= 'z')
+ c += 'A' - 'a';
}
}
// Add to prefix min max using PossibleMatchRange on regexp.
- std::string dmin, dmax;
+ std::string dmin, dmax;
maxlen -= n;
if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
- min->append(dmin);
- max->append(dmax);
- } else if (!max->empty()) {
+ min->append(dmin);
+ max->append(dmax);
+ } else if (!max->empty()) {
// prog_->PossibleMatchRange has failed us,
// but we still have useful information from prefix_.
- // Round up *max to allow any possible suffix.
- PrefixSuccessor(max);
+ // Round up *max to allow any possible suffix.
+ PrefixSuccessor(max);
} else {
// Nothing useful.
*min = "";
@@ -597,7 +597,7 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max,
// Avoid possible locale nonsense in standard strcasecmp.
// The string a is known to be all lowercase.
static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
- const char* ae = a + len;
+ const char* ae = a + len;
for (; a < ae; a++, b++) {
uint8_t x = *a;
@@ -619,7 +619,7 @@ bool RE2::Match(const StringPiece& text,
Anchor re_anchor,
StringPiece* submatch,
int nsubmatch) const {
- if (!ok()) {
+ if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
@@ -654,8 +654,8 @@ bool RE2::Match(const StringPiece& text,
// If the regexp is anchored explicitly, must not be in middle of text.
if (prog_->anchor_start() && startpos != 0)
return false;
- if (prog_->anchor_end() && endpos != text.size())
- return false;
+ if (prog_->anchor_end() && endpos != text.size())
+ return false;
// If the regexp is anchored explicitly, update re_anchor
// so that we can potentially fall into a faster case below.
@@ -691,85 +691,85 @@ bool RE2::Match(const StringPiece& text,
kind = Prog::kLongestMatch;
bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
- bool can_bit_state = prog_->CanBitState();
+ bool can_bit_state = prog_->CanBitState();
size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
-#ifdef RE2_HAVE_THREAD_LOCAL
- hooks::context = this;
-#endif
+#ifdef RE2_HAVE_THREAD_LOCAL
+ hooks::context = this;
+#endif
bool dfa_failed = false;
- bool skipped_test = false;
+ bool skipped_test = false;
switch (re_anchor) {
default:
- LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
- return false;
-
+ LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
+ return false;
+
case UNANCHORED: {
- if (prog_->anchor_end()) {
- // This is a very special case: we don't need the forward DFA because
- // we already know where the match must end! Instead, the reverse DFA
- // can say whether there is a match and (optionally) where it starts.
- Prog* prog = ReverseProg();
- if (prog == NULL) {
- // Fall back to NFA below.
- skipped_test = true;
- break;
- }
- if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
- Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
- if (dfa_failed) {
- if (options_.log_errors())
- LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
- << "program size " << prog->size() << ", "
- << "list count " << prog->list_count() << ", "
- << "bytemap range " << prog->bytemap_range();
- // Fall back to NFA below.
- skipped_test = true;
- break;
- }
- return false;
- }
- if (matchp == NULL) // Matched. Don't care where.
- return true;
- break;
- }
-
+ if (prog_->anchor_end()) {
+ // This is a very special case: we don't need the forward DFA because
+ // we already know where the match must end! Instead, the reverse DFA
+ // can say whether there is a match and (optionally) where it starts.
+ Prog* prog = ReverseProg();
+ if (prog == NULL) {
+ // Fall back to NFA below.
+ skipped_test = true;
+ break;
+ }
+ if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
+ Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
+ if (dfa_failed) {
+ if (options_.log_errors())
+ LOG(ERROR) << "DFA out of memory: "
+ << "pattern length " << pattern_.size() << ", "
+ << "program size " << prog->size() << ", "
+ << "list count " << prog->list_count() << ", "
+ << "bytemap range " << prog->bytemap_range();
+ // Fall back to NFA below.
+ skipped_test = true;
+ break;
+ }
+ return false;
+ }
+ if (matchp == NULL) // Matched. Don't care where.
+ return true;
+ break;
+ }
+
if (!prog_->SearchDFA(subtext, text, anchor, kind,
matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
- LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
- << "program size " << prog_->size() << ", "
- << "list count " << prog_->list_count() << ", "
- << "bytemap range " << prog_->bytemap_range();
+ LOG(ERROR) << "DFA out of memory: "
+ << "pattern length " << pattern_.size() << ", "
+ << "program size " << prog_->size() << ", "
+ << "list count " << prog_->list_count() << ", "
+ << "bytemap range " << prog_->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
}
return false;
}
- if (matchp == NULL) // Matched. Don't care where.
+ if (matchp == NULL) // Matched. Don't care where.
return true;
- // SearchDFA set match.end() but didn't know where the
- // match started. Run the regexp backward from match.end()
+ // SearchDFA set match.end() but didn't know where the
+ // match started. Run the regexp backward from match.end()
// to find the longest possible match -- that's where it started.
Prog* prog = ReverseProg();
- if (prog == NULL) {
- // Fall back to NFA below.
- skipped_test = true;
- break;
- }
+ if (prog == NULL) {
+ // Fall back to NFA below.
+ skipped_test = true;
+ break;
+ }
if (!prog->SearchDFA(match, text, Prog::kAnchored,
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
- LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
- << "program size " << prog->size() << ", "
- << "list count " << prog->list_count() << ", "
- << "bytemap range " << prog->bytemap_range();
+ LOG(ERROR) << "DFA out of memory: "
+ << "pattern length " << pattern_.size() << ", "
+ << "program size " << prog->size() << ", "
+ << "list count " << prog->list_count() << ", "
+ << "bytemap range " << prog->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
@@ -808,11 +808,11 @@ bool RE2::Match(const StringPiece& text,
&match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
- LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
- << "program size " << prog_->size() << ", "
- << "list count " << prog_->list_count() << ", "
- << "bytemap range " << prog_->bytemap_range();
+ LOG(ERROR) << "DFA out of memory: "
+ << "pattern length " << pattern_.size() << ", "
+ << "program size " << prog_->size() << ", "
+ << "list count " << prog_->list_count() << ", "
+ << "bytemap range " << prog_->bytemap_range();
// Fall back to NFA below.
skipped_test = true;
break;
@@ -876,7 +876,7 @@ bool RE2::Match(const StringPiece& text,
// Internal matcher - like Match() but takes Args not StringPieces.
bool RE2::DoMatch(const StringPiece& text,
- Anchor re_anchor,
+ Anchor re_anchor,
size_t* consumed,
const Arg* const* args,
int n) const {
@@ -886,11 +886,11 @@ bool RE2::DoMatch(const StringPiece& text,
return false;
}
- if (NumberOfCapturingGroups() < n) {
- // RE has fewer capturing groups than number of Arg pointers passed in.
- return false;
- }
-
+ if (NumberOfCapturingGroups() < n) {
+ // RE has fewer capturing groups than number of Arg pointers passed in.
+ return false;
+ }
+
// Count number of capture groups needed.
int nvec;
if (n == 0 && consumed == NULL)
@@ -902,14 +902,14 @@ bool RE2::DoMatch(const StringPiece& text,
StringPiece stkvec[kVecSize];
StringPiece* heapvec = NULL;
- if (nvec <= static_cast<int>(arraysize(stkvec))) {
+ if (nvec <= static_cast<int>(arraysize(stkvec))) {
vec = stkvec;
} else {
vec = new StringPiece[nvec];
heapvec = vec;
}
- if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
+ if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
delete[] heapvec;
return false;
}
@@ -939,8 +939,8 @@ bool RE2::DoMatch(const StringPiece& text,
// Checks that the rewrite string is well-formed with respect to this
// regular expression.
-bool RE2::CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const {
+bool RE2::CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -968,125 +968,125 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite,
}
if (max_token > NumberOfCapturingGroups()) {
- *error = StringPrintf(
- "Rewrite schema requests %d matches, but the regexp only has %d "
- "parenthesized subexpressions.",
- max_token, NumberOfCapturingGroups());
+ *error = StringPrintf(
+ "Rewrite schema requests %d matches, but the regexp only has %d "
+ "parenthesized subexpressions.",
+ max_token, NumberOfCapturingGroups());
return false;
}
return true;
}
-// Returns the maximum submatch needed for the rewrite to be done by Replace().
-// E.g. if rewrite == "foo \\2,\\1", returns 2.
-int RE2::MaxSubmatch(const StringPiece& rewrite) {
- int max = 0;
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- if (*s == '\\') {
- s++;
- int c = (s < end) ? *s : -1;
- if (isdigit(c)) {
- int n = (c - '0');
- if (n > max)
- max = n;
- }
- }
- }
- return max;
-}
-
-// Append the "rewrite" string, with backslash subsitutions from "vec",
-// to string "out".
-bool RE2::Rewrite(std::string* out,
- const StringPiece& rewrite,
- const StringPiece* vec,
- int veclen) const {
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- if (*s != '\\') {
- out->push_back(*s);
- continue;
- }
- s++;
- int c = (s < end) ? *s : -1;
- if (isdigit(c)) {
- int n = (c - '0');
- if (n >= veclen) {
- if (options_.log_errors()) {
- LOG(ERROR) << "invalid substitution \\" << n
- << " from " << veclen << " groups";
- }
- return false;
- }
- StringPiece snip = vec[n];
- if (!snip.empty())
- out->append(snip.data(), snip.size());
- } else if (c == '\\') {
- out->push_back('\\');
- } else {
- if (options_.log_errors())
- LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
- return false;
- }
- }
- return true;
-}
-
+// Returns the maximum submatch needed for the rewrite to be done by Replace().
+// E.g. if rewrite == "foo \\2,\\1", returns 2.
+int RE2::MaxSubmatch(const StringPiece& rewrite) {
+ int max = 0;
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ if (*s == '\\') {
+ s++;
+ int c = (s < end) ? *s : -1;
+ if (isdigit(c)) {
+ int n = (c - '0');
+ if (n > max)
+ max = n;
+ }
+ }
+ }
+ return max;
+}
+
+// Append the "rewrite" string, with backslash subsitutions from "vec",
+// to string "out".
+bool RE2::Rewrite(std::string* out,
+ const StringPiece& rewrite,
+ const StringPiece* vec,
+ int veclen) const {
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ if (*s != '\\') {
+ out->push_back(*s);
+ continue;
+ }
+ s++;
+ int c = (s < end) ? *s : -1;
+ if (isdigit(c)) {
+ int n = (c - '0');
+ if (n >= veclen) {
+ if (options_.log_errors()) {
+ LOG(ERROR) << "invalid substitution \\" << n
+ << " from " << veclen << " groups";
+ }
+ return false;
+ }
+ StringPiece snip = vec[n];
+ if (!snip.empty())
+ out->append(snip.data(), snip.size());
+ } else if (c == '\\') {
+ out->push_back('\\');
+ } else {
+ if (options_.log_errors())
+ LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+ return false;
+ }
+ }
+ return true;
+}
+
/***** Parsers for various types *****/
-namespace re2_internal {
-
-template <>
-bool Parse(const char* str, size_t n, void* dest) {
+namespace re2_internal {
+
+template <>
+bool Parse(const char* str, size_t n, void* dest) {
// We fail if somebody asked us to store into a non-NULL void* pointer
return (dest == NULL);
}
-template <>
-bool Parse(const char* str, size_t n, std::string* dest) {
+template <>
+bool Parse(const char* str, size_t n, std::string* dest) {
if (dest == NULL) return true;
- dest->assign(str, n);
+ dest->assign(str, n);
return true;
}
#if defined(ARCADIA_ROOT)
-template <>
-bool Parse(const char* str, size_t n, TString* dest) {
+template <>
+bool Parse(const char* str, size_t n, TString* dest) {
if (dest == NULL) return true;
- dest->assign(str, n);
+ dest->assign(str, n);
return true;
}
#endif
-template <>
-bool Parse(const char* str, size_t n, StringPiece* dest) {
+template <>
+bool Parse(const char* str, size_t n, StringPiece* dest) {
if (dest == NULL) return true;
- *dest = StringPiece(str, n);
+ *dest = StringPiece(str, n);
return true;
}
-template <>
-bool Parse(const char* str, size_t n, char* dest) {
+template <>
+bool Parse(const char* str, size_t n, char* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
- *dest = str[0];
+ *dest = str[0];
return true;
}
-template <>
-bool Parse(const char* str, size_t n, signed char* dest) {
+template <>
+bool Parse(const char* str, size_t n, signed char* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
- *dest = str[0];
+ *dest = str[0];
return true;
}
-template <>
-bool Parse(const char* str, size_t n, unsigned char* dest) {
+template <>
+bool Parse(const char* str, size_t n, unsigned char* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
- *dest = str[0];
+ *dest = str[0];
return true;
}
@@ -1150,41 +1150,41 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
return buf;
}
-template <>
-bool Parse(const char* str, size_t n, float* dest) {
+template <>
+bool Parse(const char* str, size_t n, float* dest) {
+ if (n == 0) return false;
+ static const int kMaxLength = 200;
+ char buf[kMaxLength+1];
+ str = TerminateNumber(buf, sizeof buf, str, &n, true);
+ char* end;
+ errno = 0;
+ float r = strtof(str, &end);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *dest = r;
+ return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, double* dest) {
+ if (n == 0) return false;
+ static const int kMaxLength = 200;
+ char buf[kMaxLength+1];
+ str = TerminateNumber(buf, sizeof buf, str, &n, true);
+ char* end;
+ errno = 0;
+ double r = strtod(str, &end);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *dest = r;
+ return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, long* dest, int radix) {
if (n == 0) return false;
- static const int kMaxLength = 200;
- char buf[kMaxLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, true);
- char* end;
- errno = 0;
- float r = strtof(str, &end);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *dest = r;
- return true;
-}
-
-template <>
-bool Parse(const char* str, size_t n, double* dest) {
- if (n == 0) return false;
- static const int kMaxLength = 200;
- char buf[kMaxLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, true);
- char* end;
- errno = 0;
- double r = strtod(str, &end);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *dest = r;
- return true;
-}
-
-template <>
-bool Parse(const char* str, size_t n, long* dest, int radix) {
- if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
@@ -1193,12 +1193,12 @@ bool Parse(const char* str, size_t n, long* dest, int radix) {
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
- *dest = r;
+ *dest = r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
@@ -1214,52 +1214,52 @@ bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
- *dest = r;
+ *dest = r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, short* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, short* dest, int radix) {
long r;
- if (!Parse(str, n, &r, radix)) return false; // Could not parse
- if ((short)r != r) return false; // Out of range
+ if (!Parse(str, n, &r, radix)) return false; // Could not parse
+ if ((short)r != r) return false; // Out of range
if (dest == NULL) return true;
- *dest = (short)r;
+ *dest = (short)r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
unsigned long r;
- if (!Parse(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned short)r != r) return false; // Out of range
+ if (!Parse(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned short)r != r) return false; // Out of range
if (dest == NULL) return true;
- *dest = (unsigned short)r;
+ *dest = (unsigned short)r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, int* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, int* dest, int radix) {
long r;
- if (!Parse(str, n, &r, radix)) return false; // Could not parse
- if ((int)r != r) return false; // Out of range
+ if (!Parse(str, n, &r, radix)) return false; // Could not parse
+ if ((int)r != r) return false; // Out of range
if (dest == NULL) return true;
- *dest = (int)r;
+ *dest = (int)r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
unsigned long r;
- if (!Parse(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned int)r != r) return false; // Out of range
+ if (!Parse(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned int)r != r) return false; // Out of range
if (dest == NULL) return true;
- *dest = (unsigned int)r;
+ *dest = (unsigned int)r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, long long* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, long long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
@@ -1269,12 +1269,12 @@ bool Parse(const char* str, size_t n, long long* dest, int radix) {
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
- *dest = r;
+ *dest = r;
return true;
}
-template <>
-bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
+template <>
+bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
@@ -1289,47 +1289,47 @@ bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
- *dest = r;
+ *dest = r;
return true;
}
-} // namespace re2_internal
-
-namespace hooks {
-
-#ifdef RE2_HAVE_THREAD_LOCAL
-thread_local const RE2* context = NULL;
-#endif
-
-template <typename T>
-union Hook {
- void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
- T* Load() const { return cb_.load(std::memory_order_acquire); }
-
-#if !defined(__clang__) && defined(_MSC_VER)
- // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
- // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
- static_assert(std::atomic<T*>::is_always_lock_free,
- "std::atomic<T*> must be always lock-free");
- T* cb_for_constinit_;
-#endif
-
- std::atomic<T*> cb_;
-};
-
-template <typename T>
-static void DoNothing(const T&) {}
-
-#define DEFINE_HOOK(type, name) \
- static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
- void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
- type##Callback* Get##type##Hook() { return name##_hook.Load(); }
-
-DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
-DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
-
-#undef DEFINE_HOOK
-
-} // namespace hooks
-
+} // namespace re2_internal
+
+namespace hooks {
+
+#ifdef RE2_HAVE_THREAD_LOCAL
+thread_local const RE2* context = NULL;
+#endif
+
+template <typename T>
+union Hook {
+ void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
+ T* Load() const { return cb_.load(std::memory_order_acquire); }
+
+#if !defined(__clang__) && defined(_MSC_VER)
+ // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
+ // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
+ static_assert(std::atomic<T*>::is_always_lock_free,
+ "std::atomic<T*> must be always lock-free");
+ T* cb_for_constinit_;
+#endif
+
+ std::atomic<T*> cb_;
+};
+
+template <typename T>
+static void DoNothing(const T&) {}
+
+#define DEFINE_HOOK(type, name) \
+ static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
+ void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
+ type##Callback* Get##type##Hook() { return name##_hook.Load(); }
+
+DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
+DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
+
+#undef DEFINE_HOOK
+
+} // namespace hooks
+
} // namespace re2
diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h
index a51cd556c1..f8f8043daf 100644
--- a/contrib/libs/re2/re2/re2.h
+++ b/contrib/libs/re2/re2/re2.h
@@ -30,19 +30,19 @@
// "(?i)hello" -- (?i) turns on case-insensitive matching
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
//
-// The double backslashes are needed when writing C++ string literals.
-// However, they should NOT be used when writing C++11 raw string literals:
-//
-// R"(hello (\w+) world)" -- \w matches a "word" character
-// R"(version (\d+))" -- \d matches a digit
-// R"(hello\s+world)" -- \s matches any whitespace character
-// R"(\b(\w+)\b)" -- \b matches non-empty string at word boundary
-// R"((?i)hello)" -- (?i) turns on case-insensitive matching
-// R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible
-//
-// When using UTF-8 encoding, case-insensitive matching will perform
-// simple case folding, not full case folding.
-//
+// The double backslashes are needed when writing C++ string literals.
+// However, they should NOT be used when writing C++11 raw string literals:
+//
+// R"(hello (\w+) world)" -- \w matches a "word" character
+// R"(version (\d+))" -- \d matches a digit
+// R"(hello\s+world)" -- \s matches any whitespace character
+// R"(\b(\w+)\b)" -- \b matches non-empty string at word boundary
+// R"((?i)hello)" -- (?i) turns on case-insensitive matching
+// R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible
+//
+// When using UTF-8 encoding, case-insensitive matching will perform
+// simple case folding, not full case folding.
+//
// -----------------------------------------------------------------------
// MATCHING INTERFACE:
//
@@ -66,29 +66,29 @@
// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
//
// -----------------------------------------------------------------------
-// MATCHING WITH SUBSTRING EXTRACTION:
+// MATCHING WITH SUBSTRING EXTRACTION:
//
-// You can supply extra pointer arguments to extract matched substrings.
-// On match failure, none of the pointees will have been modified.
-// On match success, the substrings will be converted (as necessary) and
-// their values will be assigned to their pointees until all conversions
-// have succeeded or one conversion has failed.
-// On conversion failure, the pointees will be in an indeterminate state
-// because the caller has no way of knowing which conversion failed.
-// However, conversion cannot fail for types like string and StringPiece
-// that do not inspect the substring contents. Hence, in the common case
-// where all of the pointees are of such types, failure is always due to
-// match failure and thus none of the pointees will have been modified.
+// You can supply extra pointer arguments to extract matched substrings.
+// On match failure, none of the pointees will have been modified.
+// On match success, the substrings will be converted (as necessary) and
+// their values will be assigned to their pointees until all conversions
+// have succeeded or one conversion has failed.
+// On conversion failure, the pointees will be in an indeterminate state
+// because the caller has no way of knowing which conversion failed.
+// However, conversion cannot fail for types like string and StringPiece
+// that do not inspect the substring contents. Hence, in the common case
+// where all of the pointees are of such types, failure is always due to
+// match failure and thus none of the pointees will have been modified.
//
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
-// std::string s;
+// std::string s;
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
// Example: fails because string cannot be stored in integer
// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
//
-// Example: fails because there aren't enough sub-patterns
+// Example: fails because there aren't enough sub-patterns
// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
//
// Example: does not try to extract any extra sub-patterns
@@ -144,10 +144,10 @@
// which represents a sub-range of a real string.
//
// Example: read lines of the form "var = value" from a string.
-// std::string contents = ...; // Fill string somehow
+// std::string contents = ...; // Fill string somehow
// StringPiece input(contents); // Wrap a StringPiece around it
//
-// std::string var;
+// std::string var;
// int value;
// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
// ...;
@@ -208,16 +208,16 @@
#include <map>
#include <mutex>
#include <string>
-#include <type_traits>
-#include <vector>
+#include <type_traits>
+#include <vector>
#if defined(ARCADIA_ROOT)
#include <util/generic/string.h>
#endif
-#if defined(__APPLE__)
-#include <TargetConditionals.h>
-#endif
-
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
#include "re2/stringpiece.h"
namespace re2 {
@@ -251,7 +251,7 @@ class RE2 {
ErrorBadCharRange, // bad character class range
ErrorMissingBracket, // missing closing ]
ErrorMissingParen, // missing closing )
- ErrorUnexpectedParen, // unexpected closing )
+ ErrorUnexpectedParen, // unexpected closing )
ErrorTrailingBackslash, // trailing \ at end of regexp
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
ErrorRepeatSize, // bad repetition argument
@@ -274,12 +274,12 @@ class RE2 {
Quiet // do not log about regexp parse errors
};
- // Need to have the const char* and const std::string& forms for implicit
+ // Need to have the const char* and const std::string& forms for implicit
// conversions when passing string literals to FullMatch and PartialMatch.
// Otherwise the StringPiece form would be sufficient.
#ifndef SWIG
RE2(const char* pattern);
- RE2(const std::string& pattern);
+ RE2(const std::string& pattern);
#endif
RE2(const StringPiece& pattern);
RE2(const StringPiece& pattern, const Options& options);
@@ -295,11 +295,11 @@ class RE2 {
// The string specification for this RE2. E.g.
// RE2 re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
- const std::string& pattern() const { return pattern_; }
+ const std::string& pattern() const { return pattern_; }
// If RE2 could not be created properly, returns an error string.
// Else returns the empty string.
- const std::string& error() const { return *error_; }
+ const std::string& error() const { return *error_; }
// If RE2 could not be created properly, returns an error code.
// Else returns RE2::NoError (== 0).
@@ -307,81 +307,81 @@ class RE2 {
// If RE2 could not be created properly, returns the offending
// portion of the regexp.
- const std::string& error_arg() const { return error_arg_; }
+ const std::string& error_arg() const { return error_arg_; }
// Returns the program size, a very approximate measure of a regexp's "cost".
// Larger numbers are more expensive than smaller numbers.
int ProgramSize() const;
- int ReverseProgramSize() const;
+ int ReverseProgramSize() const;
- // If histogram is not null, outputs the program fanout
- // as a histogram bucketed by powers of 2.
+ // If histogram is not null, outputs the program fanout
+ // as a histogram bucketed by powers of 2.
// Returns the number of the largest non-empty bucket.
- int ProgramFanout(std::vector<int>* histogram) const;
- int ReverseProgramFanout(std::vector<int>* histogram) const;
+ int ProgramFanout(std::vector<int>* histogram) const;
+ int ReverseProgramFanout(std::vector<int>* histogram) const;
// Returns the underlying Regexp; not for general use.
// Returns entire_regexp_ so that callers don't need
// to know about prefix_ and prefix_foldcase_.
re2::Regexp* Regexp() const { return entire_regexp_; }
- /***** The array-based matching interface ******/
-
- // The functions here have names ending in 'N' and are used to implement
- // the functions whose names are the prefix before the 'N'. It is sometimes
- // useful to invoke them directly, but the syntax is awkward, so the 'N'-less
- // versions should be preferred.
- static bool FullMatchN(const StringPiece& text, const RE2& re,
- const Arg* const args[], int n);
- static bool PartialMatchN(const StringPiece& text, const RE2& re,
- const Arg* const args[], int n);
- static bool ConsumeN(StringPiece* input, const RE2& re,
- const Arg* const args[], int n);
- static bool FindAndConsumeN(StringPiece* input, const RE2& re,
- const Arg* const args[], int n);
-
-#ifndef SWIG
- private:
- template <typename F, typename SP>
- static inline bool Apply(F f, SP sp, const RE2& re) {
- return f(sp, re, NULL, 0);
- }
-
- template <typename F, typename SP, typename... A>
- static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) {
- const Arg* const args[] = {&a...};
- const int n = sizeof...(a);
- return f(sp, re, args, n);
- }
-
- public:
- // In order to allow FullMatch() et al. to be called with a varying number
- // of arguments of varying types, we use two layers of variadic templates.
- // The first layer constructs the temporary Arg objects. The second layer
- // (above) constructs the array of pointers to the temporary Arg objects.
-
+ /***** The array-based matching interface ******/
+
+ // The functions here have names ending in 'N' and are used to implement
+ // the functions whose names are the prefix before the 'N'. It is sometimes
+ // useful to invoke them directly, but the syntax is awkward, so the 'N'-less
+ // versions should be preferred.
+ static bool FullMatchN(const StringPiece& text, const RE2& re,
+ const Arg* const args[], int n);
+ static bool PartialMatchN(const StringPiece& text, const RE2& re,
+ const Arg* const args[], int n);
+ static bool ConsumeN(StringPiece* input, const RE2& re,
+ const Arg* const args[], int n);
+ static bool FindAndConsumeN(StringPiece* input, const RE2& re,
+ const Arg* const args[], int n);
+
+#ifndef SWIG
+ private:
+ template <typename F, typename SP>
+ static inline bool Apply(F f, SP sp, const RE2& re) {
+ return f(sp, re, NULL, 0);
+ }
+
+ template <typename F, typename SP, typename... A>
+ static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) {
+ const Arg* const args[] = {&a...};
+ const int n = sizeof...(a);
+ return f(sp, re, args, n);
+ }
+
+ public:
+ // In order to allow FullMatch() et al. to be called with a varying number
+ // of arguments of varying types, we use two layers of variadic templates.
+ // The first layer constructs the temporary Arg objects. The second layer
+ // (above) constructs the array of pointers to the temporary Arg objects.
+
/***** The useful part: the matching interface *****/
// Matches "text" against "re". If pointer arguments are
// supplied, copies matched sub-patterns into them.
//
- // You can pass in a "const char*" or a "std::string" for "text".
- // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
+ // You can pass in a "const char*" or a "std::string" for "text".
+ // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of:
- // std::string (matched piece is copied to string)
+ // std::string (matched piece is copied to string)
// StringPiece (StringPiece is mutated to point to matched piece)
// T (where "bool T::ParseFrom(const char*, size_t)" exists)
// (void*)NULL (the corresponding matched sub-pattern is not copied)
//
// Returns true iff all of the following conditions are satisfied:
- // a. "text" matches "re" fully - from the beginning to the end of "text".
- // b. The number of matched sub-patterns is >= number of supplied pointers.
+ // a. "text" matches "re" fully - from the beginning to the end of "text".
+ // b. The number of matched sub-patterns is >= number of supplied pointers.
// c. The "i"th argument has a suitable type for holding the
// string captured as the "i"th sub-pattern. If you pass in
// NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, the "i"th captured sub-pattern is
+ // number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
//
// CAVEAT: An optional sub-pattern that does not exist in the
@@ -395,80 +395,80 @@ class RE2 {
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
}
- // Like FullMatch(), except that "re" is allowed to match a substring
- // of "text".
- //
- // Returns true iff all of the following conditions are satisfied:
- // a. "text" matches "re" partially - for some substring of "text".
- // b. The number of matched sub-patterns is >= number of supplied pointers.
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, the "i"th captured sub-pattern is
- // ignored.
+ // Like FullMatch(), except that "re" is allowed to match a substring
+ // of "text".
+ //
+ // Returns true iff all of the following conditions are satisfied:
+ // a. "text" matches "re" partially - for some substring of "text".
+ // b. The number of matched sub-patterns is >= number of supplied pointers.
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
+ // number of sub-patterns, the "i"th captured sub-pattern is
+ // ignored.
template <typename... A>
static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
}
- // Like FullMatch() and PartialMatch(), except that "re" has to match
- // a prefix of the text, and "input" is advanced past the matched
- // text. Note: "input" is modified iff this routine returns true
- // and "re" matched a non-empty substring of "input".
- //
- // Returns true iff all of the following conditions are satisfied:
- // a. "input" matches "re" partially - for some prefix of "input".
- // b. The number of matched sub-patterns is >= number of supplied pointers.
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, the "i"th captured sub-pattern is
- // ignored.
+ // Like FullMatch() and PartialMatch(), except that "re" has to match
+ // a prefix of the text, and "input" is advanced past the matched
+ // text. Note: "input" is modified iff this routine returns true
+ // and "re" matched a non-empty substring of "input".
+ //
+ // Returns true iff all of the following conditions are satisfied:
+ // a. "input" matches "re" partially - for some prefix of "input".
+ // b. The number of matched sub-patterns is >= number of supplied pointers.
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
+ // number of sub-patterns, the "i"th captured sub-pattern is
+ // ignored.
template <typename... A>
static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
}
- // Like Consume(), but does not anchor the match at the beginning of
- // the text. That is, "re" need not start its match at the beginning
- // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
- // the next word in "s" and stores it in "word".
- //
- // Returns true iff all of the following conditions are satisfied:
- // a. "input" matches "re" partially - for some substring of "input".
- // b. The number of matched sub-patterns is >= number of supplied pointers.
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, the "i"th captured sub-pattern is
- // ignored.
+ // Like Consume(), but does not anchor the match at the beginning of
+ // the text. That is, "re" need not start its match at the beginning
+ // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
+ // the next word in "s" and stores it in "word".
+ //
+ // Returns true iff all of the following conditions are satisfied:
+ // a. "input" matches "re" partially - for some substring of "input".
+ // b. The number of matched sub-patterns is >= number of supplied pointers.
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
+ // number of sub-patterns, the "i"th captured sub-pattern is
+ // ignored.
template <typename... A>
static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
}
#endif
- // Replace the first match of "re" in "str" with "rewrite".
+ // Replace the first match of "re" in "str" with "rewrite".
// Within "rewrite", backslash-escaped digits (\1 to \9) can be
// used to insert text matching corresponding parenthesized group
// from the pattern. \0 in "rewrite" refers to the entire matching
// text. E.g.,
//
- // std::string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(RE2::Replace(&s, "b+", "d"));
//
// will leave "s" containing "yada dabba doo"
//
// Returns true if the pattern matches and a replacement occurs,
// false otherwise.
- static bool Replace(std::string* str,
- const RE2& re,
+ static bool Replace(std::string* str,
+ const RE2& re,
const StringPiece& rewrite);
#if defined(ARCADIA_ROOT)
static bool Replace(TString *str,
const RE2& pattern,
const StringPiece& rewrite) {
- std::string tmp(*str);
+ std::string tmp(*str);
bool res = Replace(&tmp, pattern, rewrite);
*str = tmp;
return res;
@@ -478,7 +478,7 @@ class RE2 {
// Like Replace(), except replaces successive non-overlapping occurrences
// of the pattern in the string with the rewrite. E.g.
//
- // std::string s = "yabba dabba doo";
+ // std::string s = "yabba dabba doo";
// CHECK(RE2::GlobalReplace(&s, "b+", "d"));
//
// will leave "s" containing "yada dada doo"
@@ -488,15 +488,15 @@ class RE2 {
// replacing "ana" within "banana" makes only one replacement, not two.
//
// Returns the number of replacements made.
- static int GlobalReplace(std::string* str,
- const RE2& re,
+ static int GlobalReplace(std::string* str,
+ const RE2& re,
const StringPiece& rewrite);
#if defined(ARCADIA_ROOT)
- static int GlobalReplace(TString* str,
- const RE2& pattern,
- const StringPiece& rewrite) {
- std::string tmp(*str);
+ static int GlobalReplace(TString* str,
+ const RE2& pattern,
+ const StringPiece& rewrite) {
+ std::string tmp(*str);
int res = GlobalReplace(&tmp, pattern, rewrite);
*str = tmp;
return res;
@@ -511,15 +511,15 @@ class RE2 {
// successfully; if no match occurs, the string is left unaffected.
//
// REQUIRES: "text" must not alias any part of "*out".
- static bool Extract(const StringPiece& text,
- const RE2& re,
- const StringPiece& rewrite,
- std::string* out);
+ static bool Extract(const StringPiece& text,
+ const RE2& re,
+ const StringPiece& rewrite,
+ std::string* out);
#if defined(ARCADIA_ROOT)
- static bool Extract(const StringPiece& text,
+ static bool Extract(const StringPiece& text,
const RE2& pattern,
- const StringPiece& rewrite,
+ const StringPiece& rewrite,
TString *out) {
std::string tmp;
bool res = Extract(text, pattern, rewrite, &tmp);
@@ -530,11 +530,11 @@ class RE2 {
// Escapes all potentially meaningful regexp characters in
// 'unquoted'. The returned string, used as a regular expression,
- // will match exactly the original string. For example,
+ // will match exactly the original string. For example,
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
- static std::string QuoteMeta(const StringPiece& unquoted);
+ static std::string QuoteMeta(const StringPiece& unquoted);
// Computes range for any strings matching regexp. The min and max can in
// some cases be arbitrarily precise, so the caller gets to specify the
@@ -550,8 +550,8 @@ class RE2 {
// do not compile down to infinite repetitions.
//
// Returns true on success, false on error.
- bool PossibleMatchRange(std::string* min, std::string* max,
- int maxlen) const;
+ bool PossibleMatchRange(std::string* min, std::string* max,
+ int maxlen) const;
// Generic matching interface
@@ -565,46 +565,46 @@ class RE2 {
// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction. The overall match ($0)
// does not count: if the regexp is "(a)(b)", returns 2.
- int NumberOfCapturingGroups() const { return num_captures_; }
+ int NumberOfCapturingGroups() const { return num_captures_; }
// Return a map from names to capturing indices.
// The map records the index of the leftmost group
// with the given name.
// Only valid until the re is deleted.
- const std::map<std::string, int>& NamedCapturingGroups() const;
+ const std::map<std::string, int>& NamedCapturingGroups() const;
// Return a map from capturing indices to names.
// The map has no entries for unnamed groups.
// Only valid until the re is deleted.
- const std::map<int, std::string>& CapturingGroupNames() const;
+ const std::map<int, std::string>& CapturingGroupNames() const;
// General matching routine.
// Match against text starting at offset startpos
// and stopping the search at offset endpos.
// Returns true if match found, false if not.
- // On a successful match, fills in submatch[] (up to nsubmatch entries)
+ // On a successful match, fills in submatch[] (up to nsubmatch entries)
// with information about submatches.
- // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
- // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
- // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
- // Caveat: submatch[] may be clobbered even on match failure.
+ // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
+ // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
+ // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
+ // Caveat: submatch[] may be clobbered even on match failure.
//
// Don't ask for more match information than you will use:
- // runs much faster with nsubmatch == 1 than nsubmatch > 1, and
- // runs even faster if nsubmatch == 0.
- // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
+ // runs much faster with nsubmatch == 1 than nsubmatch > 1, and
+ // runs even faster if nsubmatch == 0.
+ // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
// but will be handled correctly.
//
// Passing text == StringPiece(NULL, 0) will be handled like any other
// empty string, but note that on return, it will not be possible to tell
// whether submatch i matched the empty string or did not match:
- // either way, submatch[i].data() == NULL.
+ // either way, submatch[i].data() == NULL.
bool Match(const StringPiece& text,
size_t startpos,
size_t endpos,
- Anchor re_anchor,
- StringPiece* submatch,
- int nsubmatch) const;
+ Anchor re_anchor,
+ StringPiece* submatch,
+ int nsubmatch) const;
// Check that the given rewrite string is suitable for use with this
// regular expression. It checks that:
@@ -614,11 +614,11 @@ class RE2 {
// '\' followed by anything other than a digit or '\'.
// A true return value guarantees that Replace() and Extract() won't
// fail because of a bad rewrite string.
- bool CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const;
+ bool CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const;
- bool CheckRewriteString(const StringPiece& rewrite, std::nullptr_t error) const {
- return CheckRewriteString(rewrite, static_cast<std::string*>(error));
+ bool CheckRewriteString(const StringPiece& rewrite, std::nullptr_t error) const {
+ return CheckRewriteString(rewrite, static_cast<std::string*>(error));
}
#if defined(ARCADIA_ROOT)
@@ -643,8 +643,8 @@ class RE2 {
// Returns true on success. This method can fail because of a malformed
// rewrite string. CheckRewriteString guarantees that the rewrite will
// be sucessful.
- bool Rewrite(std::string* out,
- const StringPiece& rewrite,
+ bool Rewrite(std::string* out,
+ const StringPiece& rewrite,
const StringPiece* vec,
int veclen) const;
@@ -666,9 +666,9 @@ class RE2 {
// with (?i) unless in posix_syntax mode)
//
// The following options are only consulted when posix_syntax == true.
- // When posix_syntax == false, these features are always enabled and
- // cannot be turned off; to perform multi-line matching in that case,
- // begin the regexp with (?m).
+ // When posix_syntax == false, these features are always enabled and
+ // cannot be turned off; to perform multi-line matching in that case,
+ // begin the regexp with (?m).
// perl_classes (false) allow Perl's \d \s \w \D \S \W
// word_boundary (false) allow Perl's \b \B (word boundary and not)
// one_line (false) ^ and $ only match beginning and end of text
@@ -684,7 +684,7 @@ class RE2 {
// can have two DFAs (one first match, one longest match).
// That makes 4 DFAs:
//
- // forward, first-match - used for UNANCHORED or ANCHOR_START searches
+ // forward, first-match - used for UNANCHORED or ANCHOR_START searches
// if opt.longest_match() == false
// forward, longest-match - used for all ANCHOR_BOTH searches,
// and the other two kinds if
@@ -789,46 +789,46 @@ class RE2 {
};
// Returns the options set in the constructor.
- const Options& options() const { return options_; }
+ const Options& options() const { return options_; }
// Argument converters; see below.
- template <typename T>
- static Arg CRadix(T* ptr);
- template <typename T>
- static Arg Hex(T* ptr);
- template <typename T>
- static Arg Octal(T* ptr);
+ template <typename T>
+ static Arg CRadix(T* ptr);
+ template <typename T>
+ static Arg Hex(T* ptr);
+ template <typename T>
+ static Arg Octal(T* ptr);
private:
void Init(const StringPiece& pattern, const Options& options);
bool DoMatch(const StringPiece& text,
- Anchor re_anchor,
+ Anchor re_anchor,
size_t* consumed,
const Arg* const args[],
int n) const;
re2::Prog* ReverseProg() const;
- std::string pattern_; // string regular expression
- Options options_; // option flags
- re2::Regexp* entire_regexp_; // parsed regular expression
- const std::string* error_; // error indicator (or points to empty string)
- ErrorCode error_code_; // error code
- std::string error_arg_; // fragment of regexp showing error
- std::string prefix_; // required prefix (before suffix_regexp_)
- bool prefix_foldcase_; // prefix_ is ASCII case-insensitive
- re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
- re2::Prog* prog_; // compiled program for regexp
- int num_captures_; // number of capturing groups
- bool is_one_pass_; // can use prog_->SearchOnePass?
-
- // Reverse Prog for DFA execution only
- mutable re2::Prog* rprog_;
+ std::string pattern_; // string regular expression
+ Options options_; // option flags
+ re2::Regexp* entire_regexp_; // parsed regular expression
+ const std::string* error_; // error indicator (or points to empty string)
+ ErrorCode error_code_; // error code
+ std::string error_arg_; // fragment of regexp showing error
+ std::string prefix_; // required prefix (before suffix_regexp_)
+ bool prefix_foldcase_; // prefix_ is ASCII case-insensitive
+ re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
+ re2::Prog* prog_; // compiled program for regexp
+ int num_captures_; // number of capturing groups
+ bool is_one_pass_; // can use prog_->SearchOnePass?
+
+ // Reverse Prog for DFA execution only
+ mutable re2::Prog* rprog_;
// Map from capture names to indices
- mutable const std::map<std::string, int>* named_groups_;
+ mutable const std::map<std::string, int>* named_groups_;
// Map from capture indices to names
- mutable const std::map<int, std::string>* group_names_;
+ mutable const std::map<int, std::string>* group_names_;
mutable std::once_flag rprog_once_;
mutable std::once_flag named_groups_once_;
@@ -840,137 +840,137 @@ class RE2 {
/***** Implementation details *****/
-namespace re2_internal {
+namespace re2_internal {
-// Types for which the 3-ary Parse() function template has specializations.
-template <typename T> struct Parse3ary : public std::false_type {};
-template <> struct Parse3ary<void> : public std::true_type {};
-template <> struct Parse3ary<std::string> : public std::true_type {};
-template <> struct Parse3ary<StringPiece> : public std::true_type {};
+// Types for which the 3-ary Parse() function template has specializations.
+template <typename T> struct Parse3ary : public std::false_type {};
+template <> struct Parse3ary<void> : public std::true_type {};
+template <> struct Parse3ary<std::string> : public std::true_type {};
+template <> struct Parse3ary<StringPiece> : public std::true_type {};
#if defined(ARCADIA_ROOT)
-template <> struct Parse3ary<TString> : public std::true_type {};
+template <> struct Parse3ary<TString> : public std::true_type {};
#endif
-template <> struct Parse3ary<char> : public std::true_type {};
-template <> struct Parse3ary<signed char> : public std::true_type {};
-template <> struct Parse3ary<unsigned char> : public std::true_type {};
-template <> struct Parse3ary<float> : public std::true_type {};
-template <> struct Parse3ary<double> : public std::true_type {};
-
-template <typename T>
-bool Parse(const char* str, size_t n, T* dest);
-
-// Types for which the 4-ary Parse() function template has specializations.
-template <typename T> struct Parse4ary : public std::false_type {};
-template <> struct Parse4ary<long> : public std::true_type {};
-template <> struct Parse4ary<unsigned long> : public std::true_type {};
-template <> struct Parse4ary<short> : public std::true_type {};
-template <> struct Parse4ary<unsigned short> : public std::true_type {};
-template <> struct Parse4ary<int> : public std::true_type {};
-template <> struct Parse4ary<unsigned int> : public std::true_type {};
-template <> struct Parse4ary<long long> : public std::true_type {};
-template <> struct Parse4ary<unsigned long long> : public std::true_type {};
-
-template <typename T>
-bool Parse(const char* str, size_t n, T* dest, int radix);
-
-} // namespace re2_internal
-
+template <> struct Parse3ary<char> : public std::true_type {};
+template <> struct Parse3ary<signed char> : public std::true_type {};
+template <> struct Parse3ary<unsigned char> : public std::true_type {};
+template <> struct Parse3ary<float> : public std::true_type {};
+template <> struct Parse3ary<double> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest);
+
+// Types for which the 4-ary Parse() function template has specializations.
+template <typename T> struct Parse4ary : public std::false_type {};
+template <> struct Parse4ary<long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long> : public std::true_type {};
+template <> struct Parse4ary<short> : public std::true_type {};
+template <> struct Parse4ary<unsigned short> : public std::true_type {};
+template <> struct Parse4ary<int> : public std::true_type {};
+template <> struct Parse4ary<unsigned int> : public std::true_type {};
+template <> struct Parse4ary<long long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long long> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest, int radix);
+
+} // namespace re2_internal
+
class RE2::Arg {
- private:
- template <typename T>
- using CanParse3ary = typename std::enable_if<
- re2_internal::Parse3ary<T>::value,
- int>::type;
-
- template <typename T>
- using CanParse4ary = typename std::enable_if<
- re2_internal::Parse4ary<T>::value,
- int>::type;
-
-#if !defined(_MSC_VER)
- template <typename T>
- using CanParseFrom = typename std::enable_if<
- std::is_member_function_pointer<
- decltype(static_cast<bool (T::*)(const char*, size_t)>(
- &T::ParseFrom))>::value,
- int>::type;
-#endif
-
- public:
- Arg() : Arg(nullptr) {}
- Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
-
- template <typename T, CanParse3ary<T> = 0>
- Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
-
- template <typename T, CanParse4ary<T> = 0>
- Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
-
-#if !defined(_MSC_VER)
- template <typename T, CanParseFrom<T> = 0>
- Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
-#endif
-
- typedef bool (*Parser)(const char* str, size_t n, void* dest);
-
- template <typename T>
- Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
-
- bool Parse(const char* str, size_t n) const {
- return (*parser_)(str, n, arg_);
- }
-
private:
- static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
- return true;
- }
-
- template <typename T>
- static bool DoParse3ary(const char* str, size_t n, void* dest) {
- return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
- }
-
- template <typename T>
- static bool DoParse4ary(const char* str, size_t n, void* dest) {
- return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
- }
-
-#if !defined(_MSC_VER)
- template <typename T>
- static bool DoParseFrom(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
- }
-#endif
-
- void* arg_;
- Parser parser_;
+ template <typename T>
+ using CanParse3ary = typename std::enable_if<
+ re2_internal::Parse3ary<T>::value,
+ int>::type;
+
+ template <typename T>
+ using CanParse4ary = typename std::enable_if<
+ re2_internal::Parse4ary<T>::value,
+ int>::type;
+
+#if !defined(_MSC_VER)
+ template <typename T>
+ using CanParseFrom = typename std::enable_if<
+ std::is_member_function_pointer<
+ decltype(static_cast<bool (T::*)(const char*, size_t)>(
+ &T::ParseFrom))>::value,
+ int>::type;
+#endif
+
+ public:
+ Arg() : Arg(nullptr) {}
+ Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
+
+ template <typename T, CanParse3ary<T> = 0>
+ Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
+
+ template <typename T, CanParse4ary<T> = 0>
+ Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
+
+#if !defined(_MSC_VER)
+ template <typename T, CanParseFrom<T> = 0>
+ Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
+#endif
+
+ typedef bool (*Parser)(const char* str, size_t n, void* dest);
+
+ template <typename T>
+ Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
+
+ bool Parse(const char* str, size_t n) const {
+ return (*parser_)(str, n, arg_);
+ }
+
+ private:
+ static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
+ return true;
+ }
+
+ template <typename T>
+ static bool DoParse3ary(const char* str, size_t n, void* dest) {
+ return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
+ }
+
+ template <typename T>
+ static bool DoParse4ary(const char* str, size_t n, void* dest) {
+ return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
+ }
+
+#if !defined(_MSC_VER)
+ template <typename T>
+ static bool DoParseFrom(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
+ }
+#endif
+
+ void* arg_;
+ Parser parser_;
};
-template <typename T>
-inline RE2::Arg RE2::CRadix(T* ptr) {
- return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
- return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
- });
-}
-
-template <typename T>
-inline RE2::Arg RE2::Hex(T* ptr) {
- return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
- return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
- });
+template <typename T>
+inline RE2::Arg RE2::CRadix(T* ptr) {
+ return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+ return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
+ });
}
-template <typename T>
-inline RE2::Arg RE2::Octal(T* ptr) {
- return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
- return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
- });
-}
+template <typename T>
+inline RE2::Arg RE2::Hex(T* ptr) {
+ return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+ return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
+ });
+}
+
+template <typename T>
+inline RE2::Arg RE2::Octal(T* ptr) {
+ return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+ return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
+ });
+}
#ifndef SWIG
// Silence warnings about missing initializers for members of LazyRE2.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif
@@ -1019,56 +1019,56 @@ class LazyRE2 {
void operator=(const LazyRE2&); // disallowed
};
-#endif
-
-namespace hooks {
-
-// Most platforms support thread_local. Older versions of iOS don't support
-// thread_local, but for the sake of brevity, we lump together all versions
-// of Apple platforms that aren't macOS. If an iOS application really needs
-// the context pointee someday, we can get more specific then...
+#endif
+
+namespace hooks {
+
+// Most platforms support thread_local. Older versions of iOS don't support
+// thread_local, but for the sake of brevity, we lump together all versions
+// of Apple platforms that aren't macOS. If an iOS application really needs
+// the context pointee someday, we can get more specific then...
//
// As per https://github.com/google/re2/issues/325, thread_local support in
// MinGW seems to be buggy. (FWIW, Abseil folks also avoid it.)
-#define RE2_HAVE_THREAD_LOCAL
+#define RE2_HAVE_THREAD_LOCAL
#if (defined(__APPLE__) && !(defined(TARGET_OS_OSX) && TARGET_OS_OSX)) || defined(__MINGW32__)
-#undef RE2_HAVE_THREAD_LOCAL
-#endif
-
-// A hook must not make any assumptions regarding the lifetime of the context
-// pointee beyond the current invocation of the hook. Pointers and references
-// obtained via the context pointee should be considered invalidated when the
-// hook returns. Hence, any data about the context pointee (e.g. its pattern)
-// would have to be copied in order for it to be kept for an indefinite time.
-//
-// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
-// could result in infinite mutual recursion. To discourage that possibility,
-// RE2 will not maintain the context pointer correctly when used in that way.
-#ifdef RE2_HAVE_THREAD_LOCAL
-extern thread_local const RE2* context;
-#endif
-
-struct DFAStateCacheReset {
- int64_t state_budget;
- size_t state_cache_size;
-};
-
-struct DFASearchFailure {
- // Nothing yet...
-};
-
-#define DECLARE_HOOK(type) \
- using type##Callback = void(const type&); \
- void Set##type##Hook(type##Callback* cb); \
- type##Callback* Get##type##Hook();
-
-DECLARE_HOOK(DFAStateCacheReset)
-DECLARE_HOOK(DFASearchFailure)
-
-#undef DECLARE_HOOK
-
-} // namespace hooks
-
+#undef RE2_HAVE_THREAD_LOCAL
+#endif
+
+// A hook must not make any assumptions regarding the lifetime of the context
+// pointee beyond the current invocation of the hook. Pointers and references
+// obtained via the context pointee should be considered invalidated when the
+// hook returns. Hence, any data about the context pointee (e.g. its pattern)
+// would have to be copied in order for it to be kept for an indefinite time.
+//
+// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
+// could result in infinite mutual recursion. To discourage that possibility,
+// RE2 will not maintain the context pointer correctly when used in that way.
+#ifdef RE2_HAVE_THREAD_LOCAL
+extern thread_local const RE2* context;
+#endif
+
+struct DFAStateCacheReset {
+ int64_t state_budget;
+ size_t state_cache_size;
+};
+
+struct DFASearchFailure {
+ // Nothing yet...
+};
+
+#define DECLARE_HOOK(type) \
+ using type##Callback = void(const type&); \
+ void Set##type##Hook(type##Callback* cb); \
+ type##Callback* Get##type##Hook();
+
+DECLARE_HOOK(DFAStateCacheReset)
+DECLARE_HOOK(DFASearchFailure)
+
+#undef DECLARE_HOOK
+
+} // namespace hooks
+
} // namespace re2
using re2::RE2;
diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc
index 44359cba9c..ca1318b43d 100644
--- a/contrib/libs/re2/re2/regexp.cc
+++ b/contrib/libs/re2/re2/regexp.cc
@@ -20,7 +20,7 @@
#include "util/logging.h"
#include "util/mutex.h"
#include "util/utf.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/stringpiece.h"
#include "re2/walker-inl.h"
@@ -244,12 +244,12 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
return new Regexp(kRegexpEmptyMatch, flags);
}
- PODArray<Regexp*> subcopy;
+ PODArray<Regexp*> subcopy;
if (op == kRegexpAlternate && can_factor) {
// Going to edit sub; make a copy so we don't step on caller.
- subcopy = PODArray<Regexp*>(nsub);
- memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
- sub = subcopy.data();
+ subcopy = PODArray<Regexp*>(nsub);
+ memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
+ sub = subcopy.data();
nsub = FactorAlternation(sub, nsub, flags);
if (nsub == 1) {
Regexp* re = sub[0];
@@ -333,14 +333,14 @@ Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) {
}
void Regexp::Swap(Regexp* that) {
- // Regexp is not trivially copyable, so we cannot freely copy it with
- // memmove(3), but swapping objects like so is safe for our purposes.
+ // Regexp is not trivially copyable, so we cannot freely copy it with
+ // memmove(3), but swapping objects like so is safe for our purposes.
char tmp[sizeof *this];
- void* vthis = reinterpret_cast<void*>(this);
- void* vthat = reinterpret_cast<void*>(that);
- memmove(tmp, vthis, sizeof *this);
- memmove(vthis, vthat, sizeof *this);
- memmove(vthat, tmp, sizeof *this);
+ void* vthis = reinterpret_cast<void*>(this);
+ void* vthat = reinterpret_cast<void*>(that);
+ memmove(tmp, vthis, sizeof *this);
+ memmove(vthis, vthat, sizeof *this);
+ memmove(vthat, tmp, sizeof *this);
}
// Tests equality of all top-level structure but not subregexps.
@@ -498,7 +498,7 @@ static const char *kErrorStrings[] = {
"invalid character class range",
"missing ]",
"missing )",
- "unexpected )",
+ "unexpected )",
"trailing \\",
"no argument for repetition operator",
"invalid repetition size",
@@ -508,16 +508,16 @@ static const char *kErrorStrings[] = {
"invalid named capture group",
};
-std::string RegexpStatus::CodeText(enum RegexpStatusCode code) {
+std::string RegexpStatus::CodeText(enum RegexpStatusCode code) {
if (code < 0 || code >= arraysize(kErrorStrings))
code = kRegexpInternalError;
return kErrorStrings[code];
}
-std::string RegexpStatus::Text() const {
+std::string RegexpStatus::Text() const {
if (error_arg_.empty())
return CodeText(code_);
- std::string s;
+ std::string s;
s.append(CodeText(code_));
s.append(": ");
s.append(error_arg_.data(), error_arg_.size());
@@ -542,12 +542,12 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
ncapture_++;
return ignored;
}
-
+
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
-#endif
+#endif
return ignored;
}
@@ -570,17 +570,17 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
NamedCapturesWalker() : map_(NULL) {}
~NamedCapturesWalker() { delete map_; }
- std::map<std::string, int>* TakeMap() {
- std::map<std::string, int>* m = map_;
+ std::map<std::string, int>* TakeMap() {
+ std::map<std::string, int>* m = map_;
map_ = NULL;
return m;
}
- virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
+ virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
if (re->op() == kRegexpCapture && re->name() != NULL) {
// Allocate map once we find a name.
if (map_ == NULL)
- map_ = new std::map<std::string, int>;
+ map_ = new std::map<std::string, int>;
// Record first occurrence of each name.
// (The rule is that if you have the same name
@@ -591,21 +591,21 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
}
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
-#endif
+#endif
return ignored;
}
private:
- std::map<std::string, int>* map_;
+ std::map<std::string, int>* map_;
NamedCapturesWalker(const NamedCapturesWalker&) = delete;
NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete;
};
-std::map<std::string, int>* Regexp::NamedCaptures() {
+std::map<std::string, int>* Regexp::NamedCaptures() {
NamedCapturesWalker w;
w.Walk(this, 0);
return w.TakeMap();
@@ -617,17 +617,17 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
CaptureNamesWalker() : map_(NULL) {}
~CaptureNamesWalker() { delete map_; }
- std::map<int, std::string>* TakeMap() {
- std::map<int, std::string>* m = map_;
+ std::map<int, std::string>* TakeMap() {
+ std::map<int, std::string>* m = map_;
map_ = NULL;
return m;
}
- virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
+ virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
if (re->op() == kRegexpCapture && re->name() != NULL) {
// Allocate map once we find a name.
if (map_ == NULL)
- map_ = new std::map<int, std::string>;
+ map_ = new std::map<int, std::string>;
(*map_)[re->cap()] = *re->name();
}
@@ -635,52 +635,52 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
}
virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
-#endif
+#endif
return ignored;
}
private:
- std::map<int, std::string>* map_;
+ std::map<int, std::string>* map_;
CaptureNamesWalker(const CaptureNamesWalker&) = delete;
CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete;
};
-std::map<int, std::string>* Regexp::CaptureNames() {
+std::map<int, std::string>* Regexp::CaptureNames() {
CaptureNamesWalker w;
w.Walk(this, 0);
return w.TakeMap();
}
-void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
- std::string* bytes) {
- if (latin1) {
- bytes->resize(nrunes);
- for (int i = 0; i < nrunes; i++)
- (*bytes)[i] = static_cast<char>(runes[i]);
- } else {
- bytes->resize(nrunes * UTFmax); // worst case
- char* p = &(*bytes)[0];
- for (int i = 0; i < nrunes; i++)
- p += runetochar(p, &runes[i]);
- bytes->resize(p - &(*bytes)[0]);
- bytes->shrink_to_fit();
- }
-}
-
+void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
+ std::string* bytes) {
+ if (latin1) {
+ bytes->resize(nrunes);
+ for (int i = 0; i < nrunes; i++)
+ (*bytes)[i] = static_cast<char>(runes[i]);
+ } else {
+ bytes->resize(nrunes * UTFmax); // worst case
+ char* p = &(*bytes)[0];
+ for (int i = 0; i < nrunes; i++)
+ p += runetochar(p, &runes[i]);
+ bytes->resize(p - &(*bytes)[0]);
+ bytes->shrink_to_fit();
+ }
+}
+
// Determines whether regexp matches must be anchored
// with a fixed string prefix. If so, returns the prefix and
// the regexp that remains after the prefix. The prefix might
// be ASCII case-insensitive.
-bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
- Regexp** suffix) {
- prefix->clear();
- *foldcase = false;
- *suffix = NULL;
-
+bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
+ Regexp** suffix) {
+ prefix->clear();
+ *foldcase = false;
+ *suffix = NULL;
+
// No need for a walker: the regexp must be of the form
// 1. some number of ^ anchors
// 2. a literal char or string
@@ -688,59 +688,59 @@ bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
if (op_ != kRegexpConcat)
return false;
int i = 0;
- while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
+ while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
i++;
if (i == 0 || i >= nsub_)
return false;
- Regexp* re = sub()[i];
- if (re->op_ != kRegexpLiteral &&
- re->op_ != kRegexpLiteralString)
- return false;
+ Regexp* re = sub()[i];
+ if (re->op_ != kRegexpLiteral &&
+ re->op_ != kRegexpLiteralString)
+ return false;
i++;
if (i < nsub_) {
for (int j = i; j < nsub_; j++)
- sub()[j]->Incref();
- *suffix = Concat(sub() + i, nsub_ - i, parse_flags());
+ sub()[j]->Incref();
+ *suffix = Concat(sub() + i, nsub_ - i, parse_flags());
} else {
- *suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
- }
-
- bool latin1 = (re->parse_flags() & Latin1) != 0;
- Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
- int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
- ConvertRunesToBytes(latin1, runes, nrunes, prefix);
- *foldcase = (re->parse_flags() & FoldCase) != 0;
+ *suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
+ }
+
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
+ return true;
+}
+
+// Determines whether regexp matches must be unanchored
+// with a fixed string prefix. If so, returns the prefix.
+// The prefix might be ASCII case-insensitive.
+bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
+ prefix->clear();
+ *foldcase = false;
+
+ // No need for a walker: the regexp must either begin with or be
+ // a literal char or string. We "see through" capturing groups,
+ // but make no effort to glue multiple prefix fragments together.
+ Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
+ while (re->op_ == kRegexpCapture) {
+ re = re->sub()[0];
+ if (re->op_ == kRegexpConcat && re->nsub_ > 0)
+ re = re->sub()[0];
+ }
+ if (re->op_ != kRegexpLiteral &&
+ re->op_ != kRegexpLiteralString)
+ return false;
+
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
return true;
}
-// Determines whether regexp matches must be unanchored
-// with a fixed string prefix. If so, returns the prefix.
-// The prefix might be ASCII case-insensitive.
-bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
- prefix->clear();
- *foldcase = false;
-
- // No need for a walker: the regexp must either begin with or be
- // a literal char or string. We "see through" capturing groups,
- // but make no effort to glue multiple prefix fragments together.
- Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
- while (re->op_ == kRegexpCapture) {
- re = re->sub()[0];
- if (re->op_ == kRegexpConcat && re->nsub_ > 0)
- re = re->sub()[0];
- }
- if (re->op_ != kRegexpLiteral &&
- re->op_ != kRegexpLiteralString)
- return false;
-
- bool latin1 = (re->parse_flags() & Latin1) != 0;
- Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
- int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
- ConvertRunesToBytes(latin1, runes, nrunes, prefix);
- *foldcase = (re->parse_flags() & FoldCase) != 0;
- return true;
-}
-
// Character class builder is a balanced binary tree (STL set)
// containing non-overlapping, non-abutting RuneRanges.
// The less-than operator used in the tree treats two
@@ -918,7 +918,7 @@ void CharClassBuilder::Negate() {
// The ranges are allocated in the same block as the header,
// necessitating a special allocator and Delete method.
-CharClass* CharClass::New(size_t maxranges) {
+CharClass* CharClass::New(size_t maxranges) {
CharClass* cc;
uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
cc = reinterpret_cast<CharClass*>(data);
@@ -935,7 +935,7 @@ void CharClass::Delete() {
}
CharClass* CharClass::Negate() {
- CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1));
+ CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1));
cc->folds_ascii_ = folds_ascii_;
cc->nrunes_ = Runemax + 1 - nrunes_;
int n = 0;
@@ -972,7 +972,7 @@ bool CharClass::Contains(Rune r) const {
}
CharClass* CharClassBuilder::GetCharClass() {
- CharClass* cc = CharClass::New(ranges_.size());
+ CharClass* cc = CharClass::New(ranges_.size());
int n = 0;
for (iterator it = begin(); it != end(); ++it)
cc->ranges_[n++] = *it;
diff --git a/contrib/libs/re2/re2/regexp.h b/contrib/libs/re2/re2/regexp.h
index 1fb5ed4e44..b6446f9fe5 100644
--- a/contrib/libs/re2/re2/regexp.h
+++ b/contrib/libs/re2/re2/regexp.h
@@ -86,15 +86,15 @@
// form accessible to clients, so that client code can analyze the
// parsed regular expressions.
-#include <stddef.h>
+#include <stddef.h>
#include <stdint.h>
#include <map>
#include <set>
#include <string>
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/utf.h"
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/utf.h"
#include "re2/stringpiece.h"
namespace re2 {
@@ -178,7 +178,7 @@ enum RegexpStatusCode {
kRegexpBadCharRange, // bad character class range
kRegexpMissingBracket, // missing closing ]
kRegexpMissingParen, // missing closing )
- kRegexpUnexpectedParen, // unexpected closing )
+ kRegexpUnexpectedParen, // unexpected closing )
kRegexpTrailingBackslash, // at end of regexp
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
kRegexpRepeatSize, // bad repetition argument
@@ -196,7 +196,7 @@ class RegexpStatus {
void set_code(RegexpStatusCode code) { code_ = code; }
void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
- void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
+ void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
RegexpStatusCode code() const { return code_; }
const StringPiece& error_arg() const { return error_arg_; }
bool ok() const { return code() == kRegexpSuccess; }
@@ -206,16 +206,16 @@ class RegexpStatus {
// Returns text equivalent of code, e.g.:
// "Bad character class"
- static std::string CodeText(RegexpStatusCode code);
+ static std::string CodeText(RegexpStatusCode code);
// Returns text describing error, e.g.:
// "Bad character class: [z-a]"
- std::string Text() const;
+ std::string Text() const;
private:
RegexpStatusCode code_; // Kind of error
- StringPiece error_arg_; // Piece of regexp containing syntax error.
- std::string* tmp_; // Temporary storage, possibly where error_arg_ is.
+ StringPiece error_arg_; // Piece of regexp containing syntax error.
+ std::string* tmp_; // Temporary storage, possibly where error_arg_ is.
RegexpStatus(const RegexpStatus&) = delete;
RegexpStatus& operator=(const RegexpStatus&) = delete;
@@ -260,7 +260,7 @@ class CharClass {
private:
CharClass(); // not implemented
~CharClass(); // not implemented
- static CharClass* New(size_t maxranges);
+ static CharClass* New(size_t maxranges);
friend class CharClassBuilder;
@@ -338,7 +338,7 @@ class Regexp {
Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
- const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
+ const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
@@ -370,7 +370,7 @@ class Regexp {
// string representation of the simplified form. Returns true on success.
// Returns false and sets *status (if status != NULL) on parse error.
static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
- std::string* dst, RegexpStatus* status);
+ std::string* dst, RegexpStatus* status);
// Returns the number of capturing groups in the regexp.
int NumCaptures();
@@ -379,16 +379,16 @@ class Regexp {
// Returns a map from names to capturing group indices,
// or NULL if the regexp contains no named capture groups.
// The caller is responsible for deleting the map.
- std::map<std::string, int>* NamedCaptures();
+ std::map<std::string, int>* NamedCaptures();
// Returns a map from capturing group indices to capturing group
// names or NULL if the regexp contains no named capture groups. The
// caller is responsible for deleting the map.
- std::map<int, std::string>* CaptureNames();
+ std::map<int, std::string>* CaptureNames();
// Returns a string representation of the current regexp,
// using as few parentheses as possible.
- std::string ToString();
+ std::string ToString();
// Convenience functions. They consume the passed reference,
// so in many cases you should use, e.g., Plus(re->Incref(), flags).
@@ -410,7 +410,7 @@ class Regexp {
// Debugging function. Returns string format for regexp
// that makes structure clear. Does NOT use regexp syntax.
- std::string Dump();
+ std::string Dump();
// Helper traversal class, defined fully in walker-inl.h.
template<typename T> class Walker;
@@ -437,22 +437,22 @@ class Regexp {
// begin with a non-empty fixed string (perhaps after ASCII
// case-folding). If so, returns the prefix and the sub-regexp that
// follows it.
- // Callers should expect *prefix, *foldcase and *suffix to be "zeroed"
- // regardless of the return value.
- bool RequiredPrefix(std::string* prefix, bool* foldcase,
- Regexp** suffix);
-
- // Whether every match of this regexp must be unanchored and
- // begin with a non-empty fixed string (perhaps after ASCII
- // case-folding). If so, returns the prefix.
- // Callers should expect *prefix and *foldcase to be "zeroed"
- // regardless of the return value.
- bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
-
- // Controls the maximum repeat count permitted by the parser.
- // FOR FUZZING ONLY.
- static void FUZZING_ONLY_set_maximum_repeat_count(int i);
-
+ // Callers should expect *prefix, *foldcase and *suffix to be "zeroed"
+ // regardless of the return value.
+ bool RequiredPrefix(std::string* prefix, bool* foldcase,
+ Regexp** suffix);
+
+ // Whether every match of this regexp must be unanchored and
+ // begin with a non-empty fixed string (perhaps after ASCII
+ // case-folding). If so, returns the prefix.
+ // Callers should expect *prefix and *foldcase to be "zeroed"
+ // regardless of the return value.
+ bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
+
+ // Controls the maximum repeat count permitted by the parser.
+ // FOR FUZZING ONLY.
+ static void FUZZING_ONLY_set_maximum_repeat_count(int i);
+
private:
// Constructor allocates vectors as appropriate for operator.
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
@@ -507,7 +507,7 @@ class Regexp {
// Simplifies an alternation of literal strings by factoring out
// common prefixes.
static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
- friend class FactorAlternationImpl;
+ friend class FactorAlternationImpl;
// Is a == b? Only efficient on regexps that have not been through
// Simplify yet - the expansion of a kRegexpRepeat will make this
@@ -519,7 +519,7 @@ class Regexp {
DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
if (n > 1)
submany_ = new Regexp*[n];
- nsub_ = static_cast<uint16_t>(n);
+ nsub_ = static_cast<uint16_t>(n);
}
// Add Rune to LiteralString
@@ -577,7 +577,7 @@ class Regexp {
};
struct { // Capture
int cap_;
- std::string* name_;
+ std::string* name_;
};
struct { // LiteralString
int nrunes_;
diff --git a/contrib/libs/re2/re2/set.cc b/contrib/libs/re2/re2/set.cc
index d847ad1f31..18705663a5 100644
--- a/contrib/libs/re2/re2/set.cc
+++ b/contrib/libs/re2/re2/set.cc
@@ -5,56 +5,56 @@
#include "re2/set.h"
#include <stddef.h>
-#include <algorithm>
-#include <memory>
-#include <utility>
+#include <algorithm>
+#include <memory>
+#include <utility>
#include "util/util.h"
#include "util/logging.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
-#include "re2/stringpiece.h"
+#include "re2/stringpiece.h"
namespace re2 {
-RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
- : options_(options),
- anchor_(anchor),
- compiled_(false),
- size_(0) {
- options_.set_never_capture(true); // might unblock some optimisations
+RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
+ : options_(options),
+ anchor_(anchor),
+ compiled_(false),
+ size_(0) {
+ options_.set_never_capture(true); // might unblock some optimisations
}
RE2::Set::~Set() {
- for (size_t i = 0; i < elem_.size(); i++)
- elem_[i].second->Decref();
+ for (size_t i = 0; i < elem_.size(); i++)
+ elem_[i].second->Decref();
}
-RE2::Set::Set(Set&& other)
- : options_(other.options_),
- anchor_(other.anchor_),
- elem_(std::move(other.elem_)),
- compiled_(other.compiled_),
- size_(other.size_),
- prog_(std::move(other.prog_)) {
- other.elem_.clear();
- other.elem_.shrink_to_fit();
- other.compiled_ = false;
- other.size_ = 0;
- other.prog_.reset();
-}
-
-RE2::Set& RE2::Set::operator=(Set&& other) {
- this->~Set();
- (void) new (this) Set(std::move(other));
- return *this;
-}
-
-int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
+RE2::Set::Set(Set&& other)
+ : options_(other.options_),
+ anchor_(other.anchor_),
+ elem_(std::move(other.elem_)),
+ compiled_(other.compiled_),
+ size_(other.size_),
+ prog_(std::move(other.prog_)) {
+ other.elem_.clear();
+ other.elem_.shrink_to_fit();
+ other.compiled_ = false;
+ other.size_ = 0;
+ other.prog_.reset();
+}
+
+RE2::Set& RE2::Set::operator=(Set&& other) {
+ this->~Set();
+ (void) new (this) Set(std::move(other));
+ return *this;
+}
+
+int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
if (compiled_) {
- LOG(DFATAL) << "RE2::Set::Add() called after compiling";
+ LOG(DFATAL) << "RE2::Set::Add() called after compiling";
return -1;
}
@@ -71,105 +71,105 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
}
// Concatenate with match index and push on vector.
- int n = static_cast<int>(elem_.size());
+ int n = static_cast<int>(elem_.size());
re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
if (re->op() == kRegexpConcat) {
int nsub = re->nsub();
- PODArray<re2::Regexp*> sub(nsub + 1);
+ PODArray<re2::Regexp*> sub(nsub + 1);
for (int i = 0; i < nsub; i++)
sub[i] = re->sub()[i]->Incref();
sub[nsub] = m;
re->Decref();
- re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
+ re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
} else {
re2::Regexp* sub[2];
sub[0] = re;
sub[1] = m;
re = re2::Regexp::Concat(sub, 2, pf);
}
- elem_.emplace_back(std::string(pattern), re);
+ elem_.emplace_back(std::string(pattern), re);
return n;
}
bool RE2::Set::Compile() {
if (compiled_) {
- LOG(DFATAL) << "RE2::Set::Compile() called more than once";
+ LOG(DFATAL) << "RE2::Set::Compile() called more than once";
return false;
}
compiled_ = true;
- size_ = static_cast<int>(elem_.size());
-
- // Sort the elements by their patterns. This is good enough for now
- // until we have a Regexp comparison function. (Maybe someday...)
- std::sort(elem_.begin(), elem_.end(),
- [](const Elem& a, const Elem& b) -> bool {
- return a.first < b.first;
- });
-
- PODArray<re2::Regexp*> sub(size_);
- for (int i = 0; i < size_; i++)
- sub[i] = elem_[i].second;
- elem_.clear();
- elem_.shrink_to_fit();
-
+ size_ = static_cast<int>(elem_.size());
+
+ // Sort the elements by their patterns. This is good enough for now
+ // until we have a Regexp comparison function. (Maybe someday...)
+ std::sort(elem_.begin(), elem_.end(),
+ [](const Elem& a, const Elem& b) -> bool {
+ return a.first < b.first;
+ });
+
+ PODArray<re2::Regexp*> sub(size_);
+ for (int i = 0; i < size_; i++)
+ sub[i] = elem_[i].second;
+ elem_.clear();
+ elem_.shrink_to_fit();
+
Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
options_.ParseFlags());
- re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
-
- prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
+ re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
+
+ prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
re->Decref();
- return prog_ != nullptr;
-}
+ return prog_ != nullptr;
+}
-bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
- return Match(text, v, NULL);
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
+ return Match(text, v, NULL);
}
-bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
- ErrorInfo* error_info) const {
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
+ ErrorInfo* error_info) const {
if (!compiled_) {
- LOG(DFATAL) << "RE2::Set::Match() called before compiling";
- if (error_info != NULL)
- error_info->kind = kNotCompiled;
+ LOG(DFATAL) << "RE2::Set::Match() called before compiling";
+ if (error_info != NULL)
+ error_info->kind = kNotCompiled;
return false;
}
-#ifdef RE2_HAVE_THREAD_LOCAL
- hooks::context = NULL;
-#endif
- bool dfa_failed = false;
- std::unique_ptr<SparseSet> matches;
- if (v != NULL) {
- matches.reset(new SparseSet(size_));
+#ifdef RE2_HAVE_THREAD_LOCAL
+ hooks::context = NULL;
+#endif
+ bool dfa_failed = false;
+ std::unique_ptr<SparseSet> matches;
+ if (v != NULL) {
+ matches.reset(new SparseSet(size_));
v->clear();
- }
- bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
- NULL, &dfa_failed, matches.get());
+ }
+ bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
+ NULL, &dfa_failed, matches.get());
if (dfa_failed) {
if (options_.log_errors())
- LOG(ERROR) << "DFA out of memory: "
- << "program size " << prog_->size() << ", "
- << "list count " << prog_->list_count() << ", "
- << "bytemap range " << prog_->bytemap_range();
- if (error_info != NULL)
- error_info->kind = kOutOfMemory;
+ LOG(ERROR) << "DFA out of memory: "
+ << "program size " << prog_->size() << ", "
+ << "list count " << prog_->list_count() << ", "
+ << "bytemap range " << prog_->bytemap_range();
+ if (error_info != NULL)
+ error_info->kind = kOutOfMemory;
return false;
}
- if (ret == false) {
- if (error_info != NULL)
- error_info->kind = kNoError;
+ if (ret == false) {
+ if (error_info != NULL)
+ error_info->kind = kNoError;
return false;
}
- if (v != NULL) {
- if (matches->empty()) {
- LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
- if (error_info != NULL)
- error_info->kind = kInconsistent;
- return false;
- }
- v->assign(matches->begin(), matches->end());
- }
- if (error_info != NULL)
- error_info->kind = kNoError;
+ if (v != NULL) {
+ if (matches->empty()) {
+ LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
+ if (error_info != NULL)
+ error_info->kind = kInconsistent;
+ return false;
+ }
+ v->assign(matches->begin(), matches->end());
+ }
+ if (error_info != NULL)
+ error_info->kind = kNoError;
return true;
}
diff --git a/contrib/libs/re2/re2/set.h b/contrib/libs/re2/re2/set.h
index 540791cb34..8d64f30ccd 100644
--- a/contrib/libs/re2/re2/set.h
+++ b/contrib/libs/re2/re2/set.h
@@ -5,9 +5,9 @@
#ifndef RE2_SET_H_
#define RE2_SET_H_
-#include <memory>
+#include <memory>
#include <string>
-#include <utility>
+#include <utility>
#include <vector>
#include "re2/re2.h"
@@ -23,61 +23,61 @@ namespace re2 {
// be searched for simultaneously.
class RE2::Set {
public:
- enum ErrorKind {
- kNoError = 0,
- kNotCompiled, // The set is not compiled.
- kOutOfMemory, // The DFA ran out of memory.
- kInconsistent, // The result is inconsistent. This should never happen.
- };
-
- struct ErrorInfo {
- ErrorKind kind;
- };
-
+ enum ErrorKind {
+ kNoError = 0,
+ kNotCompiled, // The set is not compiled.
+ kOutOfMemory, // The DFA ran out of memory.
+ kInconsistent, // The result is inconsistent. This should never happen.
+ };
+
+ struct ErrorInfo {
+ ErrorKind kind;
+ };
+
Set(const RE2::Options& options, RE2::Anchor anchor);
~Set();
- // Not copyable.
- Set(const Set&) = delete;
- Set& operator=(const Set&) = delete;
- // Movable.
- Set(Set&& other);
- Set& operator=(Set&& other);
-
- // Adds pattern to the set using the options passed to the constructor.
- // Returns the index that will identify the regexp in the output of Match(),
- // or -1 if the regexp cannot be parsed.
+ // Not copyable.
+ Set(const Set&) = delete;
+ Set& operator=(const Set&) = delete;
+ // Movable.
+ Set(Set&& other);
+ Set& operator=(Set&& other);
+
+ // Adds pattern to the set using the options passed to the constructor.
+ // Returns the index that will identify the regexp in the output of Match(),
+ // or -1 if the regexp cannot be parsed.
// Indices are assigned in sequential order starting from 0.
- // Errors do not increment the index; if error is not NULL, *error will hold
- // the error message from the parser.
- int Add(const StringPiece& pattern, std::string* error);
-
- // Compiles the set in preparation for matching.
- // Returns false if the compiler runs out of memory.
- // Add() must not be called again after Compile().
- // Compile() must be called before Match().
+ // Errors do not increment the index; if error is not NULL, *error will hold
+ // the error message from the parser.
+ int Add(const StringPiece& pattern, std::string* error);
+
+ // Compiles the set in preparation for matching.
+ // Returns false if the compiler runs out of memory.
+ // Add() must not be called again after Compile().
+ // Compile() must be called before Match().
bool Compile();
- // Returns true if text matches at least one of the regexps in the set.
- // Fills v (if not NULL) with the indices of the matching regexps.
+ // Returns true if text matches at least one of the regexps in the set.
+ // Fills v (if not NULL) with the indices of the matching regexps.
// Callers must not expect v to be sorted.
bool Match(const StringPiece& text, std::vector<int>* v) const;
- // As above, but populates error_info (if not NULL) when none of the regexps
- // in the set matched. This can inform callers when DFA execution fails, for
- // example, because they might wish to handle that case differently.
- bool Match(const StringPiece& text, std::vector<int>* v,
- ErrorInfo* error_info) const;
-
+ // As above, but populates error_info (if not NULL) when none of the regexps
+ // in the set matched. This can inform callers when DFA execution fails, for
+ // example, because they might wish to handle that case differently.
+ bool Match(const StringPiece& text, std::vector<int>* v,
+ ErrorInfo* error_info) const;
+
private:
- typedef std::pair<std::string, re2::Regexp*> Elem;
-
+ typedef std::pair<std::string, re2::Regexp*> Elem;
+
RE2::Options options_;
RE2::Anchor anchor_;
- std::vector<Elem> elem_;
+ std::vector<Elem> elem_;
bool compiled_;
- int size_;
- std::unique_ptr<re2::Prog> prog_;
+ int size_;
+ std::unique_ptr<re2::Prog> prog_;
};
} // namespace re2
diff --git a/contrib/libs/re2/re2/simplify.cc b/contrib/libs/re2/re2/simplify.cc
index 94b06d1c1a..663d5fcd45 100644
--- a/contrib/libs/re2/re2/simplify.cc
+++ b/contrib/libs/re2/re2/simplify.cc
@@ -11,7 +11,7 @@
#include "util/util.h"
#include "util/logging.h"
#include "util/utf.h"
-#include "re2/pod_array.h"
+#include "re2/pod_array.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -21,7 +21,7 @@ namespace re2 {
// string representation of the simplified form. Returns true on success.
// Returns false and sets *error (if error != NULL) on error.
bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
- std::string* dst, RegexpStatus* status) {
+ std::string* dst, RegexpStatus* status) {
Regexp* re = Parse(src, flags, status);
if (re == NULL)
return false;
@@ -178,20 +178,20 @@ Regexp* Regexp::Simplify() {
CoalesceWalker cw;
Regexp* cre = cw.Walk(this, NULL);
if (cre == NULL)
- return NULL;
- if (cw.stopped_early()) {
- cre->Decref();
- return NULL;
- }
+ return NULL;
+ if (cw.stopped_early()) {
+ cre->Decref();
+ return NULL;
+ }
SimplifyWalker sw;
Regexp* sre = sw.Walk(cre, NULL);
cre->Decref();
- if (sre == NULL)
- return NULL;
- if (sw.stopped_early()) {
- sre->Decref();
- return NULL;
- }
+ if (sre == NULL)
+ return NULL;
+ if (sw.stopped_early()) {
+ sre->Decref();
+ return NULL;
+ }
return sre;
}
@@ -220,10 +220,10 @@ Regexp* CoalesceWalker::Copy(Regexp* re) {
}
Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
-#endif
+#endif
return re->Incref();
}
@@ -446,10 +446,10 @@ Regexp* SimplifyWalker::Copy(Regexp* re) {
}
Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
-#endif
+#endif
return re->Incref();
}
@@ -599,11 +599,11 @@ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
return Regexp::Plus(re->Incref(), f);
// General case: x{4,} is xxxx+
- PODArray<Regexp*> nre_subs(min);
+ PODArray<Regexp*> nre_subs(min);
for (int i = 0; i < min-1; i++)
nre_subs[i] = re->Incref();
nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
- return Regexp::Concat(nre_subs.data(), min, f);
+ return Regexp::Concat(nre_subs.data(), min, f);
}
// Special case: (x){0} matches only empty string.
@@ -621,10 +621,10 @@ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
// Build leading prefix: xx. Capturing only on the last one.
Regexp* nre = NULL;
if (min > 0) {
- PODArray<Regexp*> nre_subs(min);
+ PODArray<Regexp*> nre_subs(min);
for (int i = 0; i < min; i++)
nre_subs[i] = re->Incref();
- nre = Regexp::Concat(nre_subs.data(), min, f);
+ nre = Regexp::Concat(nre_subs.data(), min, f);
}
// Build and attach suffix: (x(x(x)?)?)?
diff --git a/contrib/libs/re2/re2/sparse_array.h b/contrib/libs/re2/re2/sparse_array.h
index 3577d6dc76..09ffe086b7 100644
--- a/contrib/libs/re2/re2/sparse_array.h
+++ b/contrib/libs/re2/re2/sparse_array.h
@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_SPARSE_ARRAY_H_
-#define RE2_SPARSE_ARRAY_H_
+#ifndef RE2_SPARSE_ARRAY_H_
+#define RE2_SPARSE_ARRAY_H_
// DESCRIPTION
//
@@ -55,55 +55,55 @@
// IMPLEMENTATION
//
-// SparseArray is an array dense_ and an array sparse_ of identical size.
-// At any point, the number of elements in the sparse array is size_.
+// SparseArray is an array dense_ and an array sparse_ of identical size.
+// At any point, the number of elements in the sparse array is size_.
//
-// The array dense_ contains the size_ elements in the sparse array (with
+// The array dense_ contains the size_ elements in the sparse array (with
// their indices),
// in the order that the elements were first inserted. This array is dense:
// the size_ pairs are dense_[0] through dense_[size_-1].
//
-// The array sparse_ maps from indices in [0,m) to indices in [0,size_).
-// For indices present in the array, dense_[sparse_[i]].index_ == i.
-// For indices not present in the array, sparse_ can contain any value at all,
-// perhaps outside the range [0, size_) but perhaps not.
+// The array sparse_ maps from indices in [0,m) to indices in [0,size_).
+// For indices present in the array, dense_[sparse_[i]].index_ == i.
+// For indices not present in the array, sparse_ can contain any value at all,
+// perhaps outside the range [0, size_) but perhaps not.
//
-// The lax requirement on sparse_ values makes clearing the array very easy:
-// set size_ to 0. Lookups are slightly more complicated.
-// An index i has a value in the array if and only if:
-// sparse_[i] is in [0, size_) AND
-// dense_[sparse_[i]].index_ == i.
+// The lax requirement on sparse_ values makes clearing the array very easy:
+// set size_ to 0. Lookups are slightly more complicated.
+// An index i has a value in the array if and only if:
+// sparse_[i] is in [0, size_) AND
+// dense_[sparse_[i]].index_ == i.
// If both these properties hold, only then it is safe to refer to
-// dense_[sparse_[i]].value_
+// dense_[sparse_[i]].value_
// as the value associated with index i.
//
-// To insert a new entry, set sparse_[i] to size_,
+// To insert a new entry, set sparse_[i] to size_,
// initialize dense_[size_], and then increment size_.
//
// To make the sparse array as efficient as possible for non-primitive types,
// elements may or may not be destroyed when they are deleted from the sparse
-// array through a call to resize(). They immediately become inaccessible, but
-// they are only guaranteed to be destroyed when the SparseArray destructor is
-// called.
+// array through a call to resize(). They immediately become inaccessible, but
+// they are only guaranteed to be destroyed when the SparseArray destructor is
+// called.
//
// A moved-from SparseArray will be empty.
-// Doing this simplifies the logic below.
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
+// Doing this simplifies the logic below.
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
#include <assert.h>
#include <stdint.h>
-#if __has_feature(memory_sanitizer)
-#include <sanitizer/msan_interface.h>
-#endif
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif
#include <algorithm>
#include <memory>
#include <utility>
-#include "re2/pod_array.h"
-
+#include "re2/pod_array.h"
+
namespace re2 {
template<typename Value>
@@ -116,14 +116,14 @@ class SparseArray {
// IndexValue pairs: exposed in SparseArray::iterator.
class IndexValue;
- typedef IndexValue* iterator;
- typedef const IndexValue* const_iterator;
+ typedef IndexValue* iterator;
+ typedef const IndexValue* const_iterator;
SparseArray(const SparseArray& src);
- SparseArray(SparseArray&& src);
+ SparseArray(SparseArray&& src);
SparseArray& operator=(const SparseArray& src);
- SparseArray& operator=(SparseArray&& src);
+ SparseArray& operator=(SparseArray&& src);
// Return the number of entries in the array.
int size() const {
@@ -137,30 +137,30 @@ class SparseArray {
// Iterate over the array.
iterator begin() {
- return dense_.data();
+ return dense_.data();
}
iterator end() {
- return dense_.data() + size_;
+ return dense_.data() + size_;
}
const_iterator begin() const {
- return dense_.data();
+ return dense_.data();
}
const_iterator end() const {
- return dense_.data() + size_;
+ return dense_.data() + size_;
}
// Change the maximum size of the array.
// Invalidates all iterators.
- void resize(int new_max_size);
+ void resize(int new_max_size);
// Return the maximum size of the array.
// Indices can be in the range [0, max_size).
int max_size() const {
- if (dense_.data() != NULL)
- return dense_.size();
- else
- return 0;
+ if (dense_.data() != NULL)
+ return dense_.size();
+ else
+ return 0;
}
// Clear the array.
@@ -183,55 +183,55 @@ class SparseArray {
return SetInternal(true, i, v);
}
- // Set the value at new index i to v.
- // Fast but unsafe: only use if has_index(i) is false.
- iterator set_new(int i, const Value& v) {
- return SetInternal(false, i, v);
+ // Set the value at new index i to v.
+ // Fast but unsafe: only use if has_index(i) is false.
+ iterator set_new(int i, const Value& v) {
+ return SetInternal(false, i, v);
}
- // Set the value at index i to v.
+ // Set the value at index i to v.
// Fast but unsafe: only use if has_index(i) is true.
iterator set_existing(int i, const Value& v) {
return SetExistingInternal(i, v);
}
- // Get the value at index i.
- // Fast but unsafe: only use if has_index(i) is true.
- Value& get_existing(int i) {
- assert(has_index(i));
- return dense_[sparse_[i]].value_;
+ // Get the value at index i.
+ // Fast but unsafe: only use if has_index(i) is true.
+ Value& get_existing(int i) {
+ assert(has_index(i));
+ return dense_[sparse_[i]].value_;
}
- const Value& get_existing(int i) const {
- assert(has_index(i));
- return dense_[sparse_[i]].value_;
+ const Value& get_existing(int i) const {
+ assert(has_index(i));
+ return dense_[sparse_[i]].value_;
}
private:
- iterator SetInternal(bool allow_existing, int i, const Value& v) {
+ iterator SetInternal(bool allow_existing, int i, const Value& v) {
DebugCheckInvariants();
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
assert(false && "illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
// dereferencing an invalid pointer.
return begin();
}
- if (!allow_existing) {
+ if (!allow_existing) {
assert(!has_index(i));
create_index(i);
} else {
if (!has_index(i))
create_index(i);
}
- return SetExistingInternal(i, v);
+ return SetExistingInternal(i, v);
}
- iterator SetExistingInternal(int i, const Value& v) {
+ iterator SetExistingInternal(int i, const Value& v) {
DebugCheckInvariants();
assert(has_index(i));
- dense_[sparse_[i]].value_ = v;
+ dense_[sparse_[i]].value_ = v;
DebugCheckInvariants();
- return dense_.data() + sparse_[i];
+ return dense_.data() + sparse_[i];
}
// Add the index i to the array.
@@ -246,20 +246,20 @@ class SparseArray {
// and at the beginning and end of all public non-const member functions.
void DebugCheckInvariants() const;
- // Initializes memory for elements [min, max).
- void MaybeInitializeMemory(int min, int max) {
-#if __has_feature(memory_sanitizer)
- __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
-#elif defined(RE2_ON_VALGRIND)
- for (int i = min; i < max; i++) {
- sparse_[i] = 0xababababU;
- }
-#endif
- }
-
+ // Initializes memory for elements [min, max).
+ void MaybeInitializeMemory(int min, int max) {
+#if __has_feature(memory_sanitizer)
+ __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
+#elif defined(RE2_ON_VALGRIND)
+ for (int i = min; i < max; i++) {
+ sparse_[i] = 0xababababU;
+ }
+#endif
+ }
+
int size_ = 0;
- PODArray<int> sparse_;
- PODArray<IndexValue> dense_;
+ PODArray<int> sparse_;
+ PODArray<IndexValue> dense_;
};
template<typename Value>
@@ -268,38 +268,38 @@ SparseArray<Value>::SparseArray() = default;
template<typename Value>
SparseArray<Value>::SparseArray(const SparseArray& src)
: size_(src.size_),
- sparse_(src.max_size()),
- dense_(src.max_size()) {
- std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
- std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
+ sparse_(src.max_size()),
+ dense_(src.max_size()) {
+ std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+ std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
}
template<typename Value>
-SparseArray<Value>::SparseArray(SparseArray&& src)
+SparseArray<Value>::SparseArray(SparseArray&& src)
: size_(src.size_),
- sparse_(std::move(src.sparse_)),
+ sparse_(std::move(src.sparse_)),
dense_(std::move(src.dense_)) {
src.size_ = 0;
}
template<typename Value>
SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
- // Construct these first for exception safety.
- PODArray<int> a(src.max_size());
- PODArray<IndexValue> b(src.max_size());
-
+ // Construct these first for exception safety.
+ PODArray<int> a(src.max_size());
+ PODArray<IndexValue> b(src.max_size());
+
size_ = src.size_;
- sparse_ = std::move(a);
- dense_ = std::move(b);
- std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
- std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
+ sparse_ = std::move(a);
+ dense_ = std::move(b);
+ std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
+ std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
return *this;
}
template<typename Value>
-SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
+SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
size_ = src.size_;
- sparse_ = std::move(src.sparse_);
+ sparse_ = std::move(src.sparse_);
dense_ = std::move(src.dense_);
src.size_ = 0;
return *this;
@@ -310,37 +310,37 @@ template<typename Value>
class SparseArray<Value>::IndexValue {
public:
int index() const { return index_; }
- Value& value() { return value_; }
- const Value& value() const { return value_; }
+ Value& value() { return value_; }
+ const Value& value() const { return value_; }
private:
- friend class SparseArray;
+ friend class SparseArray;
int index_;
- Value value_;
+ Value value_;
};
// Change the maximum size of the array.
// Invalidates all iterators.
template<typename Value>
-void SparseArray<Value>::resize(int new_max_size) {
+void SparseArray<Value>::resize(int new_max_size) {
DebugCheckInvariants();
- if (new_max_size > max_size()) {
- const int old_max_size = max_size();
-
- // Construct these first for exception safety.
- PODArray<int> a(new_max_size);
- PODArray<IndexValue> b(new_max_size);
-
- std::copy_n(sparse_.data(), old_max_size, a.data());
- std::copy_n(dense_.data(), old_max_size, b.data());
-
- sparse_ = std::move(a);
- dense_ = std::move(b);
-
- MaybeInitializeMemory(old_max_size, new_max_size);
+ if (new_max_size > max_size()) {
+ const int old_max_size = max_size();
+
+ // Construct these first for exception safety.
+ PODArray<int> a(new_max_size);
+ PODArray<IndexValue> b(new_max_size);
+
+ std::copy_n(sparse_.data(), old_max_size, a.data());
+ std::copy_n(dense_.data(), old_max_size, b.data());
+
+ sparse_ = std::move(a);
+ dense_ = std::move(b);
+
+ MaybeInitializeMemory(old_max_size, new_max_size);
}
- if (size_ > new_max_size)
- size_ = new_max_size;
+ if (size_ > new_max_size)
+ size_ = new_max_size;
DebugCheckInvariants();
}
@@ -348,27 +348,27 @@ void SparseArray<Value>::resize(int new_max_size) {
template<typename Value>
bool SparseArray<Value>::has_index(int i) const {
assert(i >= 0);
- assert(i < max_size());
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+ assert(i < max_size());
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
return false;
}
- // Unsigned comparison avoids checking sparse_[i] < 0.
- return (uint32_t)sparse_[i] < (uint32_t)size_ &&
- dense_[sparse_[i]].index_ == i;
+ // Unsigned comparison avoids checking sparse_[i] < 0.
+ return (uint32_t)sparse_[i] < (uint32_t)size_ &&
+ dense_[sparse_[i]].index_ == i;
}
template<typename Value>
void SparseArray<Value>::create_index(int i) {
assert(!has_index(i));
- assert(size_ < max_size());
- sparse_[i] = size_;
+ assert(size_ < max_size());
+ sparse_[i] = size_;
dense_[size_].index_ = i;
size_++;
}
-template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
- sparse_(max_size), dense_(max_size) {
- MaybeInitializeMemory(size_, max_size);
+template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
+ sparse_(max_size), dense_(max_size) {
+ MaybeInitializeMemory(size_, max_size);
DebugCheckInvariants();
}
@@ -378,7 +378,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() {
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
assert(0 <= size_);
- assert(size_ <= max_size());
+ assert(size_ <= max_size());
}
// Comparison function for sorting.
@@ -389,4 +389,4 @@ template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
} // namespace re2
-#endif // RE2_SPARSE_ARRAY_H_
+#endif // RE2_SPARSE_ARRAY_H_
diff --git a/contrib/libs/re2/re2/sparse_set.h b/contrib/libs/re2/re2/sparse_set.h
index 88b22de332..06ed88d81b 100644
--- a/contrib/libs/re2/re2/sparse_set.h
+++ b/contrib/libs/re2/re2/sparse_set.h
@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_SPARSE_SET_H_
-#define RE2_SPARSE_SET_H_
+#ifndef RE2_SPARSE_SET_H_
+#define RE2_SPARSE_SET_H_
// DESCRIPTION
//
@@ -47,22 +47,22 @@
//
// See sparse_array.h for implementation details.
-// Doing this simplifies the logic below.
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
+// Doing this simplifies the logic below.
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
#include <assert.h>
#include <stdint.h>
-#if __has_feature(memory_sanitizer)
-#include <sanitizer/msan_interface.h>
-#endif
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif
#include <algorithm>
#include <memory>
#include <utility>
-#include "re2/pod_array.h"
-
+#include "re2/pod_array.h"
+
namespace re2 {
template<typename Value>
@@ -72,8 +72,8 @@ class SparseSetT {
explicit SparseSetT(int max_size);
~SparseSetT();
- typedef int* iterator;
- typedef const int* const_iterator;
+ typedef int* iterator;
+ typedef const int* const_iterator;
// Return the number of entries in the set.
int size() const {
@@ -87,30 +87,30 @@ class SparseSetT {
// Iterate over the set.
iterator begin() {
- return dense_.data();
+ return dense_.data();
}
iterator end() {
- return dense_.data() + size_;
+ return dense_.data() + size_;
}
const_iterator begin() const {
- return dense_.data();
+ return dense_.data();
}
const_iterator end() const {
- return dense_.data() + size_;
+ return dense_.data() + size_;
}
// Change the maximum size of the set.
// Invalidates all iterators.
- void resize(int new_max_size);
+ void resize(int new_max_size);
// Return the maximum size of the set.
// Indices can be in the range [0, max_size).
int max_size() const {
- if (dense_.data() != NULL)
- return dense_.size();
- else
- return 0;
+ if (dense_.data() != NULL)
+ return dense_.size();
+ else
+ return 0;
}
// Clear the set.
@@ -142,7 +142,7 @@ class SparseSetT {
private:
iterator InsertInternal(bool allow_existing, int i) {
DebugCheckInvariants();
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
assert(false && "illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
@@ -157,7 +157,7 @@ class SparseSetT {
create_index(i);
}
DebugCheckInvariants();
- return dense_.data() + sparse_[i];
+ return dense_.data() + sparse_[i];
}
// Add the index i to the set.
@@ -171,20 +171,20 @@ class SparseSetT {
// and at the beginning and end of all public non-const member functions.
void DebugCheckInvariants() const;
- // Initializes memory for elements [min, max).
- void MaybeInitializeMemory(int min, int max) {
-#if __has_feature(memory_sanitizer)
- __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
-#elif defined(RE2_ON_VALGRIND)
- for (int i = min; i < max; i++) {
- sparse_[i] = 0xababababU;
- }
-#endif
- }
-
+ // Initializes memory for elements [min, max).
+ void MaybeInitializeMemory(int min, int max) {
+#if __has_feature(memory_sanitizer)
+ __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
+#elif defined(RE2_ON_VALGRIND)
+ for (int i = min; i < max; i++) {
+ sparse_[i] = 0xababababU;
+ }
+#endif
+ }
+
int size_ = 0;
- PODArray<int> sparse_;
- PODArray<int> dense_;
+ PODArray<int> sparse_;
+ PODArray<int> dense_;
};
template<typename Value>
@@ -193,25 +193,25 @@ SparseSetT<Value>::SparseSetT() = default;
// Change the maximum size of the set.
// Invalidates all iterators.
template<typename Value>
-void SparseSetT<Value>::resize(int new_max_size) {
+void SparseSetT<Value>::resize(int new_max_size) {
DebugCheckInvariants();
- if (new_max_size > max_size()) {
- const int old_max_size = max_size();
-
- // Construct these first for exception safety.
- PODArray<int> a(new_max_size);
- PODArray<int> b(new_max_size);
-
- std::copy_n(sparse_.data(), old_max_size, a.data());
- std::copy_n(dense_.data(), old_max_size, b.data());
-
- sparse_ = std::move(a);
- dense_ = std::move(b);
-
- MaybeInitializeMemory(old_max_size, new_max_size);
+ if (new_max_size > max_size()) {
+ const int old_max_size = max_size();
+
+ // Construct these first for exception safety.
+ PODArray<int> a(new_max_size);
+ PODArray<int> b(new_max_size);
+
+ std::copy_n(sparse_.data(), old_max_size, a.data());
+ std::copy_n(dense_.data(), old_max_size, b.data());
+
+ sparse_ = std::move(a);
+ dense_ = std::move(b);
+
+ MaybeInitializeMemory(old_max_size, new_max_size);
}
- if (size_ > new_max_size)
- size_ = new_max_size;
+ if (size_ > new_max_size)
+ size_ = new_max_size;
DebugCheckInvariants();
}
@@ -219,27 +219,27 @@ void SparseSetT<Value>::resize(int new_max_size) {
template<typename Value>
bool SparseSetT<Value>::contains(int i) const {
assert(i >= 0);
- assert(i < max_size());
- if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
+ assert(i < max_size());
+ if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
return false;
}
- // Unsigned comparison avoids checking sparse_[i] < 0.
- return (uint32_t)sparse_[i] < (uint32_t)size_ &&
- dense_[sparse_[i]] == i;
+ // Unsigned comparison avoids checking sparse_[i] < 0.
+ return (uint32_t)sparse_[i] < (uint32_t)size_ &&
+ dense_[sparse_[i]] == i;
}
template<typename Value>
void SparseSetT<Value>::create_index(int i) {
assert(!contains(i));
- assert(size_ < max_size());
- sparse_[i] = size_;
+ assert(size_ < max_size());
+ sparse_[i] = size_;
dense_[size_] = i;
size_++;
}
-template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
- sparse_(max_size), dense_(max_size) {
- MaybeInitializeMemory(size_, max_size);
+template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
+ sparse_(max_size), dense_(max_size) {
+ MaybeInitializeMemory(size_, max_size);
DebugCheckInvariants();
}
@@ -249,7 +249,7 @@ template<typename Value> SparseSetT<Value>::~SparseSetT() {
template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
assert(0 <= size_);
- assert(size_ <= max_size());
+ assert(size_ <= max_size());
}
// Comparison function for sorting.
@@ -261,4 +261,4 @@ typedef SparseSetT<void> SparseSet;
} // namespace re2
-#endif // RE2_SPARSE_SET_H_
+#endif // RE2_SPARSE_SET_H_
diff --git a/contrib/libs/re2/re2/stringpiece.h b/contrib/libs/re2/re2/stringpiece.h
index 1f53a7f5e4..ef73683401 100644
--- a/contrib/libs/re2/re2/stringpiece.h
+++ b/contrib/libs/re2/re2/stringpiece.h
@@ -19,20 +19,20 @@
//
// Arghh! I wish C++ literals were "string".
-// Doing this simplifies the logic below.
-#ifndef __has_include
-#define __has_include(x) 0
-#endif
-
+// Doing this simplifies the logic below.
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
#include <stddef.h>
#include <string.h>
#include <algorithm>
#include <iosfwd>
#include <iterator>
#include <string>
-#if __has_include(<string_view>) && __cplusplus >= 201703L
-#include <string_view>
-#endif
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+#include <string_view>
+#endif
#if defined(ARCADIA_ROOT)
#include <util/generic/string.h>
#endif
@@ -41,7 +41,7 @@ namespace re2 {
class StringPiece {
public:
- typedef std::char_traits<char> traits_type;
+ typedef std::char_traits<char> traits_type;
typedef char value_type;
typedef char* pointer;
typedef const char* const_pointer;
@@ -60,10 +60,10 @@ class StringPiece {
// expected.
StringPiece()
: data_(NULL), size_(0) {}
-#if __has_include(<string_view>) && __cplusplus >= 201703L
- StringPiece(const std::string_view& str)
- : data_(str.data()), size_(str.size()) {}
-#endif
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+ StringPiece(const std::string_view& str)
+ : data_(str.data()), size_(str.size()) {}
+#endif
StringPiece(const std::string& str)
: data_(str.data()), size_(str.size()) {}
StringPiece(const char* str)
@@ -71,8 +71,8 @@ class StringPiece {
StringPiece(const char* str, size_type len)
: data_(str), size_(len) {}
#if defined(ARCADIA_ROOT)
- StringPiece(const TString& str)
- : StringPiece(str.data(), str.size()) {}
+ StringPiece(const TString& str)
+ : StringPiece(str.data(), str.size()) {}
#endif
const_iterator begin() const { return data_; }
@@ -110,13 +110,13 @@ class StringPiece {
size_ = len;
}
- // Converts to `std::basic_string`.
- template <typename A>
- explicit operator std::basic_string<char, traits_type, A>() const {
- if (!data_) return {};
- return std::basic_string<char, traits_type, A>(data_, size_);
- }
-
+ // Converts to `std::basic_string`.
+ template <typename A>
+ explicit operator std::basic_string<char, traits_type, A>() const {
+ if (!data_) return {};
+ return std::basic_string<char, traits_type, A>(data_, size_);
+ }
+
std::string as_string() const {
return std::string(data_, size_);
}
diff --git a/contrib/libs/re2/re2/testing/backtrack.cc b/contrib/libs/re2/re2/testing/backtrack.cc
index cc6253ddf7..920a4534dc 100644
--- a/contrib/libs/re2/re2/testing/backtrack.cc
+++ b/contrib/libs/re2/re2/testing/backtrack.cc
@@ -1,275 +1,275 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Tested by search_test.cc, exhaustive_test.cc, tester.cc
-//
-// Prog::UnsafeSearchBacktrack is a backtracking regular expression search,
-// except that it remembers where it has been, trading a lot of
-// memory for a lot of time. It exists only for testing purposes.
-//
-// Let me repeat that.
-//
-// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
-// - It uses a ton of memory.
-// - It uses a ton of stack.
-// - It uses CHECK and LOG(FATAL).
-// - It implements unanchored search by repeated anchored search.
-//
-// On the other hand, it is very simple and a good reference
-// implementation for the more complicated regexp packages.
-//
-// In BUILD, this file is linked into the ":testing" library,
-// not the main library, in order to make it harder to pick up
-// accidentally.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "re2/pod_array.h"
-#include "re2/prog.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-// Backtracker holds the state for a backtracking search.
-//
-// Excluding the search parameters, the main search state
-// is just the "capture registers", which record, for the
-// current execution, the string position at which each
-// parenthesis was passed. cap_[0] and cap_[1] are the
-// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
-//
-// To avoid infinite loops during backtracking on expressions
-// like (a*)*, the visited_[] bitmap marks the (state, string-position)
-// pairs that have already been explored and are thus not worth
-// re-exploring if we get there via another path. Modern backtracking
-// libraries engineer their program representation differently, to make
-// such infinite loops possible to avoid without keeping a giant visited_
-// bitmap, but visited_ works fine for a reference implementation
-// and it has the nice benefit of making the search run in linear time.
-class Backtracker {
- public:
- explicit Backtracker(Prog* prog);
-
- bool Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch);
-
- private:
- // Explores from instruction id at string position p looking for a match.
- // Returns true if found (so that caller can stop trying other possibilities).
- bool Visit(int id, const char* p);
-
- // Tries instruction id at string position p.
- // Returns true if a match is found.
- bool Try(int id, const char* p);
-
- // Search parameters
- Prog* prog_; // program being run
- StringPiece text_; // text being searched
- StringPiece context_; // greater context of text being searched
- bool anchored_; // whether search is anchored at text.begin()
- bool longest_; // whether search wants leftmost-longest match
- bool endmatch_; // whether search must end at text.end()
- StringPiece *submatch_; // submatches to fill in
- int nsubmatch_; // # of submatches to fill in
-
- // Search state
- const char* cap_[64]; // capture registers
- PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
-
- Backtracker(const Backtracker&) = delete;
- Backtracker& operator=(const Backtracker&) = delete;
-};
-
-Backtracker::Backtracker(Prog* prog)
- : prog_(prog),
- anchored_(false),
- longest_(false),
- endmatch_(false),
- submatch_(NULL),
- nsubmatch_(0) {
-}
-
-// Runs a backtracking search.
-bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch) {
- text_ = text;
- context_ = context;
- if (context_.data() == NULL)
- context_ = text;
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tested by search_test.cc, exhaustive_test.cc, tester.cc
+//
+// Prog::UnsafeSearchBacktrack is a backtracking regular expression search,
+// except that it remembers where it has been, trading a lot of
+// memory for a lot of time. It exists only for testing purposes.
+//
+// Let me repeat that.
+//
+// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
+// - It uses a ton of memory.
+// - It uses a ton of stack.
+// - It uses CHECK and LOG(FATAL).
+// - It implements unanchored search by repeated anchored search.
+//
+// On the other hand, it is very simple and a good reference
+// implementation for the more complicated regexp packages.
+//
+// In BUILD, this file is linked into the ":testing" library,
+// not the main library, in order to make it harder to pick up
+// accidentally.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/pod_array.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+// Backtracker holds the state for a backtracking search.
+//
+// Excluding the search parameters, the main search state
+// is just the "capture registers", which record, for the
+// current execution, the string position at which each
+// parenthesis was passed. cap_[0] and cap_[1] are the
+// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
+//
+// To avoid infinite loops during backtracking on expressions
+// like (a*)*, the visited_[] bitmap marks the (state, string-position)
+// pairs that have already been explored and are thus not worth
+// re-exploring if we get there via another path. Modern backtracking
+// libraries engineer their program representation differently, to make
+// such infinite loops possible to avoid without keeping a giant visited_
+// bitmap, but visited_ works fine for a reference implementation
+// and it has the nice benefit of making the search run in linear time.
+class Backtracker {
+ public:
+ explicit Backtracker(Prog* prog);
+
+ bool Search(const StringPiece& text, const StringPiece& context,
+ bool anchored, bool longest,
+ StringPiece* submatch, int nsubmatch);
+
+ private:
+ // Explores from instruction id at string position p looking for a match.
+ // Returns true if found (so that caller can stop trying other possibilities).
+ bool Visit(int id, const char* p);
+
+ // Tries instruction id at string position p.
+ // Returns true if a match is found.
+ bool Try(int id, const char* p);
+
+ // Search parameters
+ Prog* prog_; // program being run
+ StringPiece text_; // text being searched
+ StringPiece context_; // greater context of text being searched
+ bool anchored_; // whether search is anchored at text.begin()
+ bool longest_; // whether search wants leftmost-longest match
+ bool endmatch_; // whether search must end at text.end()
+ StringPiece *submatch_; // submatches to fill in
+ int nsubmatch_; // # of submatches to fill in
+
+ // Search state
+ const char* cap_[64]; // capture registers
+ PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
+
+ Backtracker(const Backtracker&) = delete;
+ Backtracker& operator=(const Backtracker&) = delete;
+};
+
+Backtracker::Backtracker(Prog* prog)
+ : prog_(prog),
+ anchored_(false),
+ longest_(false),
+ endmatch_(false),
+ submatch_(NULL),
+ nsubmatch_(0) {
+}
+
+// Runs a backtracking search.
+bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
+ bool anchored, bool longest,
+ StringPiece* submatch, int nsubmatch) {
+ text_ = text;
+ context_ = context;
+ if (context_.data() == NULL)
+ context_ = text;
if (prog_->anchor_start() && BeginPtr(text) > BeginPtr(context_))
- return false;
+ return false;
if (prog_->anchor_end() && EndPtr(text) < EndPtr(context_))
- return false;
- anchored_ = anchored | prog_->anchor_start();
- longest_ = longest | prog_->anchor_end();
- endmatch_ = prog_->anchor_end();
- submatch_ = submatch;
- nsubmatch_ = nsubmatch;
- CHECK_LT(2*nsubmatch_, static_cast<int>(arraysize(cap_)));
- memset(cap_, 0, sizeof cap_);
-
- // We use submatch_[0] for our own bookkeeping,
- // so it had better exist.
- StringPiece sp0;
- if (nsubmatch < 1) {
- submatch_ = &sp0;
- nsubmatch_ = 1;
- }
- submatch_[0] = StringPiece();
-
- // Allocate new visited_ bitmap -- size is proportional
- // to text, so have to reallocate on each call to Search.
- int nvisited = prog_->size() * static_cast<int>(text.size()+1);
- nvisited = (nvisited + 31) / 32;
- visited_ = PODArray<uint32_t>(nvisited);
- memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
-
- // Anchored search must start at text.begin().
- if (anchored_) {
- cap_[0] = text.data();
- return Visit(prog_->start(), text.data());
- }
-
- // Unanchored search, starting from each possible text position.
- // Notice that we have to try the empty string at the end of
- // the text, so the loop condition is p <= text.end(), not p < text.end().
- for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
- cap_[0] = p;
- if (Visit(prog_->start(), p)) // Match must be leftmost; done.
- return true;
- // Avoid invoking undefined behavior (arithmetic on a null pointer)
- // by simply not continuing the loop.
- if (p == NULL)
- break;
- }
- return false;
-}
-
-// Explores from instruction id at string position p looking for a match.
-// Return true if found (so that caller can stop trying other possibilities).
-bool Backtracker::Visit(int id, const char* p) {
- // Check bitmap. If we've already explored from here,
- // either it didn't match or it did but we're hoping for a better match.
- // Either way, don't go down that road again.
- CHECK(p <= text_.data() + text_.size());
- int n = id * static_cast<int>(text_.size()+1) +
- static_cast<int>(p-text_.data());
- CHECK_LT(n/32, visited_.size());
- if (visited_[n/32] & (1 << (n&31)))
- return false;
- visited_[n/32] |= 1 << (n&31);
-
- Prog::Inst* ip = prog_->inst(id);
- if (Try(id, p)) {
- if (longest_ && !ip->last())
- Visit(id+1, p);
- return true;
- }
- if (!ip->last())
- return Visit(id+1, p);
- return false;
-}
-
-// Tries instruction id at string position p.
-// Returns true if a match is found.
-bool Backtracker::Try(int id, const char* p) {
- // Pick out byte at current position. If at end of string,
- // have to explore in hope of finishing a match. Use impossible byte -1.
- int c = -1;
- if (p < text_.data() + text_.size())
- c = *p & 0xFF;
-
- Prog::Inst* ip = prog_->inst(id);
- switch (ip->opcode()) {
- default:
- LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
- return false; // not reached
-
- case kInstAltMatch:
- // Ignored.
- return false;
-
- case kInstByteRange:
- if (ip->Matches(c))
- return Visit(ip->out(), p+1);
- return false;
-
- case kInstCapture:
- if (0 <= ip->cap() &&
- ip->cap() < static_cast<int>(arraysize(cap_))) {
- // Capture p to register, but save old value.
- const char* q = cap_[ip->cap()];
- cap_[ip->cap()] = p;
- bool ret = Visit(ip->out(), p);
- // Restore old value as we backtrack.
- cap_[ip->cap()] = q;
- return ret;
- }
- return Visit(ip->out(), p);
-
- case kInstEmptyWidth:
- if (ip->empty() & ~Prog::EmptyFlags(context_, p))
- return false;
- return Visit(ip->out(), p);
-
- case kInstNop:
- return Visit(ip->out(), p);
-
- case kInstMatch:
- // We found a match. If it's the best so far, record the
- // parameters in the caller's submatch_ array.
- if (endmatch_ && p != context_.data() + context_.size())
- return false;
- cap_[1] = p;
- if (submatch_[0].data() == NULL ||
- (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
- // First match so far - or better match.
- for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece(
- cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
- }
- return true;
-
- case kInstFail:
- return false;
- }
-}
-
-// Runs a backtracking search.
-bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
- const StringPiece& context,
- Anchor anchor,
- MatchKind kind,
- StringPiece* match,
- int nmatch) {
- // If full match, we ask for an anchored longest match
- // and then check that match[0] == text.
- // So make sure match[0] exists.
- StringPiece sp0;
- if (kind == kFullMatch) {
- anchor = kAnchored;
- if (nmatch < 1) {
- match = &sp0;
- nmatch = 1;
- }
- }
-
- // Run the search.
- Backtracker b(this);
- bool anchored = anchor == kAnchored;
- bool longest = kind != kFirstMatch;
- if (!b.Search(text, context, anchored, longest, match, nmatch))
- return false;
+ return false;
+ anchored_ = anchored | prog_->anchor_start();
+ longest_ = longest | prog_->anchor_end();
+ endmatch_ = prog_->anchor_end();
+ submatch_ = submatch;
+ nsubmatch_ = nsubmatch;
+ CHECK_LT(2*nsubmatch_, static_cast<int>(arraysize(cap_)));
+ memset(cap_, 0, sizeof cap_);
+
+ // We use submatch_[0] for our own bookkeeping,
+ // so it had better exist.
+ StringPiece sp0;
+ if (nsubmatch < 1) {
+ submatch_ = &sp0;
+ nsubmatch_ = 1;
+ }
+ submatch_[0] = StringPiece();
+
+ // Allocate new visited_ bitmap -- size is proportional
+ // to text, so have to reallocate on each call to Search.
+ int nvisited = prog_->size() * static_cast<int>(text.size()+1);
+ nvisited = (nvisited + 31) / 32;
+ visited_ = PODArray<uint32_t>(nvisited);
+ memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
+
+ // Anchored search must start at text.begin().
+ if (anchored_) {
+ cap_[0] = text.data();
+ return Visit(prog_->start(), text.data());
+ }
+
+ // Unanchored search, starting from each possible text position.
+ // Notice that we have to try the empty string at the end of
+ // the text, so the loop condition is p <= text.end(), not p < text.end().
+ for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
+ cap_[0] = p;
+ if (Visit(prog_->start(), p)) // Match must be leftmost; done.
+ return true;
+ // Avoid invoking undefined behavior (arithmetic on a null pointer)
+ // by simply not continuing the loop.
+ if (p == NULL)
+ break;
+ }
+ return false;
+}
+
+// Explores from instruction id at string position p looking for a match.
+// Return true if found (so that caller can stop trying other possibilities).
+bool Backtracker::Visit(int id, const char* p) {
+ // Check bitmap. If we've already explored from here,
+ // either it didn't match or it did but we're hoping for a better match.
+ // Either way, don't go down that road again.
+ CHECK(p <= text_.data() + text_.size());
+ int n = id * static_cast<int>(text_.size()+1) +
+ static_cast<int>(p-text_.data());
+ CHECK_LT(n/32, visited_.size());
+ if (visited_[n/32] & (1 << (n&31)))
+ return false;
+ visited_[n/32] |= 1 << (n&31);
+
+ Prog::Inst* ip = prog_->inst(id);
+ if (Try(id, p)) {
+ if (longest_ && !ip->last())
+ Visit(id+1, p);
+ return true;
+ }
+ if (!ip->last())
+ return Visit(id+1, p);
+ return false;
+}
+
+// Tries instruction id at string position p.
+// Returns true if a match is found.
+bool Backtracker::Try(int id, const char* p) {
+ // Pick out byte at current position. If at end of string,
+ // have to explore in hope of finishing a match. Use impossible byte -1.
+ int c = -1;
+ if (p < text_.data() + text_.size())
+ c = *p & 0xFF;
+
+ Prog::Inst* ip = prog_->inst(id);
+ switch (ip->opcode()) {
+ default:
+ LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
+ return false; // not reached
+
+ case kInstAltMatch:
+ // Ignored.
+ return false;
+
+ case kInstByteRange:
+ if (ip->Matches(c))
+ return Visit(ip->out(), p+1);
+ return false;
+
+ case kInstCapture:
+ if (0 <= ip->cap() &&
+ ip->cap() < static_cast<int>(arraysize(cap_))) {
+ // Capture p to register, but save old value.
+ const char* q = cap_[ip->cap()];
+ cap_[ip->cap()] = p;
+ bool ret = Visit(ip->out(), p);
+ // Restore old value as we backtrack.
+ cap_[ip->cap()] = q;
+ return ret;
+ }
+ return Visit(ip->out(), p);
+
+ case kInstEmptyWidth:
+ if (ip->empty() & ~Prog::EmptyFlags(context_, p))
+ return false;
+ return Visit(ip->out(), p);
+
+ case kInstNop:
+ return Visit(ip->out(), p);
+
+ case kInstMatch:
+ // We found a match. If it's the best so far, record the
+ // parameters in the caller's submatch_ array.
+ if (endmatch_ && p != context_.data() + context_.size())
+ return false;
+ cap_[1] = p;
+ if (submatch_[0].data() == NULL ||
+ (longest_ && p > submatch_[0].data() + submatch_[0].size())) {
+ // First match so far - or better match.
+ for (int i = 0; i < nsubmatch_; i++)
+ submatch_[i] = StringPiece(
+ cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
+ }
+ return true;
+
+ case kInstFail:
+ return false;
+ }
+}
+
+// Runs a backtracking search.
+bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
+ const StringPiece& context,
+ Anchor anchor,
+ MatchKind kind,
+ StringPiece* match,
+ int nmatch) {
+ // If full match, we ask for an anchored longest match
+ // and then check that match[0] == text.
+ // So make sure match[0] exists.
+ StringPiece sp0;
+ if (kind == kFullMatch) {
+ anchor = kAnchored;
+ if (nmatch < 1) {
+ match = &sp0;
+ nmatch = 1;
+ }
+ }
+
+ // Run the search.
+ Backtracker b(this);
+ bool anchored = anchor == kAnchored;
+ bool longest = kind != kFirstMatch;
+ if (!b.Search(text, context, anchored, longest, match, nmatch))
+ return false;
if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
- return false;
- return true;
-}
-
-} // namespace re2
+ return false;
+ return true;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/charclass_test.cc b/contrib/libs/re2/re2/testing/charclass_test.cc
index 0af75ba8d1..9a8b7ac6a0 100644
--- a/contrib/libs/re2/re2/testing/charclass_test.cc
+++ b/contrib/libs/re2/re2/testing/charclass_test.cc
@@ -1,226 +1,226 @@
-// Copyright 2006 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test character class manipulations.
-
-#include <stdio.h>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/utf.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-struct CCTest {
- struct {
- Rune lo;
- Rune hi;
- } add[10];
- int remove;
- struct {
- Rune lo;
- Rune hi;
- } final[10];
-};
-
-static CCTest tests[] = {
- { { { 10, 20 }, {-1} }, -1,
- { { 10, 20 }, {-1} } },
-
- { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
- { { 10, 30 }, {-1} } },
-
- { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
- { { 10, 40 }, {-1} } },
-
- { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
- { { 0, 50 }, {-1} } },
-
- { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
-
- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
-
- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
- { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
-
- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
- { { 5, 25 }, {-1} } },
-
- { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
- { { 10, 23 }, {-1} } },
-
- // These check boundary cases during negation.
- { { { 0, Runemax }, {-1} }, -1,
- { { 0, Runemax }, {-1} } },
-
- { { { 0, 50 }, {-1} }, -1,
- { { 0, 50 }, {-1} } },
-
- { { { 50, Runemax }, {-1} }, -1,
- { { 50, Runemax }, {-1} } },
-
- // Check RemoveAbove.
- { { { 50, Runemax }, {-1} }, 255,
- { { 50, 255 }, {-1} } },
-
- { { { 50, Runemax }, {-1} }, 65535,
- { { 50, 65535 }, {-1} } },
-
- { { { 50, Runemax }, {-1} }, Runemax,
- { { 50, Runemax }, {-1} } },
-
- { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
- { { 50, 60 }, { 250, 255 }, {-1} } },
-
- { { { 50, 60 }, {-1} }, 255,
- { { 50, 60 }, {-1} } },
-
- { { { 350, 360 }, {-1} }, 255,
- { {-1} } },
-
- { { {-1} }, 255,
- { {-1} } },
-};
-
-template <typename CharClass>
-static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
- if (t == NULL) {
- printf("\t%s:", desc);
- } else {
- printf("\n");
- printf("CharClass added: [%s]", desc);
- for (int k = 0; t->add[k].lo >= 0; k++)
- printf(" %d-%d", t->add[k].lo, t->add[k].hi);
- printf("\n");
- if (t->remove >= 0)
- printf("Removed > %d\n", t->remove);
- printf("\twant:");
- for (int k = 0; t->final[k].lo >= 0; k++)
- printf(" %d-%d", t->final[k].lo, t->final[k].hi);
- printf("\n");
- printf("\thave:");
- }
-
- for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
- printf(" %d-%d", it->lo, it->hi);
- printf("\n");
-}
-
-bool ShouldContain(CCTest *t, int x) {
- for (int j = 0; t->final[j].lo >= 0; j++)
- if (t->final[j].lo <= x && x <= t->final[j].hi)
- return true;
- return false;
-}
-
-// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
-
-CharClass* Negate(CharClass *cc) {
- return cc->Negate();
-}
-
-void Delete(CharClass* cc) {
- cc->Delete();
-}
-
-CharClassBuilder* Negate(CharClassBuilder* cc) {
- CharClassBuilder* ncc = cc->Copy();
- ncc->Negate();
- return ncc;
-}
-
-void Delete(CharClassBuilder* cc) {
- delete cc;
-}
-
-template <typename CharClass>
-bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
- typename CharClass::iterator it = cc->begin();
- int size = 0;
- for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
- if (it == cc->end() ||
- it->lo != t->final[j].lo ||
- it->hi != t->final[j].hi) {
- Broke(desc, t, cc);
- return false;
- }
- size += it->hi - it->lo + 1;
- }
- if (it != cc->end()) {
- Broke(desc, t, cc);
- return false;
- }
- if (cc->size() != size) {
- Broke(desc, t, cc);
- printf("wrong size: want %d have %d\n", size, cc->size());
- return false;
- }
-
- for (int j = 0; j < 101; j++) {
- if (j == 100)
- j = Runemax;
- if (ShouldContain(t, j) != cc->Contains(j)) {
- Broke(desc, t, cc);
- printf("want contains(%d)=%d, got %d\n",
- j, ShouldContain(t, j), cc->Contains(j));
- return false;
- }
- }
-
- CharClass* ncc = Negate(cc);
- for (int j = 0; j < 101; j++) {
- if (j == 100)
- j = Runemax;
- if (ShouldContain(t, j) == ncc->Contains(j)) {
- Broke(desc, t, cc);
- Broke("ncc", NULL, ncc);
- printf("want ncc contains(%d)!=%d, got %d\n",
- j, ShouldContain(t, j), ncc->Contains(j));
- Delete(ncc);
- return false;
- }
- if (ncc->size() != Runemax+1 - cc->size()) {
- Broke(desc, t, cc);
- Broke("ncc", NULL, ncc);
- printf("ncc size should be %d is %d\n",
- Runemax+1 - cc->size(), ncc->size());
- Delete(ncc);
- return false;
- }
- }
- Delete(ncc);
- return true;
-}
-
-TEST(TestCharClassBuilder, Adds) {
- int nfail = 0;
- for (size_t i = 0; i < arraysize(tests); i++) {
- CharClassBuilder ccb;
- CCTest* t = &tests[i];
- for (int j = 0; t->add[j].lo >= 0; j++)
- ccb.AddRange(t->add[j].lo, t->add[j].hi);
- if (t->remove >= 0)
- ccb.RemoveAbove(t->remove);
- if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
- nfail++;
- CharClass* cc = ccb.GetCharClass();
- if (!CorrectCC(cc, t, "before copy (CharClass)"))
- nfail++;
- cc->Delete();
-
- CharClassBuilder *ccb1 = ccb.Copy();
- if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
- nfail++;
- cc = ccb.GetCharClass();
- if (!CorrectCC(cc, t, "after copy (CharClass)"))
- nfail++;
- cc->Delete();
- delete ccb1;
- }
- EXPECT_EQ(nfail, 0);
-}
-
-} // namespace re2
+// Copyright 2006 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test character class manipulations.
+
+#include <stdio.h>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/utf.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct CCTest {
+ struct {
+ Rune lo;
+ Rune hi;
+ } add[10];
+ int remove;
+ struct {
+ Rune lo;
+ Rune hi;
+ } final[10];
+};
+
+static CCTest tests[] = {
+ { { { 10, 20 }, {-1} }, -1,
+ { { 10, 20 }, {-1} } },
+
+ { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
+ { { 10, 30 }, {-1} } },
+
+ { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
+ { { 10, 40 }, {-1} } },
+
+ { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
+ { { 0, 50 }, {-1} } },
+
+ { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
+ { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+ { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
+ { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+ { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
+ { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
+
+ { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
+ { { 5, 25 }, {-1} } },
+
+ { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
+ { { 10, 23 }, {-1} } },
+
+ // These check boundary cases during negation.
+ { { { 0, Runemax }, {-1} }, -1,
+ { { 0, Runemax }, {-1} } },
+
+ { { { 0, 50 }, {-1} }, -1,
+ { { 0, 50 }, {-1} } },
+
+ { { { 50, Runemax }, {-1} }, -1,
+ { { 50, Runemax }, {-1} } },
+
+ // Check RemoveAbove.
+ { { { 50, Runemax }, {-1} }, 255,
+ { { 50, 255 }, {-1} } },
+
+ { { { 50, Runemax }, {-1} }, 65535,
+ { { 50, 65535 }, {-1} } },
+
+ { { { 50, Runemax }, {-1} }, Runemax,
+ { { 50, Runemax }, {-1} } },
+
+ { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
+ { { 50, 60 }, { 250, 255 }, {-1} } },
+
+ { { { 50, 60 }, {-1} }, 255,
+ { { 50, 60 }, {-1} } },
+
+ { { { 350, 360 }, {-1} }, 255,
+ { {-1} } },
+
+ { { {-1} }, 255,
+ { {-1} } },
+};
+
+template <typename CharClass>
+static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
+ if (t == NULL) {
+ printf("\t%s:", desc);
+ } else {
+ printf("\n");
+ printf("CharClass added: [%s]", desc);
+ for (int k = 0; t->add[k].lo >= 0; k++)
+ printf(" %d-%d", t->add[k].lo, t->add[k].hi);
+ printf("\n");
+ if (t->remove >= 0)
+ printf("Removed > %d\n", t->remove);
+ printf("\twant:");
+ for (int k = 0; t->final[k].lo >= 0; k++)
+ printf(" %d-%d", t->final[k].lo, t->final[k].hi);
+ printf("\n");
+ printf("\thave:");
+ }
+
+ for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
+ printf(" %d-%d", it->lo, it->hi);
+ printf("\n");
+}
+
+bool ShouldContain(CCTest *t, int x) {
+ for (int j = 0; t->final[j].lo >= 0; j++)
+ if (t->final[j].lo <= x && x <= t->final[j].hi)
+ return true;
+ return false;
+}
+
+// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
+
+CharClass* Negate(CharClass *cc) {
+ return cc->Negate();
+}
+
+void Delete(CharClass* cc) {
+ cc->Delete();
+}
+
+CharClassBuilder* Negate(CharClassBuilder* cc) {
+ CharClassBuilder* ncc = cc->Copy();
+ ncc->Negate();
+ return ncc;
+}
+
+void Delete(CharClassBuilder* cc) {
+ delete cc;
+}
+
+template <typename CharClass>
+bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
+ typename CharClass::iterator it = cc->begin();
+ int size = 0;
+ for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
+ if (it == cc->end() ||
+ it->lo != t->final[j].lo ||
+ it->hi != t->final[j].hi) {
+ Broke(desc, t, cc);
+ return false;
+ }
+ size += it->hi - it->lo + 1;
+ }
+ if (it != cc->end()) {
+ Broke(desc, t, cc);
+ return false;
+ }
+ if (cc->size() != size) {
+ Broke(desc, t, cc);
+ printf("wrong size: want %d have %d\n", size, cc->size());
+ return false;
+ }
+
+ for (int j = 0; j < 101; j++) {
+ if (j == 100)
+ j = Runemax;
+ if (ShouldContain(t, j) != cc->Contains(j)) {
+ Broke(desc, t, cc);
+ printf("want contains(%d)=%d, got %d\n",
+ j, ShouldContain(t, j), cc->Contains(j));
+ return false;
+ }
+ }
+
+ CharClass* ncc = Negate(cc);
+ for (int j = 0; j < 101; j++) {
+ if (j == 100)
+ j = Runemax;
+ if (ShouldContain(t, j) == ncc->Contains(j)) {
+ Broke(desc, t, cc);
+ Broke("ncc", NULL, ncc);
+ printf("want ncc contains(%d)!=%d, got %d\n",
+ j, ShouldContain(t, j), ncc->Contains(j));
+ Delete(ncc);
+ return false;
+ }
+ if (ncc->size() != Runemax+1 - cc->size()) {
+ Broke(desc, t, cc);
+ Broke("ncc", NULL, ncc);
+ printf("ncc size should be %d is %d\n",
+ Runemax+1 - cc->size(), ncc->size());
+ Delete(ncc);
+ return false;
+ }
+ }
+ Delete(ncc);
+ return true;
+}
+
+TEST(TestCharClassBuilder, Adds) {
+ int nfail = 0;
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ CharClassBuilder ccb;
+ CCTest* t = &tests[i];
+ for (int j = 0; t->add[j].lo >= 0; j++)
+ ccb.AddRange(t->add[j].lo, t->add[j].hi);
+ if (t->remove >= 0)
+ ccb.RemoveAbove(t->remove);
+ if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
+ nfail++;
+ CharClass* cc = ccb.GetCharClass();
+ if (!CorrectCC(cc, t, "before copy (CharClass)"))
+ nfail++;
+ cc->Delete();
+
+ CharClassBuilder *ccb1 = ccb.Copy();
+ if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
+ nfail++;
+ cc = ccb.GetCharClass();
+ if (!CorrectCC(cc, t, "after copy (CharClass)"))
+ nfail++;
+ cc->Delete();
+ delete ccb1;
+ }
+ EXPECT_EQ(nfail, 0);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/compile_test.cc b/contrib/libs/re2/re2/testing/compile_test.cc
index 107ad31b64..cf1c4cbf97 100644
--- a/contrib/libs/re2/re2/testing/compile_test.cc
+++ b/contrib/libs/re2/re2/testing/compile_test.cc
@@ -1,427 +1,427 @@
-// Copyright 2007 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test prog.cc, compile.cc
-
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/regexp.h"
-#include "re2/prog.h"
-
-namespace re2 {
-
-// Simple input/output tests checking that
-// the regexp compiles to the expected code.
-// These are just to sanity check the basic implementation.
-// The real confidence tests happen by testing the NFA/DFA
-// that run the compiled code.
-
-struct Test {
- const char* regexp;
- const char* code;
-};
-
-static Test tests[] = {
- { "a",
- "3. byte [61-61] 0 -> 4\n"
- "4. match! 0\n" },
- { "ab",
- "3. byte [61-61] 0 -> 4\n"
- "4. byte [62-62] 0 -> 5\n"
- "5. match! 0\n" },
- { "a|c",
- "3+ byte [61-61] 0 -> 5\n"
- "4. byte [63-63] 0 -> 5\n"
- "5. match! 0\n" },
- { "a|b",
- "3. byte [61-62] 0 -> 4\n"
- "4. match! 0\n" },
- { "[ab]",
- "3. byte [61-62] 0 -> 4\n"
- "4. match! 0\n" },
- { "a+",
- "3. byte [61-61] 0 -> 4\n"
- "4+ nop -> 3\n"
- "5. match! 0\n" },
- { "a+?",
- "3. byte [61-61] 0 -> 4\n"
- "4+ match! 0\n"
- "5. nop -> 3\n" },
- { "a*",
- "3+ byte [61-61] 1 -> 3\n"
- "4. match! 0\n" },
- { "a*?",
- "3+ match! 0\n"
- "4. byte [61-61] 0 -> 3\n" },
- { "a?",
- "3+ byte [61-61] 1 -> 5\n"
- "4. nop -> 5\n"
- "5. match! 0\n" },
- { "a??",
- "3+ nop -> 5\n"
- "4. byte [61-61] 0 -> 5\n"
- "5. match! 0\n" },
- { "a{4}",
- "3. byte [61-61] 0 -> 4\n"
- "4. byte [61-61] 0 -> 5\n"
- "5. byte [61-61] 0 -> 6\n"
- "6. byte [61-61] 0 -> 7\n"
- "7. match! 0\n" },
- { "(a)",
- "3. capture 2 -> 4\n"
- "4. byte [61-61] 0 -> 5\n"
- "5. capture 3 -> 6\n"
- "6. match! 0\n" },
- { "(?:a)",
- "3. byte [61-61] 0 -> 4\n"
- "4. match! 0\n" },
- { "",
- "3. match! 0\n" },
- { ".",
- "3+ byte [00-09] 0 -> 5\n"
- "4. byte [0b-ff] 0 -> 5\n"
- "5. match! 0\n" },
- { "[^ab]",
- "3+ byte [00-09] 0 -> 6\n"
- "4+ byte [0b-60] 0 -> 6\n"
- "5. byte [63-ff] 0 -> 6\n"
- "6. match! 0\n" },
- { "[Aa]",
- "3. byte/i [61-61] 0 -> 4\n"
- "4. match! 0\n" },
- { "\\C+",
- "3. byte [00-ff] 0 -> 4\n"
- "4+ altmatch -> 5 | 6\n"
- "5+ nop -> 3\n"
- "6. match! 0\n" },
- { "\\C*",
- "3+ altmatch -> 4 | 5\n"
- "4+ byte [00-ff] 1 -> 3\n"
- "5. match! 0\n" },
- { "\\C?",
- "3+ byte [00-ff] 1 -> 5\n"
- "4. nop -> 5\n"
- "5. match! 0\n" },
- // Issue 20992936
- { "[[-`]",
- "3. byte [5b-60] 0 -> 4\n"
- "4. match! 0\n" },
- // Issue 310
- { "(?:|a)*",
- "3+ nop -> 7\n"
- "4. nop -> 9\n"
- "5+ nop -> 7\n"
- "6. nop -> 9\n"
- "7+ nop -> 5\n"
- "8. byte [61-61] 0 -> 5\n"
- "9. match! 0\n" },
- { "(?:|a)+",
- "3+ nop -> 5\n"
- "4. byte [61-61] 0 -> 5\n"
- "5+ nop -> 3\n"
- "6. match! 0\n" },
-};
-
-TEST(TestRegexpCompileToProg, Simple) {
- int failed = 0;
- for (size_t i = 0; i < arraysize(tests); i++) {
- const re2::Test& t = tests[i];
- Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
- if (re == NULL) {
- LOG(ERROR) << "Cannot parse: " << t.regexp;
- failed++;
- continue;
- }
- Prog* prog = re->CompileToProg(0);
- if (prog == NULL) {
- LOG(ERROR) << "Cannot compile: " << t.regexp;
- re->Decref();
- failed++;
- continue;
- }
- ASSERT_TRUE(re->CompileToProg(1) == NULL);
- std::string s = prog->Dump();
- if (s != t.code) {
- LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
- LOG(ERROR) << "Want:\n" << t.code;
- LOG(ERROR) << "Got:\n" << s;
- failed++;
- }
- delete prog;
- re->Decref();
- }
- EXPECT_EQ(failed, 0);
-}
-
-static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
- std::string* bytemap) {
- Regexp* re = Regexp::Parse(pattern, flags, NULL);
- EXPECT_TRUE(re != NULL);
-
- {
- Prog* prog = re->CompileToProg(0);
- EXPECT_TRUE(prog != NULL);
- *bytemap = prog->DumpByteMap();
- delete prog;
- }
-
- {
- Prog* prog = re->CompileToReverseProg(0);
- EXPECT_TRUE(prog != NULL);
- EXPECT_EQ(*bytemap, prog->DumpByteMap());
- delete prog;
- }
-
- re->Decref();
-}
-
-TEST(TestCompile, Latin1Ranges) {
- // The distinct byte ranges involved in the Latin-1 dot ([^\n]).
-
- std::string bytemap;
-
- DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap);
- EXPECT_EQ("[00-09] -> 0\n"
- "[0a-0a] -> 1\n"
- "[0b-ff] -> 0\n",
- bytemap);
-}
-
-TEST(TestCompile, OtherByteMapTests) {
- std::string bytemap;
-
- // Test that "absent" ranges are mapped to the same byte class.
- DumpByteMap("[0-9A-Fa-f]+", Regexp::PerlX|Regexp::Latin1, &bytemap);
- EXPECT_EQ("[00-2f] -> 0\n"
- "[30-39] -> 1\n"
- "[3a-40] -> 0\n"
- "[41-46] -> 1\n"
- "[47-60] -> 0\n"
- "[61-66] -> 1\n"
- "[67-ff] -> 0\n",
- bytemap);
-
- // Test the byte classes for \b.
- DumpByteMap("\\b", Regexp::LikePerl|Regexp::Latin1, &bytemap);
- EXPECT_EQ("[00-2f] -> 0\n"
- "[30-39] -> 1\n"
- "[3a-40] -> 0\n"
- "[41-5a] -> 1\n"
- "[5b-5e] -> 0\n"
- "[5f-5f] -> 1\n"
- "[60-60] -> 0\n"
- "[61-7a] -> 1\n"
- "[7b-ff] -> 0\n",
- bytemap);
-
- // Bug in the ASCII case-folding optimization created too many byte classes.
- DumpByteMap("[^_]", Regexp::LikePerl|Regexp::Latin1, &bytemap);
- EXPECT_EQ("[00-5e] -> 0\n"
- "[5f-5f] -> 1\n"
- "[60-ff] -> 0\n",
- bytemap);
-}
-
-TEST(TestCompile, UTF8Ranges) {
- // The distinct byte ranges involved in the UTF-8 dot ([^\n]).
- // Once, erroneously split between 0x3f and 0x40 because it is
- // a 6-bit boundary.
-
- std::string bytemap;
-
- DumpByteMap(".", Regexp::PerlX, &bytemap);
- EXPECT_EQ("[00-09] -> 0\n"
- "[0a-0a] -> 1\n"
- "[0b-7f] -> 0\n"
- "[80-bf] -> 2\n"
- "[c0-c1] -> 1\n"
- "[c2-df] -> 3\n"
- "[e0-ef] -> 4\n"
- "[f0-f4] -> 5\n"
- "[f5-ff] -> 1\n",
- bytemap);
-}
-
-TEST(TestCompile, InsufficientMemory) {
- Regexp* re = Regexp::Parse(
- "^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
- Regexp::LikePerl, NULL);
- EXPECT_TRUE(re != NULL);
- Prog* prog = re->CompileToProg(850);
- // If the memory budget has been exhausted, compilation should fail
- // and return NULL instead of trying to do anything with NoMatch().
- EXPECT_TRUE(prog == NULL);
- re->Decref();
-}
-
-static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
- std::string* forward, std::string* reverse) {
- Regexp* re = Regexp::Parse(pattern, flags, NULL);
- EXPECT_TRUE(re != NULL);
-
- if (forward != NULL) {
- Prog* prog = re->CompileToProg(0);
- EXPECT_TRUE(prog != NULL);
- *forward = prog->Dump();
- delete prog;
- }
-
- if (reverse != NULL) {
- Prog* prog = re->CompileToReverseProg(0);
- EXPECT_TRUE(prog != NULL);
- *reverse = prog->Dump();
- delete prog;
- }
-
- re->Decref();
-}
-
-TEST(TestCompile, Bug26705922) {
- // Bug in the compiler caused inefficient bytecode to be generated for Unicode
- // groups: common suffixes were cached, but common prefixes were not factored.
-
- std::string forward, reverse;
-
- Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse);
- EXPECT_EQ("3. byte [f0-f0] 0 -> 4\n"
- "4. byte [90-90] 0 -> 5\n"
- "5. byte [80-80] 0 -> 6\n"
- "6+ byte [80-80] 0 -> 8\n"
- "7. byte [90-90] 0 -> 8\n"
- "8. match! 0\n",
- forward);
- EXPECT_EQ("3+ byte [80-80] 0 -> 5\n"
- "4. byte [90-90] 0 -> 5\n"
- "5. byte [80-80] 0 -> 6\n"
- "6. byte [90-90] 0 -> 7\n"
- "7. byte [f0-f0] 0 -> 8\n"
- "8. match! 0\n",
- reverse);
-
- Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse);
- EXPECT_EQ("3+ byte [e8-ef] 0 -> 5\n"
- "4. byte [f0-f0] 0 -> 8\n"
- "5. byte [80-bf] 0 -> 6\n"
- "6. byte [80-bf] 0 -> 7\n"
- "7. match! 0\n"
- "8. byte [90-90] 0 -> 5\n",
- forward);
- EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
- "4. byte [80-bf] 0 -> 5\n"
- "5+ byte [e8-ef] 0 -> 7\n"
- "6. byte [90-90] 0 -> 8\n"
- "7. match! 0\n"
- "8. byte [f0-f0] 0 -> 7\n",
- reverse);
-
- Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
- EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
- "4+ byte [e0-ef] 0 -> 8\n"
- "5. byte [f0-f4] 0 -> 9\n"
- "6. byte [80-bf] 0 -> 7\n"
- "7. match! 0\n"
- "8. byte [80-bf] 0 -> 6\n"
- "9. byte [80-bf] 0 -> 8\n",
- forward);
- EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
- "4+ byte [c2-df] 0 -> 6\n"
- "5. byte [80-bf] 0 -> 7\n"
- "6. match! 0\n"
- "7+ byte [e0-ef] 0 -> 6\n"
- "8. byte [80-bf] 0 -> 9\n"
- "9. byte [f0-f4] 0 -> 6\n",
- reverse);
-}
-
-TEST(TestCompile, Bug35237384) {
- // Bug in the compiler caused inefficient bytecode to be generated for
- // nested nullable subexpressions.
-
- std::string forward;
-
- Dump("a**{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
- EXPECT_EQ("3+ byte [61-61] 1 -> 3\n"
- "4. nop -> 5\n"
- "5+ byte [61-61] 1 -> 5\n"
- "6. nop -> 7\n"
- "7+ byte [61-61] 1 -> 7\n"
- "8. match! 0\n",
- forward);
-
- Dump("(a*|b*)*{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
- EXPECT_EQ("3+ nop -> 28\n"
- "4. nop -> 30\n"
- "5+ byte [61-61] 1 -> 5\n"
- "6. nop -> 32\n"
- "7+ byte [61-61] 1 -> 7\n"
- "8. nop -> 26\n"
- "9+ byte [61-61] 1 -> 9\n"
- "10. nop -> 20\n"
- "11+ byte [62-62] 1 -> 11\n"
- "12. nop -> 20\n"
- "13+ byte [62-62] 1 -> 13\n"
- "14. nop -> 26\n"
- "15+ byte [62-62] 1 -> 15\n"
- "16. nop -> 32\n"
- "17+ nop -> 9\n"
- "18. nop -> 11\n"
- "19. match! 0\n"
- "20+ nop -> 17\n"
- "21. nop -> 19\n"
- "22+ nop -> 7\n"
- "23. nop -> 13\n"
- "24+ nop -> 17\n"
- "25. nop -> 19\n"
- "26+ nop -> 22\n"
- "27. nop -> 24\n"
- "28+ nop -> 5\n"
- "29. nop -> 15\n"
- "30+ nop -> 22\n"
- "31. nop -> 24\n"
- "32+ nop -> 28\n"
- "33. nop -> 30\n",
- forward);
-
- Dump("((|S.+)+|(|S.+)+|){2}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
- EXPECT_EQ("3+ nop -> 36\n"
- "4+ nop -> 31\n"
- "5. nop -> 33\n"
- "6+ byte [00-09] 0 -> 8\n"
- "7. byte [0b-ff] 0 -> 8\n"
- "8+ nop -> 6\n"
- "9+ nop -> 29\n"
- "10. nop -> 28\n"
- "11+ byte [00-09] 0 -> 13\n"
- "12. byte [0b-ff] 0 -> 13\n"
- "13+ nop -> 11\n"
- "14+ nop -> 26\n"
- "15. nop -> 28\n"
- "16+ byte [00-09] 0 -> 18\n"
- "17. byte [0b-ff] 0 -> 18\n"
- "18+ nop -> 16\n"
- "19+ nop -> 36\n"
- "20. nop -> 33\n"
- "21+ byte [00-09] 0 -> 23\n"
- "22. byte [0b-ff] 0 -> 23\n"
- "23+ nop -> 21\n"
- "24+ nop -> 31\n"
- "25. nop -> 33\n"
- "26+ nop -> 28\n"
- "27. byte [53-53] 0 -> 11\n"
- "28. match! 0\n"
- "29+ nop -> 28\n"
- "30. byte [53-53] 0 -> 6\n"
- "31+ nop -> 33\n"
- "32. byte [53-53] 0 -> 21\n"
- "33+ nop -> 29\n"
- "34+ nop -> 26\n"
- "35. nop -> 28\n"
- "36+ nop -> 33\n"
- "37. byte [53-53] 0 -> 16\n",
- forward);
-}
-
-} // namespace re2
+// Copyright 2007 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test prog.cc, compile.cc
+
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+#include "re2/prog.h"
+
+namespace re2 {
+
+// Simple input/output tests checking that
+// the regexp compiles to the expected code.
+// These are just to sanity check the basic implementation.
+// The real confidence tests happen by testing the NFA/DFA
+// that run the compiled code.
+
+struct Test {
+ const char* regexp;
+ const char* code;
+};
+
+static Test tests[] = {
+ { "a",
+ "3. byte [61-61] 0 -> 4\n"
+ "4. match! 0\n" },
+ { "ab",
+ "3. byte [61-61] 0 -> 4\n"
+ "4. byte [62-62] 0 -> 5\n"
+ "5. match! 0\n" },
+ { "a|c",
+ "3+ byte [61-61] 0 -> 5\n"
+ "4. byte [63-63] 0 -> 5\n"
+ "5. match! 0\n" },
+ { "a|b",
+ "3. byte [61-62] 0 -> 4\n"
+ "4. match! 0\n" },
+ { "[ab]",
+ "3. byte [61-62] 0 -> 4\n"
+ "4. match! 0\n" },
+ { "a+",
+ "3. byte [61-61] 0 -> 4\n"
+ "4+ nop -> 3\n"
+ "5. match! 0\n" },
+ { "a+?",
+ "3. byte [61-61] 0 -> 4\n"
+ "4+ match! 0\n"
+ "5. nop -> 3\n" },
+ { "a*",
+ "3+ byte [61-61] 1 -> 3\n"
+ "4. match! 0\n" },
+ { "a*?",
+ "3+ match! 0\n"
+ "4. byte [61-61] 0 -> 3\n" },
+ { "a?",
+ "3+ byte [61-61] 1 -> 5\n"
+ "4. nop -> 5\n"
+ "5. match! 0\n" },
+ { "a??",
+ "3+ nop -> 5\n"
+ "4. byte [61-61] 0 -> 5\n"
+ "5. match! 0\n" },
+ { "a{4}",
+ "3. byte [61-61] 0 -> 4\n"
+ "4. byte [61-61] 0 -> 5\n"
+ "5. byte [61-61] 0 -> 6\n"
+ "6. byte [61-61] 0 -> 7\n"
+ "7. match! 0\n" },
+ { "(a)",
+ "3. capture 2 -> 4\n"
+ "4. byte [61-61] 0 -> 5\n"
+ "5. capture 3 -> 6\n"
+ "6. match! 0\n" },
+ { "(?:a)",
+ "3. byte [61-61] 0 -> 4\n"
+ "4. match! 0\n" },
+ { "",
+ "3. match! 0\n" },
+ { ".",
+ "3+ byte [00-09] 0 -> 5\n"
+ "4. byte [0b-ff] 0 -> 5\n"
+ "5. match! 0\n" },
+ { "[^ab]",
+ "3+ byte [00-09] 0 -> 6\n"
+ "4+ byte [0b-60] 0 -> 6\n"
+ "5. byte [63-ff] 0 -> 6\n"
+ "6. match! 0\n" },
+ { "[Aa]",
+ "3. byte/i [61-61] 0 -> 4\n"
+ "4. match! 0\n" },
+ { "\\C+",
+ "3. byte [00-ff] 0 -> 4\n"
+ "4+ altmatch -> 5 | 6\n"
+ "5+ nop -> 3\n"
+ "6. match! 0\n" },
+ { "\\C*",
+ "3+ altmatch -> 4 | 5\n"
+ "4+ byte [00-ff] 1 -> 3\n"
+ "5. match! 0\n" },
+ { "\\C?",
+ "3+ byte [00-ff] 1 -> 5\n"
+ "4. nop -> 5\n"
+ "5. match! 0\n" },
+ // Issue 20992936
+ { "[[-`]",
+ "3. byte [5b-60] 0 -> 4\n"
+ "4. match! 0\n" },
+ // Issue 310
+ { "(?:|a)*",
+ "3+ nop -> 7\n"
+ "4. nop -> 9\n"
+ "5+ nop -> 7\n"
+ "6. nop -> 9\n"
+ "7+ nop -> 5\n"
+ "8. byte [61-61] 0 -> 5\n"
+ "9. match! 0\n" },
+ { "(?:|a)+",
+ "3+ nop -> 5\n"
+ "4. byte [61-61] 0 -> 5\n"
+ "5+ nop -> 3\n"
+ "6. match! 0\n" },
+};
+
+TEST(TestRegexpCompileToProg, Simple) {
+ int failed = 0;
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ const re2::Test& t = tests[i];
+ Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
+ if (re == NULL) {
+ LOG(ERROR) << "Cannot parse: " << t.regexp;
+ failed++;
+ continue;
+ }
+ Prog* prog = re->CompileToProg(0);
+ if (prog == NULL) {
+ LOG(ERROR) << "Cannot compile: " << t.regexp;
+ re->Decref();
+ failed++;
+ continue;
+ }
+ ASSERT_TRUE(re->CompileToProg(1) == NULL);
+ std::string s = prog->Dump();
+ if (s != t.code) {
+ LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
+ LOG(ERROR) << "Want:\n" << t.code;
+ LOG(ERROR) << "Got:\n" << s;
+ failed++;
+ }
+ delete prog;
+ re->Decref();
+ }
+ EXPECT_EQ(failed, 0);
+}
+
+static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
+ std::string* bytemap) {
+ Regexp* re = Regexp::Parse(pattern, flags, NULL);
+ EXPECT_TRUE(re != NULL);
+
+ {
+ Prog* prog = re->CompileToProg(0);
+ EXPECT_TRUE(prog != NULL);
+ *bytemap = prog->DumpByteMap();
+ delete prog;
+ }
+
+ {
+ Prog* prog = re->CompileToReverseProg(0);
+ EXPECT_TRUE(prog != NULL);
+ EXPECT_EQ(*bytemap, prog->DumpByteMap());
+ delete prog;
+ }
+
+ re->Decref();
+}
+
+TEST(TestCompile, Latin1Ranges) {
+ // The distinct byte ranges involved in the Latin-1 dot ([^\n]).
+
+ std::string bytemap;
+
+ DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap);
+ EXPECT_EQ("[00-09] -> 0\n"
+ "[0a-0a] -> 1\n"
+ "[0b-ff] -> 0\n",
+ bytemap);
+}
+
+TEST(TestCompile, OtherByteMapTests) {
+ std::string bytemap;
+
+ // Test that "absent" ranges are mapped to the same byte class.
+ DumpByteMap("[0-9A-Fa-f]+", Regexp::PerlX|Regexp::Latin1, &bytemap);
+ EXPECT_EQ("[00-2f] -> 0\n"
+ "[30-39] -> 1\n"
+ "[3a-40] -> 0\n"
+ "[41-46] -> 1\n"
+ "[47-60] -> 0\n"
+ "[61-66] -> 1\n"
+ "[67-ff] -> 0\n",
+ bytemap);
+
+ // Test the byte classes for \b.
+ DumpByteMap("\\b", Regexp::LikePerl|Regexp::Latin1, &bytemap);
+ EXPECT_EQ("[00-2f] -> 0\n"
+ "[30-39] -> 1\n"
+ "[3a-40] -> 0\n"
+ "[41-5a] -> 1\n"
+ "[5b-5e] -> 0\n"
+ "[5f-5f] -> 1\n"
+ "[60-60] -> 0\n"
+ "[61-7a] -> 1\n"
+ "[7b-ff] -> 0\n",
+ bytemap);
+
+ // Bug in the ASCII case-folding optimization created too many byte classes.
+ DumpByteMap("[^_]", Regexp::LikePerl|Regexp::Latin1, &bytemap);
+ EXPECT_EQ("[00-5e] -> 0\n"
+ "[5f-5f] -> 1\n"
+ "[60-ff] -> 0\n",
+ bytemap);
+}
+
+TEST(TestCompile, UTF8Ranges) {
+ // The distinct byte ranges involved in the UTF-8 dot ([^\n]).
+ // Once, erroneously split between 0x3f and 0x40 because it is
+ // a 6-bit boundary.
+
+ std::string bytemap;
+
+ DumpByteMap(".", Regexp::PerlX, &bytemap);
+ EXPECT_EQ("[00-09] -> 0\n"
+ "[0a-0a] -> 1\n"
+ "[0b-7f] -> 0\n"
+ "[80-bf] -> 2\n"
+ "[c0-c1] -> 1\n"
+ "[c2-df] -> 3\n"
+ "[e0-ef] -> 4\n"
+ "[f0-f4] -> 5\n"
+ "[f5-ff] -> 1\n",
+ bytemap);
+}
+
+TEST(TestCompile, InsufficientMemory) {
+ Regexp* re = Regexp::Parse(
+ "^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
+ Regexp::LikePerl, NULL);
+ EXPECT_TRUE(re != NULL);
+ Prog* prog = re->CompileToProg(850);
+ // If the memory budget has been exhausted, compilation should fail
+ // and return NULL instead of trying to do anything with NoMatch().
+ EXPECT_TRUE(prog == NULL);
+ re->Decref();
+}
+
+static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
+ std::string* forward, std::string* reverse) {
+ Regexp* re = Regexp::Parse(pattern, flags, NULL);
+ EXPECT_TRUE(re != NULL);
+
+ if (forward != NULL) {
+ Prog* prog = re->CompileToProg(0);
+ EXPECT_TRUE(prog != NULL);
+ *forward = prog->Dump();
+ delete prog;
+ }
+
+ if (reverse != NULL) {
+ Prog* prog = re->CompileToReverseProg(0);
+ EXPECT_TRUE(prog != NULL);
+ *reverse = prog->Dump();
+ delete prog;
+ }
+
+ re->Decref();
+}
+
+TEST(TestCompile, Bug26705922) {
+ // Bug in the compiler caused inefficient bytecode to be generated for Unicode
+ // groups: common suffixes were cached, but common prefixes were not factored.
+
+ std::string forward, reverse;
+
+ Dump("[\\x{10000}\\x{10010}]", Regexp::LikePerl, &forward, &reverse);
+ EXPECT_EQ("3. byte [f0-f0] 0 -> 4\n"
+ "4. byte [90-90] 0 -> 5\n"
+ "5. byte [80-80] 0 -> 6\n"
+ "6+ byte [80-80] 0 -> 8\n"
+ "7. byte [90-90] 0 -> 8\n"
+ "8. match! 0\n",
+ forward);
+ EXPECT_EQ("3+ byte [80-80] 0 -> 5\n"
+ "4. byte [90-90] 0 -> 5\n"
+ "5. byte [80-80] 0 -> 6\n"
+ "6. byte [90-90] 0 -> 7\n"
+ "7. byte [f0-f0] 0 -> 8\n"
+ "8. match! 0\n",
+ reverse);
+
+ Dump("[\\x{8000}-\\x{10FFF}]", Regexp::LikePerl, &forward, &reverse);
+ EXPECT_EQ("3+ byte [e8-ef] 0 -> 5\n"
+ "4. byte [f0-f0] 0 -> 8\n"
+ "5. byte [80-bf] 0 -> 6\n"
+ "6. byte [80-bf] 0 -> 7\n"
+ "7. match! 0\n"
+ "8. byte [90-90] 0 -> 5\n",
+ forward);
+ EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+ "4. byte [80-bf] 0 -> 5\n"
+ "5+ byte [e8-ef] 0 -> 7\n"
+ "6. byte [90-90] 0 -> 8\n"
+ "7. match! 0\n"
+ "8. byte [f0-f0] 0 -> 7\n",
+ reverse);
+
+ Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
+ EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
+ "4+ byte [e0-ef] 0 -> 8\n"
+ "5. byte [f0-f4] 0 -> 9\n"
+ "6. byte [80-bf] 0 -> 7\n"
+ "7. match! 0\n"
+ "8. byte [80-bf] 0 -> 6\n"
+ "9. byte [80-bf] 0 -> 8\n",
+ forward);
+ EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+ "4+ byte [c2-df] 0 -> 6\n"
+ "5. byte [80-bf] 0 -> 7\n"
+ "6. match! 0\n"
+ "7+ byte [e0-ef] 0 -> 6\n"
+ "8. byte [80-bf] 0 -> 9\n"
+ "9. byte [f0-f4] 0 -> 6\n",
+ reverse);
+}
+
+TEST(TestCompile, Bug35237384) {
+ // Bug in the compiler caused inefficient bytecode to be generated for
+ // nested nullable subexpressions.
+
+ std::string forward;
+
+ Dump("a**{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
+ EXPECT_EQ("3+ byte [61-61] 1 -> 3\n"
+ "4. nop -> 5\n"
+ "5+ byte [61-61] 1 -> 5\n"
+ "6. nop -> 7\n"
+ "7+ byte [61-61] 1 -> 7\n"
+ "8. match! 0\n",
+ forward);
+
+ Dump("(a*|b*)*{3,}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
+ EXPECT_EQ("3+ nop -> 28\n"
+ "4. nop -> 30\n"
+ "5+ byte [61-61] 1 -> 5\n"
+ "6. nop -> 32\n"
+ "7+ byte [61-61] 1 -> 7\n"
+ "8. nop -> 26\n"
+ "9+ byte [61-61] 1 -> 9\n"
+ "10. nop -> 20\n"
+ "11+ byte [62-62] 1 -> 11\n"
+ "12. nop -> 20\n"
+ "13+ byte [62-62] 1 -> 13\n"
+ "14. nop -> 26\n"
+ "15+ byte [62-62] 1 -> 15\n"
+ "16. nop -> 32\n"
+ "17+ nop -> 9\n"
+ "18. nop -> 11\n"
+ "19. match! 0\n"
+ "20+ nop -> 17\n"
+ "21. nop -> 19\n"
+ "22+ nop -> 7\n"
+ "23. nop -> 13\n"
+ "24+ nop -> 17\n"
+ "25. nop -> 19\n"
+ "26+ nop -> 22\n"
+ "27. nop -> 24\n"
+ "28+ nop -> 5\n"
+ "29. nop -> 15\n"
+ "30+ nop -> 22\n"
+ "31. nop -> 24\n"
+ "32+ nop -> 28\n"
+ "33. nop -> 30\n",
+ forward);
+
+ Dump("((|S.+)+|(|S.+)+|){2}", Regexp::Latin1|Regexp::NeverCapture, &forward, NULL);
+ EXPECT_EQ("3+ nop -> 36\n"
+ "4+ nop -> 31\n"
+ "5. nop -> 33\n"
+ "6+ byte [00-09] 0 -> 8\n"
+ "7. byte [0b-ff] 0 -> 8\n"
+ "8+ nop -> 6\n"
+ "9+ nop -> 29\n"
+ "10. nop -> 28\n"
+ "11+ byte [00-09] 0 -> 13\n"
+ "12. byte [0b-ff] 0 -> 13\n"
+ "13+ nop -> 11\n"
+ "14+ nop -> 26\n"
+ "15. nop -> 28\n"
+ "16+ byte [00-09] 0 -> 18\n"
+ "17. byte [0b-ff] 0 -> 18\n"
+ "18+ nop -> 16\n"
+ "19+ nop -> 36\n"
+ "20. nop -> 33\n"
+ "21+ byte [00-09] 0 -> 23\n"
+ "22. byte [0b-ff] 0 -> 23\n"
+ "23+ nop -> 21\n"
+ "24+ nop -> 31\n"
+ "25. nop -> 33\n"
+ "26+ nop -> 28\n"
+ "27. byte [53-53] 0 -> 11\n"
+ "28. match! 0\n"
+ "29+ nop -> 28\n"
+ "30. byte [53-53] 0 -> 6\n"
+ "31+ nop -> 33\n"
+ "32. byte [53-53] 0 -> 21\n"
+ "33+ nop -> 29\n"
+ "34+ nop -> 26\n"
+ "35. nop -> 28\n"
+ "36+ nop -> 33\n"
+ "37. byte [53-53] 0 -> 16\n",
+ forward);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/dump.cc b/contrib/libs/re2/re2/testing/dump.cc
index 96acb1ecc1..fb3fb7da1d 100644
--- a/contrib/libs/re2/re2/testing/dump.cc
+++ b/contrib/libs/re2/re2/testing/dump.cc
@@ -1,163 +1,163 @@
-// Copyright 2006 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Dump the regexp into a string showing structure.
-// Tested by parse_unittest.cc
-
-// This function traverses the regexp recursively,
-// meaning that on inputs like Regexp::Simplify of
-// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
-// it takes time and space exponential in the size of the
-// original regular expression. It can also use stack space
-// linear in the size of the regular expression for inputs
-// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
-// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
-// As a result, Dump is provided only in the testing
-// library (see BUILD).
-
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
-#include "re2/stringpiece.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-static const char* kOpcodeNames[] = {
- "bad",
- "no",
- "emp",
- "lit",
- "str",
- "cat",
- "alt",
- "star",
- "plus",
- "que",
- "rep",
- "cap",
- "dot",
- "byte",
- "bol",
- "eol",
- "wb", // kRegexpWordBoundary
- "nwb", // kRegexpNoWordBoundary
- "bot",
- "eot",
- "cc",
- "match",
-};
-
-// Create string representation of regexp with explicit structure.
-// Nothing pretty, just for testing.
-static void DumpRegexpAppending(Regexp* re, std::string* s) {
- if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
- *s += StringPrintf("op%d", re->op());
- } else {
- switch (re->op()) {
- default:
- break;
- case kRegexpStar:
- case kRegexpPlus:
- case kRegexpQuest:
- case kRegexpRepeat:
- if (re->parse_flags() & Regexp::NonGreedy)
- s->append("n");
- break;
- }
- s->append(kOpcodeNames[re->op()]);
- if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
- Rune r = re->rune();
- if ('a' <= r && r <= 'z')
- s->append("fold");
- }
- if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
- for (int i = 0; i < re->nrunes(); i++) {
- Rune r = re->runes()[i];
- if ('a' <= r && r <= 'z') {
- s->append("fold");
- break;
- }
- }
- }
- }
- s->append("{");
- switch (re->op()) {
- default:
- break;
- case kRegexpEndText:
- if (!(re->parse_flags() & Regexp::WasDollar)) {
- s->append("\\z");
- }
- break;
- case kRegexpLiteral: {
- Rune r = re->rune();
- char buf[UTFmax+1];
- buf[runetochar(buf, &r)] = 0;
- s->append(buf);
- break;
- }
- case kRegexpLiteralString:
- for (int i = 0; i < re->nrunes(); i++) {
- Rune r = re->runes()[i];
- char buf[UTFmax+1];
- buf[runetochar(buf, &r)] = 0;
- s->append(buf);
- }
- break;
- case kRegexpConcat:
- case kRegexpAlternate:
- for (int i = 0; i < re->nsub(); i++)
- DumpRegexpAppending(re->sub()[i], s);
- break;
- case kRegexpStar:
- case kRegexpPlus:
- case kRegexpQuest:
- DumpRegexpAppending(re->sub()[0], s);
- break;
- case kRegexpCapture:
- if (re->cap() == 0)
- LOG(DFATAL) << "kRegexpCapture cap() == 0";
- if (re->name()) {
- s->append(*re->name());
- s->append(":");
- }
- DumpRegexpAppending(re->sub()[0], s);
- break;
- case kRegexpRepeat:
- s->append(StringPrintf("%d,%d ", re->min(), re->max()));
- DumpRegexpAppending(re->sub()[0], s);
- break;
- case kRegexpCharClass: {
- std::string sep;
- for (CharClass::iterator it = re->cc()->begin();
- it != re->cc()->end(); ++it) {
- RuneRange rr = *it;
- s->append(sep);
- if (rr.lo == rr.hi)
- s->append(StringPrintf("%#x", rr.lo));
- else
- s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
- sep = " ";
- }
- break;
- }
- }
- s->append("}");
-}
-
-std::string Regexp::Dump() {
- // Make sure that we are being called from a unit test.
- // Should cause a link error if used outside of testing.
- CHECK(!::testing::TempDir().empty());
-
- std::string s;
- DumpRegexpAppending(this, &s);
- return s;
-}
-
-} // namespace re2
+// Copyright 2006 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Dump the regexp into a string showing structure.
+// Tested by parse_unittest.cc
+
+// This function traverses the regexp recursively,
+// meaning that on inputs like Regexp::Simplify of
+// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
+// it takes time and space exponential in the size of the
+// original regular expression. It can also use stack space
+// linear in the size of the regular expression for inputs
+// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
+// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
+// As a result, Dump is provided only in the testing
+// library (see BUILD).
+
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/stringpiece.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+static const char* kOpcodeNames[] = {
+ "bad",
+ "no",
+ "emp",
+ "lit",
+ "str",
+ "cat",
+ "alt",
+ "star",
+ "plus",
+ "que",
+ "rep",
+ "cap",
+ "dot",
+ "byte",
+ "bol",
+ "eol",
+ "wb", // kRegexpWordBoundary
+ "nwb", // kRegexpNoWordBoundary
+ "bot",
+ "eot",
+ "cc",
+ "match",
+};
+
+// Create string representation of regexp with explicit structure.
+// Nothing pretty, just for testing.
+static void DumpRegexpAppending(Regexp* re, std::string* s) {
+ if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
+ *s += StringPrintf("op%d", re->op());
+ } else {
+ switch (re->op()) {
+ default:
+ break;
+ case kRegexpStar:
+ case kRegexpPlus:
+ case kRegexpQuest:
+ case kRegexpRepeat:
+ if (re->parse_flags() & Regexp::NonGreedy)
+ s->append("n");
+ break;
+ }
+ s->append(kOpcodeNames[re->op()]);
+ if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
+ Rune r = re->rune();
+ if ('a' <= r && r <= 'z')
+ s->append("fold");
+ }
+ if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
+ for (int i = 0; i < re->nrunes(); i++) {
+ Rune r = re->runes()[i];
+ if ('a' <= r && r <= 'z') {
+ s->append("fold");
+ break;
+ }
+ }
+ }
+ }
+ s->append("{");
+ switch (re->op()) {
+ default:
+ break;
+ case kRegexpEndText:
+ if (!(re->parse_flags() & Regexp::WasDollar)) {
+ s->append("\\z");
+ }
+ break;
+ case kRegexpLiteral: {
+ Rune r = re->rune();
+ char buf[UTFmax+1];
+ buf[runetochar(buf, &r)] = 0;
+ s->append(buf);
+ break;
+ }
+ case kRegexpLiteralString:
+ for (int i = 0; i < re->nrunes(); i++) {
+ Rune r = re->runes()[i];
+ char buf[UTFmax+1];
+ buf[runetochar(buf, &r)] = 0;
+ s->append(buf);
+ }
+ break;
+ case kRegexpConcat:
+ case kRegexpAlternate:
+ for (int i = 0; i < re->nsub(); i++)
+ DumpRegexpAppending(re->sub()[i], s);
+ break;
+ case kRegexpStar:
+ case kRegexpPlus:
+ case kRegexpQuest:
+ DumpRegexpAppending(re->sub()[0], s);
+ break;
+ case kRegexpCapture:
+ if (re->cap() == 0)
+ LOG(DFATAL) << "kRegexpCapture cap() == 0";
+ if (re->name()) {
+ s->append(*re->name());
+ s->append(":");
+ }
+ DumpRegexpAppending(re->sub()[0], s);
+ break;
+ case kRegexpRepeat:
+ s->append(StringPrintf("%d,%d ", re->min(), re->max()));
+ DumpRegexpAppending(re->sub()[0], s);
+ break;
+ case kRegexpCharClass: {
+ std::string sep;
+ for (CharClass::iterator it = re->cc()->begin();
+ it != re->cc()->end(); ++it) {
+ RuneRange rr = *it;
+ s->append(sep);
+ if (rr.lo == rr.hi)
+ s->append(StringPrintf("%#x", rr.lo));
+ else
+ s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
+ sep = " ";
+ }
+ break;
+ }
+ }
+ s->append("}");
+}
+
+std::string Regexp::Dump() {
+ // Make sure that we are being called from a unit test.
+ // Should cause a link error if used outside of testing.
+ CHECK(!::testing::TempDir().empty());
+
+ std::string s;
+ DumpRegexpAppending(this, &s);
+ return s;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/exhaustive_tester.cc b/contrib/libs/re2/re2/testing/exhaustive_tester.cc
index 847bf7c170..b75b068299 100644
--- a/contrib/libs/re2/re2/testing/exhaustive_tester.cc
+++ b/contrib/libs/re2/re2/testing/exhaustive_tester.cc
@@ -1,191 +1,191 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Exhaustive testing of regular expression matching.
-
-// Each test picks an alphabet (e.g., "abc"), a maximum string length,
-// a maximum regular expression length, and a maximum number of letters
-// that can appear in the regular expression. Given these parameters,
-// it tries every possible regular expression and string, verifying that
-// the NFA, DFA, and a trivial backtracking implementation agree about
-// the location of the match.
-
-#include <stdio.h>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/flags.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/testing/exhaustive_tester.h"
-#include "re2/testing/tester.h"
-
-// For target `log' in the Makefile.
-#ifndef LOGGING
-#define LOGGING 0
-#endif
-
-DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
-
-DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
- "Stop testing a regular expression after finding this many "
- "strings that break it.");
-
-namespace re2 {
-
-static char* escape(const StringPiece& sp) {
- static char buf[512];
- char* p = buf;
- *p++ = '\"';
- for (size_t i = 0; i < sp.size(); i++) {
- if(p+5 >= buf+sizeof buf)
- LOG(FATAL) << "ExhaustiveTester escape: too long";
- if(sp[i] == '\\' || sp[i] == '\"') {
- *p++ = '\\';
- *p++ = sp[i];
- } else if(sp[i] == '\n') {
- *p++ = '\\';
- *p++ = 'n';
- } else {
- *p++ = sp[i];
- }
- }
- *p++ = '\"';
- *p = '\0';
- return buf;
-}
-
-static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
- if (!re.Match(input, 0, input.size(), anchor, m, n)) {
- printf("-");
- return;
- }
- for (int i = 0; i < n; i++) {
- if (i > 0)
- printf(" ");
- if (m[i].data() == NULL)
- printf("-");
- else
- printf("%td-%td",
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Exhaustive testing of regular expression matching.
+
+// Each test picks an alphabet (e.g., "abc"), a maximum string length,
+// a maximum regular expression length, and a maximum number of letters
+// that can appear in the regular expression. Given these parameters,
+// it tries every possible regular expression and string, verifying that
+// the NFA, DFA, and a trivial backtracking implementation agree about
+// the location of the match.
+
+#include <stdio.h>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/testing/exhaustive_tester.h"
+#include "re2/testing/tester.h"
+
+// For target `log' in the Makefile.
+#ifndef LOGGING
+#define LOGGING 0
+#endif
+
+DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
+
+DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
+ "Stop testing a regular expression after finding this many "
+ "strings that break it.");
+
+namespace re2 {
+
+static char* escape(const StringPiece& sp) {
+ static char buf[512];
+ char* p = buf;
+ *p++ = '\"';
+ for (size_t i = 0; i < sp.size(); i++) {
+ if(p+5 >= buf+sizeof buf)
+ LOG(FATAL) << "ExhaustiveTester escape: too long";
+ if(sp[i] == '\\' || sp[i] == '\"') {
+ *p++ = '\\';
+ *p++ = sp[i];
+ } else if(sp[i] == '\n') {
+ *p++ = '\\';
+ *p++ = 'n';
+ } else {
+ *p++ = sp[i];
+ }
+ }
+ *p++ = '\"';
+ *p = '\0';
+ return buf;
+}
+
+static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
+ if (!re.Match(input, 0, input.size(), anchor, m, n)) {
+ printf("-");
+ return;
+ }
+ for (int i = 0; i < n; i++) {
+ if (i > 0)
+ printf(" ");
+ if (m[i].data() == NULL)
+ printf("-");
+ else
+ printf("%td-%td",
BeginPtr(m[i]) - BeginPtr(input),
EndPtr(m[i]) - BeginPtr(input));
- }
-}
-
-// Processes a single generated regexp.
-// Compiles it using Regexp interface and PCRE, and then
-// checks that NFA, DFA, and PCRE all return the same results.
-void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
- regexps_++;
- std::string regexp = const_regexp;
- if (!topwrapper_.empty()) {
- regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
- }
-
- if (GetFlag(FLAGS_show_regexps)) {
- printf("\r%s", regexp.c_str());
- fflush(stdout);
- }
-
- if (LOGGING) {
- // Write out test cases and answers for use in testing
- // other implementations, such as Go's regexp package.
- if (randomstrings_)
- LOG(ERROR) << "Cannot log with random strings.";
- if (regexps_ == 1) { // first
- printf("strings\n");
- strgen_.Reset();
- while (strgen_.HasNext())
- printf("%s\n", escape(strgen_.Next()));
- printf("regexps\n");
- }
- printf("%s\n", escape(regexp));
-
- RE2 re(regexp);
- RE2::Options longest;
- longest.set_longest_match(true);
- RE2 relongest(regexp, longest);
- int ngroup = re.NumberOfCapturingGroups()+1;
- StringPiece* group = new StringPiece[ngroup];
-
- strgen_.Reset();
- while (strgen_.HasNext()) {
- StringPiece input = strgen_.Next();
- PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
- printf(";");
- PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
- printf(";");
- PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
- printf(";");
- PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
- printf("\n");
- }
- delete[] group;
- return;
- }
-
- Tester tester(regexp);
- if (tester.error())
- return;
-
- strgen_.Reset();
- strgen_.GenerateNULL();
- if (randomstrings_)
- strgen_.Random(stringseed_, stringcount_);
- int bad_inputs = 0;
- while (strgen_.HasNext()) {
- tests_++;
- if (!tester.TestInput(strgen_.Next())) {
- failures_++;
- if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
- break;
- }
- }
-}
-
-// Runs an exhaustive test on the given parameters.
-void ExhaustiveTest(int maxatoms, int maxops,
- const std::vector<std::string>& alphabet,
- const std::vector<std::string>& ops,
- int maxstrlen,
- const std::vector<std::string>& stralphabet,
- const std::string& wrapper,
- const std::string& topwrapper) {
- if (RE2_DEBUG_MODE) {
- if (maxatoms > 1)
- maxatoms--;
- if (maxops > 1)
- maxops--;
- if (maxstrlen > 1)
- maxstrlen--;
- }
- ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
- maxstrlen, stralphabet, wrapper,
- topwrapper);
- t.Generate();
- if (!LOGGING) {
- printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
- t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
- }
- EXPECT_EQ(0, t.failures());
-}
-
-// Runs an exhaustive test using the given parameters and
-// the basic egrep operators.
-void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
- int maxstrlen, const std::string& stralphabet,
- const std::string& wrapper) {
- const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
-
- for (size_t i = 0; i < arraysize(tops); i++) {
- ExhaustiveTest(maxatoms, maxops,
- Split("", alphabet),
- RegexpGenerator::EgrepOps(),
- maxstrlen,
- Split("", stralphabet),
- wrapper,
- tops[i]);
- }
-}
-
-} // namespace re2
+ }
+}
+
+// Processes a single generated regexp.
+// Compiles it using Regexp interface and PCRE, and then
+// checks that NFA, DFA, and PCRE all return the same results.
+void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
+ regexps_++;
+ std::string regexp = const_regexp;
+ if (!topwrapper_.empty()) {
+ regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
+ }
+
+ if (GetFlag(FLAGS_show_regexps)) {
+ printf("\r%s", regexp.c_str());
+ fflush(stdout);
+ }
+
+ if (LOGGING) {
+ // Write out test cases and answers for use in testing
+ // other implementations, such as Go's regexp package.
+ if (randomstrings_)
+ LOG(ERROR) << "Cannot log with random strings.";
+ if (regexps_ == 1) { // first
+ printf("strings\n");
+ strgen_.Reset();
+ while (strgen_.HasNext())
+ printf("%s\n", escape(strgen_.Next()));
+ printf("regexps\n");
+ }
+ printf("%s\n", escape(regexp));
+
+ RE2 re(regexp);
+ RE2::Options longest;
+ longest.set_longest_match(true);
+ RE2 relongest(regexp, longest);
+ int ngroup = re.NumberOfCapturingGroups()+1;
+ StringPiece* group = new StringPiece[ngroup];
+
+ strgen_.Reset();
+ while (strgen_.HasNext()) {
+ StringPiece input = strgen_.Next();
+ PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
+ printf(";");
+ PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
+ printf(";");
+ PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
+ printf(";");
+ PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
+ printf("\n");
+ }
+ delete[] group;
+ return;
+ }
+
+ Tester tester(regexp);
+ if (tester.error())
+ return;
+
+ strgen_.Reset();
+ strgen_.GenerateNULL();
+ if (randomstrings_)
+ strgen_.Random(stringseed_, stringcount_);
+ int bad_inputs = 0;
+ while (strgen_.HasNext()) {
+ tests_++;
+ if (!tester.TestInput(strgen_.Next())) {
+ failures_++;
+ if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
+ break;
+ }
+ }
+}
+
+// Runs an exhaustive test on the given parameters.
+void ExhaustiveTest(int maxatoms, int maxops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
+ int maxstrlen,
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper) {
+ if (RE2_DEBUG_MODE) {
+ if (maxatoms > 1)
+ maxatoms--;
+ if (maxops > 1)
+ maxops--;
+ if (maxstrlen > 1)
+ maxstrlen--;
+ }
+ ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
+ maxstrlen, stralphabet, wrapper,
+ topwrapper);
+ t.Generate();
+ if (!LOGGING) {
+ printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
+ t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
+ }
+ EXPECT_EQ(0, t.failures());
+}
+
+// Runs an exhaustive test using the given parameters and
+// the basic egrep operators.
+void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
+ int maxstrlen, const std::string& stralphabet,
+ const std::string& wrapper) {
+ const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
+
+ for (size_t i = 0; i < arraysize(tops); i++) {
+ ExhaustiveTest(maxatoms, maxops,
+ Split("", alphabet),
+ RegexpGenerator::EgrepOps(),
+ maxstrlen,
+ Split("", stralphabet),
+ wrapper,
+ tops[i]);
+ }
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/exhaustive_tester.h b/contrib/libs/re2/re2/testing/exhaustive_tester.h
index fb26b04dcf..3a14282f01 100644
--- a/contrib/libs/re2/re2/testing/exhaustive_tester.h
+++ b/contrib/libs/re2/re2/testing/exhaustive_tester.h
@@ -1,105 +1,105 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H_
-#define RE2_TESTING_EXHAUSTIVE_TESTER_H_
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "re2/testing/regexp_generator.h"
-#include "re2/testing/string_generator.h"
-
-namespace re2 {
-
-// Doing this simplifies the logic below.
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
-#if !defined(NDEBUG)
-// We are in a debug build.
-const bool RE2_DEBUG_MODE = true;
-#elif __has_feature(address_sanitizer) || __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer)
-// Not a debug build, but still under sanitizers.
-const bool RE2_DEBUG_MODE = true;
-#else
-const bool RE2_DEBUG_MODE = false;
-#endif
-
-// Exhaustive regular expression test: generate all regexps within parameters,
-// then generate all strings of a given length over a given alphabet,
-// then check that NFA, DFA, and PCRE agree about whether each regexp matches
-// each possible string, and if so, where the match is.
-//
-// Can also be used in a "random" mode that generates a given number
-// of random regexp and strings, allowing testing of larger expressions
-// and inputs.
-class ExhaustiveTester : public RegexpGenerator {
- public:
- ExhaustiveTester(int maxatoms,
- int maxops,
- const std::vector<std::string>& alphabet,
- const std::vector<std::string>& ops,
- int maxstrlen,
- const std::vector<std::string>& stralphabet,
- const std::string& wrapper,
- const std::string& topwrapper)
- : RegexpGenerator(maxatoms, maxops, alphabet, ops),
- strgen_(maxstrlen, stralphabet),
- wrapper_(wrapper),
- topwrapper_(topwrapper),
- regexps_(0), tests_(0), failures_(0),
- randomstrings_(0), stringseed_(0), stringcount_(0) { }
-
- int regexps() { return regexps_; }
- int tests() { return tests_; }
- int failures() { return failures_; }
-
- // Needed for RegexpGenerator interface.
- void HandleRegexp(const std::string& regexp);
-
- // Causes testing to generate random input strings.
- void RandomStrings(int32_t seed, int32_t count) {
- randomstrings_ = true;
- stringseed_ = seed;
- stringcount_ = count;
- }
-
- private:
- StringGenerator strgen_;
- std::string wrapper_; // Regexp wrapper - either empty or has one %s.
- std::string topwrapper_; // Regexp top-level wrapper.
- int regexps_; // Number of HandleRegexp calls
- int tests_; // Number of regexp tests.
- int failures_; // Number of tests failed.
-
- bool randomstrings_; // Whether to use random strings
- int32_t stringseed_; // If so, the seed.
- int stringcount_; // If so, how many to generate.
-
- ExhaustiveTester(const ExhaustiveTester&) = delete;
- ExhaustiveTester& operator=(const ExhaustiveTester&) = delete;
-};
-
-// Runs an exhaustive test on the given parameters.
-void ExhaustiveTest(int maxatoms, int maxops,
- const std::vector<std::string>& alphabet,
- const std::vector<std::string>& ops,
- int maxstrlen,
- const std::vector<std::string>& stralphabet,
- const std::string& wrapper,
- const std::string& topwrapper);
-
-// Runs an exhaustive test using the given parameters and
-// the basic egrep operators.
-void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
- int maxstrlen, const std::string& stralphabet,
- const std::string& wrapper);
-
-} // namespace re2
-
-#endif // RE2_TESTING_EXHAUSTIVE_TESTER_H_
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_TESTING_EXHAUSTIVE_TESTER_H_
+#define RE2_TESTING_EXHAUSTIVE_TESTER_H_
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "re2/testing/regexp_generator.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+// Doing this simplifies the logic below.
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if !defined(NDEBUG)
+// We are in a debug build.
+const bool RE2_DEBUG_MODE = true;
+#elif __has_feature(address_sanitizer) || __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer)
+// Not a debug build, but still under sanitizers.
+const bool RE2_DEBUG_MODE = true;
+#else
+const bool RE2_DEBUG_MODE = false;
+#endif
+
+// Exhaustive regular expression test: generate all regexps within parameters,
+// then generate all strings of a given length over a given alphabet,
+// then check that NFA, DFA, and PCRE agree about whether each regexp matches
+// each possible string, and if so, where the match is.
+//
+// Can also be used in a "random" mode that generates a given number
+// of random regexp and strings, allowing testing of larger expressions
+// and inputs.
+class ExhaustiveTester : public RegexpGenerator {
+ public:
+ ExhaustiveTester(int maxatoms,
+ int maxops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
+ int maxstrlen,
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper)
+ : RegexpGenerator(maxatoms, maxops, alphabet, ops),
+ strgen_(maxstrlen, stralphabet),
+ wrapper_(wrapper),
+ topwrapper_(topwrapper),
+ regexps_(0), tests_(0), failures_(0),
+ randomstrings_(0), stringseed_(0), stringcount_(0) { }
+
+ int regexps() { return regexps_; }
+ int tests() { return tests_; }
+ int failures() { return failures_; }
+
+ // Needed for RegexpGenerator interface.
+ void HandleRegexp(const std::string& regexp);
+
+ // Causes testing to generate random input strings.
+ void RandomStrings(int32_t seed, int32_t count) {
+ randomstrings_ = true;
+ stringseed_ = seed;
+ stringcount_ = count;
+ }
+
+ private:
+ StringGenerator strgen_;
+ std::string wrapper_; // Regexp wrapper - either empty or has one %s.
+ std::string topwrapper_; // Regexp top-level wrapper.
+ int regexps_; // Number of HandleRegexp calls
+ int tests_; // Number of regexp tests.
+ int failures_; // Number of tests failed.
+
+ bool randomstrings_; // Whether to use random strings
+ int32_t stringseed_; // If so, the seed.
+ int stringcount_; // If so, how many to generate.
+
+ ExhaustiveTester(const ExhaustiveTester&) = delete;
+ ExhaustiveTester& operator=(const ExhaustiveTester&) = delete;
+};
+
+// Runs an exhaustive test on the given parameters.
+void ExhaustiveTest(int maxatoms, int maxops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
+ int maxstrlen,
+ const std::vector<std::string>& stralphabet,
+ const std::string& wrapper,
+ const std::string& topwrapper);
+
+// Runs an exhaustive test using the given parameters and
+// the basic egrep operators.
+void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
+ int maxstrlen, const std::string& stralphabet,
+ const std::string& wrapper);
+
+} // namespace re2
+
+#endif // RE2_TESTING_EXHAUSTIVE_TESTER_H_
diff --git a/contrib/libs/re2/re2/testing/filtered_re2_test.cc b/contrib/libs/re2/re2/testing/filtered_re2_test.cc
index 684d4356c4..073a70a745 100644
--- a/contrib/libs/re2/re2/testing/filtered_re2_test.cc
+++ b/contrib/libs/re2/re2/testing/filtered_re2_test.cc
@@ -1,340 +1,340 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stddef.h>
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/filtered_re2.h"
-#include "re2/re2.h"
-
-namespace re2 {
-
-struct FilterTestVars {
- FilterTestVars() {}
- explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {}
-
- std::vector<std::string> atoms;
- std::vector<int> atom_indices;
- std::vector<int> matches;
- RE2::Options opts;
- FilteredRE2 f;
-};
-
-TEST(FilteredRE2Test, EmptyTest) {
- FilterTestVars v;
-
- v.f.Compile(&v.atoms);
- EXPECT_EQ(0, v.atoms.size());
-
- // Compile has no effect at all when called before Add: it will not
- // record that it has been called and it will not clear the vector.
- // The second point does not matter here, but the first point means
- // that an error will be logged during the call to AllMatches.
- v.f.AllMatches("foo", v.atom_indices, &v.matches);
- EXPECT_EQ(0, v.matches.size());
-}
-
-TEST(FilteredRE2Test, SmallOrTest) {
- FilterTestVars v(4); // override the minimum atom length
- int id;
- v.f.Add("(foo|bar)", v.opts, &id);
-
- v.f.Compile(&v.atoms);
- EXPECT_EQ(0, v.atoms.size());
-
- v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
- EXPECT_EQ(1, v.matches.size());
- EXPECT_EQ(id, v.matches[0]);
-}
-
-TEST(FilteredRE2Test, SmallLatinTest) {
- FilterTestVars v;
- int id;
-
- v.opts.set_encoding(RE2::Options::EncodingLatin1);
- v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
- v.f.Compile(&v.atoms);
- EXPECT_EQ(1, v.atoms.size());
- EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
-
- v.atom_indices.push_back(0);
- v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
- EXPECT_EQ(1, v.matches.size());
- EXPECT_EQ(id, v.matches[0]);
-}
-
-struct AtomTest {
- const char* testname;
- // If any test needs more than this many regexps or atoms, increase
- // the size of the corresponding array.
- const char* regexps[20];
- const char* atoms[20];
-};
-
-AtomTest atom_tests[] = {
- {
- // This test checks to make sure empty patterns are allowed.
- "CheckEmptyPattern",
- {""},
- {}
- }, {
- // This test checks that all atoms of length greater than min length
- // are found, and no atoms that are of smaller length are found.
- "AllAtomsGtMinLengthFound", {
- "(abc123|def456|ghi789).*mnop[x-z]+",
- "abc..yyy..zz",
- "mnmnpp[a-z]+PPP"
- }, {
- "abc123",
- "def456",
- "ghi789",
- "mnop",
- "abc",
- "yyy",
- "mnmnpp",
- "ppp"
- }
- }, {
- // Test to make sure that any atoms that have another atom as a
- // substring in an OR are removed; that is, only the shortest
- // substring is kept.
- "SubstrAtomRemovesSuperStrInOr", {
- "(abc123|abc|ghi789|abc1234).*[x-z]+",
- "abcd..yyy..yyyzzz",
- "mnmnpp[a-z]+PPP"
- }, {
- "abc",
- "ghi789",
- "abcd",
- "yyy",
- "yyyzzz",
- "mnmnpp",
- "ppp"
- }
- }, {
- // Test character class expansion.
- "CharClassExpansion", {
- "m[a-c][d-f]n.*[x-z]+",
- "[x-y]bcde[ab]"
- }, {
- "madn", "maen", "mafn",
- "mbdn", "mben", "mbfn",
- "mcdn", "mcen", "mcfn",
- "xbcdea", "xbcdeb",
- "ybcdea", "ybcdeb"
- }
- }, {
- // Test upper/lower of non-ASCII.
- "UnicodeLower", {
- "(?i)ΔδΠϖπΣςσ",
- "ΛΜΝΟΠ",
- "ψρστυ",
- }, {
- "δδπππσσσ",
- "λμνοπ",
- "ψρστυ",
- },
- },
-};
-
-void AddRegexpsAndCompile(const char* regexps[],
- size_t n,
- struct FilterTestVars* v) {
- for (size_t i = 0; i < n; i++) {
- int id;
- v->f.Add(regexps[i], v->opts, &id);
- }
- v->f.Compile(&v->atoms);
-}
-
-bool CheckExpectedAtoms(const char* atoms[],
- size_t n,
- const char* testname,
- struct FilterTestVars* v) {
- std::vector<std::string> expected;
- for (size_t i = 0; i < n; i++)
- expected.push_back(atoms[i]);
-
- bool pass = expected.size() == v->atoms.size();
-
- std::sort(v->atoms.begin(), v->atoms.end());
- std::sort(expected.begin(), expected.end());
- for (size_t i = 0; pass && i < n; i++)
- pass = pass && expected[i] == v->atoms[i];
-
- if (!pass) {
- LOG(ERROR) << "Failed " << testname;
- LOG(ERROR) << "Expected #atoms = " << expected.size();
- for (size_t i = 0; i < expected.size(); i++)
- LOG(ERROR) << expected[i];
- LOG(ERROR) << "Found #atoms = " << v->atoms.size();
- for (size_t i = 0; i < v->atoms.size(); i++)
- LOG(ERROR) << v->atoms[i];
- }
-
- return pass;
-}
-
-TEST(FilteredRE2Test, AtomTests) {
- int nfail = 0;
- for (size_t i = 0; i < arraysize(atom_tests); i++) {
- FilterTestVars v;
- AtomTest* t = &atom_tests[i];
- size_t nregexp, natom;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
- if (t->regexps[nregexp] == NULL)
- break;
- for (natom = 0; natom < arraysize(t->atoms); natom++)
- if (t->atoms[natom] == NULL)
- break;
- AddRegexpsAndCompile(t->regexps, nregexp, &v);
- if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
- nfail++;
- }
- EXPECT_EQ(0, nfail);
-}
-
-void FindAtomIndices(const std::vector<std::string>& atoms,
- const std::vector<std::string>& matched_atoms,
- std::vector<int>* atom_indices) {
- atom_indices->clear();
- for (size_t i = 0; i < matched_atoms.size(); i++) {
- for (size_t j = 0; j < atoms.size(); j++) {
- if (matched_atoms[i] == atoms[j]) {
- atom_indices->push_back(static_cast<int>(j));
- break;
- }
- }
- }
-}
-
-TEST(FilteredRE2Test, MatchEmptyPattern) {
- FilterTestVars v;
- AtomTest* t = &atom_tests[0];
- // We are using the regexps used in one of the atom tests
- // for this test. Adding the EXPECT here to make sure
- // the index we use for the test is for the correct test.
- EXPECT_EQ("CheckEmptyPattern", std::string(t->testname));
- size_t nregexp;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
- if (t->regexps[nregexp] == NULL)
- break;
- AddRegexpsAndCompile(t->regexps, nregexp, &v);
- std::string text = "0123";
- std::vector<int> atom_ids;
- std::vector<int> matching_regexps;
- EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
-}
-
-TEST(FilteredRE2Test, MatchTests) {
- FilterTestVars v;
- AtomTest* t = &atom_tests[2];
- // We are using the regexps used in one of the atom tests
- // for this test.
- EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname));
- size_t nregexp;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
- if (t->regexps[nregexp] == NULL)
- break;
- AddRegexpsAndCompile(t->regexps, nregexp, &v);
-
- std::string text = "abc121212xyz";
- // atoms = abc
- std::vector<int> atom_ids;
- std::vector<std::string> atoms;
- atoms.push_back("abc");
- FindAtomIndices(v.atoms, atoms, &atom_ids);
- std::vector<int> matching_regexps;
- v.f.AllMatches(text, atom_ids, &matching_regexps);
- EXPECT_EQ(1, matching_regexps.size());
-
- text = "abc12312yyyzzz";
- atoms.clear();
- atoms.push_back("abc");
- atoms.push_back("yyy");
- atoms.push_back("yyyzzz");
- FindAtomIndices(v.atoms, atoms, &atom_ids);
- v.f.AllMatches(text, atom_ids, &matching_regexps);
- EXPECT_EQ(1, matching_regexps.size());
-
- text = "abcd12yyy32yyyzzz";
- atoms.clear();
- atoms.push_back("abc");
- atoms.push_back("abcd");
- atoms.push_back("yyy");
- atoms.push_back("yyyzzz");
- FindAtomIndices(v.atoms, atoms, &atom_ids);
- LOG(INFO) << "S: " << atom_ids.size();
- for (size_t i = 0; i < atom_ids.size(); i++)
- LOG(INFO) << "i: " << i << " : " << atom_ids[i];
- v.f.AllMatches(text, atom_ids, &matching_regexps);
- EXPECT_EQ(2, matching_regexps.size());
-}
-
-TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
- // Bug due to find() finding "" at the start of everything in a string
- // set and thus SimplifyStringSet() would end up erasing everything.
- // In order to test this, we have to keep PrefilterTree from discarding
- // the OR entirely, so we have to make the minimum atom length zero.
-
- FilterTestVars v(0); // override the minimum atom length
- const char* regexps[] = {"-R.+(|ADD=;AA){12}}"};
- const char* atoms[] = {"", "-r", "add=;aa", "}"};
- AddRegexpsAndCompile(regexps, arraysize(regexps), &v);
- EXPECT_TRUE(CheckExpectedAtoms(atoms, arraysize(atoms),
- "EmptyStringInStringSetBug", &v));
-}
-
-TEST(FilteredRE2Test, MoveSemantics) {
- FilterTestVars v1;
- int id;
- v1.f.Add("foo\\d+", v1.opts, &id);
- EXPECT_EQ(0, id);
- v1.f.Compile(&v1.atoms);
- EXPECT_EQ(1, v1.atoms.size());
- EXPECT_EQ("foo", v1.atoms[0]);
- v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
- EXPECT_EQ(1, v1.matches.size());
- EXPECT_EQ(0, v1.matches[0]);
- v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
- EXPECT_EQ(0, v1.matches.size());
-
- // The moved-to object should do what the moved-from object did.
- FilterTestVars v2;
- v2.f = std::move(v1.f);
- v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
- EXPECT_EQ(1, v2.matches.size());
- EXPECT_EQ(0, v2.matches[0]);
- v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
- EXPECT_EQ(0, v2.matches.size());
-
- // The moved-from object should have been reset and be reusable.
- v1.f.Add("bar\\d+", v1.opts, &id);
- EXPECT_EQ(0, id);
- v1.f.Compile(&v1.atoms);
- EXPECT_EQ(1, v1.atoms.size());
- EXPECT_EQ("bar", v1.atoms[0]);
- v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
- EXPECT_EQ(0, v1.matches.size());
- v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
- EXPECT_EQ(1, v1.matches.size());
- EXPECT_EQ(0, v1.matches[0]);
-
- // Verify that "overwriting" works and also doesn't leak memory.
- // (The latter will need a leak detector such as LeakSanitizer.)
- v1.f = std::move(v2.f);
- v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
- EXPECT_EQ(1, v1.matches.size());
- EXPECT_EQ(0, v1.matches[0]);
- v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
- EXPECT_EQ(0, v1.matches.size());
-}
-
-} // namespace re2
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/filtered_re2.h"
+#include "re2/re2.h"
+
+namespace re2 {
+
+struct FilterTestVars {
+ FilterTestVars() {}
+ explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {}
+
+ std::vector<std::string> atoms;
+ std::vector<int> atom_indices;
+ std::vector<int> matches;
+ RE2::Options opts;
+ FilteredRE2 f;
+};
+
+TEST(FilteredRE2Test, EmptyTest) {
+ FilterTestVars v;
+
+ v.f.Compile(&v.atoms);
+ EXPECT_EQ(0, v.atoms.size());
+
+ // Compile has no effect at all when called before Add: it will not
+ // record that it has been called and it will not clear the vector.
+ // The second point does not matter here, but the first point means
+ // that an error will be logged during the call to AllMatches.
+ v.f.AllMatches("foo", v.atom_indices, &v.matches);
+ EXPECT_EQ(0, v.matches.size());
+}
+
+TEST(FilteredRE2Test, SmallOrTest) {
+ FilterTestVars v(4); // override the minimum atom length
+ int id;
+ v.f.Add("(foo|bar)", v.opts, &id);
+
+ v.f.Compile(&v.atoms);
+ EXPECT_EQ(0, v.atoms.size());
+
+ v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
+ EXPECT_EQ(1, v.matches.size());
+ EXPECT_EQ(id, v.matches[0]);
+}
+
+TEST(FilteredRE2Test, SmallLatinTest) {
+ FilterTestVars v;
+ int id;
+
+ v.opts.set_encoding(RE2::Options::EncodingLatin1);
+ v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
+ v.f.Compile(&v.atoms);
+ EXPECT_EQ(1, v.atoms.size());
+ EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
+
+ v.atom_indices.push_back(0);
+ v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
+ EXPECT_EQ(1, v.matches.size());
+ EXPECT_EQ(id, v.matches[0]);
+}
+
+struct AtomTest {
+ const char* testname;
+ // If any test needs more than this many regexps or atoms, increase
+ // the size of the corresponding array.
+ const char* regexps[20];
+ const char* atoms[20];
+};
+
+AtomTest atom_tests[] = {
+ {
+ // This test checks to make sure empty patterns are allowed.
+ "CheckEmptyPattern",
+ {""},
+ {}
+ }, {
+ // This test checks that all atoms of length greater than min length
+ // are found, and no atoms that are of smaller length are found.
+ "AllAtomsGtMinLengthFound", {
+ "(abc123|def456|ghi789).*mnop[x-z]+",
+ "abc..yyy..zz",
+ "mnmnpp[a-z]+PPP"
+ }, {
+ "abc123",
+ "def456",
+ "ghi789",
+ "mnop",
+ "abc",
+ "yyy",
+ "mnmnpp",
+ "ppp"
+ }
+ }, {
+ // Test to make sure that any atoms that have another atom as a
+ // substring in an OR are removed; that is, only the shortest
+ // substring is kept.
+ "SubstrAtomRemovesSuperStrInOr", {
+ "(abc123|abc|ghi789|abc1234).*[x-z]+",
+ "abcd..yyy..yyyzzz",
+ "mnmnpp[a-z]+PPP"
+ }, {
+ "abc",
+ "ghi789",
+ "abcd",
+ "yyy",
+ "yyyzzz",
+ "mnmnpp",
+ "ppp"
+ }
+ }, {
+ // Test character class expansion.
+ "CharClassExpansion", {
+ "m[a-c][d-f]n.*[x-z]+",
+ "[x-y]bcde[ab]"
+ }, {
+ "madn", "maen", "mafn",
+ "mbdn", "mben", "mbfn",
+ "mcdn", "mcen", "mcfn",
+ "xbcdea", "xbcdeb",
+ "ybcdea", "ybcdeb"
+ }
+ }, {
+ // Test upper/lower of non-ASCII.
+ "UnicodeLower", {
+ "(?i)ΔδΠϖπΣςσ",
+ "ΛΜΝΟΠ",
+ "ψρστυ",
+ }, {
+ "δδπππσσσ",
+ "λμνοπ",
+ "ψρστυ",
+ },
+ },
+};
+
+void AddRegexpsAndCompile(const char* regexps[],
+ size_t n,
+ struct FilterTestVars* v) {
+ for (size_t i = 0; i < n; i++) {
+ int id;
+ v->f.Add(regexps[i], v->opts, &id);
+ }
+ v->f.Compile(&v->atoms);
+}
+
+bool CheckExpectedAtoms(const char* atoms[],
+ size_t n,
+ const char* testname,
+ struct FilterTestVars* v) {
+ std::vector<std::string> expected;
+ for (size_t i = 0; i < n; i++)
+ expected.push_back(atoms[i]);
+
+ bool pass = expected.size() == v->atoms.size();
+
+ std::sort(v->atoms.begin(), v->atoms.end());
+ std::sort(expected.begin(), expected.end());
+ for (size_t i = 0; pass && i < n; i++)
+ pass = pass && expected[i] == v->atoms[i];
+
+ if (!pass) {
+ LOG(ERROR) << "Failed " << testname;
+ LOG(ERROR) << "Expected #atoms = " << expected.size();
+ for (size_t i = 0; i < expected.size(); i++)
+ LOG(ERROR) << expected[i];
+ LOG(ERROR) << "Found #atoms = " << v->atoms.size();
+ for (size_t i = 0; i < v->atoms.size(); i++)
+ LOG(ERROR) << v->atoms[i];
+ }
+
+ return pass;
+}
+
+TEST(FilteredRE2Test, AtomTests) {
+ int nfail = 0;
+ for (size_t i = 0; i < arraysize(atom_tests); i++) {
+ FilterTestVars v;
+ AtomTest* t = &atom_tests[i];
+ size_t nregexp, natom;
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ if (t->regexps[nregexp] == NULL)
+ break;
+ for (natom = 0; natom < arraysize(t->atoms); natom++)
+ if (t->atoms[natom] == NULL)
+ break;
+ AddRegexpsAndCompile(t->regexps, nregexp, &v);
+ if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
+ nfail++;
+ }
+ EXPECT_EQ(0, nfail);
+}
+
+void FindAtomIndices(const std::vector<std::string>& atoms,
+ const std::vector<std::string>& matched_atoms,
+ std::vector<int>* atom_indices) {
+ atom_indices->clear();
+ for (size_t i = 0; i < matched_atoms.size(); i++) {
+ for (size_t j = 0; j < atoms.size(); j++) {
+ if (matched_atoms[i] == atoms[j]) {
+ atom_indices->push_back(static_cast<int>(j));
+ break;
+ }
+ }
+ }
+}
+
+TEST(FilteredRE2Test, MatchEmptyPattern) {
+ FilterTestVars v;
+ AtomTest* t = &atom_tests[0];
+ // We are using the regexps used in one of the atom tests
+ // for this test. Adding the EXPECT here to make sure
+ // the index we use for the test is for the correct test.
+ EXPECT_EQ("CheckEmptyPattern", std::string(t->testname));
+ size_t nregexp;
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ if (t->regexps[nregexp] == NULL)
+ break;
+ AddRegexpsAndCompile(t->regexps, nregexp, &v);
+ std::string text = "0123";
+ std::vector<int> atom_ids;
+ std::vector<int> matching_regexps;
+ EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
+}
+
+TEST(FilteredRE2Test, MatchTests) {
+ FilterTestVars v;
+ AtomTest* t = &atom_tests[2];
+ // We are using the regexps used in one of the atom tests
+ // for this test.
+ EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname));
+ size_t nregexp;
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ if (t->regexps[nregexp] == NULL)
+ break;
+ AddRegexpsAndCompile(t->regexps, nregexp, &v);
+
+ std::string text = "abc121212xyz";
+ // atoms = abc
+ std::vector<int> atom_ids;
+ std::vector<std::string> atoms;
+ atoms.push_back("abc");
+ FindAtomIndices(v.atoms, atoms, &atom_ids);
+ std::vector<int> matching_regexps;
+ v.f.AllMatches(text, atom_ids, &matching_regexps);
+ EXPECT_EQ(1, matching_regexps.size());
+
+ text = "abc12312yyyzzz";
+ atoms.clear();
+ atoms.push_back("abc");
+ atoms.push_back("yyy");
+ atoms.push_back("yyyzzz");
+ FindAtomIndices(v.atoms, atoms, &atom_ids);
+ v.f.AllMatches(text, atom_ids, &matching_regexps);
+ EXPECT_EQ(1, matching_regexps.size());
+
+ text = "abcd12yyy32yyyzzz";
+ atoms.clear();
+ atoms.push_back("abc");
+ atoms.push_back("abcd");
+ atoms.push_back("yyy");
+ atoms.push_back("yyyzzz");
+ FindAtomIndices(v.atoms, atoms, &atom_ids);
+ LOG(INFO) << "S: " << atom_ids.size();
+ for (size_t i = 0; i < atom_ids.size(); i++)
+ LOG(INFO) << "i: " << i << " : " << atom_ids[i];
+ v.f.AllMatches(text, atom_ids, &matching_regexps);
+ EXPECT_EQ(2, matching_regexps.size());
+}
+
+TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
+ // Bug due to find() finding "" at the start of everything in a string
+ // set and thus SimplifyStringSet() would end up erasing everything.
+ // In order to test this, we have to keep PrefilterTree from discarding
+ // the OR entirely, so we have to make the minimum atom length zero.
+
+ FilterTestVars v(0); // override the minimum atom length
+ const char* regexps[] = {"-R.+(|ADD=;AA){12}}"};
+ const char* atoms[] = {"", "-r", "add=;aa", "}"};
+ AddRegexpsAndCompile(regexps, arraysize(regexps), &v);
+ EXPECT_TRUE(CheckExpectedAtoms(atoms, arraysize(atoms),
+ "EmptyStringInStringSetBug", &v));
+}
+
+TEST(FilteredRE2Test, MoveSemantics) {
+ FilterTestVars v1;
+ int id;
+ v1.f.Add("foo\\d+", v1.opts, &id);
+ EXPECT_EQ(0, id);
+ v1.f.Compile(&v1.atoms);
+ EXPECT_EQ(1, v1.atoms.size());
+ EXPECT_EQ("foo", v1.atoms[0]);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+
+ // The moved-to object should do what the moved-from object did.
+ FilterTestVars v2;
+ v2.f = std::move(v1.f);
+ v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
+ EXPECT_EQ(1, v2.matches.size());
+ EXPECT_EQ(0, v2.matches[0]);
+ v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
+ EXPECT_EQ(0, v2.matches.size());
+
+ // The moved-from object should have been reset and be reusable.
+ v1.f.Add("bar\\d+", v1.opts, &id);
+ EXPECT_EQ(0, id);
+ v1.f.Compile(&v1.atoms);
+ EXPECT_EQ(1, v1.atoms.size());
+ EXPECT_EQ("bar", v1.atoms[0]);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+
+ // Verify that "overwriting" works and also doesn't leak memory.
+ // (The latter will need a leak detector such as LeakSanitizer.)
+ v1.f = std::move(v2.f);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/mimics_pcre_test.cc b/contrib/libs/re2/re2/testing/mimics_pcre_test.cc
index ac2612e71a..cb21aef726 100644
--- a/contrib/libs/re2/re2/testing/mimics_pcre_test.cc
+++ b/contrib/libs/re2/re2/testing/mimics_pcre_test.cc
@@ -1,77 +1,77 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/prog.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-struct PCRETest {
- const char* regexp;
- bool should_match;
-};
-
-static PCRETest tests[] = {
- // Most things should behave exactly.
- { "abc", true },
- { "(a|b)c", true },
- { "(a*|b)c", true },
- { "(a|b*)c", true },
- { "a(b|c)d", true },
- { "a(()|())c", true },
- { "ab*c", true },
- { "ab+c", true },
- { "a(b*|c*)d", true },
- { "\\W", true },
- { "\\W{1,2}", true },
- { "\\d", true },
-
- // Check that repeated empty strings do not.
- { "(a*)*", false },
- { "x(a*)*y", false },
- { "(a*)+", false },
- { "(a+)*", true },
- { "(a+)+", true },
- { "(a+)+", true },
-
- // \v is the only character class that shouldn't.
- { "\\b", true },
- { "\\v", false },
- { "\\d", true },
-
- // The handling of ^ in multi-line mode is different, as is
- // the handling of $ in single-line mode. (Both involve
- // boundary cases if the string ends with \n.)
- { "\\A", true },
- { "\\z", true },
- { "(?m)^", false },
- { "(?m)$", true },
- { "(?-m)^", true },
- { "(?-m)$", false }, // In PCRE, == \Z
- { "(?m)\\A", true },
- { "(?m)\\z", true },
- { "(?-m)\\A", true },
- { "(?-m)\\z", true },
-};
-
-TEST(MimicsPCRE, SimpleTests) {
- for (size_t i = 0; i < arraysize(tests); i++) {
- const PCRETest& t = tests[i];
- for (size_t j = 0; j < 2; j++) {
- Regexp::ParseFlags flags = Regexp::LikePerl;
- if (j == 0)
- flags = flags | Regexp::Latin1;
- Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
- ASSERT_TRUE(re != NULL) << " " << t.regexp;
- ASSERT_EQ(t.should_match, re->MimicsPCRE())
- << " " << t.regexp << " "
- << (j == 0 ? "latin1" : "utf");
- re->Decref();
- }
- }
-}
-
-} // namespace re2
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct PCRETest {
+ const char* regexp;
+ bool should_match;
+};
+
+static PCRETest tests[] = {
+ // Most things should behave exactly.
+ { "abc", true },
+ { "(a|b)c", true },
+ { "(a*|b)c", true },
+ { "(a|b*)c", true },
+ { "a(b|c)d", true },
+ { "a(()|())c", true },
+ { "ab*c", true },
+ { "ab+c", true },
+ { "a(b*|c*)d", true },
+ { "\\W", true },
+ { "\\W{1,2}", true },
+ { "\\d", true },
+
+ // Check that repeated empty strings do not.
+ { "(a*)*", false },
+ { "x(a*)*y", false },
+ { "(a*)+", false },
+ { "(a+)*", true },
+ { "(a+)+", true },
+ { "(a+)+", true },
+
+ // \v is the only character class that shouldn't.
+ { "\\b", true },
+ { "\\v", false },
+ { "\\d", true },
+
+ // The handling of ^ in multi-line mode is different, as is
+ // the handling of $ in single-line mode. (Both involve
+ // boundary cases if the string ends with \n.)
+ { "\\A", true },
+ { "\\z", true },
+ { "(?m)^", false },
+ { "(?m)$", true },
+ { "(?-m)^", true },
+ { "(?-m)$", false }, // In PCRE, == \Z
+ { "(?m)\\A", true },
+ { "(?m)\\z", true },
+ { "(?-m)\\A", true },
+ { "(?-m)\\z", true },
+};
+
+TEST(MimicsPCRE, SimpleTests) {
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ const PCRETest& t = tests[i];
+ for (size_t j = 0; j < 2; j++) {
+ Regexp::ParseFlags flags = Regexp::LikePerl;
+ if (j == 0)
+ flags = flags | Regexp::Latin1;
+ Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+ ASSERT_TRUE(re != NULL) << " " << t.regexp;
+ ASSERT_EQ(t.should_match, re->MimicsPCRE())
+ << " " << t.regexp << " "
+ << (j == 0 ? "latin1" : "utf");
+ re->Decref();
+ }
+ }
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/null_walker.cc b/contrib/libs/re2/re2/testing/null_walker.cc
index 32a2aa0d8a..adc46068e6 100644
--- a/contrib/libs/re2/re2/testing/null_walker.cc
+++ b/contrib/libs/re2/re2/testing/null_walker.cc
@@ -1,49 +1,49 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/regexp.h"
-#include "re2/walker-inl.h"
-
-namespace re2 {
-
-// Null walker. For benchmarking the walker itself.
-
-class NullWalker : public Regexp::Walker<bool> {
- public:
- NullWalker() {}
-
- virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args);
-
- virtual bool ShortVisit(Regexp* re, bool a) {
- // Should never be called: we use Walk(), not WalkExponential().
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- LOG(DFATAL) << "NullWalker::ShortVisit called";
-#endif
- return a;
- }
-
- private:
- NullWalker(const NullWalker&) = delete;
- NullWalker& operator=(const NullWalker&) = delete;
-};
-
-// Called after visiting re's children. child_args contains the return
-// value from each of the children's PostVisits (i.e., whether each child
-// can match an empty string). Returns whether this clause can match an
-// empty string.
-bool NullWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
- bool* child_args, int nchild_args) {
- return false;
-}
-
-// Returns whether re can match an empty string.
-void Regexp::NullWalk() {
- NullWalker w;
- w.Walk(this, false);
-}
-
-} // namespace re2
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace re2 {
+
+// Null walker. For benchmarking the walker itself.
+
+class NullWalker : public Regexp::Walker<bool> {
+ public:
+ NullWalker() {}
+
+ virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+ bool* child_args, int nchild_args);
+
+ virtual bool ShortVisit(Regexp* re, bool a) {
+ // Should never be called: we use Walk(), not WalkExponential().
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ LOG(DFATAL) << "NullWalker::ShortVisit called";
+#endif
+ return a;
+ }
+
+ private:
+ NullWalker(const NullWalker&) = delete;
+ NullWalker& operator=(const NullWalker&) = delete;
+};
+
+// Called after visiting re's children. child_args contains the return
+// value from each of the children's PostVisits (i.e., whether each child
+// can match an empty string). Returns whether this clause can match an
+// empty string.
+bool NullWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg,
+ bool* child_args, int nchild_args) {
+ return false;
+}
+
+// Returns whether re can match an empty string.
+void Regexp::NullWalk() {
+ NullWalker w;
+ w.Walk(this, false);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/parse_test.cc b/contrib/libs/re2/re2/testing/parse_test.cc
index 6856e4e162..b1cbfdc5c8 100644
--- a/contrib/libs/re2/re2/testing/parse_test.cc
+++ b/contrib/libs/re2/re2/testing/parse_test.cc
@@ -1,509 +1,509 @@
-// Copyright 2006 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test parse.cc, dump.cc, and tostring.cc.
-
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-// In the past, we used 1<<30 here and zeroed the bit later, but that
-// has undefined behaviour, so now we use an internal-only flag because
-// otherwise we would have to introduce a new flag value just for this.
-static const Regexp::ParseFlags TestZeroFlags = Regexp::WasDollar;
-
-struct Test {
- const char* regexp;
- const char* parse;
- Regexp::ParseFlags flags;
-};
-
-static Regexp::ParseFlags kTestFlags = Regexp::MatchNL |
- Regexp::PerlX |
- Regexp::PerlClasses |
- Regexp::UnicodeGroups;
-
-static Test tests[] = {
- // Base cases
- { "a", "lit{a}" },
- { "a.", "cat{lit{a}dot{}}" },
- { "a.b", "cat{lit{a}dot{}lit{b}}" },
- { "ab", "str{ab}" },
- { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" },
- { "abc", "str{abc}" },
- { "a|^", "alt{lit{a}bol{}}" },
- { "a|b", "cc{0x61-0x62}" },
- { "(a)", "cap{lit{a}}" },
- { "(a)|b", "alt{cap{lit{a}}lit{b}}" },
- { "a*", "star{lit{a}}" },
- { "a+", "plus{lit{a}}" },
- { "a?", "que{lit{a}}" },
- { "a{2}", "rep{2,2 lit{a}}" },
- { "a{2,3}", "rep{2,3 lit{a}}" },
- { "a{2,}", "rep{2,-1 lit{a}}" },
- { "a*?", "nstar{lit{a}}" },
- { "a+?", "nplus{lit{a}}" },
- { "a??", "nque{lit{a}}" },
- { "a{2}?", "nrep{2,2 lit{a}}" },
- { "a{2,3}?", "nrep{2,3 lit{a}}" },
- { "a{2,}?", "nrep{2,-1 lit{a}}" },
- { "", "emp{}" },
- { "|", "alt{emp{}emp{}}" },
- { "|x|", "alt{emp{}lit{x}emp{}}" },
- { ".", "dot{}" },
- { "^", "bol{}" },
- { "$", "eol{}" },
- { "\\|", "lit{|}" },
- { "\\(", "lit{(}" },
- { "\\)", "lit{)}" },
- { "\\*", "lit{*}" },
- { "\\+", "lit{+}" },
- { "\\?", "lit{?}" },
- { "{", "lit{{}" },
- { "}", "lit{}}" },
- { "\\.", "lit{.}" },
- { "\\^", "lit{^}" },
- { "\\$", "lit{$}" },
- { "\\\\", "lit{\\}" },
- { "[ace]", "cc{0x61 0x63 0x65}" },
- { "[abc]", "cc{0x61-0x63}" },
- { "[a-z]", "cc{0x61-0x7a}" },
- { "[a]", "lit{a}" },
- { "\\-", "lit{-}" },
- { "-", "lit{-}" },
- { "\\_", "lit{_}" },
-
- // Posix and Perl extensions
- { "[[:lower:]]", "cc{0x61-0x7a}" },
- { "[a-z]", "cc{0x61-0x7a}" },
- { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
- { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
- { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
- { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
- { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- { "\\d", "cc{0x30-0x39}" },
- { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" },
- { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" },
- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" },
- { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" },
- { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" },
- { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
- { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" },
- { "\\C", "byte{}" },
-
- // Unicode, negatives, and a double negative.
- { "\\p{Braille}", "cc{0x2800-0x28ff}" },
- { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
- { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
- { "\\P{^Braille}", "cc{0x2800-0x28ff}" },
-
- // More interesting regular expressions.
- { "a{,2}", "str{a{,2}}" },
- { "\\.\\^\\$\\\\", "str{.^$\\}" },
- { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" },
- { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
- { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8
- { "a*{", "cat{star{lit{a}}lit{{}}" },
-
- // Test precedences
- { "(?:ab)*", "star{str{ab}}" },
- { "(ab)*", "star{cap{str{ab}}}" },
- { "ab|cd", "alt{str{ab}str{cd}}" },
- { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
-
- // Test squashing of **, ++, ?? et cetera.
- { "(?:(?:a)*)*", "star{lit{a}}" },
- { "(?:(?:a)+)+", "plus{lit{a}}" },
- { "(?:(?:a)?)?", "que{lit{a}}" },
- { "(?:(?:a)*)+", "star{lit{a}}" },
- { "(?:(?:a)*)?", "star{lit{a}}" },
- { "(?:(?:a)+)*", "star{lit{a}}" },
- { "(?:(?:a)+)?", "star{lit{a}}" },
- { "(?:(?:a)?)*", "star{lit{a}}" },
- { "(?:(?:a)?)+", "star{lit{a}}" },
-
- // Test flattening.
- { "(?:a)", "lit{a}" },
- { "(?:ab)(?:cd)", "str{abcd}" },
- { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
- { "a|c", "cc{0x61 0x63}" },
- { "a|[cd]", "cc{0x61 0x63-0x64}" },
- { "a|.", "dot{}" },
- { "[ab]|c", "cc{0x61-0x63}" },
- { "[ab]|[cd]", "cc{0x61-0x64}" },
- { "[ab]|.", "dot{}" },
- { ".|c", "dot{}" },
- { ".|[cd]", "dot{}" },
- { ".|.", "dot{}" },
-
- // Test Perl quoted literals
- { "\\Q+|*?{[\\E", "str{+|*?{[}" },
- { "\\Q+\\E+", "plus{lit{+}}" },
- { "\\Q\\\\E", "lit{\\}" },
- { "\\Q\\\\\\E", "str{\\\\}" },
- { "\\Qa\\E*", "star{lit{a}}" },
- { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" },
- { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" },
-
- // Test Perl \A and \z
- { "(?m)^", "bol{}" },
- { "(?m)$", "eol{}" },
- { "(?-m)^", "bot{}" },
- { "(?-m)$", "eot{}" },
- { "(?m)\\A", "bot{}" },
- { "(?m)\\z", "eot{\\z}" },
- { "(?-m)\\A", "bot{}" },
- { "(?-m)\\z", "eot{\\z}" },
-
- // Test named captures
- { "(?P<name>a)", "cap{name:lit{a}}" },
+// Copyright 2006 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test parse.cc, dump.cc, and tostring.cc.
+
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+// In the past, we used 1<<30 here and zeroed the bit later, but that
+// has undefined behaviour, so now we use an internal-only flag because
+// otherwise we would have to introduce a new flag value just for this.
+static const Regexp::ParseFlags TestZeroFlags = Regexp::WasDollar;
+
+struct Test {
+ const char* regexp;
+ const char* parse;
+ Regexp::ParseFlags flags;
+};
+
+static Regexp::ParseFlags kTestFlags = Regexp::MatchNL |
+ Regexp::PerlX |
+ Regexp::PerlClasses |
+ Regexp::UnicodeGroups;
+
+static Test tests[] = {
+ // Base cases
+ { "a", "lit{a}" },
+ { "a.", "cat{lit{a}dot{}}" },
+ { "a.b", "cat{lit{a}dot{}lit{b}}" },
+ { "ab", "str{ab}" },
+ { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" },
+ { "abc", "str{abc}" },
+ { "a|^", "alt{lit{a}bol{}}" },
+ { "a|b", "cc{0x61-0x62}" },
+ { "(a)", "cap{lit{a}}" },
+ { "(a)|b", "alt{cap{lit{a}}lit{b}}" },
+ { "a*", "star{lit{a}}" },
+ { "a+", "plus{lit{a}}" },
+ { "a?", "que{lit{a}}" },
+ { "a{2}", "rep{2,2 lit{a}}" },
+ { "a{2,3}", "rep{2,3 lit{a}}" },
+ { "a{2,}", "rep{2,-1 lit{a}}" },
+ { "a*?", "nstar{lit{a}}" },
+ { "a+?", "nplus{lit{a}}" },
+ { "a??", "nque{lit{a}}" },
+ { "a{2}?", "nrep{2,2 lit{a}}" },
+ { "a{2,3}?", "nrep{2,3 lit{a}}" },
+ { "a{2,}?", "nrep{2,-1 lit{a}}" },
+ { "", "emp{}" },
+ { "|", "alt{emp{}emp{}}" },
+ { "|x|", "alt{emp{}lit{x}emp{}}" },
+ { ".", "dot{}" },
+ { "^", "bol{}" },
+ { "$", "eol{}" },
+ { "\\|", "lit{|}" },
+ { "\\(", "lit{(}" },
+ { "\\)", "lit{)}" },
+ { "\\*", "lit{*}" },
+ { "\\+", "lit{+}" },
+ { "\\?", "lit{?}" },
+ { "{", "lit{{}" },
+ { "}", "lit{}}" },
+ { "\\.", "lit{.}" },
+ { "\\^", "lit{^}" },
+ { "\\$", "lit{$}" },
+ { "\\\\", "lit{\\}" },
+ { "[ace]", "cc{0x61 0x63 0x65}" },
+ { "[abc]", "cc{0x61-0x63}" },
+ { "[a-z]", "cc{0x61-0x7a}" },
+ { "[a]", "lit{a}" },
+ { "\\-", "lit{-}" },
+ { "-", "lit{-}" },
+ { "\\_", "lit{_}" },
+
+ // Posix and Perl extensions
+ { "[[:lower:]]", "cc{0x61-0x7a}" },
+ { "[a-z]", "cc{0x61-0x7a}" },
+ { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
+ { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
+ { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+ { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+ { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+ { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+ { "\\d", "cc{0x30-0x39}" },
+ { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" },
+ { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" },
+ { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" },
+ { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" },
+ { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" },
+ { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
+ { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
+ { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" },
+ { "\\C", "byte{}" },
+
+ // Unicode, negatives, and a double negative.
+ { "\\p{Braille}", "cc{0x2800-0x28ff}" },
+ { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
+ { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
+ { "\\P{^Braille}", "cc{0x2800-0x28ff}" },
+
+ // More interesting regular expressions.
+ { "a{,2}", "str{a{,2}}" },
+ { "\\.\\^\\$\\\\", "str{.^$\\}" },
+ { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" },
+ { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
+ { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8
+ { "a*{", "cat{star{lit{a}}lit{{}}" },
+
+ // Test precedences
+ { "(?:ab)*", "star{str{ab}}" },
+ { "(ab)*", "star{cap{str{ab}}}" },
+ { "ab|cd", "alt{str{ab}str{cd}}" },
+ { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
+
+ // Test squashing of **, ++, ?? et cetera.
+ { "(?:(?:a)*)*", "star{lit{a}}" },
+ { "(?:(?:a)+)+", "plus{lit{a}}" },
+ { "(?:(?:a)?)?", "que{lit{a}}" },
+ { "(?:(?:a)*)+", "star{lit{a}}" },
+ { "(?:(?:a)*)?", "star{lit{a}}" },
+ { "(?:(?:a)+)*", "star{lit{a}}" },
+ { "(?:(?:a)+)?", "star{lit{a}}" },
+ { "(?:(?:a)?)*", "star{lit{a}}" },
+ { "(?:(?:a)?)+", "star{lit{a}}" },
+
+ // Test flattening.
+ { "(?:a)", "lit{a}" },
+ { "(?:ab)(?:cd)", "str{abcd}" },
+ { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
+ { "a|c", "cc{0x61 0x63}" },
+ { "a|[cd]", "cc{0x61 0x63-0x64}" },
+ { "a|.", "dot{}" },
+ { "[ab]|c", "cc{0x61-0x63}" },
+ { "[ab]|[cd]", "cc{0x61-0x64}" },
+ { "[ab]|.", "dot{}" },
+ { ".|c", "dot{}" },
+ { ".|[cd]", "dot{}" },
+ { ".|.", "dot{}" },
+
+ // Test Perl quoted literals
+ { "\\Q+|*?{[\\E", "str{+|*?{[}" },
+ { "\\Q+\\E+", "plus{lit{+}}" },
+ { "\\Q\\\\E", "lit{\\}" },
+ { "\\Q\\\\\\E", "str{\\\\}" },
+ { "\\Qa\\E*", "star{lit{a}}" },
+ { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" },
+ { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" },
+
+ // Test Perl \A and \z
+ { "(?m)^", "bol{}" },
+ { "(?m)$", "eol{}" },
+ { "(?-m)^", "bot{}" },
+ { "(?-m)$", "eot{}" },
+ { "(?m)\\A", "bot{}" },
+ { "(?m)\\z", "eot{\\z}" },
+ { "(?-m)\\A", "bot{}" },
+ { "(?-m)\\z", "eot{\\z}" },
+
+ // Test named captures
+ { "(?P<name>a)", "cap{name:lit{a}}" },
{ "(?P<中文>a)", "cap{中文:lit{a}}" },
-
- // Case-folded literals
- { "[Aa]", "litfold{a}" },
-
- // Strings
- { "abcde", "str{abcde}" },
- { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
-
- // Reported bug involving \n leaking in despite use of NeverNL.
- { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
- { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
- { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags },
- { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
- { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags },
- { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
- { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags },
- { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
- { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
- { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
- { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
- { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
- { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses },
- { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::FoldCase },
- { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::NeverNL },
- { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses },
- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::FoldCase },
- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::NeverNL },
- { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
- Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
-
- // Bug in Regexp::ToString() that emitted [^], which
- // would (obviously) fail to parse when fed back in.
- { "[\\s\\S]", "cc{0-0x10ffff}" },
-};
-
-bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
- return Regexp::Equal(a, b);
-}
-
-void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
- const std::string& title) {
- Regexp** re = new Regexp*[ntests];
- for (int i = 0; i < ntests; i++) {
- RegexpStatus status;
- Regexp::ParseFlags f = flags;
- if (tests[i].flags != 0) {
- f = tests[i].flags & ~TestZeroFlags;
- }
- re[i] = Regexp::Parse(tests[i].regexp, f, &status);
- ASSERT_TRUE(re[i] != NULL)
- << " " << tests[i].regexp << " " << status.Text();
- std::string s = re[i]->Dump();
- EXPECT_EQ(std::string(tests[i].parse), s)
- << "Regexp: " << tests[i].regexp
- << "\nparse: " << std::string(tests[i].parse)
- << " s: " << s << " flag=" << f;
- }
-
- for (int i = 0; i < ntests; i++) {
- for (int j = 0; j < ntests; j++) {
- EXPECT_EQ(std::string(tests[i].parse) == std::string(tests[j].parse),
- RegexpEqualTestingOnly(re[i], re[j]))
- << "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
- }
- }
-
- for (int i = 0; i < ntests; i++)
- re[i]->Decref();
- delete[] re;
-}
-
-// Test that regexps parse to expected structures.
-TEST(TestParse, SimpleRegexps) {
- TestParse(tests, arraysize(tests), kTestFlags, "simple");
-}
-
-Test foldcase_tests[] = {
- { "AbCdE", "strfold{abcde}" },
- { "[Aa]", "litfold{a}" },
- { "a", "litfold{a}" },
-
- // 0x17F is an old English long s (looks like an f) and folds to s.
- // 0x212A is the Kelvin symbol and folds to k.
- { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...]
- { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
- { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
-};
-
-// Test that parsing with FoldCase works.
-TEST(TestParse, FoldCase) {
- TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
-}
-
-Test literal_tests[] = {
- { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" },
-};
-
-// Test that parsing with Literal works.
-TEST(TestParse, Literal) {
- TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
-}
-
-Test matchnl_tests[] = {
- { ".", "dot{}" },
- { "\n", "lit{\n}" },
- { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
- { "[a\\n]", "cc{0xa 0x61}" },
-};
-
-// Test that parsing with MatchNL works.
-// (Also tested above during simple cases.)
-TEST(TestParse, MatchNL) {
- TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
-}
-
-Test nomatchnl_tests[] = {
- { ".", "cc{0-0x9 0xb-0x10ffff}" },
- { "\n", "lit{\n}" },
- { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" },
- { "[a\\n]", "cc{0xa 0x61}" },
-};
-
-// Test that parsing without MatchNL works.
-TEST(TestParse, NoMatchNL) {
- TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
-}
-
-Test prefix_tests[] = {
- { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" },
- { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" },
- { "abc|abd|aef|bcx|bcy",
- "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}"
- "cat{str{bc}cc{0x78-0x79}}}" },
- { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
- { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
- { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
- { ".c|.d", "cat{cc{0-0x9 0xb-0x10ffff}cc{0x63-0x64}}" },
- { "\\Cc|\\Cd", "cat{byte{}cc{0x63-0x64}}" },
- { "x{2}|x{2}[0-9]",
- "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
- { "x{2}y|x{2}[0-9]y",
- "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" },
- { "n|r|rs",
- "alt{lit{n}cat{lit{r}alt{emp{}lit{s}}}}" },
- { "n|rs|r",
- "alt{lit{n}cat{lit{r}alt{lit{s}emp{}}}}" },
- { "r|rs|n",
- "alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" },
- { "rs|r|n",
- "alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" },
- { "a\\C*?c|a\\C*?b",
- "cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
- { "^/a/bc|^/a/de",
- "cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
- // In the past, factoring was limited to kFactorAlternationMaxDepth (8).
- { "a|aa|aaa|aaaa|aaaaa|aaaaaa|aaaaaaa|aaaaaaaa|aaaaaaaaa|aaaaaaaaaa",
- "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
- "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
- "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
- "lit{a}}}}}}}}}}}}}}}}}}}" },
- { "a|aardvark|aardvarks|abaci|aback|abacus|abacuses|abaft|abalone|abalones",
- "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
- "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
- "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
-};
-
-// Test that prefix factoring works.
-TEST(TestParse, Prefix) {
- TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
-}
-
-Test nested_tests[] = {
- { "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))",
- "cap{cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}}}}}}}}" },
- { "((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
- "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{1,1 lit{x}}}}}}}}}}}}}}}}}}}}}" },
- { "((((((((((x{0}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
- "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{0,0 lit{x}}}}}}}}}}}}}}}}}}}}}" },
- { "((((((x{2}){2}){2}){5}){5}){5})",
- "cap{rep{5,5 cap{rep{5,5 cap{rep{5,5 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}" },
-};
-
-// Test that nested repetition works.
-TEST(TestParse, Nested) {
- TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested");
-}
-
-// Invalid regular expressions
-const char* badtests[] = {
- "(",
- ")",
- "(a",
- "(a|b|",
- "(a|b",
- "[a-z",
- "([a-z)",
- "x{1001}",
- "\xff", // Invalid UTF-8
- "[\xff]",
- "[\\\xff]",
- "\\\xff",
- "(?P<name>a",
- "(?P<name>",
- "(?P<name",
- "(?P<x y>a)",
- "(?P<>a)",
- "[a-Z]",
- "(?i)[a-Z]",
- "a{100000}",
- "a{100000,}",
- "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
- "(((x{7}){11}){13})",
- "\\Q\\E*",
-};
-
-// Valid in Perl, bad in POSIX
-const char* only_perl[] = {
- "[a-b-c]",
- "\\Qabc\\E",
- "\\Q*+?{[\\E",
- "\\Q\\\\E",
- "\\Q\\\\\\E",
- "\\Q\\\\\\\\E",
- "\\Q\\\\\\\\\\E",
- "(?:a)",
- "(?P<name>a)",
-};
-
-// Valid in POSIX, bad in Perl.
-const char* only_posix[] = {
- "a++",
- "a**",
- "a?*",
- "a+*",
- "a{1}*",
-};
-
-// Test that parser rejects bad regexps.
-TEST(TestParse, InvalidRegexps) {
- for (size_t i = 0; i < arraysize(badtests); i++) {
- ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
- << " " << badtests[i];
- ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
- << " " << badtests[i];
- }
- for (size_t i = 0; i < arraysize(only_posix); i++) {
- ASSERT_TRUE(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
- << " " << only_posix[i];
- Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
- ASSERT_TRUE(re != NULL) << " " << only_posix[i];
- re->Decref();
- }
- for (size_t i = 0; i < arraysize(only_perl); i++) {
- ASSERT_TRUE(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
- << " " << only_perl[i];
- Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
- ASSERT_TRUE(re != NULL) << " " << only_perl[i];
- re->Decref();
- }
-}
-
-// Test that ToString produces original regexp or equivalent one.
-TEST(TestToString, EquivalentParse) {
- for (size_t i = 0; i < arraysize(tests); i++) {
- RegexpStatus status;
- Regexp::ParseFlags f = kTestFlags;
- if (tests[i].flags != 0) {
- f = tests[i].flags & ~TestZeroFlags;
- }
- Regexp* re = Regexp::Parse(tests[i].regexp, f, &status);
- ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
- std::string s = re->Dump();
- EXPECT_EQ(std::string(tests[i].parse), s)
- << "Regexp: " << tests[i].regexp
- << "\nparse: " << std::string(tests[i].parse)
- << " s: " << s << " flag=" << f;
- std::string t = re->ToString();
- if (t != tests[i].regexp) {
- // If ToString didn't return the original regexp,
- // it must have found one with fewer parens.
- // Unfortunately we can't check the length here, because
- // ToString produces "\\{" for a literal brace,
- // but "{" is a shorter equivalent.
- // ASSERT_LT(t.size(), strlen(tests[i].regexp))
- // << " t=" << t << " regexp=" << tests[i].regexp;
-
- // Test that if we parse the new regexp we get the same structure.
- Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
- ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
- std::string ss = nre->Dump();
- std::string tt = nre->ToString();
- if (s != ss || t != tt)
- LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
- EXPECT_EQ(s, ss);
- EXPECT_EQ(t, tt);
- nre->Decref();
- }
- re->Decref();
- }
-}
-
-// Test that capture error args are correct.
-TEST(NamedCaptures, ErrorArgs) {
- RegexpStatus status;
- Regexp* re;
-
- re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status);
- EXPECT_TRUE(re == NULL);
- EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
- EXPECT_EQ(status.error_arg(), "(?P<name");
-
- re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status);
- EXPECT_TRUE(re == NULL);
- EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
- EXPECT_EQ(status.error_arg(), "(?P<space bar>");
-}
-
-} // namespace re2
+
+ // Case-folded literals
+ { "[Aa]", "litfold{a}" },
+
+ // Strings
+ { "abcde", "str{abcde}" },
+ { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
+
+ // Reported bug involving \n leaking in despite use of NeverNL.
+ { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
+ { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+ { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags },
+ { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+ { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags },
+ { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+ { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags },
+ { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+ { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
+ { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
+ { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
+ { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
+ { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses },
+ { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::FoldCase },
+ { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::NeverNL },
+ { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
+ { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses },
+ { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::FoldCase },
+ { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::NeverNL },
+ { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
+ Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
+
+ // Bug in Regexp::ToString() that emitted [^], which
+ // would (obviously) fail to parse when fed back in.
+ { "[\\s\\S]", "cc{0-0x10ffff}" },
+};
+
+bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
+ return Regexp::Equal(a, b);
+}
+
+void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
+ const std::string& title) {
+ Regexp** re = new Regexp*[ntests];
+ for (int i = 0; i < ntests; i++) {
+ RegexpStatus status;
+ Regexp::ParseFlags f = flags;
+ if (tests[i].flags != 0) {
+ f = tests[i].flags & ~TestZeroFlags;
+ }
+ re[i] = Regexp::Parse(tests[i].regexp, f, &status);
+ ASSERT_TRUE(re[i] != NULL)
+ << " " << tests[i].regexp << " " << status.Text();
+ std::string s = re[i]->Dump();
+ EXPECT_EQ(std::string(tests[i].parse), s)
+ << "Regexp: " << tests[i].regexp
+ << "\nparse: " << std::string(tests[i].parse)
+ << " s: " << s << " flag=" << f;
+ }
+
+ for (int i = 0; i < ntests; i++) {
+ for (int j = 0; j < ntests; j++) {
+ EXPECT_EQ(std::string(tests[i].parse) == std::string(tests[j].parse),
+ RegexpEqualTestingOnly(re[i], re[j]))
+ << "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
+ }
+ }
+
+ for (int i = 0; i < ntests; i++)
+ re[i]->Decref();
+ delete[] re;
+}
+
+// Test that regexps parse to expected structures.
+TEST(TestParse, SimpleRegexps) {
+ TestParse(tests, arraysize(tests), kTestFlags, "simple");
+}
+
+Test foldcase_tests[] = {
+ { "AbCdE", "strfold{abcde}" },
+ { "[Aa]", "litfold{a}" },
+ { "a", "litfold{a}" },
+
+ // 0x17F is an old English long s (looks like an f) and folds to s.
+ // 0x212A is the Kelvin symbol and folds to k.
+ { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...]
+ { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+ { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
+};
+
+// Test that parsing with FoldCase works.
+TEST(TestParse, FoldCase) {
+ TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
+}
+
+Test literal_tests[] = {
+ { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" },
+};
+
+// Test that parsing with Literal works.
+TEST(TestParse, Literal) {
+ TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
+}
+
+Test matchnl_tests[] = {
+ { ".", "dot{}" },
+ { "\n", "lit{\n}" },
+ { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
+ { "[a\\n]", "cc{0xa 0x61}" },
+};
+
+// Test that parsing with MatchNL works.
+// (Also tested above during simple cases.)
+TEST(TestParse, MatchNL) {
+ TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
+}
+
+Test nomatchnl_tests[] = {
+ { ".", "cc{0-0x9 0xb-0x10ffff}" },
+ { "\n", "lit{\n}" },
+ { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" },
+ { "[a\\n]", "cc{0xa 0x61}" },
+};
+
+// Test that parsing without MatchNL works.
+TEST(TestParse, NoMatchNL) {
+ TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
+}
+
+Test prefix_tests[] = {
+ { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" },
+ { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" },
+ { "abc|abd|aef|bcx|bcy",
+ "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}"
+ "cat{str{bc}cc{0x78-0x79}}}" },
+ { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
+ { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
+ { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
+ { ".c|.d", "cat{cc{0-0x9 0xb-0x10ffff}cc{0x63-0x64}}" },
+ { "\\Cc|\\Cd", "cat{byte{}cc{0x63-0x64}}" },
+ { "x{2}|x{2}[0-9]",
+ "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
+ { "x{2}y|x{2}[0-9]y",
+ "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" },
+ { "n|r|rs",
+ "alt{lit{n}cat{lit{r}alt{emp{}lit{s}}}}" },
+ { "n|rs|r",
+ "alt{lit{n}cat{lit{r}alt{lit{s}emp{}}}}" },
+ { "r|rs|n",
+ "alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" },
+ { "rs|r|n",
+ "alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" },
+ { "a\\C*?c|a\\C*?b",
+ "cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
+ { "^/a/bc|^/a/de",
+ "cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
+ // In the past, factoring was limited to kFactorAlternationMaxDepth (8).
+ { "a|aa|aaa|aaaa|aaaaa|aaaaaa|aaaaaaa|aaaaaaaa|aaaaaaaaa|aaaaaaaaaa",
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
+ "lit{a}}}}}}}}}}}}}}}}}}}" },
+ { "a|aardvark|aardvarks|abaci|aback|abacus|abacuses|abaft|abalone|abalones",
+ "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
+ "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
+ "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
+};
+
+// Test that prefix factoring works.
+TEST(TestParse, Prefix) {
+ TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
+}
+
+Test nested_tests[] = {
+ { "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))",
+ "cap{cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}}}}}}}}" },
+ { "((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
+ "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{1,1 lit{x}}}}}}}}}}}}}}}}}}}}}" },
+ { "((((((((((x{0}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
+ "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{0,0 lit{x}}}}}}}}}}}}}}}}}}}}}" },
+ { "((((((x{2}){2}){2}){5}){5}){5})",
+ "cap{rep{5,5 cap{rep{5,5 cap{rep{5,5 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}" },
+};
+
+// Test that nested repetition works.
+TEST(TestParse, Nested) {
+ TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested");
+}
+
+// Invalid regular expressions
+const char* badtests[] = {
+ "(",
+ ")",
+ "(a",
+ "(a|b|",
+ "(a|b",
+ "[a-z",
+ "([a-z)",
+ "x{1001}",
+ "\xff", // Invalid UTF-8
+ "[\xff]",
+ "[\\\xff]",
+ "\\\xff",
+ "(?P<name>a",
+ "(?P<name>",
+ "(?P<name",
+ "(?P<x y>a)",
+ "(?P<>a)",
+ "[a-Z]",
+ "(?i)[a-Z]",
+ "a{100000}",
+ "a{100000,}",
+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
+ "(((x{7}){11}){13})",
+ "\\Q\\E*",
+};
+
+// Valid in Perl, bad in POSIX
+const char* only_perl[] = {
+ "[a-b-c]",
+ "\\Qabc\\E",
+ "\\Q*+?{[\\E",
+ "\\Q\\\\E",
+ "\\Q\\\\\\E",
+ "\\Q\\\\\\\\E",
+ "\\Q\\\\\\\\\\E",
+ "(?:a)",
+ "(?P<name>a)",
+};
+
+// Valid in POSIX, bad in Perl.
+const char* only_posix[] = {
+ "a++",
+ "a**",
+ "a?*",
+ "a+*",
+ "a{1}*",
+};
+
+// Test that parser rejects bad regexps.
+TEST(TestParse, InvalidRegexps) {
+ for (size_t i = 0; i < arraysize(badtests); i++) {
+ ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
+ << " " << badtests[i];
+ ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
+ << " " << badtests[i];
+ }
+ for (size_t i = 0; i < arraysize(only_posix); i++) {
+ ASSERT_TRUE(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
+ << " " << only_posix[i];
+ Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
+ ASSERT_TRUE(re != NULL) << " " << only_posix[i];
+ re->Decref();
+ }
+ for (size_t i = 0; i < arraysize(only_perl); i++) {
+ ASSERT_TRUE(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
+ << " " << only_perl[i];
+ Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
+ ASSERT_TRUE(re != NULL) << " " << only_perl[i];
+ re->Decref();
+ }
+}
+
+// Test that ToString produces original regexp or equivalent one.
+TEST(TestToString, EquivalentParse) {
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ RegexpStatus status;
+ Regexp::ParseFlags f = kTestFlags;
+ if (tests[i].flags != 0) {
+ f = tests[i].flags & ~TestZeroFlags;
+ }
+ Regexp* re = Regexp::Parse(tests[i].regexp, f, &status);
+ ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
+ std::string s = re->Dump();
+ EXPECT_EQ(std::string(tests[i].parse), s)
+ << "Regexp: " << tests[i].regexp
+ << "\nparse: " << std::string(tests[i].parse)
+ << " s: " << s << " flag=" << f;
+ std::string t = re->ToString();
+ if (t != tests[i].regexp) {
+ // If ToString didn't return the original regexp,
+ // it must have found one with fewer parens.
+ // Unfortunately we can't check the length here, because
+ // ToString produces "\\{" for a literal brace,
+ // but "{" is a shorter equivalent.
+ // ASSERT_LT(t.size(), strlen(tests[i].regexp))
+ // << " t=" << t << " regexp=" << tests[i].regexp;
+
+ // Test that if we parse the new regexp we get the same structure.
+ Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
+ ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
+ std::string ss = nre->Dump();
+ std::string tt = nre->ToString();
+ if (s != ss || t != tt)
+ LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
+ EXPECT_EQ(s, ss);
+ EXPECT_EQ(t, tt);
+ nre->Decref();
+ }
+ re->Decref();
+ }
+}
+
+// Test that capture error args are correct.
+TEST(NamedCaptures, ErrorArgs) {
+ RegexpStatus status;
+ Regexp* re;
+
+ re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
+ EXPECT_EQ(status.error_arg(), "(?P<name");
+
+ re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
+ EXPECT_EQ(status.error_arg(), "(?P<space bar>");
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/possible_match_test.cc b/contrib/libs/re2/re2/testing/possible_match_test.cc
index 6b06053dde..f337217b92 100644
--- a/contrib/libs/re2/re2/testing/possible_match_test.cc
+++ b/contrib/libs/re2/re2/testing/possible_match_test.cc
@@ -1,247 +1,247 @@
-// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <string.h>
-#include <string>
-#include <vector>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/prog.h"
-#include "re2/re2.h"
-#include "re2/regexp.h"
-#include "re2/testing/exhaustive_tester.h"
-#include "re2/testing/regexp_generator.h"
-#include "re2/testing/string_generator.h"
-
-namespace re2 {
-
-// Test that C++ strings are compared as uint8s, not int8s.
-// PossibleMatchRange doesn't depend on this, but callers probably will.
-TEST(CplusplusStrings, EightBit) {
- std::string s = "\x70";
- std::string t = "\xA0";
- EXPECT_LT(s, t);
-}
-
-struct PrefixTest {
- const char* regexp;
- int maxlen;
- const char* min;
- const char* max;
-};
-
-static PrefixTest tests[] = {
- { "", 10, "", "", },
- { "Abcdef", 10, "Abcdef", "Abcdef" },
- { "abc(def|ghi)", 10, "abcdef", "abcghi" },
- { "a+hello", 10, "aa", "ahello" },
- { "a*hello", 10, "a", "hello" },
- { "def|abc", 10, "abc", "def" },
- { "a(b)(c)[d]", 10, "abcd", "abcd" },
- { "ab(cab|cat)", 10, "abcab", "abcat" },
- { "ab(cab|ca)x", 10, "abcabx", "abcax" },
- { "(ab|x)(c|de)", 10, "abc", "xde" },
- { "(ab|x)?(c|z)?", 10, "", "z" },
- { "[^\\s\\S]", 10, "", "" },
- { "(abc)+", 5, "abc", "abcac" },
- { "(abc)+", 2, "ab", "ac" },
- { "(abc)+", 1, "a", "b" },
- { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
- { "a*", 10, "", "ab" },
-
- { "(?i)Abcdef", 10, "ABCDEF", "abcdef" },
- { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" },
- { "(?i)a+hello", 10, "AA", "ahello" },
- { "(?i)a*hello", 10, "A", "hello" },
- { "(?i)def|abc", 10, "ABC", "def" },
- { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" },
- { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" },
- { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" },
- { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" },
- { "(?i)(ab|x)?(c|z)?", 10, "", "z" },
- { "(?i)[^\\s\\S]", 10, "", "" },
- { "(?i)(abc)+", 5, "ABC", "abcac" },
- { "(?i)(abc)+", 2, "AB", "ac" },
- { "(?i)(abc)+", 1, "A", "b" },
- { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
- { "(?i)a*", 10, "", "ab" },
- { "(?i)A*", 10, "", "ab" },
-
- { "\\AAbcdef", 10, "Abcdef", "Abcdef" },
- { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" },
- { "\\Aa+hello", 10, "aa", "ahello" },
- { "\\Aa*hello", 10, "a", "hello" },
- { "\\Adef|abc", 10, "abc", "def" },
- { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" },
- { "\\Aab(cab|cat)", 10, "abcab", "abcat" },
- { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" },
- { "\\A(ab|x)(c|de)", 10, "abc", "xde" },
- { "\\A(ab|x)?(c|z)?", 10, "", "z" },
- { "\\A[^\\s\\S]", 10, "", "" },
- { "\\A(abc)+", 5, "abc", "abcac" },
- { "\\A(abc)+", 2, "ab", "ac" },
- { "\\A(abc)+", 1, "a", "b" },
- { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
- { "\\Aa*", 10, "", "ab" },
-
- { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" },
- { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" },
- { "(?i)\\Aa+hello", 10, "AA", "ahello" },
- { "(?i)\\Aa*hello", 10, "A", "hello" },
- { "(?i)\\Adef|abc", 10, "ABC", "def" },
- { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" },
- { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" },
- { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" },
- { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" },
- { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" },
- { "(?i)\\A[^\\s\\S]", 10, "", "" },
- { "(?i)\\A(abc)+", 5, "ABC", "abcac" },
- { "(?i)\\A(abc)+", 2, "AB", "ac" },
- { "(?i)\\A(abc)+", 1, "A", "b" },
- { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
- { "(?i)\\Aa*", 10, "", "ab" },
- { "(?i)\\AA*", 10, "", "ab" },
-};
-
-TEST(PossibleMatchRange, HandWritten) {
- for (size_t i = 0; i < arraysize(tests); i++) {
- for (size_t j = 0; j < 2; j++) {
- const PrefixTest& t = tests[i];
- std::string min, max;
- if (j == 0) {
- LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
- Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
- ASSERT_TRUE(re != NULL);
- Prog* prog = re->CompileToProg(0);
- ASSERT_TRUE(prog != NULL);
- ASSERT_TRUE(prog->PossibleMatchRange(&min, &max, t.maxlen))
- << " " << t.regexp;
- delete prog;
- re->Decref();
- } else {
- ASSERT_TRUE(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
- }
- EXPECT_EQ(t.min, min) << t.regexp;
- EXPECT_EQ(t.max, max) << t.regexp;
- }
- }
-}
-
-// Test cases where PossibleMatchRange should return false.
-TEST(PossibleMatchRange, Failures) {
- std::string min, max;
-
- // Fails because no room to write max.
- EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
-
- // Fails because there is no max -- any non-empty string matches
- // or begins a match. Have to use Latin-1 input, because there
- // are no valid UTF-8 strings beginning with byte 0xFF.
- EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
- EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
- EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
- EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
- EXPECT_FALSE(RE2(".*", RE2::Latin1).
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
- EXPECT_FALSE(RE2("\\C*").
- PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-
- // Fails because it's a malformed regexp.
- EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
-}
-
-// Exhaustive test: generate all regexps within parameters,
-// then generate all strings of a given length over a given alphabet,
-// then check that the prefix information agrees with whether
-// the regexp matches each of the strings.
-class PossibleMatchTester : public RegexpGenerator {
- public:
- PossibleMatchTester(int maxatoms,
- int maxops,
- const std::vector<std::string>& alphabet,
- const std::vector<std::string>& ops,
- int maxstrlen,
- const std::vector<std::string>& stralphabet)
- : RegexpGenerator(maxatoms, maxops, alphabet, ops),
- strgen_(maxstrlen, stralphabet),
- regexps_(0), tests_(0) { }
-
- int regexps() { return regexps_; }
- int tests() { return tests_; }
-
- // Needed for RegexpGenerator interface.
- void HandleRegexp(const std::string& regexp);
-
- private:
- StringGenerator strgen_;
-
- int regexps_; // Number of HandleRegexp calls
- int tests_; // Number of regexp tests.
-
- PossibleMatchTester(const PossibleMatchTester&) = delete;
- PossibleMatchTester& operator=(const PossibleMatchTester&) = delete;
-};
-
-// Processes a single generated regexp.
-// Checks that all accepted strings agree with the prefix range.
-void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
- regexps_++;
-
- VLOG(3) << CEscape(regexp);
-
- RE2 re(regexp, RE2::Latin1);
- ASSERT_EQ(re.error(), "");
-
- std::string min, max;
- if(!re.PossibleMatchRange(&min, &max, 10)) {
- // There's no good max for "\\C*". Can't use strcmp
- // because sometimes it gets embedded in more
- // complicated expressions.
- if(strstr(regexp.c_str(), "\\C*"))
- return;
- LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
- }
-
- strgen_.Reset();
- while (strgen_.HasNext()) {
- const StringPiece& s = strgen_.Next();
- tests_++;
- if (!RE2::FullMatch(s, re))
- continue;
- ASSERT_GE(s, min) << " regexp: " << regexp << " max: " << max;
- ASSERT_LE(s, max) << " regexp: " << regexp << " min: " << min;
- }
-}
-
-TEST(PossibleMatchRange, Exhaustive) {
- int natom = 3;
- int noperator = 3;
- int stringlen = 5;
- if (RE2_DEBUG_MODE) {
- natom = 2;
- noperator = 3;
- stringlen = 3;
- }
- PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
- RegexpGenerator::EgrepOps(),
- stringlen, Explode("ab4"));
- t.Generate();
- LOG(INFO) << t.regexps() << " regexps, "
- << t.tests() << " tests";
-}
-
-} // namespace re2
+// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+#include <string>
+#include <vector>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/testing/exhaustive_tester.h"
+#include "re2/testing/regexp_generator.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+// Test that C++ strings are compared as uint8s, not int8s.
+// PossibleMatchRange doesn't depend on this, but callers probably will.
+TEST(CplusplusStrings, EightBit) {
+ std::string s = "\x70";
+ std::string t = "\xA0";
+ EXPECT_LT(s, t);
+}
+
+struct PrefixTest {
+ const char* regexp;
+ int maxlen;
+ const char* min;
+ const char* max;
+};
+
+static PrefixTest tests[] = {
+ { "", 10, "", "", },
+ { "Abcdef", 10, "Abcdef", "Abcdef" },
+ { "abc(def|ghi)", 10, "abcdef", "abcghi" },
+ { "a+hello", 10, "aa", "ahello" },
+ { "a*hello", 10, "a", "hello" },
+ { "def|abc", 10, "abc", "def" },
+ { "a(b)(c)[d]", 10, "abcd", "abcd" },
+ { "ab(cab|cat)", 10, "abcab", "abcat" },
+ { "ab(cab|ca)x", 10, "abcabx", "abcax" },
+ { "(ab|x)(c|de)", 10, "abc", "xde" },
+ { "(ab|x)?(c|z)?", 10, "", "z" },
+ { "[^\\s\\S]", 10, "", "" },
+ { "(abc)+", 5, "abc", "abcac" },
+ { "(abc)+", 2, "ab", "ac" },
+ { "(abc)+", 1, "a", "b" },
+ { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
+ { "a*", 10, "", "ab" },
+
+ { "(?i)Abcdef", 10, "ABCDEF", "abcdef" },
+ { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" },
+ { "(?i)a+hello", 10, "AA", "ahello" },
+ { "(?i)a*hello", 10, "A", "hello" },
+ { "(?i)def|abc", 10, "ABC", "def" },
+ { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" },
+ { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" },
+ { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" },
+ { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" },
+ { "(?i)(ab|x)?(c|z)?", 10, "", "z" },
+ { "(?i)[^\\s\\S]", 10, "", "" },
+ { "(?i)(abc)+", 5, "ABC", "abcac" },
+ { "(?i)(abc)+", 2, "AB", "ac" },
+ { "(?i)(abc)+", 1, "A", "b" },
+ { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
+ { "(?i)a*", 10, "", "ab" },
+ { "(?i)A*", 10, "", "ab" },
+
+ { "\\AAbcdef", 10, "Abcdef", "Abcdef" },
+ { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" },
+ { "\\Aa+hello", 10, "aa", "ahello" },
+ { "\\Aa*hello", 10, "a", "hello" },
+ { "\\Adef|abc", 10, "abc", "def" },
+ { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" },
+ { "\\Aab(cab|cat)", 10, "abcab", "abcat" },
+ { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" },
+ { "\\A(ab|x)(c|de)", 10, "abc", "xde" },
+ { "\\A(ab|x)?(c|z)?", 10, "", "z" },
+ { "\\A[^\\s\\S]", 10, "", "" },
+ { "\\A(abc)+", 5, "abc", "abcac" },
+ { "\\A(abc)+", 2, "ab", "ac" },
+ { "\\A(abc)+", 1, "a", "b" },
+ { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
+ { "\\Aa*", 10, "", "ab" },
+
+ { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" },
+ { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" },
+ { "(?i)\\Aa+hello", 10, "AA", "ahello" },
+ { "(?i)\\Aa*hello", 10, "A", "hello" },
+ { "(?i)\\Adef|abc", 10, "ABC", "def" },
+ { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" },
+ { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" },
+ { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" },
+ { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" },
+ { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" },
+ { "(?i)\\A[^\\s\\S]", 10, "", "" },
+ { "(?i)\\A(abc)+", 5, "ABC", "abcac" },
+ { "(?i)\\A(abc)+", 2, "AB", "ac" },
+ { "(?i)\\A(abc)+", 1, "A", "b" },
+ { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
+ { "(?i)\\Aa*", 10, "", "ab" },
+ { "(?i)\\AA*", 10, "", "ab" },
+};
+
+TEST(PossibleMatchRange, HandWritten) {
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t j = 0; j < 2; j++) {
+ const PrefixTest& t = tests[i];
+ std::string min, max;
+ if (j == 0) {
+ LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
+ Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
+ ASSERT_TRUE(re != NULL);
+ Prog* prog = re->CompileToProg(0);
+ ASSERT_TRUE(prog != NULL);
+ ASSERT_TRUE(prog->PossibleMatchRange(&min, &max, t.maxlen))
+ << " " << t.regexp;
+ delete prog;
+ re->Decref();
+ } else {
+ ASSERT_TRUE(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
+ }
+ EXPECT_EQ(t.min, min) << t.regexp;
+ EXPECT_EQ(t.max, max) << t.regexp;
+ }
+ }
+}
+
+// Test cases where PossibleMatchRange should return false.
+TEST(PossibleMatchRange, Failures) {
+ std::string min, max;
+
+ // Fails because no room to write max.
+ EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
+
+ // Fails because there is no max -- any non-empty string matches
+ // or begins a match. Have to use Latin-1 input, because there
+ // are no valid UTF-8 strings beginning with byte 0xFF.
+ EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ EXPECT_FALSE(RE2(".*", RE2::Latin1).
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ EXPECT_FALSE(RE2("\\C*").
+ PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+
+ // Fails because it's a malformed regexp.
+ EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
+ << "min=" << CEscape(min) << ", max=" << CEscape(max);
+}
+
+// Exhaustive test: generate all regexps within parameters,
+// then generate all strings of a given length over a given alphabet,
+// then check that the prefix information agrees with whether
+// the regexp matches each of the strings.
+class PossibleMatchTester : public RegexpGenerator {
+ public:
+ PossibleMatchTester(int maxatoms,
+ int maxops,
+ const std::vector<std::string>& alphabet,
+ const std::vector<std::string>& ops,
+ int maxstrlen,
+ const std::vector<std::string>& stralphabet)
+ : RegexpGenerator(maxatoms, maxops, alphabet, ops),
+ strgen_(maxstrlen, stralphabet),
+ regexps_(0), tests_(0) { }
+
+ int regexps() { return regexps_; }
+ int tests() { return tests_; }
+
+ // Needed for RegexpGenerator interface.
+ void HandleRegexp(const std::string& regexp);
+
+ private:
+ StringGenerator strgen_;
+
+ int regexps_; // Number of HandleRegexp calls
+ int tests_; // Number of regexp tests.
+
+ PossibleMatchTester(const PossibleMatchTester&) = delete;
+ PossibleMatchTester& operator=(const PossibleMatchTester&) = delete;
+};
+
+// Processes a single generated regexp.
+// Checks that all accepted strings agree with the prefix range.
+void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
+ regexps_++;
+
+ VLOG(3) << CEscape(regexp);
+
+ RE2 re(regexp, RE2::Latin1);
+ ASSERT_EQ(re.error(), "");
+
+ std::string min, max;
+ if(!re.PossibleMatchRange(&min, &max, 10)) {
+ // There's no good max for "\\C*". Can't use strcmp
+ // because sometimes it gets embedded in more
+ // complicated expressions.
+ if(strstr(regexp.c_str(), "\\C*"))
+ return;
+ LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
+ }
+
+ strgen_.Reset();
+ while (strgen_.HasNext()) {
+ const StringPiece& s = strgen_.Next();
+ tests_++;
+ if (!RE2::FullMatch(s, re))
+ continue;
+ ASSERT_GE(s, min) << " regexp: " << regexp << " max: " << max;
+ ASSERT_LE(s, max) << " regexp: " << regexp << " min: " << min;
+ }
+}
+
+TEST(PossibleMatchRange, Exhaustive) {
+ int natom = 3;
+ int noperator = 3;
+ int stringlen = 5;
+ if (RE2_DEBUG_MODE) {
+ natom = 2;
+ noperator = 3;
+ stringlen = 3;
+ }
+ PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
+ RegexpGenerator::EgrepOps(),
+ stringlen, Explode("ab4"));
+ t.Generate();
+ LOG(INFO) << t.regexps() << " regexps, "
+ << t.tests() << " tests";
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/re2_arg_test.cc b/contrib/libs/re2/re2/testing/re2_arg_test.cc
index e576491540..8df90ab8f2 100644
--- a/contrib/libs/re2/re2/testing/re2_arg_test.cc
+++ b/contrib/libs/re2/re2/testing/re2_arg_test.cc
@@ -1,160 +1,160 @@
-// Copyright 2005 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This tests to make sure numbers are parsed from strings
-// correctly.
-// Todo: Expand the test to validate strings parsed to the other types
-// supported by RE2::Arg class
-
-#include <stdint.h>
-#include <string.h>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/re2.h"
-
-namespace re2 {
-
-struct SuccessTable {
- const char * value_string;
- int64_t value;
- bool success[6];
-};
-
-// Test boundary cases for different integral sizes.
-// Specifically I want to make sure that values outside the boundries
-// of an integral type will fail and that negative numbers will fail
-// for unsigned types. The following table contains the boundaries for
-// the various integral types and has entries for whether or not each
-// type can contain the given value.
-const SuccessTable kSuccessTable[] = {
-// string integer value i16 u16 i32 u32 i64 u64
-// 0 to 2^7-1
-{ "0", 0, { true, true, true, true, true, true }},
-{ "127", 127, { true, true, true, true, true, true }},
-
-// -1 to -2^7
-{ "-1", -1, { true, false, true, false, true, false }},
-{ "-128", -128, { true, false, true, false, true, false }},
-
-// 2^7 to 2^8-1
-{ "128", 128, { true, true, true, true, true, true }},
-{ "255", 255, { true, true, true, true, true, true }},
-
-// 2^8 to 2^15-1
-{ "256", 256, { true, true, true, true, true, true }},
-{ "32767", 32767, { true, true, true, true, true, true }},
-
-// -2^7-1 to -2^15
-{ "-129", -129, { true, false, true, false, true, false }},
-{ "-32768", -32768, { true, false, true, false, true, false }},
-
-// 2^15 to 2^16-1
-{ "32768", 32768, { false, true, true, true, true, true }},
-{ "65535", 65535, { false, true, true, true, true, true }},
-
-// 2^16 to 2^31-1
-{ "65536", 65536, { false, false, true, true, true, true }},
-{ "2147483647", 2147483647, { false, false, true, true, true, true }},
-
-// -2^15-1 to -2^31
-{ "-32769", -32769, { false, false, true, false, true, false }},
-{ "-2147483648", static_cast<int64_t>(0xFFFFFFFF80000000LL),
- { false, false, true, false, true, false }},
-
-// 2^31 to 2^32-1
-{ "2147483648", 2147483648U, { false, false, false, true, true, true }},
-{ "4294967295", 4294967295U, { false, false, false, true, true, true }},
-
-// 2^32 to 2^63-1
-{ "4294967296", 4294967296LL, { false, false, false, false, true, true }},
-{ "9223372036854775807",
- 9223372036854775807LL, { false, false, false, false, true, true }},
-
-// -2^31-1 to -2^63
-{ "-2147483649", -2147483649LL, { false, false, false, false, true, false }},
-{ "-9223372036854775808", static_cast<int64_t>(0x8000000000000000LL),
- { false, false, false, false, true, false }},
-
-// 2^63 to 2^64-1
-{ "9223372036854775808", static_cast<int64_t>(9223372036854775808ULL),
- { false, false, false, false, false, true }},
-{ "18446744073709551615", static_cast<int64_t>(18446744073709551615ULL),
- { false, false, false, false, false, true }},
-
-// >= 2^64
-{ "18446744073709551616", 0, { false, false, false, false, false, false }},
-};
-
-const int kNumStrings = arraysize(kSuccessTable);
-
-// It's ugly to use a macro, but we apparently can't use the EXPECT_EQ
-// macro outside of a TEST block and this seems to be the only way to
-// avoid code duplication. I can also pull off a couple nice tricks
-// using concatenation for the type I'm checking against.
-#define PARSE_FOR_TYPE(type, column) { \
- type r; \
- for (int i = 0; i < kNumStrings; ++i) { \
- RE2::Arg arg(&r); \
- const char* const p = kSuccessTable[i].value_string; \
- bool retval = arg.Parse(p, strlen(p)); \
- bool success = kSuccessTable[i].success[column]; \
- EXPECT_EQ(retval, success) \
- << "Parsing '" << p << "' for type " #type " should return " \
- << success; \
- if (success) { \
- EXPECT_EQ(r, (type)kSuccessTable[i].value); \
- } \
- } \
-}
-
-TEST(RE2ArgTest, Int16Test) {
- PARSE_FOR_TYPE(int16_t, 0);
-}
-
-TEST(RE2ArgTest, Uint16Test) {
- PARSE_FOR_TYPE(uint16_t, 1);
-}
-
-TEST(RE2ArgTest, Int32Test) {
- PARSE_FOR_TYPE(int32_t, 2);
-}
-
-TEST(RE2ArgTest, Uint32Test) {
- PARSE_FOR_TYPE(uint32_t, 3);
-}
-
-TEST(RE2ArgTest, Int64Test) {
- PARSE_FOR_TYPE(int64_t, 4);
-}
-
-TEST(RE2ArgTest, Uint64Test) {
- PARSE_FOR_TYPE(uint64_t, 5);
-}
-
-TEST(RE2ArgTest, ParseFromTest) {
-#if !defined(_MSC_VER)
- struct {
- bool ParseFrom(const char* str, size_t n) {
- LOG(INFO) << "str = " << str << ", n = " << n;
- return true;
- }
- } obj1;
- RE2::Arg arg1(&obj1);
- EXPECT_TRUE(arg1.Parse("one", 3));
-
- struct {
- bool ParseFrom(const char* str, size_t n) {
- LOG(INFO) << "str = " << str << ", n = " << n;
- return false;
- }
- // Ensure that RE2::Arg works even with overloaded ParseFrom().
- void ParseFrom(const char* str) {}
- } obj2;
- RE2::Arg arg2(&obj2);
- EXPECT_FALSE(arg2.Parse("two", 3));
-#endif
-}
-
-} // namespace re2
+// Copyright 2005 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This tests to make sure numbers are parsed from strings
+// correctly.
+// Todo: Expand the test to validate strings parsed to the other types
+// supported by RE2::Arg class
+
+#include <stdint.h>
+#include <string.h>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/re2.h"
+
+namespace re2 {
+
+struct SuccessTable {
+ const char * value_string;
+ int64_t value;
+ bool success[6];
+};
+
+// Test boundary cases for different integral sizes.
+// Specifically I want to make sure that values outside the boundries
+// of an integral type will fail and that negative numbers will fail
+// for unsigned types. The following table contains the boundaries for
+// the various integral types and has entries for whether or not each
+// type can contain the given value.
+const SuccessTable kSuccessTable[] = {
+// string integer value i16 u16 i32 u32 i64 u64
+// 0 to 2^7-1
+{ "0", 0, { true, true, true, true, true, true }},
+{ "127", 127, { true, true, true, true, true, true }},
+
+// -1 to -2^7
+{ "-1", -1, { true, false, true, false, true, false }},
+{ "-128", -128, { true, false, true, false, true, false }},
+
+// 2^7 to 2^8-1
+{ "128", 128, { true, true, true, true, true, true }},
+{ "255", 255, { true, true, true, true, true, true }},
+
+// 2^8 to 2^15-1
+{ "256", 256, { true, true, true, true, true, true }},
+{ "32767", 32767, { true, true, true, true, true, true }},
+
+// -2^7-1 to -2^15
+{ "-129", -129, { true, false, true, false, true, false }},
+{ "-32768", -32768, { true, false, true, false, true, false }},
+
+// 2^15 to 2^16-1
+{ "32768", 32768, { false, true, true, true, true, true }},
+{ "65535", 65535, { false, true, true, true, true, true }},
+
+// 2^16 to 2^31-1
+{ "65536", 65536, { false, false, true, true, true, true }},
+{ "2147483647", 2147483647, { false, false, true, true, true, true }},
+
+// -2^15-1 to -2^31
+{ "-32769", -32769, { false, false, true, false, true, false }},
+{ "-2147483648", static_cast<int64_t>(0xFFFFFFFF80000000LL),
+ { false, false, true, false, true, false }},
+
+// 2^31 to 2^32-1
+{ "2147483648", 2147483648U, { false, false, false, true, true, true }},
+{ "4294967295", 4294967295U, { false, false, false, true, true, true }},
+
+// 2^32 to 2^63-1
+{ "4294967296", 4294967296LL, { false, false, false, false, true, true }},
+{ "9223372036854775807",
+ 9223372036854775807LL, { false, false, false, false, true, true }},
+
+// -2^31-1 to -2^63
+{ "-2147483649", -2147483649LL, { false, false, false, false, true, false }},
+{ "-9223372036854775808", static_cast<int64_t>(0x8000000000000000LL),
+ { false, false, false, false, true, false }},
+
+// 2^63 to 2^64-1
+{ "9223372036854775808", static_cast<int64_t>(9223372036854775808ULL),
+ { false, false, false, false, false, true }},
+{ "18446744073709551615", static_cast<int64_t>(18446744073709551615ULL),
+ { false, false, false, false, false, true }},
+
+// >= 2^64
+{ "18446744073709551616", 0, { false, false, false, false, false, false }},
+};
+
+const int kNumStrings = arraysize(kSuccessTable);
+
+// It's ugly to use a macro, but we apparently can't use the EXPECT_EQ
+// macro outside of a TEST block and this seems to be the only way to
+// avoid code duplication. I can also pull off a couple nice tricks
+// using concatenation for the type I'm checking against.
+#define PARSE_FOR_TYPE(type, column) { \
+ type r; \
+ for (int i = 0; i < kNumStrings; ++i) { \
+ RE2::Arg arg(&r); \
+ const char* const p = kSuccessTable[i].value_string; \
+ bool retval = arg.Parse(p, strlen(p)); \
+ bool success = kSuccessTable[i].success[column]; \
+ EXPECT_EQ(retval, success) \
+ << "Parsing '" << p << "' for type " #type " should return " \
+ << success; \
+ if (success) { \
+ EXPECT_EQ(r, (type)kSuccessTable[i].value); \
+ } \
+ } \
+}
+
+TEST(RE2ArgTest, Int16Test) {
+ PARSE_FOR_TYPE(int16_t, 0);
+}
+
+TEST(RE2ArgTest, Uint16Test) {
+ PARSE_FOR_TYPE(uint16_t, 1);
+}
+
+TEST(RE2ArgTest, Int32Test) {
+ PARSE_FOR_TYPE(int32_t, 2);
+}
+
+TEST(RE2ArgTest, Uint32Test) {
+ PARSE_FOR_TYPE(uint32_t, 3);
+}
+
+TEST(RE2ArgTest, Int64Test) {
+ PARSE_FOR_TYPE(int64_t, 4);
+}
+
+TEST(RE2ArgTest, Uint64Test) {
+ PARSE_FOR_TYPE(uint64_t, 5);
+}
+
+TEST(RE2ArgTest, ParseFromTest) {
+#if !defined(_MSC_VER)
+ struct {
+ bool ParseFrom(const char* str, size_t n) {
+ LOG(INFO) << "str = " << str << ", n = " << n;
+ return true;
+ }
+ } obj1;
+ RE2::Arg arg1(&obj1);
+ EXPECT_TRUE(arg1.Parse("one", 3));
+
+ struct {
+ bool ParseFrom(const char* str, size_t n) {
+ LOG(INFO) << "str = " << str << ", n = " << n;
+ return false;
+ }
+ // Ensure that RE2::Arg works even with overloaded ParseFrom().
+ void ParseFrom(const char* str) {}
+ } obj2;
+ RE2::Arg arg2(&obj2);
+ EXPECT_FALSE(arg2.Parse("two", 3));
+#endif
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/re2_test.cc b/contrib/libs/re2/re2/testing/re2_test.cc
index ae3b3c3863..9ffe1467d8 100644
--- a/contrib/libs/re2/re2/testing/re2_test.cc
+++ b/contrib/libs/re2/re2/testing/re2_test.cc
@@ -1,1659 +1,1659 @@
-// -*- coding: utf-8 -*-
-// Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// TODO: Test extractions for PartialMatch/Consume
-
-#include <errno.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <map>
-#include <string>
-#include <utility>
-#include <vector>
-#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
-#include <sys/mman.h>
-#include <unistd.h> /* for sysconf */
-#endif
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/re2.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-TEST(RE2, HexTests) {
-#define ASSERT_HEX(type, value) \
- do { \
- type v; \
- ASSERT_TRUE( \
- RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
- ASSERT_EQ(v, 0x##value); \
- ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
- RE2::CRadix(&v))); \
- ASSERT_EQ(v, 0x##value); \
- } while (0)
-
- ASSERT_HEX(short, 2bad);
- ASSERT_HEX(unsigned short, 2badU);
- ASSERT_HEX(int, dead);
- ASSERT_HEX(unsigned int, deadU);
- ASSERT_HEX(long, 7eadbeefL);
- ASSERT_HEX(unsigned long, deadbeefUL);
- ASSERT_HEX(long long, 12345678deadbeefLL);
- ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
-
-#undef ASSERT_HEX
-}
-
-TEST(RE2, OctalTests) {
-#define ASSERT_OCTAL(type, value) \
- do { \
- type v; \
- ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
- ASSERT_EQ(v, 0##value); \
- ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
- RE2::CRadix(&v))); \
- ASSERT_EQ(v, 0##value); \
- } while (0)
-
- ASSERT_OCTAL(short, 77777);
- ASSERT_OCTAL(unsigned short, 177777U);
- ASSERT_OCTAL(int, 17777777777);
- ASSERT_OCTAL(unsigned int, 37777777777U);
- ASSERT_OCTAL(long, 17777777777L);
- ASSERT_OCTAL(unsigned long, 37777777777UL);
- ASSERT_OCTAL(long long, 777777777777777777777LL);
- ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
-
-#undef ASSERT_OCTAL
-}
-
-TEST(RE2, DecimalTests) {
-#define ASSERT_DECIMAL(type, value) \
- do { \
- type v; \
- ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
- ASSERT_EQ(v, value); \
- ASSERT_TRUE( \
- RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
- ASSERT_EQ(v, value); \
- } while (0)
-
- ASSERT_DECIMAL(short, -1);
- ASSERT_DECIMAL(unsigned short, 9999);
- ASSERT_DECIMAL(int, -1000);
- ASSERT_DECIMAL(unsigned int, 12345U);
- ASSERT_DECIMAL(long, -10000000L);
- ASSERT_DECIMAL(unsigned long, 3083324652U);
- ASSERT_DECIMAL(long long, -100000000000000LL);
- ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
-
-#undef ASSERT_DECIMAL
-}
-
-TEST(RE2, Replace) {
- struct ReplaceTest {
- const char *regexp;
- const char *rewrite;
- const char *original;
- const char *single;
- const char *global;
- int greplace_count;
- };
- static const ReplaceTest tests[] = {
- { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
- "\\2\\1ay",
- "the quick brown fox jumps over the lazy dogs.",
- "ethay quick brown fox jumps over the lazy dogs.",
- "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
- 9 },
- { "\\w+",
- "\\0-NOSPAM",
- "abcd.efghi@google.com",
- "abcd-NOSPAM.efghi@google.com",
- "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
- 4 },
- { "^",
- "(START)",
- "foo",
- "(START)foo",
- "(START)foo",
- 1 },
- { "^",
- "(START)",
- "",
- "(START)",
- "(START)",
- 1 },
- { "$",
- "(END)",
- "",
- "(END)",
- "(END)",
- 1 },
- { "b",
- "bb",
- "ababababab",
- "abbabababab",
- "abbabbabbabbabb",
- 5 },
- { "b",
- "bb",
- "bbbbbb",
- "bbbbbbb",
- "bbbbbbbbbbbb",
- 6 },
- { "b+",
- "bb",
- "bbbbbb",
- "bb",
- "bb",
- 1 },
- { "b*",
- "bb",
- "bbbbbb",
- "bb",
- "bb",
- 1 },
- { "b*",
- "bb",
- "aaaaa",
- "bbaaaaa",
- "bbabbabbabbabbabb",
- 6 },
- // Check newline handling
- { "a.*a",
- "(\\0)",
- "aba\naba",
- "(aba)\naba",
- "(aba)\n(aba)",
- 2 },
- { "", NULL, NULL, NULL, NULL, 0 }
- };
-
- for (const ReplaceTest* t = tests; t->original != NULL; t++) {
- std::string one(t->original);
- ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
- ASSERT_EQ(one, t->single);
- std::string all(t->original);
- ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
- << "Got: " << all;
- ASSERT_EQ(all, t->global);
- }
-}
-
-static void TestCheckRewriteString(const char* regexp, const char* rewrite,
- bool expect_ok) {
- std::string error;
- RE2 exp(regexp);
- bool actual_ok = exp.CheckRewriteString(rewrite, &error);
- EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
-}
-
-TEST(CheckRewriteString, all) {
- TestCheckRewriteString("abc", "foo", true);
- TestCheckRewriteString("abc", "foo\\", false);
- TestCheckRewriteString("abc", "foo\\0bar", true);
-
- TestCheckRewriteString("a(b)c", "foo", true);
- TestCheckRewriteString("a(b)c", "foo\\0bar", true);
- TestCheckRewriteString("a(b)c", "foo\\1bar", true);
- TestCheckRewriteString("a(b)c", "foo\\2bar", false);
- TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
-
- TestCheckRewriteString("a(b)(c)", "foo\\12", true);
- TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
- TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
-}
-
-TEST(RE2, Extract) {
- std::string s;
-
- ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
- ASSERT_EQ(s, "kremvax!boris");
-
- ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
- ASSERT_EQ(s, "'foo'");
- // check that false match doesn't overwrite
- ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
- ASSERT_EQ(s, "'foo'");
-}
-
-TEST(RE2, MaxSubmatchTooLarge) {
- std::string s;
- ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
- s = "foo";
- ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
- s = "foo";
- ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
-}
-
-TEST(RE2, Consume) {
- RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
- std::string word;
-
- std::string s(" aaa b!@#$@#$cccc");
- StringPiece input(s);
-
- ASSERT_TRUE(RE2::Consume(&input, r, &word));
- ASSERT_EQ(word, "aaa") << " input: " << input;
- ASSERT_TRUE(RE2::Consume(&input, r, &word));
- ASSERT_EQ(word, "b") << " input: " << input;
- ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
-}
-
-TEST(RE2, ConsumeN) {
- const std::string s(" one two three 4");
- StringPiece input(s);
-
- RE2::Arg argv[2];
- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
-
- // 0 arg
- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
-
- // 1 arg
- std::string word;
- argv[0] = &word;
- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
- EXPECT_EQ("two", word);
-
- // Multi-args
- int n;
- argv[1] = &n;
- EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
- EXPECT_EQ("three", word);
- EXPECT_EQ(4, n);
-}
-
-TEST(RE2, FindAndConsume) {
- RE2 r("(\\w+)"); // matches a word
- std::string word;
-
- std::string s(" aaa b!@#$@#$cccc");
- StringPiece input(s);
-
- ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
- ASSERT_EQ(word, "aaa");
- ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
- ASSERT_EQ(word, "b");
- ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
- ASSERT_EQ(word, "cccc");
- ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
-
- // Check that FindAndConsume works without any submatches.
- // Earlier version used uninitialized data for
- // length to consume.
- input = "aaa";
- ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
- ASSERT_EQ(input, "");
-}
-
-TEST(RE2, FindAndConsumeN) {
- const std::string s(" one two three 4");
- StringPiece input(s);
-
- RE2::Arg argv[2];
- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
-
- // 0 arg
- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
-
- // 1 arg
- std::string word;
- argv[0] = &word;
- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
- EXPECT_EQ("two", word);
-
- // Multi-args
- int n;
- argv[1] = &n;
- EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
- EXPECT_EQ("three", word);
- EXPECT_EQ(4, n);
-}
-
-TEST(RE2, MatchNumberPeculiarity) {
- RE2 r("(foo)|(bar)|(baz)");
- std::string word1;
- std::string word2;
- std::string word3;
-
- ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
- ASSERT_EQ(word1, "foo");
- ASSERT_EQ(word2, "");
- ASSERT_EQ(word3, "");
- ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
- ASSERT_EQ(word1, "");
- ASSERT_EQ(word2, "bar");
- ASSERT_EQ(word3, "");
- ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
- ASSERT_EQ(word1, "");
- ASSERT_EQ(word2, "");
- ASSERT_EQ(word3, "baz");
- ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
-
- std::string a;
- ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
- ASSERT_EQ(a, "");
-}
-
-TEST(RE2, Match) {
- RE2 re("((\\w+):([0-9]+))"); // extracts host and port
- StringPiece group[4];
-
- // No match.
- StringPiece s = "zyzzyva";
- ASSERT_FALSE(
- re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
-
- // Matches and extracts.
- s = "a chrisr:9000 here";
- ASSERT_TRUE(
- re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
- ASSERT_EQ(group[0], "chrisr:9000");
- ASSERT_EQ(group[1], "chrisr:9000");
- ASSERT_EQ(group[2], "chrisr");
- ASSERT_EQ(group[3], "9000");
-
- std::string all, host;
- int port;
- ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
- ASSERT_EQ(all, "chrisr:9000");
- ASSERT_EQ(host, "chrisr");
- ASSERT_EQ(port, 9000);
-}
-
-static void TestRecursion(int size, const char* pattern) {
- // Fill up a string repeating the pattern given
- std::string domain;
- domain.resize(size);
- size_t patlen = strlen(pattern);
- for (int i = 0; i < size; i++) {
- domain[i] = pattern[i % patlen];
- }
- // Just make sure it doesn't crash due to too much recursion.
- RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
- RE2::FullMatch(domain, re);
-}
-
-// A meta-quoted string, interpreted as a pattern, should always match
-// the original unquoted string.
-static void TestQuoteMeta(const std::string& unquoted,
- const RE2::Options& options = RE2::DefaultOptions) {
- std::string quoted = RE2::QuoteMeta(unquoted);
- RE2 re(quoted, options);
- EXPECT_TRUE(RE2::FullMatch(unquoted, re))
- << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
-}
-
-// A meta-quoted string, interpreted as a pattern, should always match
-// the original unquoted string.
-static void NegativeTestQuoteMeta(
- const std::string& unquoted, const std::string& should_not_match,
- const RE2::Options& options = RE2::DefaultOptions) {
- std::string quoted = RE2::QuoteMeta(unquoted);
- RE2 re(quoted, options);
- EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
- << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
-}
-
-// Tests that quoted meta characters match their original strings,
-// and that a few things that shouldn't match indeed do not.
-TEST(QuoteMeta, Simple) {
- TestQuoteMeta("foo");
- TestQuoteMeta("foo.bar");
- TestQuoteMeta("foo\\.bar");
- TestQuoteMeta("[1-9]");
- TestQuoteMeta("1.5-2.0?");
- TestQuoteMeta("\\d");
- TestQuoteMeta("Who doesn't like ice cream?");
- TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
- TestQuoteMeta("((?!)xxx).*yyy");
- TestQuoteMeta("([");
-}
-TEST(QuoteMeta, SimpleNegative) {
- NegativeTestQuoteMeta("foo", "bar");
- NegativeTestQuoteMeta("...", "bar");
- NegativeTestQuoteMeta("\\.", ".");
- NegativeTestQuoteMeta("\\.", "..");
- NegativeTestQuoteMeta("(a)", "a");
- NegativeTestQuoteMeta("(a|b)", "a");
- NegativeTestQuoteMeta("(a|b)", "(a)");
- NegativeTestQuoteMeta("(a|b)", "a|b");
- NegativeTestQuoteMeta("[0-9]", "0");
- NegativeTestQuoteMeta("[0-9]", "0-9");
- NegativeTestQuoteMeta("[0-9]", "[9]");
- NegativeTestQuoteMeta("((?!)xxx)", "xxx");
-}
-
-TEST(QuoteMeta, Latin1) {
- TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
-}
-
-TEST(QuoteMeta, UTF8) {
- TestQuoteMeta("Plácido Domingo");
- TestQuoteMeta("xyz"); // No fancy utf8.
- TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
- TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
- TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
- TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
- TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
- // still work.
- NegativeTestQuoteMeta("27\xc2\xb0",
- "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
-}
-
-TEST(QuoteMeta, HasNull) {
- std::string has_null;
-
- // string with one null character
- has_null += '\0';
- TestQuoteMeta(has_null);
- NegativeTestQuoteMeta(has_null, "");
-
- // Don't want null-followed-by-'1' to be interpreted as '\01'.
- has_null += '1';
- TestQuoteMeta(has_null);
- NegativeTestQuoteMeta(has_null, "\1");
-}
-
-TEST(ProgramSize, BigProgram) {
- RE2 re_simple("simple regexp");
- RE2 re_medium("medium.*regexp");
- RE2 re_complex("complex.{1,128}regexp");
-
- ASSERT_GT(re_simple.ProgramSize(), 0);
- ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
- ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
-
- ASSERT_GT(re_simple.ReverseProgramSize(), 0);
- ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
- ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
-}
-
-TEST(ProgramFanout, BigProgram) {
- RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
- RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
- RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
- RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
-
- std::vector<int> histogram;
-
- // 3 is the largest non-empty bucket and has 2 element.
- ASSERT_EQ(3, re1.ProgramFanout(&histogram));
- ASSERT_EQ(2, histogram[3]);
-
- // 6 is the largest non-empty bucket and has 11 elements.
- ASSERT_EQ(6, re10.ProgramFanout(&histogram));
- ASSERT_EQ(11, histogram[6]);
-
- // 9 is the largest non-empty bucket and has 101 elements.
- ASSERT_EQ(9, re100.ProgramFanout(&histogram));
- ASSERT_EQ(101, histogram[9]);
-
- // 13 is the largest non-empty bucket and has 1001 elements.
- ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
- ASSERT_EQ(1001, histogram[13]);
-
- // 2 is the largest non-empty bucket and has 2 element.
- ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
- ASSERT_EQ(2, histogram[2]);
-
- // 5 is the largest non-empty bucket and has 11 elements.
- ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
- ASSERT_EQ(11, histogram[5]);
-
- // 9 is the largest non-empty bucket and has 101 elements.
- ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
- ASSERT_EQ(101, histogram[9]);
-
- // 12 is the largest non-empty bucket and has 1001 elements.
- ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
- ASSERT_EQ(1001, histogram[12]);
-}
-
-// Issue 956519: handling empty character sets was
-// causing NULL dereference. This tests a few empty character sets.
-// (The way to get an empty character set is to negate a full one.)
-TEST(EmptyCharset, Fuzz) {
- static const char *empties[] = {
- "[^\\S\\s]",
- "[^\\S[:space:]]",
- "[^\\D\\d]",
- "[^\\D[:digit:]]"
- };
- for (size_t i = 0; i < arraysize(empties); i++)
- ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
-}
-
-// Bitstate assumes that kInstFail instructions in
-// alternations or capture groups have been "compiled away".
-TEST(EmptyCharset, BitstateAssumptions) {
- // Captures trigger use of Bitstate.
- static const char *nop_empties[] = {
- "((((()))))" "[^\\S\\s]?",
- "((((()))))" "([^\\S\\s])?",
- "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
- "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
- };
- StringPiece group[6];
- for (size_t i = 0; i < arraysize(nop_empties); i++)
- ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
-}
-
-// Test that named groups work correctly.
-TEST(Capture, NamedGroups) {
- {
- RE2 re("(hello world)");
- ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
- const std::map<std::string, int>& m = re.NamedCapturingGroups();
- ASSERT_EQ(m.size(), 0);
- }
-
- {
- RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
- ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
- const std::map<std::string, int>& m = re.NamedCapturingGroups();
- ASSERT_EQ(m.size(), 4);
- ASSERT_EQ(m.find("A")->second, 1);
- ASSERT_EQ(m.find("B")->second, 2);
- ASSERT_EQ(m.find("C")->second, 3);
- ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
- }
-}
-
-TEST(RE2, CapturedGroupTest) {
- RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
- int num_groups = re.NumberOfCapturingGroups();
- EXPECT_EQ(2, num_groups);
- std::string args[4];
- RE2::Arg arg0(&args[0]);
- RE2::Arg arg1(&args[1]);
- RE2::Arg arg2(&args[2]);
- RE2::Arg arg3(&args[3]);
-
- const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
- EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
- re, matches, num_groups));
- const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
- EXPECT_TRUE(named_groups.find("S") != named_groups.end());
- EXPECT_TRUE(named_groups.find("D") != named_groups.end());
-
- // The named group index is 1-based.
- int source_group_index = named_groups.find("S")->second;
- int destination_group_index = named_groups.find("D")->second;
- EXPECT_EQ(1, source_group_index);
- EXPECT_EQ(2, destination_group_index);
-
- // The args is zero-based.
- EXPECT_EQ("mountain view", args[source_group_index - 1]);
- EXPECT_EQ("san jose", args[destination_group_index - 1]);
-}
-
-TEST(RE2, FullMatchWithNoArgs) {
- ASSERT_TRUE(RE2::FullMatch("h", "h"));
- ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
- ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
- ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
- ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
-}
-
-TEST(RE2, PartialMatch) {
- ASSERT_TRUE(RE2::PartialMatch("x", "x"));
- ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
- ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
- ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
- ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
-}
-
-TEST(RE2, PartialMatchN) {
- RE2::Arg argv[2];
- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
-
- // 0 arg
- EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
- EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
-
- // 1 arg
- int i;
- argv[0] = &i;
- EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
- EXPECT_EQ(1001, i);
- EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
-
- // Multi-arg
- std::string s;
- argv[1] = &s;
- EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
- EXPECT_EQ(42, i);
- EXPECT_EQ("life", s);
- EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
-}
-
-TEST(RE2, FullMatchZeroArg) {
- // Zero-arg
- ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
-}
-
-TEST(RE2, FullMatchOneArg) {
- int i;
-
- // Single-arg
- ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
- ASSERT_EQ(i, 1001);
- ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
- ASSERT_EQ(i, -123);
- ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
- ASSERT_FALSE(
- RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
-}
-
-TEST(RE2, FullMatchIntegerArg) {
- int i;
-
- // Digits surrounding integer-arg
- ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
- ASSERT_EQ(i, 23);
- ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
- ASSERT_EQ(i, 1);
- ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
- ASSERT_EQ(i, -1);
- ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
- ASSERT_EQ(i, 1);
- ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
- ASSERT_EQ(i, -1);
-}
-
-TEST(RE2, FullMatchStringArg) {
- std::string s;
- // String-arg
- ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
- ASSERT_EQ(s, std::string("ell"));
-}
-
-TEST(RE2, FullMatchStringPieceArg) {
- int i;
- // StringPiece-arg
- StringPiece sp;
- ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
- ASSERT_EQ(sp.size(), 4);
- ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
- ASSERT_EQ(i, 1234);
-}
-
-TEST(RE2, FullMatchMultiArg) {
- int i;
- std::string s;
- // Multi-arg
- ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
- ASSERT_EQ(s, std::string("ruby"));
- ASSERT_EQ(i, 1234);
-}
-
-TEST(RE2, FullMatchN) {
- RE2::Arg argv[2];
- const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
-
- // 0 arg
- EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
- EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
-
- // 1 arg
- int i;
- argv[0] = &i;
- EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
- EXPECT_EQ(1001, i);
- EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
-
- // Multi-arg
- std::string s;
- argv[1] = &s;
- EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
- EXPECT_EQ(42, i);
- EXPECT_EQ("life", s);
- EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
-}
-
-TEST(RE2, FullMatchIgnoredArg) {
- int i;
- std::string s;
-
- // Old-school NULL should be ignored.
- ASSERT_TRUE(
- RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
- ASSERT_EQ(s, std::string("ruby"));
- ASSERT_EQ(i, 1234);
-
- // C++11 nullptr should also be ignored.
- ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
- ASSERT_EQ(s, std::string("rubz"));
- ASSERT_EQ(i, 1235);
-}
-
-TEST(RE2, FullMatchTypedNullArg) {
- std::string s;
-
- // Ignore non-void* NULL arg
- ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
- ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
- ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
- ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
- ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
- ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
- ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
-
- // Fail on non-void* NULL arg if the match doesn't parse for the given type.
- ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
- ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
- ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
- ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
- ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
-}
-
-// Check that numeric parsing code does not read past the end of
-// the number being parsed.
-// This implementation requires mmap(2) et al. and thus cannot
-// be used unless they are available.
-TEST(RE2, NULTerminated) {
-#if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
- char *v;
- int x;
- long pagesize = sysconf(_SC_PAGE_SIZE);
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
- v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
- ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
- LOG(INFO) << "Memory at " << (void*)v;
- ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
- v[pagesize - 1] = '1';
-
- x = 0;
- ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
- ASSERT_EQ(x, 1);
-#endif
-}
-
-TEST(RE2, FullMatchTypeTests) {
- // Type tests
- std::string zeros(1000, '0');
- {
- char c;
- ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
- ASSERT_EQ(c, 'H');
- }
- {
- unsigned char c;
- ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
- ASSERT_EQ(c, static_cast<unsigned char>('H'));
- }
- {
- int16_t v;
- ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
- ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
- ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
- ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
- ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
- }
- {
- uint16_t v;
- ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
- ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
- ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
- }
- {
- int32_t v;
- static const int32_t max = INT32_C(0x7fffffff);
- static const int32_t min = -max - 1;
- ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
- ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
- ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
- ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
- ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
-
- ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
- ASSERT_EQ(v, max);
- ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
- ASSERT_EQ(v, min);
-
- ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
- ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
- ASSERT_EQ(v, max);
- ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
- }
- {
- uint32_t v;
- static const uint32_t max = UINT32_C(0xffffffff);
- ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
- ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
- ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
-
- ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
- }
- {
- int64_t v;
- static const int64_t max = INT64_C(0x7fffffffffffffff);
- static const int64_t min = -max - 1;
- std::string str;
-
- ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
-
- str = std::to_string(max);
- ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
-
- str = std::to_string(min);
- ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
-
- str = std::to_string(max);
- ASSERT_NE(str.back(), '9');
- str.back()++;
- ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
-
- str = std::to_string(min);
- ASSERT_NE(str.back(), '9');
- str.back()++;
- ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
- }
- {
- uint64_t v;
- int64_t v2;
- static const uint64_t max = UINT64_C(0xffffffffffffffff);
- std::string str;
-
- ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
-
- str = std::to_string(max);
- ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
-
- ASSERT_NE(str.back(), '9');
- str.back()++;
- ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
- }
-}
-
-TEST(RE2, FloatingPointFullMatchTypes) {
- std::string zeros(1000, '0');
- {
- float v;
- ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
- ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
- ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
-
- ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
- ASSERT_EQ(v, float(1e23));
-
- // 6700000000081920.1 is an edge case.
- // 6700000000081920 is exactly halfway between
- // two float32s, so the .1 should make it round up.
- // However, the .1 is outside the precision possible with
- // a float64: the nearest float64 is 6700000000081920.
- // So if the code uses strtod and then converts to float32,
- // round-to-even will make it round down instead of up.
- // To pass the test, the parser must call strtof directly.
- // This test case is carefully chosen to use only a 17-digit
- // number, since C does not guarantee to get the correctly
- // rounded answer for strtod and strtof unless the input is
- // short.
- //
- // This is known to fail on Cygwin and MinGW due to a broken
- // implementation of strtof(3). And apparently MSVC too. Sigh.
-#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
- ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
- ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
- ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
- ASSERT_EQ(v, 6700000000081920.1f)
- << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
-#endif
- }
- {
- double v;
- ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
- ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
- ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
- ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
- ASSERT_EQ(v, double(1e23));
-
- ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
- ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
- ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
- ASSERT_EQ(v, 1.0000000596046448)
- << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
- }
-}
-
-TEST(RE2, FullMatchAnchored) {
- int i;
- // Check that matching is fully anchored
- ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
- ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
- ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
- ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
-}
-
-TEST(RE2, FullMatchBraces) {
- // Braces
- ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
- ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
- ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
-}
-
-TEST(RE2, Complicated) {
- // Complicated RE2
- ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
- ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
- ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
- ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
-}
-
-TEST(RE2, FullMatchEnd) {
- // Check full-match handling (needs '$' tacked on internally)
- ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
- ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
- ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
- ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
- ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
- ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
- ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
-
- // Uncomment the following if we change the handling of '$' to
- // prevent it from matching a trailing newline
- if (false) {
- // Check that we don't get bitten by pcre's special handling of a
- // '\n' at the end of the string matching '$'
- ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
- }
-}
-
-TEST(RE2, FullMatchArgCount) {
- // Number of args
- int a[16];
- ASSERT_TRUE(RE2::FullMatch("", ""));
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
- ASSERT_EQ(a[0], 1);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
- &a[2], &a[3]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
- ASSERT_EQ(a[3], 4);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
- &a[2], &a[3], &a[4]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
- ASSERT_EQ(a[3], 4);
- ASSERT_EQ(a[4], 5);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
- &a[1], &a[2], &a[3], &a[4], &a[5]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
- ASSERT_EQ(a[3], 4);
- ASSERT_EQ(a[4], 5);
- ASSERT_EQ(a[5], 6);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
- &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
- ASSERT_EQ(a[3], 4);
- ASSERT_EQ(a[4], 5);
- ASSERT_EQ(a[5], 6);
- ASSERT_EQ(a[6], 7);
-
- memset(a, 0, sizeof(0));
- ASSERT_TRUE(RE2::FullMatch("1234567890123456",
- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
- "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
- &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
- &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
- &a[13], &a[14], &a[15]));
- ASSERT_EQ(a[0], 1);
- ASSERT_EQ(a[1], 2);
- ASSERT_EQ(a[2], 3);
- ASSERT_EQ(a[3], 4);
- ASSERT_EQ(a[4], 5);
- ASSERT_EQ(a[5], 6);
- ASSERT_EQ(a[6], 7);
- ASSERT_EQ(a[7], 8);
- ASSERT_EQ(a[8], 9);
- ASSERT_EQ(a[9], 0);
- ASSERT_EQ(a[10], 1);
- ASSERT_EQ(a[11], 2);
- ASSERT_EQ(a[12], 3);
- ASSERT_EQ(a[13], 4);
- ASSERT_EQ(a[14], 5);
- ASSERT_EQ(a[15], 6);
-}
-
-TEST(RE2, Accessors) {
- // Check the pattern() accessor
- {
- const std::string kPattern = "http://([^/]+)/.*";
- const RE2 re(kPattern);
- ASSERT_EQ(kPattern, re.pattern());
- }
-
- // Check RE2 error field.
- {
- RE2 re("foo");
- ASSERT_TRUE(re.error().empty()); // Must have no error
- ASSERT_TRUE(re.ok());
- ASSERT_EQ(re.error_code(), RE2::NoError);
- }
-}
-
-TEST(RE2, UTF8) {
- // Check UTF-8 handling
- // Three Japanese characters (nihongo)
- const char utf8_string[] = {
- (char)0xe6, (char)0x97, (char)0xa5, // 65e5
- (char)0xe6, (char)0x9c, (char)0xac, // 627c
- (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
- 0
- };
- const char utf8_pattern[] = {
- '.',
- (char)0xe6, (char)0x9c, (char)0xac, // 627c
- '.',
- 0
- };
-
- // Both should match in either mode, bytes or UTF-8
- RE2 re_test1(".........", RE2::Latin1);
- ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
- RE2 re_test2("...");
- ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
-
- // Check that '.' matches one byte or UTF-8 character
- // according to the mode.
- std::string s;
- RE2 re_test3("(.)", RE2::Latin1);
- ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
- ASSERT_EQ(s, std::string("\xe6"));
- RE2 re_test4("(.)");
- ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
- ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
-
- // Check that string matches itself in either mode
- RE2 re_test5(utf8_string, RE2::Latin1);
- ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
- RE2 re_test6(utf8_string);
- ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
-
- // Check that pattern matches string only in UTF8 mode
- RE2 re_test7(utf8_pattern, RE2::Latin1);
- ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
- RE2 re_test8(utf8_pattern);
- ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
-}
-
-TEST(RE2, UngreedyUTF8) {
- // Check that ungreedy, UTF8 regular expressions don't match when they
- // oughtn't -- see bug 82246.
- {
- // This code always worked.
- const char* pattern = "\\w+X";
- const std::string target = "a aX";
- RE2 match_sentence(pattern, RE2::Latin1);
- RE2 match_sentence_re(pattern);
-
- ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
- ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
- }
- {
- const char* pattern = "(?U)\\w+X";
- const std::string target = "a aX";
- RE2 match_sentence(pattern, RE2::Latin1);
- ASSERT_EQ(match_sentence.error(), "");
- RE2 match_sentence_re(pattern);
-
- ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
- ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
- }
-}
-
-TEST(RE2, Rejects) {
- {
- RE2 re("a\\1", RE2::Quiet);
- ASSERT_FALSE(re.ok()); }
- {
- RE2 re("a[x", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- }
- {
- RE2 re("a[z-a]", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- }
- {
- RE2 re("a[[:foobar:]]", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- }
- {
- RE2 re("a(b", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- }
- {
- RE2 re("a\\", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- }
-}
-
-TEST(RE2, NoCrash) {
- // Test that using a bad regexp doesn't crash.
- {
- RE2 re("a\\", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
- }
-
- // Test that using an enormous regexp doesn't crash
- {
- RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
- ASSERT_FALSE(re.ok());
- ASSERT_FALSE(RE2::PartialMatch("aaa", re));
- }
-
- // Test that a crazy regexp still compiles and runs.
- {
- RE2 re(".{512}x", RE2::Quiet);
- ASSERT_TRUE(re.ok());
- std::string s;
- s.append(515, 'c');
- s.append("x");
- ASSERT_TRUE(RE2::PartialMatch(s, re));
- }
-}
-
-TEST(RE2, Recursion) {
- // Test that recursion is stopped.
- // This test is PCRE-legacy -- there's no recursion in RE2.
- int bytes = 15 * 1024; // enough to crash PCRE
- TestRecursion(bytes, ".");
- TestRecursion(bytes, "a");
- TestRecursion(bytes, "a.");
- TestRecursion(bytes, "ab.");
- TestRecursion(bytes, "abc.");
-}
-
-TEST(RE2, BigCountedRepetition) {
- // Test that counted repetition works, given tons of memory.
- RE2::Options opt;
- opt.set_max_mem(256<<20);
-
- RE2 re(".{512}x", opt);
- ASSERT_TRUE(re.ok());
- std::string s;
- s.append(515, 'c');
- s.append("x");
- ASSERT_TRUE(RE2::PartialMatch(s, re));
-}
-
-TEST(RE2, DeepRecursion) {
- // Test for deep stack recursion. This would fail with a
- // segmentation violation due to stack overflow before pcre was
- // patched.
- // Again, a PCRE legacy test. RE2 doesn't recurse.
- std::string comment("x*");
- std::string a(131072, 'a');
- comment += a;
- comment += "*x";
- RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
- ASSERT_TRUE(RE2::FullMatch(comment, re));
-}
-
-// Suggested by Josh Hyman. Failed when SearchOnePass was
-// not implementing case-folding.
-TEST(CaseInsensitive, MatchAndConsume) {
- std::string text = "A fish named *Wanda*";
- StringPiece sp(text);
- StringPiece result;
- EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
- EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
-}
-
-// RE2 should permit implicit conversions from string, StringPiece, const char*,
-// and C string literals.
-TEST(RE2, ImplicitConversions) {
- std::string re_string(".");
- StringPiece re_stringpiece(".");
- const char* re_cstring = ".";
- EXPECT_TRUE(RE2::PartialMatch("e", re_string));
- EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
- EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
- EXPECT_TRUE(RE2::PartialMatch("e", "."));
-}
-
-// Bugs introduced by 8622304
-TEST(RE2, CL8622304) {
- // reported by ingow
- std::string dir;
- EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
- EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
-
- // reported by jacobsa
- std::string key, val;
- EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
- "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
- &key,
- &val));
- EXPECT_EQ(key, "bar");
- EXPECT_EQ(val, "1,0x2F,030,4,5");
-}
-
-// Check that RE2 returns correct regexp pieces on error.
-// In particular, make sure it returns whole runes
-// and that it always reports invalid UTF-8.
-// Also check that Perl error flag piece is big enough.
-static struct ErrorTest {
- const char *regexp;
- RE2::ErrorCode error_code;
- const char *error_arg;
-} error_tests[] = {
- { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
- { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
- { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
- { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
- { "kl\\x", RE2::ErrorBadEscape, "\\x" },
- { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
- { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
- // used to return (?s but the error is X
- { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
- { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
- { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
- { "abc(def", RE2::ErrorMissingParen, "abc(def" },
- { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
-
- // no argument string returned for invalid UTF-8
- { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
- { "op\377qr", RE2::ErrorBadUTF8, "" },
- { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
- { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
- { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
- { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
-};
-TEST(RE2, ErrorCodeAndArg) {
- for (size_t i = 0; i < arraysize(error_tests); i++) {
- RE2 re(error_tests[i].regexp, RE2::Quiet);
- EXPECT_FALSE(re.ok());
- EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
- EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
- }
-}
-
-// Check that "never match \n" mode never matches \n.
-static struct NeverTest {
- const char* regexp;
- const char* text;
- const char* match;
-} never_tests[] = {
- { "(.*)", "abc\ndef\nghi\n", "abc" },
- { "(?s)(abc.*def)", "abc\ndef\n", NULL },
- { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
- { "(abc[^x]*def)", "abc\ndef\n", NULL },
- { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
-};
-TEST(RE2, NeverNewline) {
- RE2::Options opt;
- opt.set_never_nl(true);
- for (size_t i = 0; i < arraysize(never_tests); i++) {
- const NeverTest& t = never_tests[i];
- RE2 re(t.regexp, opt);
- if (t.match == NULL) {
- EXPECT_FALSE(re.PartialMatch(t.text, re));
- } else {
- StringPiece m;
- EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
- EXPECT_EQ(m, t.match);
- }
- }
-}
-
-// Check that dot_nl option works.
-TEST(RE2, DotNL) {
- RE2::Options opt;
- opt.set_dot_nl(true);
- EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
- EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
- opt.set_never_nl(true);
- EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
-}
-
-// Check that there are no capturing groups in "never capture" mode.
-TEST(RE2, NeverCapture) {
- RE2::Options opt;
- opt.set_never_capture(true);
- RE2 re("(r)(e)", opt);
- EXPECT_EQ(0, re.NumberOfCapturingGroups());
-}
-
-// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
-// Triggered by a failed DFA search falling back to Bitstate when
-// using Match with a NULL submatch set. Bitstate tried to read
-// the submatch[0] entry even if nsubmatch was 0.
-TEST(RE2, BitstateCaptureBug) {
- RE2::Options opt;
- opt.set_max_mem(20000);
- RE2 re("(_________$)", opt);
- StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
- EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
-}
-
-// C++ version of bug 609710.
-TEST(RE2, UnicodeClasses) {
- const std::string str = "ABCDEFGHI譚永鋒";
- std::string a, b, c;
-
- EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
- EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
- EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
- EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
- EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
- EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
-
- EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
- EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
- EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
- EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
- EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
- EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
-
- EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
- EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
- EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
- EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
- EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
- EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
-
- EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
- EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
- EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
- EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
- EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
- EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
-
- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
- EXPECT_EQ("A", a);
- EXPECT_EQ("B", b);
- EXPECT_EQ("C", c);
-
- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
- EXPECT_EQ("A", a);
- EXPECT_EQ("B", b);
- EXPECT_EQ("C", c);
-
- EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
-
- EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
- EXPECT_EQ("A", a);
- EXPECT_EQ("B", b);
- EXPECT_EQ("C", c);
-
- EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
-
- EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
- EXPECT_EQ("譚", a);
- EXPECT_EQ("永", b);
- EXPECT_EQ("鋒", c);
-}
-
-TEST(RE2, LazyRE2) {
- // Test with and without options.
- static LazyRE2 a = {"a"};
- static LazyRE2 b = {"b", RE2::Latin1};
-
- EXPECT_EQ("a", a->pattern());
- EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
-
- EXPECT_EQ("b", b->pattern());
- EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
-}
-
-// Bug reported by saito. 2009/02/17
-TEST(RE2, NullVsEmptyString) {
- RE2 re(".*");
- EXPECT_TRUE(re.ok());
-
- StringPiece null;
- EXPECT_TRUE(RE2::FullMatch(null, re));
-
- StringPiece empty("");
- EXPECT_TRUE(RE2::FullMatch(empty, re));
-}
-
-// Similar to the previous test, check that the null string and the empty
-// string both match, but also that the null string can only provide null
-// submatches whereas the empty string can also provide empty submatches.
-TEST(RE2, NullVsEmptyStringSubmatches) {
- RE2 re("()|(foo)");
- EXPECT_TRUE(re.ok());
-
- // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
- StringPiece matches[4];
-
- for (size_t i = 0; i < arraysize(matches); i++)
- matches[i] = "bar";
-
- StringPiece null;
- EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
- matches, arraysize(matches)));
- for (size_t i = 0; i < arraysize(matches); i++) {
- EXPECT_TRUE(matches[i].data() == NULL); // always null
- EXPECT_TRUE(matches[i].empty());
- }
-
- for (size_t i = 0; i < arraysize(matches); i++)
- matches[i] = "bar";
-
- StringPiece empty("");
- EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
- matches, arraysize(matches)));
- EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
- EXPECT_TRUE(matches[0].empty());
- EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
- EXPECT_TRUE(matches[1].empty());
- EXPECT_TRUE(matches[2].data() == NULL);
- EXPECT_TRUE(matches[2].empty());
- EXPECT_TRUE(matches[3].data() == NULL);
- EXPECT_TRUE(matches[3].empty());
-}
-
-// Issue 1816809
-TEST(RE2, Bug1816809) {
- RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
- StringPiece piece("llx-3;llx4");
- std::string x;
- EXPECT_TRUE(RE2::Consume(&piece, re, &x));
-}
-
-// Issue 3061120
-TEST(RE2, Bug3061120) {
- RE2 re("(?i)\\W");
- EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
- EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
- EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
-}
-
-TEST(RE2, CapturingGroupNames) {
- // Opening parentheses annotated with group IDs:
- // 12 3 45 6 7
- RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
- EXPECT_TRUE(re.ok());
- const std::map<int, std::string>& have = re.CapturingGroupNames();
- std::map<int, std::string> want;
- want[3] = "G2";
- want[6] = "G2";
- want[7] = "G1";
- EXPECT_EQ(want, have);
-}
-
-TEST(RE2, RegexpToStringLossOfAnchor) {
- EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
- EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
- EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
- EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
-}
-
-// Issue 10131674
-TEST(RE2, Bug10131674) {
- // Some of these escapes describe values that do not fit in a byte.
- RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
- EXPECT_FALSE(re.ok());
- EXPECT_FALSE(RE2::FullMatch("hello world", re));
-}
-
-TEST(RE2, Bug18391750) {
- // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
- const char t[] = {
- (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
- (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
- (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
- (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
- (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
- (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
- };
- RE2::Options opt;
- opt.set_encoding(RE2::Options::EncodingLatin1);
- opt.set_longest_match(true);
- opt.set_dot_nl(true);
- opt.set_case_sensitive(false);
- RE2 re(t, opt);
- ASSERT_TRUE(re.ok());
- RE2::PartialMatch(t, re);
-}
-
-TEST(RE2, Bug18458852) {
- // Bug in parser accepting invalid (too large) rune,
- // causing compiler to fail in DCHECK in UTF-8
- // character class code.
- const char b[] = {
- (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
- (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
- (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
- };
- RE2 re(b);
- ASSERT_FALSE(re.ok());
-}
-
-TEST(RE2, Bug18523943) {
- // Bug in BitState: case kFailInst failed the match entirely.
-
- RE2::Options opt;
- const char a[] = {
- (char)0x29, (char)0x29, (char)0x24, (char)0x00,
- };
- const char b[] = {
- (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
- };
- opt.set_log_errors(false);
- opt.set_encoding(RE2::Options::EncodingLatin1);
- opt.set_posix_syntax(true);
- opt.set_longest_match(true);
- opt.set_literal(false);
- opt.set_never_nl(true);
-
- RE2 re((const char*)b, opt);
- ASSERT_TRUE(re.ok());
- std::string s1;
- ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
-}
-
-TEST(RE2, Bug21371806) {
- // Bug in parser accepting Unicode groups in Latin-1 mode,
- // causing compiler to fail in DCHECK in prog.cc.
-
- RE2::Options opt;
- opt.set_encoding(RE2::Options::EncodingLatin1);
-
- RE2 re("g\\p{Zl}]", opt);
- ASSERT_TRUE(re.ok());
-}
-
-TEST(RE2, Bug26356109) {
- // Bug in parser caused by factoring of common prefixes in alternations.
-
- // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
- // consume "ab" and then stop (when unanchored) whereas it should consume all
- // of "abc" as per first-match semantics.
- RE2 re("a\\C*?c|a\\C*?b");
- ASSERT_TRUE(re.ok());
-
- std::string s = "abc";
- StringPiece m;
-
- ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
- ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
-
- ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
- ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
-}
-
-TEST(RE2, Issue104) {
- // RE2::GlobalReplace always advanced by one byte when the empty string was
- // matched, which would clobber any rune that is longer than one byte.
-
- std::string s = "bc";
- ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
- ASSERT_EQ("dbdcd", s);
-
- s = "ąć";
- ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
- ASSERT_EQ("ĈąĈćĈ", s);
-
- s = "人类";
- ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
- ASSERT_EQ("小人小类小", s);
-}
-
-TEST(RE2, Issue310) {
- // (?:|a)* matched more text than (?:|a)+ did.
-
- std::string s = "aaa";
- StringPiece m;
-
- RE2 star("(?:|a)*");
- ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
- ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
-
- RE2 plus("(?:|a)+");
- ASSERT_TRUE(plus.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
- ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
-}
-
-} // namespace re2
+// -*- coding: utf-8 -*-
+// Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: Test extractions for PartialMatch/Consume
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
+#include <sys/mman.h>
+#include <unistd.h> /* for sysconf */
+#endif
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+TEST(RE2, HexTests) {
+#define ASSERT_HEX(type, value) \
+ do { \
+ type v; \
+ ASSERT_TRUE( \
+ RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
+ ASSERT_EQ(v, 0x##value); \
+ ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
+ RE2::CRadix(&v))); \
+ ASSERT_EQ(v, 0x##value); \
+ } while (0)
+
+ ASSERT_HEX(short, 2bad);
+ ASSERT_HEX(unsigned short, 2badU);
+ ASSERT_HEX(int, dead);
+ ASSERT_HEX(unsigned int, deadU);
+ ASSERT_HEX(long, 7eadbeefL);
+ ASSERT_HEX(unsigned long, deadbeefUL);
+ ASSERT_HEX(long long, 12345678deadbeefLL);
+ ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
+
+#undef ASSERT_HEX
+}
+
+TEST(RE2, OctalTests) {
+#define ASSERT_OCTAL(type, value) \
+ do { \
+ type v; \
+ ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
+ ASSERT_EQ(v, 0##value); \
+ ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
+ RE2::CRadix(&v))); \
+ ASSERT_EQ(v, 0##value); \
+ } while (0)
+
+ ASSERT_OCTAL(short, 77777);
+ ASSERT_OCTAL(unsigned short, 177777U);
+ ASSERT_OCTAL(int, 17777777777);
+ ASSERT_OCTAL(unsigned int, 37777777777U);
+ ASSERT_OCTAL(long, 17777777777L);
+ ASSERT_OCTAL(unsigned long, 37777777777UL);
+ ASSERT_OCTAL(long long, 777777777777777777777LL);
+ ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
+
+#undef ASSERT_OCTAL
+}
+
+TEST(RE2, DecimalTests) {
+#define ASSERT_DECIMAL(type, value) \
+ do { \
+ type v; \
+ ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
+ ASSERT_EQ(v, value); \
+ ASSERT_TRUE( \
+ RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
+ ASSERT_EQ(v, value); \
+ } while (0)
+
+ ASSERT_DECIMAL(short, -1);
+ ASSERT_DECIMAL(unsigned short, 9999);
+ ASSERT_DECIMAL(int, -1000);
+ ASSERT_DECIMAL(unsigned int, 12345U);
+ ASSERT_DECIMAL(long, -10000000L);
+ ASSERT_DECIMAL(unsigned long, 3083324652U);
+ ASSERT_DECIMAL(long long, -100000000000000LL);
+ ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
+
+#undef ASSERT_DECIMAL
+}
+
+TEST(RE2, Replace) {
+ struct ReplaceTest {
+ const char *regexp;
+ const char *rewrite;
+ const char *original;
+ const char *single;
+ const char *global;
+ int greplace_count;
+ };
+ static const ReplaceTest tests[] = {
+ { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+ "\\2\\1ay",
+ "the quick brown fox jumps over the lazy dogs.",
+ "ethay quick brown fox jumps over the lazy dogs.",
+ "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
+ 9 },
+ { "\\w+",
+ "\\0-NOSPAM",
+ "abcd.efghi@google.com",
+ "abcd-NOSPAM.efghi@google.com",
+ "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
+ 4 },
+ { "^",
+ "(START)",
+ "foo",
+ "(START)foo",
+ "(START)foo",
+ 1 },
+ { "^",
+ "(START)",
+ "",
+ "(START)",
+ "(START)",
+ 1 },
+ { "$",
+ "(END)",
+ "",
+ "(END)",
+ "(END)",
+ 1 },
+ { "b",
+ "bb",
+ "ababababab",
+ "abbabababab",
+ "abbabbabbabbabb",
+ 5 },
+ { "b",
+ "bb",
+ "bbbbbb",
+ "bbbbbbb",
+ "bbbbbbbbbbbb",
+ 6 },
+ { "b+",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb",
+ 1 },
+ { "b*",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb",
+ 1 },
+ { "b*",
+ "bb",
+ "aaaaa",
+ "bbaaaaa",
+ "bbabbabbabbabbabb",
+ 6 },
+ // Check newline handling
+ { "a.*a",
+ "(\\0)",
+ "aba\naba",
+ "(aba)\naba",
+ "(aba)\n(aba)",
+ 2 },
+ { "", NULL, NULL, NULL, NULL, 0 }
+ };
+
+ for (const ReplaceTest* t = tests; t->original != NULL; t++) {
+ std::string one(t->original);
+ ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
+ ASSERT_EQ(one, t->single);
+ std::string all(t->original);
+ ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
+ << "Got: " << all;
+ ASSERT_EQ(all, t->global);
+ }
+}
+
+static void TestCheckRewriteString(const char* regexp, const char* rewrite,
+ bool expect_ok) {
+ std::string error;
+ RE2 exp(regexp);
+ bool actual_ok = exp.CheckRewriteString(rewrite, &error);
+ EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
+}
+
+TEST(CheckRewriteString, all) {
+ TestCheckRewriteString("abc", "foo", true);
+ TestCheckRewriteString("abc", "foo\\", false);
+ TestCheckRewriteString("abc", "foo\\0bar", true);
+
+ TestCheckRewriteString("a(b)c", "foo", true);
+ TestCheckRewriteString("a(b)c", "foo\\0bar", true);
+ TestCheckRewriteString("a(b)c", "foo\\1bar", true);
+ TestCheckRewriteString("a(b)c", "foo\\2bar", false);
+ TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
+
+ TestCheckRewriteString("a(b)(c)", "foo\\12", true);
+ TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
+ TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
+}
+
+TEST(RE2, Extract) {
+ std::string s;
+
+ ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
+ ASSERT_EQ(s, "kremvax!boris");
+
+ ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
+ ASSERT_EQ(s, "'foo'");
+ // check that false match doesn't overwrite
+ ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
+ ASSERT_EQ(s, "'foo'");
+}
+
+TEST(RE2, MaxSubmatchTooLarge) {
+ std::string s;
+ ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
+ s = "foo";
+ ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
+ s = "foo";
+ ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
+}
+
+TEST(RE2, Consume) {
+ RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
+ std::string word;
+
+ std::string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ ASSERT_TRUE(RE2::Consume(&input, r, &word));
+ ASSERT_EQ(word, "aaa") << " input: " << input;
+ ASSERT_TRUE(RE2::Consume(&input, r, &word));
+ ASSERT_EQ(word, "b") << " input: " << input;
+ ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
+}
+
+TEST(RE2, ConsumeN) {
+ const std::string s(" one two three 4");
+ StringPiece input(s);
+
+ RE2::Arg argv[2];
+ const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+ // 0 arg
+ EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
+
+ // 1 arg
+ std::string word;
+ argv[0] = &word;
+ EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
+ EXPECT_EQ("two", word);
+
+ // Multi-args
+ int n;
+ argv[1] = &n;
+ EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
+ EXPECT_EQ("three", word);
+ EXPECT_EQ(4, n);
+}
+
+TEST(RE2, FindAndConsume) {
+ RE2 r("(\\w+)"); // matches a word
+ std::string word;
+
+ std::string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+ ASSERT_EQ(word, "aaa");
+ ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+ ASSERT_EQ(word, "b");
+ ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+ ASSERT_EQ(word, "cccc");
+ ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
+
+ // Check that FindAndConsume works without any submatches.
+ // Earlier version used uninitialized data for
+ // length to consume.
+ input = "aaa";
+ ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
+ ASSERT_EQ(input, "");
+}
+
+TEST(RE2, FindAndConsumeN) {
+ const std::string s(" one two three 4");
+ StringPiece input(s);
+
+ RE2::Arg argv[2];
+ const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+ // 0 arg
+ EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
+
+ // 1 arg
+ std::string word;
+ argv[0] = &word;
+ EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
+ EXPECT_EQ("two", word);
+
+ // Multi-args
+ int n;
+ argv[1] = &n;
+ EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
+ EXPECT_EQ("three", word);
+ EXPECT_EQ(4, n);
+}
+
+TEST(RE2, MatchNumberPeculiarity) {
+ RE2 r("(foo)|(bar)|(baz)");
+ std::string word1;
+ std::string word2;
+ std::string word3;
+
+ ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
+ ASSERT_EQ(word1, "foo");
+ ASSERT_EQ(word2, "");
+ ASSERT_EQ(word3, "");
+ ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
+ ASSERT_EQ(word1, "");
+ ASSERT_EQ(word2, "bar");
+ ASSERT_EQ(word3, "");
+ ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
+ ASSERT_EQ(word1, "");
+ ASSERT_EQ(word2, "");
+ ASSERT_EQ(word3, "baz");
+ ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
+
+ std::string a;
+ ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
+ ASSERT_EQ(a, "");
+}
+
+TEST(RE2, Match) {
+ RE2 re("((\\w+):([0-9]+))"); // extracts host and port
+ StringPiece group[4];
+
+ // No match.
+ StringPiece s = "zyzzyva";
+ ASSERT_FALSE(
+ re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+
+ // Matches and extracts.
+ s = "a chrisr:9000 here";
+ ASSERT_TRUE(
+ re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+ ASSERT_EQ(group[0], "chrisr:9000");
+ ASSERT_EQ(group[1], "chrisr:9000");
+ ASSERT_EQ(group[2], "chrisr");
+ ASSERT_EQ(group[3], "9000");
+
+ std::string all, host;
+ int port;
+ ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
+ ASSERT_EQ(all, "chrisr:9000");
+ ASSERT_EQ(host, "chrisr");
+ ASSERT_EQ(port, 9000);
+}
+
+static void TestRecursion(int size, const char* pattern) {
+ // Fill up a string repeating the pattern given
+ std::string domain;
+ domain.resize(size);
+ size_t patlen = strlen(pattern);
+ for (int i = 0; i < size; i++) {
+ domain[i] = pattern[i % patlen];
+ }
+ // Just make sure it doesn't crash due to too much recursion.
+ RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
+ RE2::FullMatch(domain, re);
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(const std::string& unquoted,
+ const RE2::Options& options = RE2::DefaultOptions) {
+ std::string quoted = RE2::QuoteMeta(unquoted);
+ RE2 re(quoted, options);
+ EXPECT_TRUE(RE2::FullMatch(unquoted, re))
+ << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void NegativeTestQuoteMeta(
+ const std::string& unquoted, const std::string& should_not_match,
+ const RE2::Options& options = RE2::DefaultOptions) {
+ std::string quoted = RE2::QuoteMeta(unquoted);
+ RE2 re(quoted, options);
+ EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
+ << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+TEST(QuoteMeta, Simple) {
+ TestQuoteMeta("foo");
+ TestQuoteMeta("foo.bar");
+ TestQuoteMeta("foo\\.bar");
+ TestQuoteMeta("[1-9]");
+ TestQuoteMeta("1.5-2.0?");
+ TestQuoteMeta("\\d");
+ TestQuoteMeta("Who doesn't like ice cream?");
+ TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+ TestQuoteMeta("((?!)xxx).*yyy");
+ TestQuoteMeta("([");
+}
+TEST(QuoteMeta, SimpleNegative) {
+ NegativeTestQuoteMeta("foo", "bar");
+ NegativeTestQuoteMeta("...", "bar");
+ NegativeTestQuoteMeta("\\.", ".");
+ NegativeTestQuoteMeta("\\.", "..");
+ NegativeTestQuoteMeta("(a)", "a");
+ NegativeTestQuoteMeta("(a|b)", "a");
+ NegativeTestQuoteMeta("(a|b)", "(a)");
+ NegativeTestQuoteMeta("(a|b)", "a|b");
+ NegativeTestQuoteMeta("[0-9]", "0");
+ NegativeTestQuoteMeta("[0-9]", "0-9");
+ NegativeTestQuoteMeta("[0-9]", "[9]");
+ NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+TEST(QuoteMeta, Latin1) {
+ TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
+}
+
+TEST(QuoteMeta, UTF8) {
+ TestQuoteMeta("Plácido Domingo");
+ TestQuoteMeta("xyz"); // No fancy utf8.
+ TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
+ TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
+ TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
+ TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
+ TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
+ // still work.
+ NegativeTestQuoteMeta("27\xc2\xb0",
+ "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
+}
+
+TEST(QuoteMeta, HasNull) {
+ std::string has_null;
+
+ // string with one null character
+ has_null += '\0';
+ TestQuoteMeta(has_null);
+ NegativeTestQuoteMeta(has_null, "");
+
+ // Don't want null-followed-by-'1' to be interpreted as '\01'.
+ has_null += '1';
+ TestQuoteMeta(has_null);
+ NegativeTestQuoteMeta(has_null, "\1");
+}
+
+TEST(ProgramSize, BigProgram) {
+ RE2 re_simple("simple regexp");
+ RE2 re_medium("medium.*regexp");
+ RE2 re_complex("complex.{1,128}regexp");
+
+ ASSERT_GT(re_simple.ProgramSize(), 0);
+ ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
+ ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+
+ ASSERT_GT(re_simple.ReverseProgramSize(), 0);
+ ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
+ ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
+}
+
+TEST(ProgramFanout, BigProgram) {
+ RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
+ RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
+ RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
+ RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
+
+ std::vector<int> histogram;
+
+ // 3 is the largest non-empty bucket and has 2 element.
+ ASSERT_EQ(3, re1.ProgramFanout(&histogram));
+ ASSERT_EQ(2, histogram[3]);
+
+ // 6 is the largest non-empty bucket and has 11 elements.
+ ASSERT_EQ(6, re10.ProgramFanout(&histogram));
+ ASSERT_EQ(11, histogram[6]);
+
+ // 9 is the largest non-empty bucket and has 101 elements.
+ ASSERT_EQ(9, re100.ProgramFanout(&histogram));
+ ASSERT_EQ(101, histogram[9]);
+
+ // 13 is the largest non-empty bucket and has 1001 elements.
+ ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
+ ASSERT_EQ(1001, histogram[13]);
+
+ // 2 is the largest non-empty bucket and has 2 element.
+ ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(2, histogram[2]);
+
+ // 5 is the largest non-empty bucket and has 11 elements.
+ ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(11, histogram[5]);
+
+ // 9 is the largest non-empty bucket and has 101 elements.
+ ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(101, histogram[9]);
+
+ // 12 is the largest non-empty bucket and has 1001 elements.
+ ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
+ ASSERT_EQ(1001, histogram[12]);
+}
+
+// Issue 956519: handling empty character sets was
+// causing NULL dereference. This tests a few empty character sets.
+// (The way to get an empty character set is to negate a full one.)
+TEST(EmptyCharset, Fuzz) {
+ static const char *empties[] = {
+ "[^\\S\\s]",
+ "[^\\S[:space:]]",
+ "[^\\D\\d]",
+ "[^\\D[:digit:]]"
+ };
+ for (size_t i = 0; i < arraysize(empties); i++)
+ ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
+}
+
+// Bitstate assumes that kInstFail instructions in
+// alternations or capture groups have been "compiled away".
+TEST(EmptyCharset, BitstateAssumptions) {
+ // Captures trigger use of Bitstate.
+ static const char *nop_empties[] = {
+ "((((()))))" "[^\\S\\s]?",
+ "((((()))))" "([^\\S\\s])?",
+ "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
+ "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
+ };
+ StringPiece group[6];
+ for (size_t i = 0; i < arraysize(nop_empties); i++)
+ ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
+}
+
+// Test that named groups work correctly.
+TEST(Capture, NamedGroups) {
+ {
+ RE2 re("(hello world)");
+ ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
+ const std::map<std::string, int>& m = re.NamedCapturingGroups();
+ ASSERT_EQ(m.size(), 0);
+ }
+
+ {
+ RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
+ ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
+ const std::map<std::string, int>& m = re.NamedCapturingGroups();
+ ASSERT_EQ(m.size(), 4);
+ ASSERT_EQ(m.find("A")->second, 1);
+ ASSERT_EQ(m.find("B")->second, 2);
+ ASSERT_EQ(m.find("C")->second, 3);
+ ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
+ }
+}
+
+TEST(RE2, CapturedGroupTest) {
+ RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
+ int num_groups = re.NumberOfCapturingGroups();
+ EXPECT_EQ(2, num_groups);
+ std::string args[4];
+ RE2::Arg arg0(&args[0]);
+ RE2::Arg arg1(&args[1]);
+ RE2::Arg arg2(&args[2]);
+ RE2::Arg arg3(&args[3]);
+
+ const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
+ EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
+ re, matches, num_groups));
+ const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
+ EXPECT_TRUE(named_groups.find("S") != named_groups.end());
+ EXPECT_TRUE(named_groups.find("D") != named_groups.end());
+
+ // The named group index is 1-based.
+ int source_group_index = named_groups.find("S")->second;
+ int destination_group_index = named_groups.find("D")->second;
+ EXPECT_EQ(1, source_group_index);
+ EXPECT_EQ(2, destination_group_index);
+
+ // The args is zero-based.
+ EXPECT_EQ("mountain view", args[source_group_index - 1]);
+ EXPECT_EQ("san jose", args[destination_group_index - 1]);
+}
+
+TEST(RE2, FullMatchWithNoArgs) {
+ ASSERT_TRUE(RE2::FullMatch("h", "h"));
+ ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
+ ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
+ ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
+ ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
+}
+
+TEST(RE2, PartialMatch) {
+ ASSERT_TRUE(RE2::PartialMatch("x", "x"));
+ ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
+ ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
+ ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
+ ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
+}
+
+TEST(RE2, PartialMatchN) {
+ RE2::Arg argv[2];
+ const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+ // 0 arg
+ EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
+ EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
+
+ // 1 arg
+ int i;
+ argv[0] = &i;
+ EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
+ EXPECT_EQ(1001, i);
+ EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
+
+ // Multi-arg
+ std::string s;
+ argv[1] = &s;
+ EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
+ EXPECT_EQ(42, i);
+ EXPECT_EQ("life", s);
+ EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchZeroArg) {
+ // Zero-arg
+ ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
+}
+
+TEST(RE2, FullMatchOneArg) {
+ int i;
+
+ // Single-arg
+ ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
+ ASSERT_EQ(i, 1001);
+ ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
+ ASSERT_EQ(i, -123);
+ ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
+ ASSERT_FALSE(
+ RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
+}
+
+TEST(RE2, FullMatchIntegerArg) {
+ int i;
+
+ // Digits surrounding integer-arg
+ ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
+ ASSERT_EQ(i, 23);
+ ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
+ ASSERT_EQ(i, 1);
+ ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
+ ASSERT_EQ(i, -1);
+ ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
+ ASSERT_EQ(i, 1);
+ ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
+ ASSERT_EQ(i, -1);
+}
+
+TEST(RE2, FullMatchStringArg) {
+ std::string s;
+ // String-arg
+ ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
+ ASSERT_EQ(s, std::string("ell"));
+}
+
+TEST(RE2, FullMatchStringPieceArg) {
+ int i;
+ // StringPiece-arg
+ StringPiece sp;
+ ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
+ ASSERT_EQ(sp.size(), 4);
+ ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
+ ASSERT_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchMultiArg) {
+ int i;
+ std::string s;
+ // Multi-arg
+ ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+ ASSERT_EQ(s, std::string("ruby"));
+ ASSERT_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchN) {
+ RE2::Arg argv[2];
+ const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+ // 0 arg
+ EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
+ EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
+
+ // 1 arg
+ int i;
+ argv[0] = &i;
+ EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
+ EXPECT_EQ(1001, i);
+ EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
+
+ // Multi-arg
+ std::string s;
+ argv[1] = &s;
+ EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
+ EXPECT_EQ(42, i);
+ EXPECT_EQ("life", s);
+ EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchIgnoredArg) {
+ int i;
+ std::string s;
+
+ // Old-school NULL should be ignored.
+ ASSERT_TRUE(
+ RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
+ ASSERT_EQ(s, std::string("ruby"));
+ ASSERT_EQ(i, 1234);
+
+ // C++11 nullptr should also be ignored.
+ ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
+ ASSERT_EQ(s, std::string("rubz"));
+ ASSERT_EQ(i, 1235);
+}
+
+TEST(RE2, FullMatchTypedNullArg) {
+ std::string s;
+
+ // Ignore non-void* NULL arg
+ ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
+
+ // Fail on non-void* NULL arg if the match doesn't parse for the given type.
+ ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
+ ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
+ ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
+ ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
+ ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
+}
+
+// Check that numeric parsing code does not read past the end of
+// the number being parsed.
+// This implementation requires mmap(2) et al. and thus cannot
+// be used unless they are available.
+TEST(RE2, NULTerminated) {
+#if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
+ char *v;
+ int x;
+ long pagesize = sysconf(_SC_PAGE_SIZE);
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+ v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
+ ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
+ LOG(INFO) << "Memory at " << (void*)v;
+ ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
+ v[pagesize - 1] = '1';
+
+ x = 0;
+ ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
+ ASSERT_EQ(x, 1);
+#endif
+}
+
+TEST(RE2, FullMatchTypeTests) {
+ // Type tests
+ std::string zeros(1000, '0');
+ {
+ char c;
+ ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
+ ASSERT_EQ(c, 'H');
+ }
+ {
+ unsigned char c;
+ ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
+ ASSERT_EQ(c, static_cast<unsigned char>('H'));
+ }
+ {
+ int16_t v;
+ ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+ ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
+ ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
+ ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
+ ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
+ }
+ {
+ uint16_t v;
+ ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
+ ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
+ ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
+ }
+ {
+ int32_t v;
+ static const int32_t max = INT32_C(0x7fffffff);
+ static const int32_t min = -max - 1;
+ ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+ ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+ ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
+ ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
+ ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
+
+ ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
+ ASSERT_EQ(v, max);
+ ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
+ ASSERT_EQ(v, min);
+
+ ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
+ ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
+ ASSERT_EQ(v, max);
+ ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
+ }
+ {
+ uint32_t v;
+ static const uint32_t max = UINT32_C(0xffffffff);
+ ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
+ ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
+ ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
+
+ ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
+ }
+ {
+ int64_t v;
+ static const int64_t max = INT64_C(0x7fffffffffffffff);
+ static const int64_t min = -max - 1;
+ std::string str;
+
+ ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+
+ str = std::to_string(max);
+ ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+
+ str = std::to_string(min);
+ ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
+
+ str = std::to_string(max);
+ ASSERT_NE(str.back(), '9');
+ str.back()++;
+ ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
+
+ str = std::to_string(min);
+ ASSERT_NE(str.back(), '9');
+ str.back()++;
+ ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
+ }
+ {
+ uint64_t v;
+ int64_t v2;
+ static const uint64_t max = UINT64_C(0xffffffffffffffff);
+ std::string str;
+
+ ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
+
+ str = std::to_string(max);
+ ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+
+ ASSERT_NE(str.back(), '9');
+ str.back()++;
+ ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
+ }
+}
+
+TEST(RE2, FloatingPointFullMatchTypes) {
+ std::string zeros(1000, '0');
+ {
+ float v;
+ ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
+ ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
+ ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
+
+ ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
+ ASSERT_EQ(v, float(1e23));
+
+ // 6700000000081920.1 is an edge case.
+ // 6700000000081920 is exactly halfway between
+ // two float32s, so the .1 should make it round up.
+ // However, the .1 is outside the precision possible with
+ // a float64: the nearest float64 is 6700000000081920.
+ // So if the code uses strtod and then converts to float32,
+ // round-to-even will make it round down instead of up.
+ // To pass the test, the parser must call strtof directly.
+ // This test case is carefully chosen to use only a 17-digit
+ // number, since C does not guarantee to get the correctly
+ // rounded answer for strtod and strtof unless the input is
+ // short.
+ //
+ // This is known to fail on Cygwin and MinGW due to a broken
+ // implementation of strtof(3). And apparently MSVC too. Sigh.
+#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
+ ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
+ ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
+ ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
+ ASSERT_EQ(v, 6700000000081920.1f)
+ << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
+#endif
+ }
+ {
+ double v;
+ ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
+ ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
+ ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
+ ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
+ ASSERT_EQ(v, double(1e23));
+
+ ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
+ ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
+ ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
+ ASSERT_EQ(v, 1.0000000596046448)
+ << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
+ }
+}
+
+TEST(RE2, FullMatchAnchored) {
+ int i;
+ // Check that matching is fully anchored
+ ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
+ ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
+ ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
+ ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
+}
+
+TEST(RE2, FullMatchBraces) {
+ // Braces
+ ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
+ ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
+ ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
+}
+
+TEST(RE2, Complicated) {
+ // Complicated RE2
+ ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
+ ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
+ ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
+ ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
+}
+
+TEST(RE2, FullMatchEnd) {
+ // Check full-match handling (needs '$' tacked on internally)
+ ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
+ ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
+ ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
+ ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
+ ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
+ ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
+ ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
+
+ // Uncomment the following if we change the handling of '$' to
+ // prevent it from matching a trailing newline
+ if (false) {
+ // Check that we don't get bitten by pcre's special handling of a
+ // '\n' at the end of the string matching '$'
+ ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
+ }
+}
+
+TEST(RE2, FullMatchArgCount) {
+ // Number of args
+ int a[16];
+ ASSERT_TRUE(RE2::FullMatch("", ""));
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
+ ASSERT_EQ(a[0], 1);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
+ &a[2], &a[3]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+ ASSERT_EQ(a[3], 4);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
+ &a[2], &a[3], &a[4]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+ ASSERT_EQ(a[3], 4);
+ ASSERT_EQ(a[4], 5);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
+ &a[1], &a[2], &a[3], &a[4], &a[5]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+ ASSERT_EQ(a[3], 4);
+ ASSERT_EQ(a[4], 5);
+ ASSERT_EQ(a[5], 6);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+ ASSERT_EQ(a[3], 4);
+ ASSERT_EQ(a[4], 5);
+ ASSERT_EQ(a[5], 6);
+ ASSERT_EQ(a[6], 7);
+
+ memset(a, 0, sizeof(0));
+ ASSERT_TRUE(RE2::FullMatch("1234567890123456",
+ "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+ "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+ &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
+ &a[13], &a[14], &a[15]));
+ ASSERT_EQ(a[0], 1);
+ ASSERT_EQ(a[1], 2);
+ ASSERT_EQ(a[2], 3);
+ ASSERT_EQ(a[3], 4);
+ ASSERT_EQ(a[4], 5);
+ ASSERT_EQ(a[5], 6);
+ ASSERT_EQ(a[6], 7);
+ ASSERT_EQ(a[7], 8);
+ ASSERT_EQ(a[8], 9);
+ ASSERT_EQ(a[9], 0);
+ ASSERT_EQ(a[10], 1);
+ ASSERT_EQ(a[11], 2);
+ ASSERT_EQ(a[12], 3);
+ ASSERT_EQ(a[13], 4);
+ ASSERT_EQ(a[14], 5);
+ ASSERT_EQ(a[15], 6);
+}
+
+TEST(RE2, Accessors) {
+ // Check the pattern() accessor
+ {
+ const std::string kPattern = "http://([^/]+)/.*";
+ const RE2 re(kPattern);
+ ASSERT_EQ(kPattern, re.pattern());
+ }
+
+ // Check RE2 error field.
+ {
+ RE2 re("foo");
+ ASSERT_TRUE(re.error().empty()); // Must have no error
+ ASSERT_TRUE(re.ok());
+ ASSERT_EQ(re.error_code(), RE2::NoError);
+ }
+}
+
+TEST(RE2, UTF8) {
+ // Check UTF-8 handling
+ // Three Japanese characters (nihongo)
+ const char utf8_string[] = {
+ (char)0xe6, (char)0x97, (char)0xa5, // 65e5
+ (char)0xe6, (char)0x9c, (char)0xac, // 627c
+ (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
+ 0
+ };
+ const char utf8_pattern[] = {
+ '.',
+ (char)0xe6, (char)0x9c, (char)0xac, // 627c
+ '.',
+ 0
+ };
+
+ // Both should match in either mode, bytes or UTF-8
+ RE2 re_test1(".........", RE2::Latin1);
+ ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
+ RE2 re_test2("...");
+ ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
+
+ // Check that '.' matches one byte or UTF-8 character
+ // according to the mode.
+ std::string s;
+ RE2 re_test3("(.)", RE2::Latin1);
+ ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
+ ASSERT_EQ(s, std::string("\xe6"));
+ RE2 re_test4("(.)");
+ ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
+ ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
+
+ // Check that string matches itself in either mode
+ RE2 re_test5(utf8_string, RE2::Latin1);
+ ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
+ RE2 re_test6(utf8_string);
+ ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
+
+ // Check that pattern matches string only in UTF8 mode
+ RE2 re_test7(utf8_pattern, RE2::Latin1);
+ ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
+ RE2 re_test8(utf8_pattern);
+ ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
+}
+
+TEST(RE2, UngreedyUTF8) {
+ // Check that ungreedy, UTF8 regular expressions don't match when they
+ // oughtn't -- see bug 82246.
+ {
+ // This code always worked.
+ const char* pattern = "\\w+X";
+ const std::string target = "a aX";
+ RE2 match_sentence(pattern, RE2::Latin1);
+ RE2 match_sentence_re(pattern);
+
+ ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
+ ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
+ }
+ {
+ const char* pattern = "(?U)\\w+X";
+ const std::string target = "a aX";
+ RE2 match_sentence(pattern, RE2::Latin1);
+ ASSERT_EQ(match_sentence.error(), "");
+ RE2 match_sentence_re(pattern);
+
+ ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
+ ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
+ }
+}
+
+TEST(RE2, Rejects) {
+ {
+ RE2 re("a\\1", RE2::Quiet);
+ ASSERT_FALSE(re.ok()); }
+ {
+ RE2 re("a[x", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ }
+ {
+ RE2 re("a[z-a]", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ }
+ {
+ RE2 re("a[[:foobar:]]", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ }
+ {
+ RE2 re("a(b", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ }
+ {
+ RE2 re("a\\", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ }
+}
+
+TEST(RE2, NoCrash) {
+ // Test that using a bad regexp doesn't crash.
+ {
+ RE2 re("a\\", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
+ }
+
+ // Test that using an enormous regexp doesn't crash
+ {
+ RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
+ ASSERT_FALSE(re.ok());
+ ASSERT_FALSE(RE2::PartialMatch("aaa", re));
+ }
+
+ // Test that a crazy regexp still compiles and runs.
+ {
+ RE2 re(".{512}x", RE2::Quiet);
+ ASSERT_TRUE(re.ok());
+ std::string s;
+ s.append(515, 'c');
+ s.append("x");
+ ASSERT_TRUE(RE2::PartialMatch(s, re));
+ }
+}
+
+TEST(RE2, Recursion) {
+ // Test that recursion is stopped.
+ // This test is PCRE-legacy -- there's no recursion in RE2.
+ int bytes = 15 * 1024; // enough to crash PCRE
+ TestRecursion(bytes, ".");
+ TestRecursion(bytes, "a");
+ TestRecursion(bytes, "a.");
+ TestRecursion(bytes, "ab.");
+ TestRecursion(bytes, "abc.");
+}
+
+TEST(RE2, BigCountedRepetition) {
+ // Test that counted repetition works, given tons of memory.
+ RE2::Options opt;
+ opt.set_max_mem(256<<20);
+
+ RE2 re(".{512}x", opt);
+ ASSERT_TRUE(re.ok());
+ std::string s;
+ s.append(515, 'c');
+ s.append("x");
+ ASSERT_TRUE(RE2::PartialMatch(s, re));
+}
+
+TEST(RE2, DeepRecursion) {
+ // Test for deep stack recursion. This would fail with a
+ // segmentation violation due to stack overflow before pcre was
+ // patched.
+ // Again, a PCRE legacy test. RE2 doesn't recurse.
+ std::string comment("x*");
+ std::string a(131072, 'a');
+ comment += a;
+ comment += "*x";
+ RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
+ ASSERT_TRUE(RE2::FullMatch(comment, re));
+}
+
+// Suggested by Josh Hyman. Failed when SearchOnePass was
+// not implementing case-folding.
+TEST(CaseInsensitive, MatchAndConsume) {
+ std::string text = "A fish named *Wanda*";
+ StringPiece sp(text);
+ StringPiece result;
+ EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
+ EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
+}
+
+// RE2 should permit implicit conversions from string, StringPiece, const char*,
+// and C string literals.
+TEST(RE2, ImplicitConversions) {
+ std::string re_string(".");
+ StringPiece re_stringpiece(".");
+ const char* re_cstring = ".";
+ EXPECT_TRUE(RE2::PartialMatch("e", re_string));
+ EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
+ EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
+ EXPECT_TRUE(RE2::PartialMatch("e", "."));
+}
+
+// Bugs introduced by 8622304
+TEST(RE2, CL8622304) {
+ // reported by ingow
+ std::string dir;
+ EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
+ EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
+
+ // reported by jacobsa
+ std::string key, val;
+ EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
+ "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
+ &key,
+ &val));
+ EXPECT_EQ(key, "bar");
+ EXPECT_EQ(val, "1,0x2F,030,4,5");
+}
+
+// Check that RE2 returns correct regexp pieces on error.
+// In particular, make sure it returns whole runes
+// and that it always reports invalid UTF-8.
+// Also check that Perl error flag piece is big enough.
+static struct ErrorTest {
+ const char *regexp;
+ RE2::ErrorCode error_code;
+ const char *error_arg;
+} error_tests[] = {
+ { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
+ { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
+ { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
+ { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
+ { "kl\\x", RE2::ErrorBadEscape, "\\x" },
+ { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
+ { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
+ // used to return (?s but the error is X
+ { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
+ { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
+ { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
+ { "abc(def", RE2::ErrorMissingParen, "abc(def" },
+ { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
+
+ // no argument string returned for invalid UTF-8
+ { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
+ { "op\377qr", RE2::ErrorBadUTF8, "" },
+ { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
+ { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
+ { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
+ { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
+};
+TEST(RE2, ErrorCodeAndArg) {
+ for (size_t i = 0; i < arraysize(error_tests); i++) {
+ RE2 re(error_tests[i].regexp, RE2::Quiet);
+ EXPECT_FALSE(re.ok());
+ EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
+ EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
+ }
+}
+
+// Check that "never match \n" mode never matches \n.
+static struct NeverTest {
+ const char* regexp;
+ const char* text;
+ const char* match;
+} never_tests[] = {
+ { "(.*)", "abc\ndef\nghi\n", "abc" },
+ { "(?s)(abc.*def)", "abc\ndef\n", NULL },
+ { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
+ { "(abc[^x]*def)", "abc\ndef\n", NULL },
+ { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
+};
+TEST(RE2, NeverNewline) {
+ RE2::Options opt;
+ opt.set_never_nl(true);
+ for (size_t i = 0; i < arraysize(never_tests); i++) {
+ const NeverTest& t = never_tests[i];
+ RE2 re(t.regexp, opt);
+ if (t.match == NULL) {
+ EXPECT_FALSE(re.PartialMatch(t.text, re));
+ } else {
+ StringPiece m;
+ EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
+ EXPECT_EQ(m, t.match);
+ }
+ }
+}
+
+// Check that dot_nl option works.
+TEST(RE2, DotNL) {
+ RE2::Options opt;
+ opt.set_dot_nl(true);
+ EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
+ EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
+ opt.set_never_nl(true);
+ EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
+}
+
+// Check that there are no capturing groups in "never capture" mode.
+TEST(RE2, NeverCapture) {
+ RE2::Options opt;
+ opt.set_never_capture(true);
+ RE2 re("(r)(e)", opt);
+ EXPECT_EQ(0, re.NumberOfCapturingGroups());
+}
+
+// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
+// Triggered by a failed DFA search falling back to Bitstate when
+// using Match with a NULL submatch set. Bitstate tried to read
+// the submatch[0] entry even if nsubmatch was 0.
+TEST(RE2, BitstateCaptureBug) {
+ RE2::Options opt;
+ opt.set_max_mem(20000);
+ RE2 re("(_________$)", opt);
+ StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
+ EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
+}
+
+// C++ version of bug 609710.
+TEST(RE2, UnicodeClasses) {
+ const std::string str = "ABCDEFGHI譚永鋒";
+ std::string a, b, c;
+
+ EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
+ EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
+ EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
+ EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
+ EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
+ EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
+
+ EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
+ EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
+ EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
+ EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
+ EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
+ EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
+
+ EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
+ EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
+ EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
+ EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
+ EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
+ EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
+
+ EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
+ EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
+ EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
+ EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
+ EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
+ EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
+
+ EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
+ EXPECT_EQ("A", a);
+ EXPECT_EQ("B", b);
+ EXPECT_EQ("C", c);
+
+ EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
+ EXPECT_EQ("A", a);
+ EXPECT_EQ("B", b);
+ EXPECT_EQ("C", c);
+
+ EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
+
+ EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
+ EXPECT_EQ("A", a);
+ EXPECT_EQ("B", b);
+ EXPECT_EQ("C", c);
+
+ EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
+
+ EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
+ EXPECT_EQ("譚", a);
+ EXPECT_EQ("永", b);
+ EXPECT_EQ("鋒", c);
+}
+
+TEST(RE2, LazyRE2) {
+ // Test with and without options.
+ static LazyRE2 a = {"a"};
+ static LazyRE2 b = {"b", RE2::Latin1};
+
+ EXPECT_EQ("a", a->pattern());
+ EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
+
+ EXPECT_EQ("b", b->pattern());
+ EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
+}
+
+// Bug reported by saito. 2009/02/17
+TEST(RE2, NullVsEmptyString) {
+ RE2 re(".*");
+ EXPECT_TRUE(re.ok());
+
+ StringPiece null;
+ EXPECT_TRUE(RE2::FullMatch(null, re));
+
+ StringPiece empty("");
+ EXPECT_TRUE(RE2::FullMatch(empty, re));
+}
+
+// Similar to the previous test, check that the null string and the empty
+// string both match, but also that the null string can only provide null
+// submatches whereas the empty string can also provide empty submatches.
+TEST(RE2, NullVsEmptyStringSubmatches) {
+ RE2 re("()|(foo)");
+ EXPECT_TRUE(re.ok());
+
+ // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
+ StringPiece matches[4];
+
+ for (size_t i = 0; i < arraysize(matches); i++)
+ matches[i] = "bar";
+
+ StringPiece null;
+ EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
+ matches, arraysize(matches)));
+ for (size_t i = 0; i < arraysize(matches); i++) {
+ EXPECT_TRUE(matches[i].data() == NULL); // always null
+ EXPECT_TRUE(matches[i].empty());
+ }
+
+ for (size_t i = 0; i < arraysize(matches); i++)
+ matches[i] = "bar";
+
+ StringPiece empty("");
+ EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
+ matches, arraysize(matches)));
+ EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
+ EXPECT_TRUE(matches[0].empty());
+ EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
+ EXPECT_TRUE(matches[1].empty());
+ EXPECT_TRUE(matches[2].data() == NULL);
+ EXPECT_TRUE(matches[2].empty());
+ EXPECT_TRUE(matches[3].data() == NULL);
+ EXPECT_TRUE(matches[3].empty());
+}
+
+// Issue 1816809
+TEST(RE2, Bug1816809) {
+ RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
+ StringPiece piece("llx-3;llx4");
+ std::string x;
+ EXPECT_TRUE(RE2::Consume(&piece, re, &x));
+}
+
+// Issue 3061120
+TEST(RE2, Bug3061120) {
+ RE2 re("(?i)\\W");
+ EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
+ EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
+ EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
+}
+
+TEST(RE2, CapturingGroupNames) {
+ // Opening parentheses annotated with group IDs:
+ // 12 3 45 6 7
+ RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
+ EXPECT_TRUE(re.ok());
+ const std::map<int, std::string>& have = re.CapturingGroupNames();
+ std::map<int, std::string> want;
+ want[3] = "G2";
+ want[6] = "G2";
+ want[7] = "G1";
+ EXPECT_EQ(want, have);
+}
+
+TEST(RE2, RegexpToStringLossOfAnchor) {
+ EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
+ EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
+ EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
+ EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
+}
+
+// Issue 10131674
+TEST(RE2, Bug10131674) {
+ // Some of these escapes describe values that do not fit in a byte.
+ RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
+ EXPECT_FALSE(re.ok());
+ EXPECT_FALSE(RE2::FullMatch("hello world", re));
+}
+
+TEST(RE2, Bug18391750) {
+ // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
+ const char t[] = {
+ (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
+ (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
+ (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
+ (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
+ (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
+ (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
+ };
+ RE2::Options opt;
+ opt.set_encoding(RE2::Options::EncodingLatin1);
+ opt.set_longest_match(true);
+ opt.set_dot_nl(true);
+ opt.set_case_sensitive(false);
+ RE2 re(t, opt);
+ ASSERT_TRUE(re.ok());
+ RE2::PartialMatch(t, re);
+}
+
+TEST(RE2, Bug18458852) {
+ // Bug in parser accepting invalid (too large) rune,
+ // causing compiler to fail in DCHECK in UTF-8
+ // character class code.
+ const char b[] = {
+ (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
+ (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
+ (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
+ };
+ RE2 re(b);
+ ASSERT_FALSE(re.ok());
+}
+
+TEST(RE2, Bug18523943) {
+ // Bug in BitState: case kFailInst failed the match entirely.
+
+ RE2::Options opt;
+ const char a[] = {
+ (char)0x29, (char)0x29, (char)0x24, (char)0x00,
+ };
+ const char b[] = {
+ (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
+ };
+ opt.set_log_errors(false);
+ opt.set_encoding(RE2::Options::EncodingLatin1);
+ opt.set_posix_syntax(true);
+ opt.set_longest_match(true);
+ opt.set_literal(false);
+ opt.set_never_nl(true);
+
+ RE2 re((const char*)b, opt);
+ ASSERT_TRUE(re.ok());
+ std::string s1;
+ ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
+}
+
+TEST(RE2, Bug21371806) {
+ // Bug in parser accepting Unicode groups in Latin-1 mode,
+ // causing compiler to fail in DCHECK in prog.cc.
+
+ RE2::Options opt;
+ opt.set_encoding(RE2::Options::EncodingLatin1);
+
+ RE2 re("g\\p{Zl}]", opt);
+ ASSERT_TRUE(re.ok());
+}
+
+TEST(RE2, Bug26356109) {
+ // Bug in parser caused by factoring of common prefixes in alternations.
+
+ // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
+ // consume "ab" and then stop (when unanchored) whereas it should consume all
+ // of "abc" as per first-match semantics.
+ RE2 re("a\\C*?c|a\\C*?b");
+ ASSERT_TRUE(re.ok());
+
+ std::string s = "abc";
+ StringPiece m;
+
+ ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+ ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
+
+ ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
+ ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
+}
+
+TEST(RE2, Issue104) {
+ // RE2::GlobalReplace always advanced by one byte when the empty string was
+ // matched, which would clobber any rune that is longer than one byte.
+
+ std::string s = "bc";
+ ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
+ ASSERT_EQ("dbdcd", s);
+
+ s = "ąć";
+ ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
+ ASSERT_EQ("ĈąĈćĈ", s);
+
+ s = "人类";
+ ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
+ ASSERT_EQ("小人小类小", s);
+}
+
+TEST(RE2, Issue310) {
+ // (?:|a)* matched more text than (?:|a)+ did.
+
+ std::string s = "aaa";
+ StringPiece m;
+
+ RE2 star("(?:|a)*");
+ ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+ ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
+
+ RE2 plus("(?:|a)+");
+ ASSERT_TRUE(plus.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+ ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/regexp_generator.cc b/contrib/libs/re2/re2/testing/regexp_generator.cc
index 9065835748..3b6c9ba3db 100644
--- a/contrib/libs/re2/re2/testing/regexp_generator.cc
+++ b/contrib/libs/re2/re2/testing/regexp_generator.cc
@@ -1,276 +1,276 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Regular expression generator: generates all possible
-// regular expressions within parameters (see regexp_generator.h for details).
-
-// The regexp generator first generates a sequence of commands in a simple
-// postfix language. Each command in the language is a string,
-// like "a" or "%s*" or "%s|%s".
-//
-// To evaluate a command, enough arguments are popped from the value stack to
-// plug into the %s slots. Then the result is pushed onto the stack.
-// For example, the command sequence
-// a b %s%s c
-// results in the stack
-// ab c
-//
-// GeneratePostfix generates all possible command sequences.
-// Then RunPostfix turns each sequence into a regular expression
-// and passes the regexp to HandleRegexp.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <memory>
-#include <stack>
-#include <string>
-#include <vector>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
-#include "re2/testing/regexp_generator.h"
-
-namespace re2 {
-
-// Returns a vector of the egrep regexp operators.
-const std::vector<std::string>& RegexpGenerator::EgrepOps() {
- static const char *ops[] = {
- "%s%s",
- "%s|%s",
- "%s*",
- "%s+",
- "%s?",
- "%s\\C*",
- };
- static std::vector<std::string> v(ops, ops + arraysize(ops));
- return v;
-}
-
-RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
- const std::vector<std::string>& atoms,
- const std::vector<std::string>& ops)
- : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
- // Degenerate case.
- if (atoms_.empty())
- maxatoms_ = 0;
- if (ops_.empty())
- maxops_ = 0;
-}
-
-// Generates all possible regular expressions (within the parameters),
-// calling HandleRegexp for each one.
-void RegexpGenerator::Generate() {
- std::vector<std::string> postfix;
- GeneratePostfix(&postfix, 0, 0, 0);
-}
-
-// Generates random regular expressions, calling HandleRegexp for each one.
-void RegexpGenerator::GenerateRandom(int32_t seed, int n) {
- rng_.seed(seed);
-
- for (int i = 0; i < n; i++) {
- std::vector<std::string> postfix;
- GenerateRandomPostfix(&postfix, 0, 0, 0);
- }
-}
-
-// Counts and returns the number of occurrences of "%s" in s.
-static int CountArgs(const std::string& s) {
- const char *p = s.c_str();
- int n = 0;
- while ((p = strstr(p, "%s")) != NULL) {
- p += 2;
- n++;
- }
- return n;
-}
-
-// Generates all possible postfix command sequences.
-// Each sequence is handed off to RunPostfix to generate a regular expression.
-// The arguments are:
-// post: the current postfix sequence
-// nstk: the number of elements that would be on the stack after executing
-// the sequence
-// ops: the number of operators used in the sequence
-// atoms: the number of atoms used in the sequence
-// For example, if post were ["a", "b", "%s%s", "c"],
-// then nstk = 2, ops = 1, atoms = 3.
-//
-// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
-//
-void RegexpGenerator::GeneratePostfix(std::vector<std::string>* post,
- int nstk, int ops, int atoms) {
- if (nstk == 1)
- RunPostfix(*post);
-
- // Early out: if used too many operators or can't
- // get back down to a single expression on the stack
- // using binary operators, give up.
- if (ops + nstk - 1 > maxops_)
- return;
-
- // Add atoms if there is room.
- if (atoms < maxatoms_) {
- for (size_t i = 0; i < atoms_.size(); i++) {
- post->push_back(atoms_[i]);
- GeneratePostfix(post, nstk + 1, ops, atoms + 1);
- post->pop_back();
- }
- }
-
- // Add operators if there are enough arguments.
- if (ops < maxops_) {
- for (size_t i = 0; i < ops_.size(); i++) {
- const std::string& fmt = ops_[i];
- int nargs = CountArgs(fmt);
- if (nargs <= nstk) {
- post->push_back(fmt);
- GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
- post->pop_back();
- }
- }
- }
-}
-
-// Generates a random postfix command sequence.
-// Stops and returns true once a single sequence has been generated.
-bool RegexpGenerator::GenerateRandomPostfix(std::vector<std::string>* post,
- int nstk, int ops, int atoms) {
- std::uniform_int_distribution<int> random_stop(0, maxatoms_ - atoms);
- std::uniform_int_distribution<int> random_bit(0, 1);
- std::uniform_int_distribution<int> random_ops_index(
- 0, static_cast<int>(ops_.size()) - 1);
- std::uniform_int_distribution<int> random_atoms_index(
- 0, static_cast<int>(atoms_.size()) - 1);
-
- for (;;) {
- // Stop if we get to a single element, but only sometimes.
- if (nstk == 1 && random_stop(rng_) == 0) {
- RunPostfix(*post);
- return true;
- }
-
- // Early out: if used too many operators or can't
- // get back down to a single expression on the stack
- // using binary operators, give up.
- if (ops + nstk - 1 > maxops_)
- return false;
-
- // Add operators if there are enough arguments.
- if (ops < maxops_ && random_bit(rng_) == 0) {
- const std::string& fmt = ops_[random_ops_index(rng_)];
- int nargs = CountArgs(fmt);
- if (nargs <= nstk) {
- post->push_back(fmt);
- bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
- ops + 1, atoms);
- post->pop_back();
- if (ret)
- return true;
- }
- }
-
- // Add atoms if there is room.
- if (atoms < maxatoms_ && random_bit(rng_) == 0) {
- post->push_back(atoms_[random_atoms_index(rng_)]);
- bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
- post->pop_back();
- if (ret)
- return true;
- }
- }
-}
-
-// Interprets the postfix command sequence to create a regular expression
-// passed to HandleRegexp. The results of operators like %s|%s are wrapped
-// in (?: ) to avoid needing to maintain a precedence table.
-void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
- std::stack<std::string> regexps;
- for (size_t i = 0; i < post.size(); i++) {
- switch (CountArgs(post[i])) {
- default:
- LOG(FATAL) << "Bad operator: " << post[i];
- case 0:
- regexps.push(post[i]);
- break;
- case 1: {
- std::string a = regexps.top();
- regexps.pop();
- regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
- break;
- }
- case 2: {
- std::string b = regexps.top();
- regexps.pop();
- std::string a = regexps.top();
- regexps.pop();
- regexps.push("(?:" +
- StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
- ")");
- break;
- }
- }
- }
-
- if (regexps.size() != 1) {
- // Internal error - should never happen.
- printf("Bad regexp program:\n");
- for (size_t i = 0; i < post.size(); i++) {
- printf(" %s\n", CEscape(post[i]).c_str());
- }
- printf("Stack after running program:\n");
- while (!regexps.empty()) {
- printf(" %s\n", CEscape(regexps.top()).c_str());
- regexps.pop();
- }
- LOG(FATAL) << "Bad regexp program.";
- }
-
- HandleRegexp(regexps.top());
- HandleRegexp("^(?:" + regexps.top() + ")$");
- HandleRegexp("^(?:" + regexps.top() + ")");
- HandleRegexp("(?:" + regexps.top() + ")$");
-}
-
-// Split s into an vector of strings, one for each UTF-8 character.
-std::vector<std::string> Explode(const StringPiece& s) {
- std::vector<std::string> v;
-
- for (const char *q = s.data(); q < s.data() + s.size(); ) {
- const char* p = q;
- Rune r;
- q += chartorune(&r, q);
- v.push_back(std::string(p, q - p));
- }
-
- return v;
-}
-
-// Split string everywhere a substring is found, returning
-// vector of pieces.
-std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
- std::vector<std::string> v;
-
- if (sep.empty())
- return Explode(s);
-
- const char *p = s.data();
- for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
- if (StringPiece(q, sep.size()) == sep) {
- v.push_back(std::string(p, q - p));
- p = q + sep.size();
- q = p - 1; // -1 for ++ in loop
- continue;
- }
- }
- if (p < s.data() + s.size())
- v.push_back(std::string(p, s.data() + s.size() - p));
- return v;
-}
-
-} // namespace re2
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression generator: generates all possible
+// regular expressions within parameters (see regexp_generator.h for details).
+
+// The regexp generator first generates a sequence of commands in a simple
+// postfix language. Each command in the language is a string,
+// like "a" or "%s*" or "%s|%s".
+//
+// To evaluate a command, enough arguments are popped from the value stack to
+// plug into the %s slots. Then the result is pushed onto the stack.
+// For example, the command sequence
+// a b %s%s c
+// results in the stack
+// ab c
+//
+// GeneratePostfix generates all possible command sequences.
+// Then RunPostfix turns each sequence into a regular expression
+// and passes the regexp to HandleRegexp.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory>
+#include <stack>
+#include <string>
+#include <vector>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/testing/regexp_generator.h"
+
+namespace re2 {
+
+// Returns a vector of the egrep regexp operators.
+const std::vector<std::string>& RegexpGenerator::EgrepOps() {
+ static const char *ops[] = {
+ "%s%s",
+ "%s|%s",
+ "%s*",
+ "%s+",
+ "%s?",
+ "%s\\C*",
+ };
+ static std::vector<std::string> v(ops, ops + arraysize(ops));
+ return v;
+}
+
+RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
+ const std::vector<std::string>& atoms,
+ const std::vector<std::string>& ops)
+ : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
+ // Degenerate case.
+ if (atoms_.empty())
+ maxatoms_ = 0;
+ if (ops_.empty())
+ maxops_ = 0;
+}
+
+// Generates all possible regular expressions (within the parameters),
+// calling HandleRegexp for each one.
+void RegexpGenerator::Generate() {
+ std::vector<std::string> postfix;
+ GeneratePostfix(&postfix, 0, 0, 0);
+}
+
+// Generates random regular expressions, calling HandleRegexp for each one.
+void RegexpGenerator::GenerateRandom(int32_t seed, int n) {
+ rng_.seed(seed);
+
+ for (int i = 0; i < n; i++) {
+ std::vector<std::string> postfix;
+ GenerateRandomPostfix(&postfix, 0, 0, 0);
+ }
+}
+
+// Counts and returns the number of occurrences of "%s" in s.
+static int CountArgs(const std::string& s) {
+ const char *p = s.c_str();
+ int n = 0;
+ while ((p = strstr(p, "%s")) != NULL) {
+ p += 2;
+ n++;
+ }
+ return n;
+}
+
+// Generates all possible postfix command sequences.
+// Each sequence is handed off to RunPostfix to generate a regular expression.
+// The arguments are:
+// post: the current postfix sequence
+// nstk: the number of elements that would be on the stack after executing
+// the sequence
+// ops: the number of operators used in the sequence
+// atoms: the number of atoms used in the sequence
+// For example, if post were ["a", "b", "%s%s", "c"],
+// then nstk = 2, ops = 1, atoms = 3.
+//
+// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
+//
+void RegexpGenerator::GeneratePostfix(std::vector<std::string>* post,
+ int nstk, int ops, int atoms) {
+ if (nstk == 1)
+ RunPostfix(*post);
+
+ // Early out: if used too many operators or can't
+ // get back down to a single expression on the stack
+ // using binary operators, give up.
+ if (ops + nstk - 1 > maxops_)
+ return;
+
+ // Add atoms if there is room.
+ if (atoms < maxatoms_) {
+ for (size_t i = 0; i < atoms_.size(); i++) {
+ post->push_back(atoms_[i]);
+ GeneratePostfix(post, nstk + 1, ops, atoms + 1);
+ post->pop_back();
+ }
+ }
+
+ // Add operators if there are enough arguments.
+ if (ops < maxops_) {
+ for (size_t i = 0; i < ops_.size(); i++) {
+ const std::string& fmt = ops_[i];
+ int nargs = CountArgs(fmt);
+ if (nargs <= nstk) {
+ post->push_back(fmt);
+ GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
+ post->pop_back();
+ }
+ }
+ }
+}
+
+// Generates a random postfix command sequence.
+// Stops and returns true once a single sequence has been generated.
+bool RegexpGenerator::GenerateRandomPostfix(std::vector<std::string>* post,
+ int nstk, int ops, int atoms) {
+ std::uniform_int_distribution<int> random_stop(0, maxatoms_ - atoms);
+ std::uniform_int_distribution<int> random_bit(0, 1);
+ std::uniform_int_distribution<int> random_ops_index(
+ 0, static_cast<int>(ops_.size()) - 1);
+ std::uniform_int_distribution<int> random_atoms_index(
+ 0, static_cast<int>(atoms_.size()) - 1);
+
+ for (;;) {
+ // Stop if we get to a single element, but only sometimes.
+ if (nstk == 1 && random_stop(rng_) == 0) {
+ RunPostfix(*post);
+ return true;
+ }
+
+ // Early out: if used too many operators or can't
+ // get back down to a single expression on the stack
+ // using binary operators, give up.
+ if (ops + nstk - 1 > maxops_)
+ return false;
+
+ // Add operators if there are enough arguments.
+ if (ops < maxops_ && random_bit(rng_) == 0) {
+ const std::string& fmt = ops_[random_ops_index(rng_)];
+ int nargs = CountArgs(fmt);
+ if (nargs <= nstk) {
+ post->push_back(fmt);
+ bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
+ ops + 1, atoms);
+ post->pop_back();
+ if (ret)
+ return true;
+ }
+ }
+
+ // Add atoms if there is room.
+ if (atoms < maxatoms_ && random_bit(rng_) == 0) {
+ post->push_back(atoms_[random_atoms_index(rng_)]);
+ bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
+ post->pop_back();
+ if (ret)
+ return true;
+ }
+ }
+}
+
+// Interprets the postfix command sequence to create a regular expression
+// passed to HandleRegexp. The results of operators like %s|%s are wrapped
+// in (?: ) to avoid needing to maintain a precedence table.
+void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
+ std::stack<std::string> regexps;
+ for (size_t i = 0; i < post.size(); i++) {
+ switch (CountArgs(post[i])) {
+ default:
+ LOG(FATAL) << "Bad operator: " << post[i];
+ case 0:
+ regexps.push(post[i]);
+ break;
+ case 1: {
+ std::string a = regexps.top();
+ regexps.pop();
+ regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
+ break;
+ }
+ case 2: {
+ std::string b = regexps.top();
+ regexps.pop();
+ std::string a = regexps.top();
+ regexps.pop();
+ regexps.push("(?:" +
+ StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
+ ")");
+ break;
+ }
+ }
+ }
+
+ if (regexps.size() != 1) {
+ // Internal error - should never happen.
+ printf("Bad regexp program:\n");
+ for (size_t i = 0; i < post.size(); i++) {
+ printf(" %s\n", CEscape(post[i]).c_str());
+ }
+ printf("Stack after running program:\n");
+ while (!regexps.empty()) {
+ printf(" %s\n", CEscape(regexps.top()).c_str());
+ regexps.pop();
+ }
+ LOG(FATAL) << "Bad regexp program.";
+ }
+
+ HandleRegexp(regexps.top());
+ HandleRegexp("^(?:" + regexps.top() + ")$");
+ HandleRegexp("^(?:" + regexps.top() + ")");
+ HandleRegexp("(?:" + regexps.top() + ")$");
+}
+
+// Split s into an vector of strings, one for each UTF-8 character.
+std::vector<std::string> Explode(const StringPiece& s) {
+ std::vector<std::string> v;
+
+ for (const char *q = s.data(); q < s.data() + s.size(); ) {
+ const char* p = q;
+ Rune r;
+ q += chartorune(&r, q);
+ v.push_back(std::string(p, q - p));
+ }
+
+ return v;
+}
+
+// Split string everywhere a substring is found, returning
+// vector of pieces.
+std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
+ std::vector<std::string> v;
+
+ if (sep.empty())
+ return Explode(s);
+
+ const char *p = s.data();
+ for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
+ if (StringPiece(q, sep.size()) == sep) {
+ v.push_back(std::string(p, q - p));
+ p = q + sep.size();
+ q = p - 1; // -1 for ++ in loop
+ continue;
+ }
+ }
+ if (p < s.data() + s.size())
+ v.push_back(std::string(p, s.data() + s.size() - p));
+ return v;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/regexp_generator.h b/contrib/libs/re2/re2/testing/regexp_generator.h
index b07a4ddb1d..7d72aff889 100644
--- a/contrib/libs/re2/re2/testing/regexp_generator.h
+++ b/contrib/libs/re2/re2/testing/regexp_generator.h
@@ -1,77 +1,77 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_TESTING_REGEXP_GENERATOR_H_
-#define RE2_TESTING_REGEXP_GENERATOR_H_
-
-// Regular expression generator: generates all possible
-// regular expressions within given parameters (see below for details).
-
-#include <stdint.h>
-#include <random>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "re2/stringpiece.h"
-
-namespace re2 {
-
-// Regular expression generator.
-//
-// Given a set of atom expressions like "a", "b", or "."
-// and operators like "%s*", generates all possible regular expressions
-// using at most maxbases base expressions and maxops operators.
-// For each such expression re, calls HandleRegexp(re).
-//
-// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
-//
-class RegexpGenerator {
- public:
- RegexpGenerator(int maxatoms, int maxops,
- const std::vector<std::string>& atoms,
- const std::vector<std::string>& ops);
- virtual ~RegexpGenerator() {}
-
- // Generates all the regular expressions, calling HandleRegexp(re) for each.
- void Generate();
-
- // Generates n random regular expressions, calling HandleRegexp(re) for each.
- void GenerateRandom(int32_t seed, int n);
-
- // Handles a regular expression. Must be provided by subclass.
- virtual void HandleRegexp(const std::string& regexp) = 0;
-
- // The egrep regexp operators: * + ? | and concatenation.
- static const std::vector<std::string>& EgrepOps();
-
- private:
- void RunPostfix(const std::vector<std::string>& post);
- void GeneratePostfix(std::vector<std::string>* post,
- int nstk, int ops, int lits);
- bool GenerateRandomPostfix(std::vector<std::string>* post,
- int nstk, int ops, int lits);
-
- int maxatoms_; // Maximum number of atoms allowed in expr.
- int maxops_; // Maximum number of ops allowed in expr.
- std::vector<std::string> atoms_; // Possible atoms.
- std::vector<std::string> ops_; // Possible ops.
- std::minstd_rand0 rng_; // Random number generator.
-
- RegexpGenerator(const RegexpGenerator&) = delete;
- RegexpGenerator& operator=(const RegexpGenerator&) = delete;
-};
-
-// Helpers for preparing arguments to RegexpGenerator constructor.
-
-// Returns one string for each character in s.
-std::vector<std::string> Explode(const StringPiece& s);
-
-// Splits string everywhere sep is found, returning
-// vector of pieces.
-std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s);
-
-} // namespace re2
-
-#endif // RE2_TESTING_REGEXP_GENERATOR_H_
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_TESTING_REGEXP_GENERATOR_H_
+#define RE2_TESTING_REGEXP_GENERATOR_H_
+
+// Regular expression generator: generates all possible
+// regular expressions within given parameters (see below for details).
+
+#include <stdint.h>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+namespace re2 {
+
+// Regular expression generator.
+//
+// Given a set of atom expressions like "a", "b", or "."
+// and operators like "%s*", generates all possible regular expressions
+// using at most maxbases base expressions and maxops operators.
+// For each such expression re, calls HandleRegexp(re).
+//
+// Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
+//
+class RegexpGenerator {
+ public:
+ RegexpGenerator(int maxatoms, int maxops,
+ const std::vector<std::string>& atoms,
+ const std::vector<std::string>& ops);
+ virtual ~RegexpGenerator() {}
+
+ // Generates all the regular expressions, calling HandleRegexp(re) for each.
+ void Generate();
+
+ // Generates n random regular expressions, calling HandleRegexp(re) for each.
+ void GenerateRandom(int32_t seed, int n);
+
+ // Handles a regular expression. Must be provided by subclass.
+ virtual void HandleRegexp(const std::string& regexp) = 0;
+
+ // The egrep regexp operators: * + ? | and concatenation.
+ static const std::vector<std::string>& EgrepOps();
+
+ private:
+ void RunPostfix(const std::vector<std::string>& post);
+ void GeneratePostfix(std::vector<std::string>* post,
+ int nstk, int ops, int lits);
+ bool GenerateRandomPostfix(std::vector<std::string>* post,
+ int nstk, int ops, int lits);
+
+ int maxatoms_; // Maximum number of atoms allowed in expr.
+ int maxops_; // Maximum number of ops allowed in expr.
+ std::vector<std::string> atoms_; // Possible atoms.
+ std::vector<std::string> ops_; // Possible ops.
+ std::minstd_rand0 rng_; // Random number generator.
+
+ RegexpGenerator(const RegexpGenerator&) = delete;
+ RegexpGenerator& operator=(const RegexpGenerator&) = delete;
+};
+
+// Helpers for preparing arguments to RegexpGenerator constructor.
+
+// Returns one string for each character in s.
+std::vector<std::string> Explode(const StringPiece& s);
+
+// Splits string everywhere sep is found, returning
+// vector of pieces.
+std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s);
+
+} // namespace re2
+
+#endif // RE2_TESTING_REGEXP_GENERATOR_H_
diff --git a/contrib/libs/re2/re2/testing/regexp_test.cc b/contrib/libs/re2/re2/testing/regexp_test.cc
index 78f9c74093..11fdfed24b 100644
--- a/contrib/libs/re2/re2/testing/regexp_test.cc
+++ b/contrib/libs/re2/re2/testing/regexp_test.cc
@@ -1,86 +1,86 @@
-// Copyright 2006 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test parse.cc, dump.cc, and tostring.cc.
-
-#include <stddef.h>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-// Test that overflowed ref counts work.
-TEST(Regexp, BigRef) {
- Regexp* re;
- re = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
- for (int i = 0; i < 100000; i++)
- re->Incref();
- for (int i = 0; i < 100000; i++)
- re->Decref();
- ASSERT_EQ(re->Ref(), 1);
- re->Decref();
-}
-
-// Test that very large Concats work.
-// Depends on overflowed ref counts working.
-TEST(Regexp, BigConcat) {
- Regexp* x;
- x = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
- std::vector<Regexp*> v(90000, x); // ToString bails out at 100000
- for (size_t i = 0; i < v.size(); i++)
- x->Incref();
- ASSERT_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref();
- Regexp* re = Regexp::Concat(v.data(), static_cast<int>(v.size()),
- Regexp::NoParseFlags);
- ASSERT_EQ(re->ToString(), std::string(v.size(), 'x'));
- re->Decref();
- ASSERT_EQ(x->Ref(), 1) << x->Ref();
- x->Decref();
-}
-
-TEST(Regexp, NamedCaptures) {
- Regexp* x;
- RegexpStatus status;
- x = Regexp::Parse(
- "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
- EXPECT_TRUE(status.ok());
- EXPECT_EQ(4, x->NumCaptures());
- const std::map<std::string, int>* have = x->NamedCaptures();
- EXPECT_TRUE(have != NULL);
- EXPECT_EQ(2, have->size()); // there are only two named groups in
- // the regexp: 'g1' and 'g2'.
- std::map<std::string, int> want;
- want["g1"] = 1;
- want["g2"] = 3;
- EXPECT_EQ(want, *have);
- x->Decref();
- delete have;
-}
-
-TEST(Regexp, CaptureNames) {
- Regexp* x;
- RegexpStatus status;
- x = Regexp::Parse(
- "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
- EXPECT_TRUE(status.ok());
- EXPECT_EQ(4, x->NumCaptures());
- const std::map<int, std::string>* have = x->CaptureNames();
- EXPECT_TRUE(have != NULL);
- EXPECT_EQ(3, have->size());
- std::map<int, std::string> want;
- want[1] = "g1";
- want[3] = "g2";
- want[4] = "g1";
-
- EXPECT_EQ(want, *have);
- x->Decref();
- delete have;
-}
-
-} // namespace re2
+// Copyright 2006 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test parse.cc, dump.cc, and tostring.cc.
+
+#include <stddef.h>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+// Test that overflowed ref counts work.
+TEST(Regexp, BigRef) {
+ Regexp* re;
+ re = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
+ for (int i = 0; i < 100000; i++)
+ re->Incref();
+ for (int i = 0; i < 100000; i++)
+ re->Decref();
+ ASSERT_EQ(re->Ref(), 1);
+ re->Decref();
+}
+
+// Test that very large Concats work.
+// Depends on overflowed ref counts working.
+TEST(Regexp, BigConcat) {
+ Regexp* x;
+ x = Regexp::Parse("x", Regexp::NoParseFlags, NULL);
+ std::vector<Regexp*> v(90000, x); // ToString bails out at 100000
+ for (size_t i = 0; i < v.size(); i++)
+ x->Incref();
+ ASSERT_EQ(x->Ref(), 1 + static_cast<int>(v.size())) << x->Ref();
+ Regexp* re = Regexp::Concat(v.data(), static_cast<int>(v.size()),
+ Regexp::NoParseFlags);
+ ASSERT_EQ(re->ToString(), std::string(v.size(), 'x'));
+ re->Decref();
+ ASSERT_EQ(x->Ref(), 1) << x->Ref();
+ x->Decref();
+}
+
+TEST(Regexp, NamedCaptures) {
+ Regexp* x;
+ RegexpStatus status;
+ x = Regexp::Parse(
+ "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
+ EXPECT_TRUE(status.ok());
+ EXPECT_EQ(4, x->NumCaptures());
+ const std::map<std::string, int>* have = x->NamedCaptures();
+ EXPECT_TRUE(have != NULL);
+ EXPECT_EQ(2, have->size()); // there are only two named groups in
+ // the regexp: 'g1' and 'g2'.
+ std::map<std::string, int> want;
+ want["g1"] = 1;
+ want["g2"] = 3;
+ EXPECT_EQ(want, *have);
+ x->Decref();
+ delete have;
+}
+
+TEST(Regexp, CaptureNames) {
+ Regexp* x;
+ RegexpStatus status;
+ x = Regexp::Parse(
+ "(?P<g1>a+)|(e)(?P<g2>w*)+(?P<g1>b+)", Regexp::PerlX, &status);
+ EXPECT_TRUE(status.ok());
+ EXPECT_EQ(4, x->NumCaptures());
+ const std::map<int, std::string>* have = x->CaptureNames();
+ EXPECT_TRUE(have != NULL);
+ EXPECT_EQ(3, have->size());
+ std::map<int, std::string> want;
+ want[1] = "g1";
+ want[3] = "g2";
+ want[4] = "g1";
+
+ EXPECT_EQ(want, *have);
+ x->Decref();
+ delete have;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/required_prefix_test.cc b/contrib/libs/re2/re2/testing/required_prefix_test.cc
index 4f82f4c5a0..7fc0f0d973 100644
--- a/contrib/libs/re2/re2/testing/required_prefix_test.cc
+++ b/contrib/libs/re2/re2/testing/required_prefix_test.cc
@@ -1,199 +1,199 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/prog.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-struct PrefixTest {
- const char* regexp;
- bool return_value;
- const char* prefix;
- bool foldcase;
- const char* suffix;
-};
-
-static PrefixTest tests[] = {
- // Empty cases.
- { "", false },
- { "(?m)^", false },
- { "(?-m)^", false },
-
- // If the regexp has no ^, there's no required prefix.
- { "abc", false },
-
- // If the regexp immediately goes into
- // something not a literal match, there's no required prefix.
- { "^a*", false },
- { "^(abc)", false },
-
- // Otherwise, it should work.
- { "^abc$", true, "abc", false, "(?-m:$)" },
- { "^abc", true, "abc", false, "" },
- { "^(?i)abc", true, "abc", true, "" },
- { "^abcd*", true, "abc", false, "d*" },
- { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
- { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
- { "^☺abc", true, "☺abc", false, "" },
-};
-
-TEST(RequiredPrefix, SimpleTests) {
- for (size_t i = 0; i < arraysize(tests); i++) {
- const PrefixTest& t = tests[i];
- for (size_t j = 0; j < 2; j++) {
- Regexp::ParseFlags flags = Regexp::LikePerl;
- if (j == 0)
- flags = flags | Regexp::Latin1;
- Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
- ASSERT_TRUE(re != NULL) << " " << t.regexp;
-
- std::string p;
- bool f;
- Regexp* s;
- ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
- << " " << re->Dump();
- if (t.return_value) {
- ASSERT_EQ(p, std::string(t.prefix))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
- ASSERT_EQ(f, t.foldcase)
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
- ASSERT_EQ(s->ToString(), std::string(t.suffix))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
- s->Decref();
- }
- re->Decref();
- }
- }
-}
-
-static PrefixTest for_accel_tests[] = {
- // Empty cases.
- { "", false },
- { "(?m)^", false },
- { "(?-m)^", false },
-
- // If the regexp has a ^, there's no required prefix.
- { "^abc", false },
-
- // If the regexp immediately goes into
- // something not a literal match, there's no required prefix.
- { "a*", false },
-
- // Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
- // capturing groups, but doesn't try to glue prefix fragments together.
- { "(a?)def", false },
- { "(ab?)def", true, "a", false },
- { "(abc?)def", true, "ab", false },
- { "(()a)def", false },
- { "((a)b)def", true, "a", false },
- { "((ab)c)def", true, "ab", false },
-
- // Otherwise, it should work.
- { "abc$", true, "abc", false },
- { "abc", true, "abc", false },
- { "(?i)abc", true, "abc", true },
- { "abcd*", true, "abc", false },
- { "[Aa][Bb]cd*", true, "ab", true },
- { "ab[Cc]d*", true, "ab", false },
- { "☺abc", true, "☺abc", false },
-};
-
-TEST(RequiredPrefixForAccel, SimpleTests) {
- for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
- const PrefixTest& t = for_accel_tests[i];
- for (size_t j = 0; j < 2; j++) {
- Regexp::ParseFlags flags = Regexp::LikePerl;
- if (j == 0)
- flags = flags | Regexp::Latin1;
- Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
- ASSERT_TRUE(re != NULL) << " " << t.regexp;
-
- std::string p;
- bool f;
- ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
- << " " << re->Dump();
- if (t.return_value) {
- ASSERT_EQ(p, std::string(t.prefix))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
- ASSERT_EQ(f, t.foldcase)
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
- }
- re->Decref();
- }
- }
-}
-
-TEST(RequiredPrefixForAccel, CaseFoldingForKAndS) {
- Regexp* re;
- std::string p;
- bool f;
-
- // With Latin-1 encoding, `(?i)` prefixes can include 'k' and 's'.
- re = Regexp::Parse("(?i)KLM", Regexp::LikePerl|Regexp::Latin1, NULL);
- ASSERT_TRUE(re != NULL);
- ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
- ASSERT_EQ(p, "klm");
- ASSERT_EQ(f, true);
- re->Decref();
-
- re = Regexp::Parse("(?i)STU", Regexp::LikePerl|Regexp::Latin1, NULL);
- ASSERT_TRUE(re != NULL);
- ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
- ASSERT_EQ(p, "stu");
- ASSERT_EQ(f, true);
- re->Decref();
-
- // With UTF-8 encoding, `(?i)` prefixes can't include 'k' and 's'.
- // This is because they match U+212A and U+017F, respectively, and
- // so the parser ends up emitting character classes, not literals.
- re = Regexp::Parse("(?i)KLM", Regexp::LikePerl, NULL);
- ASSERT_TRUE(re != NULL);
- ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
- re->Decref();
-
- re = Regexp::Parse("(?i)STU", Regexp::LikePerl, NULL);
- ASSERT_TRUE(re != NULL);
- ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
- re->Decref();
-}
-
-static const char* prefix_accel_tests[] = {
- "aababc\\d+",
- "(?i)AABABC\\d+",
-};
-
-TEST(PrefixAccel, SimpleTests) {
- for (size_t i = 0; i < arraysize(prefix_accel_tests); i++) {
- const char* pattern = prefix_accel_tests[i];
- Regexp* re = Regexp::Parse(pattern, Regexp::LikePerl, NULL);
- ASSERT_TRUE(re != NULL);
- Prog* prog = re->CompileToProg(0);
- ASSERT_TRUE(prog != NULL);
- ASSERT_TRUE(prog->can_prefix_accel());
- for (int j = 0; j < 100; j++) {
- std::string text(j, 'a');
- const char* p = reinterpret_cast<const char*>(
- prog->PrefixAccel(text.data(), text.size()));
- EXPECT_TRUE(p == NULL);
- text.append("aababc");
- for (int k = 0; k < 100; k++) {
- text.append(k, 'a');
- p = reinterpret_cast<const char*>(
- prog->PrefixAccel(text.data(), text.size()));
- EXPECT_EQ(j, p - text.data());
- }
- }
- delete prog;
- re->Decref();
- }
-}
-
-} // namespace re2
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct PrefixTest {
+ const char* regexp;
+ bool return_value;
+ const char* prefix;
+ bool foldcase;
+ const char* suffix;
+};
+
+static PrefixTest tests[] = {
+ // Empty cases.
+ { "", false },
+ { "(?m)^", false },
+ { "(?-m)^", false },
+
+ // If the regexp has no ^, there's no required prefix.
+ { "abc", false },
+
+ // If the regexp immediately goes into
+ // something not a literal match, there's no required prefix.
+ { "^a*", false },
+ { "^(abc)", false },
+
+ // Otherwise, it should work.
+ { "^abc$", true, "abc", false, "(?-m:$)" },
+ { "^abc", true, "abc", false, "" },
+ { "^(?i)abc", true, "abc", true, "" },
+ { "^abcd*", true, "abc", false, "d*" },
+ { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
+ { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
+ { "^☺abc", true, "☺abc", false, "" },
+};
+
+TEST(RequiredPrefix, SimpleTests) {
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ const PrefixTest& t = tests[i];
+ for (size_t j = 0; j < 2; j++) {
+ Regexp::ParseFlags flags = Regexp::LikePerl;
+ if (j == 0)
+ flags = flags | Regexp::Latin1;
+ Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+ ASSERT_TRUE(re != NULL) << " " << t.regexp;
+
+ std::string p;
+ bool f;
+ Regexp* s;
+ ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
+ << " " << re->Dump();
+ if (t.return_value) {
+ ASSERT_EQ(p, std::string(t.prefix))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ ASSERT_EQ(f, t.foldcase)
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ ASSERT_EQ(s->ToString(), std::string(t.suffix))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ s->Decref();
+ }
+ re->Decref();
+ }
+ }
+}
+
+static PrefixTest for_accel_tests[] = {
+ // Empty cases.
+ { "", false },
+ { "(?m)^", false },
+ { "(?-m)^", false },
+
+ // If the regexp has a ^, there's no required prefix.
+ { "^abc", false },
+
+ // If the regexp immediately goes into
+ // something not a literal match, there's no required prefix.
+ { "a*", false },
+
+ // Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
+ // capturing groups, but doesn't try to glue prefix fragments together.
+ { "(a?)def", false },
+ { "(ab?)def", true, "a", false },
+ { "(abc?)def", true, "ab", false },
+ { "(()a)def", false },
+ { "((a)b)def", true, "a", false },
+ { "((ab)c)def", true, "ab", false },
+
+ // Otherwise, it should work.
+ { "abc$", true, "abc", false },
+ { "abc", true, "abc", false },
+ { "(?i)abc", true, "abc", true },
+ { "abcd*", true, "abc", false },
+ { "[Aa][Bb]cd*", true, "ab", true },
+ { "ab[Cc]d*", true, "ab", false },
+ { "☺abc", true, "☺abc", false },
+};
+
+TEST(RequiredPrefixForAccel, SimpleTests) {
+ for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
+ const PrefixTest& t = for_accel_tests[i];
+ for (size_t j = 0; j < 2; j++) {
+ Regexp::ParseFlags flags = Regexp::LikePerl;
+ if (j == 0)
+ flags = flags | Regexp::Latin1;
+ Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+ ASSERT_TRUE(re != NULL) << " " << t.regexp;
+
+ std::string p;
+ bool f;
+ ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
+ << " " << re->Dump();
+ if (t.return_value) {
+ ASSERT_EQ(p, std::string(t.prefix))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ ASSERT_EQ(f, t.foldcase)
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ }
+ re->Decref();
+ }
+ }
+}
+
+TEST(RequiredPrefixForAccel, CaseFoldingForKAndS) {
+ Regexp* re;
+ std::string p;
+ bool f;
+
+ // With Latin-1 encoding, `(?i)` prefixes can include 'k' and 's'.
+ re = Regexp::Parse("(?i)KLM", Regexp::LikePerl|Regexp::Latin1, NULL);
+ ASSERT_TRUE(re != NULL);
+ ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
+ ASSERT_EQ(p, "klm");
+ ASSERT_EQ(f, true);
+ re->Decref();
+
+ re = Regexp::Parse("(?i)STU", Regexp::LikePerl|Regexp::Latin1, NULL);
+ ASSERT_TRUE(re != NULL);
+ ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
+ ASSERT_EQ(p, "stu");
+ ASSERT_EQ(f, true);
+ re->Decref();
+
+ // With UTF-8 encoding, `(?i)` prefixes can't include 'k' and 's'.
+ // This is because they match U+212A and U+017F, respectively, and
+ // so the parser ends up emitting character classes, not literals.
+ re = Regexp::Parse("(?i)KLM", Regexp::LikePerl, NULL);
+ ASSERT_TRUE(re != NULL);
+ ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
+ re->Decref();
+
+ re = Regexp::Parse("(?i)STU", Regexp::LikePerl, NULL);
+ ASSERT_TRUE(re != NULL);
+ ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
+ re->Decref();
+}
+
+static const char* prefix_accel_tests[] = {
+ "aababc\\d+",
+ "(?i)AABABC\\d+",
+};
+
+TEST(PrefixAccel, SimpleTests) {
+ for (size_t i = 0; i < arraysize(prefix_accel_tests); i++) {
+ const char* pattern = prefix_accel_tests[i];
+ Regexp* re = Regexp::Parse(pattern, Regexp::LikePerl, NULL);
+ ASSERT_TRUE(re != NULL);
+ Prog* prog = re->CompileToProg(0);
+ ASSERT_TRUE(prog != NULL);
+ ASSERT_TRUE(prog->can_prefix_accel());
+ for (int j = 0; j < 100; j++) {
+ std::string text(j, 'a');
+ const char* p = reinterpret_cast<const char*>(
+ prog->PrefixAccel(text.data(), text.size()));
+ EXPECT_TRUE(p == NULL);
+ text.append("aababc");
+ for (int k = 0; k < 100; k++) {
+ text.append(k, 'a');
+ p = reinterpret_cast<const char*>(
+ prog->PrefixAccel(text.data(), text.size()));
+ EXPECT_EQ(j, p - text.data());
+ }
+ }
+ delete prog;
+ re->Decref();
+ }
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/search_test.cc b/contrib/libs/re2/re2/testing/search_test.cc
index f80aadd249..2539295618 100644
--- a/contrib/libs/re2/re2/testing/search_test.cc
+++ b/contrib/libs/re2/re2/testing/search_test.cc
@@ -1,334 +1,334 @@
-// Copyright 2006-2007 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "re2/prog.h"
-#include "re2/regexp.h"
-#include "re2/testing/tester.h"
-#include "re2/testing/exhaustive_tester.h"
-
-// For target `log' in the Makefile.
-#ifndef LOGGING
-#define LOGGING 0
-#endif
-
-namespace re2 {
-
-struct RegexpTest {
- const char* regexp;
- const char* text;
-};
-
-RegexpTest simple_tests[] = {
- { "a", "a" },
- { "a", "zyzzyva" },
- { "a+", "aa" },
- { "(a+|b)+", "ab" },
- { "ab|cd", "xabcdx" },
- { "h.*od?", "hello\ngoodbye\n" },
- { "h.*o", "hello\ngoodbye\n" },
- { "h.*o", "goodbye\nhello\n" },
- { "h.*o", "hello world" },
- { "h.*o", "othello, world" },
- { "[^\\s\\S]", "aaaaaaa" },
- { "a", "aaaaaaa" },
- { "a*", "aaaaaaa" },
- { "a*", "" },
- { "ab|cd", "xabcdx" },
- { "a", "cab" },
- { "a*b", "cab" },
- { "((((((((((((((((((((x))))))))))))))))))))", "x" },
- { "[abcd]", "xxxabcdxxx" },
- { "[^x]", "xxxabcdxxx" },
- { "[abcd]+", "xxxabcdxxx" },
- { "[^x]+", "xxxabcdxxx" },
- { "(fo|foo)", "fo" },
- { "(foo|fo)", "foo" },
-
- { "aa", "aA" },
- { "a", "Aa" },
- { "a", "A" },
- { "ABC", "abc" },
- { "abc", "XABCY" },
- { "ABC", "xabcy" },
-
- // Make sure ^ and $ work.
- // The pathological cases didn't work
- // in the original grep code.
- { "foo|bar|[A-Z]", "foo" },
- { "^(foo|bar|[A-Z])", "foo" },
- { "(foo|bar|[A-Z])$", "foo\n" },
- { "(foo|bar|[A-Z])$", "foo" },
- { "^(foo|bar|[A-Z])$", "foo\n" },
- { "^(foo|bar|[A-Z])$", "foo" },
- { "^(foo|bar|[A-Z])$", "bar" },
- { "^(foo|bar|[A-Z])$", "X" },
- { "^(foo|bar|[A-Z])$", "XY" },
- { "^(fo|foo)$", "fo" },
- { "^(fo|foo)$", "foo" },
- { "^^(fo|foo)$", "fo" },
- { "^^(fo|foo)$", "foo" },
- { "^$", "" },
- { "^$", "x" },
- { "^^$", "" },
- { "^$$", "" },
- { "^^$", "x" },
- { "^$$", "x" },
- { "^^$$", "" },
- { "^^$$", "x" },
- { "^^^^^^^^$$$$$$$$", "" },
- { "^", "x" },
- { "$", "x" },
-
- // Word boundaries.
- { "\\bfoo\\b", "nofoo foo that" },
- { "a\\b", "faoa x" },
- { "\\bbar", "bar x" },
- { "\\bbar", "foo\nbar x" },
- { "bar\\b", "foobar" },
- { "bar\\b", "foobar\nxxx" },
- { "(foo|bar|[A-Z])\\b", "foo" },
- { "(foo|bar|[A-Z])\\b", "foo\n" },
- { "\\b", "" },
- { "\\b", "x" },
- { "\\b(foo|bar|[A-Z])", "foo" },
- { "\\b(foo|bar|[A-Z])\\b", "X" },
- { "\\b(foo|bar|[A-Z])\\b", "XY" },
- { "\\b(foo|bar|[A-Z])\\b", "bar" },
- { "\\b(foo|bar|[A-Z])\\b", "foo" },
- { "\\b(foo|bar|[A-Z])\\b", "foo\n" },
- { "\\b(foo|bar|[A-Z])\\b", "ffoo bbar N x" },
- { "\\b(fo|foo)\\b", "fo" },
- { "\\b(fo|foo)\\b", "foo" },
- { "\\b\\b", "" },
- { "\\b\\b", "x" },
- { "\\b$", "" },
- { "\\b$", "x" },
- { "\\b$", "y x" },
- { "\\b.$", "x" },
- { "^\\b(fo|foo)\\b", "fo" },
- { "^\\b(fo|foo)\\b", "foo" },
- { "^\\b", "" },
- { "^\\b", "x" },
- { "^\\b\\b", "" },
- { "^\\b\\b", "x" },
- { "^\\b$", "" },
- { "^\\b$", "x" },
- { "^\\b.$", "x" },
- { "^\\b.\\b$", "x" },
- { "^^^^^^^^\\b$$$$$$$", "" },
- { "^^^^^^^^\\b.$$$$$$", "x" },
- { "^^^^^^^^\\b$$$$$$$", "x" },
-
- // Non-word boundaries.
- { "\\Bfoo\\B", "n foo xfoox that" },
- { "a\\B", "faoa x" },
- { "\\Bbar", "bar x" },
- { "\\Bbar", "foo\nbar x" },
- { "bar\\B", "foobar" },
- { "bar\\B", "foobar\nxxx" },
- { "(foo|bar|[A-Z])\\B", "foox" },
- { "(foo|bar|[A-Z])\\B", "foo\n" },
- { "\\B", "" },
- { "\\B", "x" },
- { "\\B(foo|bar|[A-Z])", "foo" },
- { "\\B(foo|bar|[A-Z])\\B", "xXy" },
- { "\\B(foo|bar|[A-Z])\\B", "XY" },
- { "\\B(foo|bar|[A-Z])\\B", "XYZ" },
- { "\\B(foo|bar|[A-Z])\\B", "abara" },
- { "\\B(foo|bar|[A-Z])\\B", "xfoo_" },
- { "\\B(foo|bar|[A-Z])\\B", "xfoo\n" },
- { "\\B(foo|bar|[A-Z])\\B", "foo bar vNx" },
- { "\\B(fo|foo)\\B", "xfoo" },
- { "\\B(foo|fo)\\B", "xfooo" },
- { "\\B\\B", "" },
- { "\\B\\B", "x" },
- { "\\B$", "" },
- { "\\B$", "x" },
- { "\\B$", "y x" },
- { "\\B.$", "x" },
- { "^\\B(fo|foo)\\B", "fo" },
- { "^\\B(fo|foo)\\B", "foo" },
- { "^\\B", "" },
- { "^\\B", "x" },
- { "^\\B\\B", "" },
- { "^\\B\\B", "x" },
- { "^\\B$", "" },
- { "^\\B$", "x" },
- { "^\\B.$", "x" },
- { "^\\B.\\B$", "x" },
- { "^^^^^^^^\\B$$$$$$$", "" },
- { "^^^^^^^^\\B.$$$$$$", "x" },
- { "^^^^^^^^\\B$$$$$$$", "x" },
-
- // PCRE uses only ASCII for \b computation.
- // All non-ASCII are *not* word characters.
- { "\\bx\\b", "x" },
- { "\\bx\\b", "x>" },
- { "\\bx\\b", "<x" },
- { "\\bx\\b", "<x>" },
- { "\\bx\\b", "ax" },
- { "\\bx\\b", "xb" },
- { "\\bx\\b", "axb" },
- { "\\bx\\b", "«x" },
- { "\\bx\\b", "x»" },
- { "\\bx\\b", "«x»" },
- { "\\bx\\b", "axb" },
- { "\\bx\\b", "áxβ" },
- { "\\Bx\\B", "axb" },
- { "\\Bx\\B", "áxβ" },
-
- // Weird boundary cases.
- { "^$^$", "" },
- { "^$^", "" },
- { "$^$", "" },
-
- { "^$^$", "x" },
- { "^$^", "x" },
- { "$^$", "x" },
-
- { "^$^$", "x\ny" },
- { "^$^", "x\ny" },
- { "$^$", "x\ny" },
-
- { "^$^$", "x\n\ny" },
- { "^$^", "x\n\ny" },
- { "$^$", "x\n\ny" },
-
- { "^(foo\\$)$", "foo$bar" },
- { "(foo\\$)", "foo$bar" },
- { "^...$", "abc" },
-
- // UTF-8
- { "^\xe6\x9c\xac$", "\xe6\x9c\xac" },
- { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
- { "^...$", ".\xe6\x9c\xac." },
-
- { "^\\C\\C\\C$", "\xe6\x9c\xac" },
- { "^\\C$", "\xe6\x9c\xac" },
- { "^\\C\\C\\C$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
-
- // Latin1
- { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
- { "^.........$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
- { "^...$", ".\xe6\x9c\xac." },
- { "^.....$", ".\xe6\x9c\xac." },
-
- // Perl v Posix
- { "\\B(fo|foo)\\B", "xfooo" },
- { "(fo|foo)", "foo" },
-
- // Octal escapes.
- { "\\141", "a" },
- { "\\060", "0" },
- { "\\0600", "00" },
- { "\\608", "08" },
- { "\\01", "\01" },
- { "\\018", "\01" "8" },
-
- // Hexadecimal escapes
- { "\\x{61}", "a" },
- { "\\x61", "a" },
- { "\\x{00000061}", "a" },
-
- // Unicode scripts.
- { "\\p{Greek}+", "aαβb" },
- { "\\P{Greek}+", "aαβb" },
- { "\\p{^Greek}+", "aαβb" },
- { "\\P{^Greek}+", "aαβb" },
-
- // Unicode properties. Nd is decimal number. N is any number.
- { "[^0-9]+", "abc123" },
- { "\\p{Nd}+", "abc123²³¼½¾₀₉" },
- { "\\p{^Nd}+", "abc123²³¼½¾₀₉" },
- { "\\P{Nd}+", "abc123²³¼½¾₀₉" },
- { "\\P{^Nd}+", "abc123²³¼½¾₀₉" },
- { "\\pN+", "abc123²³¼½¾₀₉" },
- { "\\p{N}+", "abc123²³¼½¾₀₉" },
- { "\\p{^N}+", "abc123²³¼½¾₀₉" },
-
- { "\\p{Any}+", "abc123" },
-
- // Character classes & case folding.
- { "(?i)[@-A]+", "@AaB" }, // matches @Aa but not B
- { "(?i)[A-Z]+", "aAzZ" },
- { "(?i)[^\\\\]+", "Aa\\" }, // \\ is between A-Z and a-z -
- // splits the ranges in an interesting way.
-
- // would like to use, but PCRE mishandles in full-match, non-greedy mode
- // { "(?i)[\\\\]+", "Aa" },
-
- { "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
-
- // Character classes & case folding.
- { "[@-A]+", "@AaB" },
- { "[A-Z]+", "aAzZ" },
- { "[^\\\\]+", "Aa\\" },
- { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
-
- // Anchoring. (^abc in aabcdef was a former bug)
- // The tester checks for a match in the text and
- // subpieces of the text with a byte removed on either side.
- { "^abc", "abcdef" },
- { "^abc", "aabcdef" },
- { "^[ay]*[bx]+c", "abcdef" },
- { "^[ay]*[bx]+c", "aabcdef" },
- { "def$", "abcdef" },
- { "def$", "abcdeff" },
- { "d[ex][fy]$", "abcdef" },
- { "d[ex][fy]$", "abcdeff" },
- { "[dz][ex][fy]$", "abcdef" },
- { "[dz][ex][fy]$", "abcdeff" },
- { "(?m)^abc", "abcdef" },
- { "(?m)^abc", "aabcdef" },
- { "(?m)^[ay]*[bx]+c", "abcdef" },
- { "(?m)^[ay]*[bx]+c", "aabcdef" },
- { "(?m)def$", "abcdef" },
- { "(?m)def$", "abcdeff" },
- { "(?m)d[ex][fy]$", "abcdef" },
- { "(?m)d[ex][fy]$", "abcdeff" },
- { "(?m)[dz][ex][fy]$", "abcdef" },
- { "(?m)[dz][ex][fy]$", "abcdeff" },
- { "^", "a" },
- { "^^", "a" },
-
- // Context.
- // The tester checks for a match in the text and
- // subpieces of the text with a byte removed on either side.
- { "a", "a" },
- { "ab*", "a" },
- { "a\\C*", "a" },
- { "a\\C+", "a" },
- { "a\\C?", "a" },
- { "a\\C*?", "a" },
- { "a\\C+?", "a" },
- { "a\\C??", "a" },
-
- // Former bugs.
- { "a\\C*|ba\\C", "baba" },
- { "\\w*I\\w*", "Inc." },
- { "(?:|a)*", "aaa" },
- { "(?:|a)+", "aaa" },
-};
-
-TEST(Regexp, SearchTests) {
- int failures = 0;
- for (size_t i = 0; i < arraysize(simple_tests); i++) {
- const RegexpTest& t = simple_tests[i];
- if (!TestRegexpOnText(t.regexp, t.text))
- failures++;
-
- if (LOGGING) {
- // Build a dummy ExhaustiveTest call that will trigger just
- // this one test, so that we log the test case.
- std::vector<std::string> atom, alpha, ops;
- atom.push_back(t.regexp);
- alpha.push_back(t.text);
- ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");
- }
- }
- EXPECT_EQ(failures, 0);
-}
-
-} // namespace re2
+// Copyright 2006-2007 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/testing/tester.h"
+#include "re2/testing/exhaustive_tester.h"
+
+// For target `log' in the Makefile.
+#ifndef LOGGING
+#define LOGGING 0
+#endif
+
+namespace re2 {
+
+struct RegexpTest {
+ const char* regexp;
+ const char* text;
+};
+
+RegexpTest simple_tests[] = {
+ { "a", "a" },
+ { "a", "zyzzyva" },
+ { "a+", "aa" },
+ { "(a+|b)+", "ab" },
+ { "ab|cd", "xabcdx" },
+ { "h.*od?", "hello\ngoodbye\n" },
+ { "h.*o", "hello\ngoodbye\n" },
+ { "h.*o", "goodbye\nhello\n" },
+ { "h.*o", "hello world" },
+ { "h.*o", "othello, world" },
+ { "[^\\s\\S]", "aaaaaaa" },
+ { "a", "aaaaaaa" },
+ { "a*", "aaaaaaa" },
+ { "a*", "" },
+ { "ab|cd", "xabcdx" },
+ { "a", "cab" },
+ { "a*b", "cab" },
+ { "((((((((((((((((((((x))))))))))))))))))))", "x" },
+ { "[abcd]", "xxxabcdxxx" },
+ { "[^x]", "xxxabcdxxx" },
+ { "[abcd]+", "xxxabcdxxx" },
+ { "[^x]+", "xxxabcdxxx" },
+ { "(fo|foo)", "fo" },
+ { "(foo|fo)", "foo" },
+
+ { "aa", "aA" },
+ { "a", "Aa" },
+ { "a", "A" },
+ { "ABC", "abc" },
+ { "abc", "XABCY" },
+ { "ABC", "xabcy" },
+
+ // Make sure ^ and $ work.
+ // The pathological cases didn't work
+ // in the original grep code.
+ { "foo|bar|[A-Z]", "foo" },
+ { "^(foo|bar|[A-Z])", "foo" },
+ { "(foo|bar|[A-Z])$", "foo\n" },
+ { "(foo|bar|[A-Z])$", "foo" },
+ { "^(foo|bar|[A-Z])$", "foo\n" },
+ { "^(foo|bar|[A-Z])$", "foo" },
+ { "^(foo|bar|[A-Z])$", "bar" },
+ { "^(foo|bar|[A-Z])$", "X" },
+ { "^(foo|bar|[A-Z])$", "XY" },
+ { "^(fo|foo)$", "fo" },
+ { "^(fo|foo)$", "foo" },
+ { "^^(fo|foo)$", "fo" },
+ { "^^(fo|foo)$", "foo" },
+ { "^$", "" },
+ { "^$", "x" },
+ { "^^$", "" },
+ { "^$$", "" },
+ { "^^$", "x" },
+ { "^$$", "x" },
+ { "^^$$", "" },
+ { "^^$$", "x" },
+ { "^^^^^^^^$$$$$$$$", "" },
+ { "^", "x" },
+ { "$", "x" },
+
+ // Word boundaries.
+ { "\\bfoo\\b", "nofoo foo that" },
+ { "a\\b", "faoa x" },
+ { "\\bbar", "bar x" },
+ { "\\bbar", "foo\nbar x" },
+ { "bar\\b", "foobar" },
+ { "bar\\b", "foobar\nxxx" },
+ { "(foo|bar|[A-Z])\\b", "foo" },
+ { "(foo|bar|[A-Z])\\b", "foo\n" },
+ { "\\b", "" },
+ { "\\b", "x" },
+ { "\\b(foo|bar|[A-Z])", "foo" },
+ { "\\b(foo|bar|[A-Z])\\b", "X" },
+ { "\\b(foo|bar|[A-Z])\\b", "XY" },
+ { "\\b(foo|bar|[A-Z])\\b", "bar" },
+ { "\\b(foo|bar|[A-Z])\\b", "foo" },
+ { "\\b(foo|bar|[A-Z])\\b", "foo\n" },
+ { "\\b(foo|bar|[A-Z])\\b", "ffoo bbar N x" },
+ { "\\b(fo|foo)\\b", "fo" },
+ { "\\b(fo|foo)\\b", "foo" },
+ { "\\b\\b", "" },
+ { "\\b\\b", "x" },
+ { "\\b$", "" },
+ { "\\b$", "x" },
+ { "\\b$", "y x" },
+ { "\\b.$", "x" },
+ { "^\\b(fo|foo)\\b", "fo" },
+ { "^\\b(fo|foo)\\b", "foo" },
+ { "^\\b", "" },
+ { "^\\b", "x" },
+ { "^\\b\\b", "" },
+ { "^\\b\\b", "x" },
+ { "^\\b$", "" },
+ { "^\\b$", "x" },
+ { "^\\b.$", "x" },
+ { "^\\b.\\b$", "x" },
+ { "^^^^^^^^\\b$$$$$$$", "" },
+ { "^^^^^^^^\\b.$$$$$$", "x" },
+ { "^^^^^^^^\\b$$$$$$$", "x" },
+
+ // Non-word boundaries.
+ { "\\Bfoo\\B", "n foo xfoox that" },
+ { "a\\B", "faoa x" },
+ { "\\Bbar", "bar x" },
+ { "\\Bbar", "foo\nbar x" },
+ { "bar\\B", "foobar" },
+ { "bar\\B", "foobar\nxxx" },
+ { "(foo|bar|[A-Z])\\B", "foox" },
+ { "(foo|bar|[A-Z])\\B", "foo\n" },
+ { "\\B", "" },
+ { "\\B", "x" },
+ { "\\B(foo|bar|[A-Z])", "foo" },
+ { "\\B(foo|bar|[A-Z])\\B", "xXy" },
+ { "\\B(foo|bar|[A-Z])\\B", "XY" },
+ { "\\B(foo|bar|[A-Z])\\B", "XYZ" },
+ { "\\B(foo|bar|[A-Z])\\B", "abara" },
+ { "\\B(foo|bar|[A-Z])\\B", "xfoo_" },
+ { "\\B(foo|bar|[A-Z])\\B", "xfoo\n" },
+ { "\\B(foo|bar|[A-Z])\\B", "foo bar vNx" },
+ { "\\B(fo|foo)\\B", "xfoo" },
+ { "\\B(foo|fo)\\B", "xfooo" },
+ { "\\B\\B", "" },
+ { "\\B\\B", "x" },
+ { "\\B$", "" },
+ { "\\B$", "x" },
+ { "\\B$", "y x" },
+ { "\\B.$", "x" },
+ { "^\\B(fo|foo)\\B", "fo" },
+ { "^\\B(fo|foo)\\B", "foo" },
+ { "^\\B", "" },
+ { "^\\B", "x" },
+ { "^\\B\\B", "" },
+ { "^\\B\\B", "x" },
+ { "^\\B$", "" },
+ { "^\\B$", "x" },
+ { "^\\B.$", "x" },
+ { "^\\B.\\B$", "x" },
+ { "^^^^^^^^\\B$$$$$$$", "" },
+ { "^^^^^^^^\\B.$$$$$$", "x" },
+ { "^^^^^^^^\\B$$$$$$$", "x" },
+
+ // PCRE uses only ASCII for \b computation.
+ // All non-ASCII are *not* word characters.
+ { "\\bx\\b", "x" },
+ { "\\bx\\b", "x>" },
+ { "\\bx\\b", "<x" },
+ { "\\bx\\b", "<x>" },
+ { "\\bx\\b", "ax" },
+ { "\\bx\\b", "xb" },
+ { "\\bx\\b", "axb" },
+ { "\\bx\\b", "«x" },
+ { "\\bx\\b", "x»" },
+ { "\\bx\\b", "«x»" },
+ { "\\bx\\b", "axb" },
+ { "\\bx\\b", "áxβ" },
+ { "\\Bx\\B", "axb" },
+ { "\\Bx\\B", "áxβ" },
+
+ // Weird boundary cases.
+ { "^$^$", "" },
+ { "^$^", "" },
+ { "$^$", "" },
+
+ { "^$^$", "x" },
+ { "^$^", "x" },
+ { "$^$", "x" },
+
+ { "^$^$", "x\ny" },
+ { "^$^", "x\ny" },
+ { "$^$", "x\ny" },
+
+ { "^$^$", "x\n\ny" },
+ { "^$^", "x\n\ny" },
+ { "$^$", "x\n\ny" },
+
+ { "^(foo\\$)$", "foo$bar" },
+ { "(foo\\$)", "foo$bar" },
+ { "^...$", "abc" },
+
+ // UTF-8
+ { "^\xe6\x9c\xac$", "\xe6\x9c\xac" },
+ { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+ { "^...$", ".\xe6\x9c\xac." },
+
+ { "^\\C\\C\\C$", "\xe6\x9c\xac" },
+ { "^\\C$", "\xe6\x9c\xac" },
+ { "^\\C\\C\\C$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+
+ // Latin1
+ { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+ { "^.........$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },
+ { "^...$", ".\xe6\x9c\xac." },
+ { "^.....$", ".\xe6\x9c\xac." },
+
+ // Perl v Posix
+ { "\\B(fo|foo)\\B", "xfooo" },
+ { "(fo|foo)", "foo" },
+
+ // Octal escapes.
+ { "\\141", "a" },
+ { "\\060", "0" },
+ { "\\0600", "00" },
+ { "\\608", "08" },
+ { "\\01", "\01" },
+ { "\\018", "\01" "8" },
+
+ // Hexadecimal escapes
+ { "\\x{61}", "a" },
+ { "\\x61", "a" },
+ { "\\x{00000061}", "a" },
+
+ // Unicode scripts.
+ { "\\p{Greek}+", "aαβb" },
+ { "\\P{Greek}+", "aαβb" },
+ { "\\p{^Greek}+", "aαβb" },
+ { "\\P{^Greek}+", "aαβb" },
+
+ // Unicode properties. Nd is decimal number. N is any number.
+ { "[^0-9]+", "abc123" },
+ { "\\p{Nd}+", "abc123²³¼½¾₀₉" },
+ { "\\p{^Nd}+", "abc123²³¼½¾₀₉" },
+ { "\\P{Nd}+", "abc123²³¼½¾₀₉" },
+ { "\\P{^Nd}+", "abc123²³¼½¾₀₉" },
+ { "\\pN+", "abc123²³¼½¾₀₉" },
+ { "\\p{N}+", "abc123²³¼½¾₀₉" },
+ { "\\p{^N}+", "abc123²³¼½¾₀₉" },
+
+ { "\\p{Any}+", "abc123" },
+
+ // Character classes & case folding.
+ { "(?i)[@-A]+", "@AaB" }, // matches @Aa but not B
+ { "(?i)[A-Z]+", "aAzZ" },
+ { "(?i)[^\\\\]+", "Aa\\" }, // \\ is between A-Z and a-z -
+ // splits the ranges in an interesting way.
+
+ // would like to use, but PCRE mishandles in full-match, non-greedy mode
+ // { "(?i)[\\\\]+", "Aa" },
+
+ { "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
+
+ // Character classes & case folding.
+ { "[@-A]+", "@AaB" },
+ { "[A-Z]+", "aAzZ" },
+ { "[^\\\\]+", "Aa\\" },
+ { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
+
+ // Anchoring. (^abc in aabcdef was a former bug)
+ // The tester checks for a match in the text and
+ // subpieces of the text with a byte removed on either side.
+ { "^abc", "abcdef" },
+ { "^abc", "aabcdef" },
+ { "^[ay]*[bx]+c", "abcdef" },
+ { "^[ay]*[bx]+c", "aabcdef" },
+ { "def$", "abcdef" },
+ { "def$", "abcdeff" },
+ { "d[ex][fy]$", "abcdef" },
+ { "d[ex][fy]$", "abcdeff" },
+ { "[dz][ex][fy]$", "abcdef" },
+ { "[dz][ex][fy]$", "abcdeff" },
+ { "(?m)^abc", "abcdef" },
+ { "(?m)^abc", "aabcdef" },
+ { "(?m)^[ay]*[bx]+c", "abcdef" },
+ { "(?m)^[ay]*[bx]+c", "aabcdef" },
+ { "(?m)def$", "abcdef" },
+ { "(?m)def$", "abcdeff" },
+ { "(?m)d[ex][fy]$", "abcdef" },
+ { "(?m)d[ex][fy]$", "abcdeff" },
+ { "(?m)[dz][ex][fy]$", "abcdef" },
+ { "(?m)[dz][ex][fy]$", "abcdeff" },
+ { "^", "a" },
+ { "^^", "a" },
+
+ // Context.
+ // The tester checks for a match in the text and
+ // subpieces of the text with a byte removed on either side.
+ { "a", "a" },
+ { "ab*", "a" },
+ { "a\\C*", "a" },
+ { "a\\C+", "a" },
+ { "a\\C?", "a" },
+ { "a\\C*?", "a" },
+ { "a\\C+?", "a" },
+ { "a\\C??", "a" },
+
+ // Former bugs.
+ { "a\\C*|ba\\C", "baba" },
+ { "\\w*I\\w*", "Inc." },
+ { "(?:|a)*", "aaa" },
+ { "(?:|a)+", "aaa" },
+};
+
+TEST(Regexp, SearchTests) {
+ int failures = 0;
+ for (size_t i = 0; i < arraysize(simple_tests); i++) {
+ const RegexpTest& t = simple_tests[i];
+ if (!TestRegexpOnText(t.regexp, t.text))
+ failures++;
+
+ if (LOGGING) {
+ // Build a dummy ExhaustiveTest call that will trigger just
+ // this one test, so that we log the test case.
+ std::vector<std::string> atom, alpha, ops;
+ atom.push_back(t.regexp);
+ alpha.push_back(t.text);
+ ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");
+ }
+ }
+ EXPECT_EQ(failures, 0);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/set_test.cc b/contrib/libs/re2/re2/testing/set_test.cc
index 140c7476d4..14ff3e79c0 100644
--- a/contrib/libs/re2/re2/testing/set_test.cc
+++ b/contrib/libs/re2/re2/testing/set_test.cc
@@ -1,230 +1,230 @@
-// Copyright 2010 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stddef.h>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/re2.h"
-#include "re2/set.h"
-
-namespace re2 {
-
-TEST(Set, Unanchored) {
- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
-
- ASSERT_EQ(s.Add("foo", NULL), 0);
- ASSERT_EQ(s.Add("(", NULL), -1);
- ASSERT_EQ(s.Add("bar", NULL), 1);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("foobar", NULL), true);
- ASSERT_EQ(s.Match("fooba", NULL), true);
- ASSERT_EQ(s.Match("oobar", NULL), true);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("foobar", &v), true);
- ASSERT_EQ(v.size(), 2);
- ASSERT_EQ(v[0], 0);
- ASSERT_EQ(v[1], 1);
-
- ASSERT_EQ(s.Match("fooba", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("oobar", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 1);
-}
-
-TEST(Set, UnanchoredFactored) {
- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
-
- ASSERT_EQ(s.Add("foo", NULL), 0);
- ASSERT_EQ(s.Add("(", NULL), -1);
- ASSERT_EQ(s.Add("foobar", NULL), 1);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("foobar", NULL), true);
- ASSERT_EQ(s.Match("obarfoobaroo", NULL), true);
- ASSERT_EQ(s.Match("fooba", NULL), true);
- ASSERT_EQ(s.Match("oobar", NULL), false);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("foobar", &v), true);
- ASSERT_EQ(v.size(), 2);
- ASSERT_EQ(v[0], 0);
- ASSERT_EQ(v[1], 1);
-
- ASSERT_EQ(s.Match("obarfoobaroo", &v), true);
- ASSERT_EQ(v.size(), 2);
- ASSERT_EQ(v[0], 0);
- ASSERT_EQ(v[1], 1);
-
- ASSERT_EQ(s.Match("fooba", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("oobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-}
-
-TEST(Set, UnanchoredDollar) {
- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
-
- ASSERT_EQ(s.Add("foo$", NULL), 0);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("foo", NULL), true);
- ASSERT_EQ(s.Match("foobar", NULL), false);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("foo", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("foobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-}
-
-TEST(Set, UnanchoredWordBoundary) {
- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
-
- ASSERT_EQ(s.Add("foo\\b", NULL), 0);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("foo", NULL), true);
- ASSERT_EQ(s.Match("foobar", NULL), false);
- ASSERT_EQ(s.Match("foo bar", NULL), true);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("foo", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("foobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("foo bar", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-}
-
-TEST(Set, Anchored) {
- RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
-
- ASSERT_EQ(s.Add("foo", NULL), 0);
- ASSERT_EQ(s.Add("(", NULL), -1);
- ASSERT_EQ(s.Add("bar", NULL), 1);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("foobar", NULL), false);
- ASSERT_EQ(s.Match("fooba", NULL), false);
- ASSERT_EQ(s.Match("oobar", NULL), false);
- ASSERT_EQ(s.Match("foo", NULL), true);
- ASSERT_EQ(s.Match("bar", NULL), true);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("foobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("fooba", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("oobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("foo", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("bar", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 1);
-}
-
-TEST(Set, EmptyUnanchored) {
- RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
-
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("", NULL), false);
- ASSERT_EQ(s.Match("foobar", NULL), false);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("foobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-}
-
-TEST(Set, EmptyAnchored) {
- RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
-
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("", NULL), false);
- ASSERT_EQ(s.Match("foobar", NULL), false);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("foobar", &v), false);
- ASSERT_EQ(v.size(), 0);
-}
-
-TEST(Set, Prefix) {
- RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
-
- ASSERT_EQ(s.Add("/prefix/\\d*", NULL), 0);
- ASSERT_EQ(s.Compile(), true);
-
- ASSERT_EQ(s.Match("/prefix", NULL), false);
- ASSERT_EQ(s.Match("/prefix/", NULL), true);
- ASSERT_EQ(s.Match("/prefix/42", NULL), true);
-
- std::vector<int> v;
- ASSERT_EQ(s.Match("/prefix", &v), false);
- ASSERT_EQ(v.size(), 0);
-
- ASSERT_EQ(s.Match("/prefix/", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-
- ASSERT_EQ(s.Match("/prefix/42", &v), true);
- ASSERT_EQ(v.size(), 1);
- ASSERT_EQ(v[0], 0);
-}
-
-TEST(Set, MoveSemantics) {
- RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
- ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
- ASSERT_EQ(s1.Compile(), true);
- ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
- ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
-
- // The moved-to object should do what the moved-from object did.
- RE2::Set s2 = std::move(s1);
- ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
- ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
-
- // The moved-from object should have been reset and be reusable.
- ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
- ASSERT_EQ(s1.Compile(), true);
- ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
- ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
-
- // Verify that "overwriting" works and also doesn't leak memory.
- // (The latter will need a leak detector such as LeakSanitizer.)
- s1 = std::move(s2);
- ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
- ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
-}
-
-} // namespace re2
+// Copyright 2010 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/re2.h"
+#include "re2/set.h"
+
+namespace re2 {
+
+TEST(Set, Unanchored) {
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+ ASSERT_EQ(s.Add("foo", NULL), 0);
+ ASSERT_EQ(s.Add("(", NULL), -1);
+ ASSERT_EQ(s.Add("bar", NULL), 1);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("foobar", NULL), true);
+ ASSERT_EQ(s.Match("fooba", NULL), true);
+ ASSERT_EQ(s.Match("oobar", NULL), true);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("foobar", &v), true);
+ ASSERT_EQ(v.size(), 2);
+ ASSERT_EQ(v[0], 0);
+ ASSERT_EQ(v[1], 1);
+
+ ASSERT_EQ(s.Match("fooba", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("oobar", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 1);
+}
+
+TEST(Set, UnanchoredFactored) {
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+ ASSERT_EQ(s.Add("foo", NULL), 0);
+ ASSERT_EQ(s.Add("(", NULL), -1);
+ ASSERT_EQ(s.Add("foobar", NULL), 1);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("foobar", NULL), true);
+ ASSERT_EQ(s.Match("obarfoobaroo", NULL), true);
+ ASSERT_EQ(s.Match("fooba", NULL), true);
+ ASSERT_EQ(s.Match("oobar", NULL), false);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("foobar", &v), true);
+ ASSERT_EQ(v.size(), 2);
+ ASSERT_EQ(v[0], 0);
+ ASSERT_EQ(v[1], 1);
+
+ ASSERT_EQ(s.Match("obarfoobaroo", &v), true);
+ ASSERT_EQ(v.size(), 2);
+ ASSERT_EQ(v[0], 0);
+ ASSERT_EQ(v[1], 1);
+
+ ASSERT_EQ(s.Match("fooba", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("oobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, UnanchoredDollar) {
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+ ASSERT_EQ(s.Add("foo$", NULL), 0);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("foo", NULL), true);
+ ASSERT_EQ(s.Match("foobar", NULL), false);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("foo", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("foobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, UnanchoredWordBoundary) {
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+ ASSERT_EQ(s.Add("foo\\b", NULL), 0);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("foo", NULL), true);
+ ASSERT_EQ(s.Match("foobar", NULL), false);
+ ASSERT_EQ(s.Match("foo bar", NULL), true);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("foo", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("foobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("foo bar", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+}
+
+TEST(Set, Anchored) {
+ RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+ ASSERT_EQ(s.Add("foo", NULL), 0);
+ ASSERT_EQ(s.Add("(", NULL), -1);
+ ASSERT_EQ(s.Add("bar", NULL), 1);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("foobar", NULL), false);
+ ASSERT_EQ(s.Match("fooba", NULL), false);
+ ASSERT_EQ(s.Match("oobar", NULL), false);
+ ASSERT_EQ(s.Match("foo", NULL), true);
+ ASSERT_EQ(s.Match("bar", NULL), true);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("foobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("fooba", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("oobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("foo", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("bar", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 1);
+}
+
+TEST(Set, EmptyUnanchored) {
+ RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("", NULL), false);
+ ASSERT_EQ(s.Match("foobar", NULL), false);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("foobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, EmptyAnchored) {
+ RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("", NULL), false);
+ ASSERT_EQ(s.Match("foobar", NULL), false);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("foobar", &v), false);
+ ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, Prefix) {
+ RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+ ASSERT_EQ(s.Add("/prefix/\\d*", NULL), 0);
+ ASSERT_EQ(s.Compile(), true);
+
+ ASSERT_EQ(s.Match("/prefix", NULL), false);
+ ASSERT_EQ(s.Match("/prefix/", NULL), true);
+ ASSERT_EQ(s.Match("/prefix/42", NULL), true);
+
+ std::vector<int> v;
+ ASSERT_EQ(s.Match("/prefix", &v), false);
+ ASSERT_EQ(v.size(), 0);
+
+ ASSERT_EQ(s.Match("/prefix/", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+
+ ASSERT_EQ(s.Match("/prefix/42", &v), true);
+ ASSERT_EQ(v.size(), 1);
+ ASSERT_EQ(v[0], 0);
+}
+
+TEST(Set, MoveSemantics) {
+ RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
+ ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
+ ASSERT_EQ(s1.Compile(), true);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+
+ // The moved-to object should do what the moved-from object did.
+ RE2::Set s2 = std::move(s1);
+ ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
+
+ // The moved-from object should have been reset and be reusable.
+ ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
+ ASSERT_EQ(s1.Compile(), true);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
+
+ // Verify that "overwriting" works and also doesn't leak memory.
+ // (The latter will need a leak detector such as LeakSanitizer.)
+ s1 = std::move(s2);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/simplify_test.cc b/contrib/libs/re2/re2/testing/simplify_test.cc
index 4510778fe5..75028930b1 100644
--- a/contrib/libs/re2/re2/testing/simplify_test.cc
+++ b/contrib/libs/re2/re2/testing/simplify_test.cc
@@ -1,273 +1,273 @@
-// Copyright 2006 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test simplify.cc.
-
-#include <string.h>
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/regexp.h"
-
-namespace re2 {
-
-struct Test {
- const char* regexp;
- const char* simplified;
-};
-
-static Test tests[] = {
- // Already-simple constructs
- { "a", "a" },
- { "ab", "ab" },
- { "a|b", "[a-b]" },
- { "ab|cd", "ab|cd" },
- { "(ab)*", "(ab)*" },
- { "(ab)+", "(ab)+" },
- { "(ab)?", "(ab)?" },
- { ".", "." },
- { "^", "^" },
- { "$", "$" },
- { "[ac]", "[ac]" },
- { "[^ac]", "[^ac]" },
-
- // Posix character classes
- { "[[:alnum:]]", "[0-9A-Za-z]" },
- { "[[:alpha:]]", "[A-Za-z]" },
- { "[[:blank:]]", "[\\t ]" },
- { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
- { "[[:digit:]]", "[0-9]" },
- { "[[:graph:]]", "[!-~]" },
- { "[[:lower:]]", "[a-z]" },
- { "[[:print:]]", "[ -~]" },
- { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
- { "[[:space:]]" , "[\\t-\\r ]" },
- { "[[:upper:]]", "[A-Z]" },
- { "[[:xdigit:]]", "[0-9A-Fa-f]" },
-
- // Perl character classes
- { "\\d", "[0-9]" },
- { "\\s", "[\\t-\\n\\f-\\r ]" },
- { "\\w", "[0-9A-Z_a-z]" },
- { "\\D", "[^0-9]" },
- { "\\S", "[^\\t-\\n\\f-\\r ]" },
- { "\\W", "[^0-9A-Z_a-z]" },
- { "[\\d]", "[0-9]" },
- { "[\\s]", "[\\t-\\n\\f-\\r ]" },
- { "[\\w]", "[0-9A-Z_a-z]" },
- { "[\\D]", "[^0-9]" },
- { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
- { "[\\W]", "[^0-9A-Z_a-z]" },
-
- // Posix repetitions
- { "a{1}", "a" },
- { "a{2}", "aa" },
- { "a{5}", "aaaaa" },
- { "a{0,1}", "a?" },
- // The next three are illegible because Simplify inserts (?:)
- // parens instead of () parens to avoid creating extra
- // captured subexpressions. The comments show a version fewer parens.
- { "(a){0,2}", "(?:(a)(a)?)?" }, // (aa?)?
- { "(a){0,4}", "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // (a(a(aa?)?)?)?
- { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // aa(a(a(aa?)?)?)?
- { "a{0,2}", "(?:aa?)?" }, // (aa?)?
- { "a{0,4}", "(?:a(?:a(?:aa?)?)?)?" }, // (a(a(aa?)?)?)?
- { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" }, // aa(a(a(aa?)?)?)?
- { "a{0,}", "a*" },
- { "a{1,}", "a+" },
- { "a{2,}", "aa+" },
- { "a{5,}", "aaaaa+" },
-
- // Test that operators simplify their arguments.
- // (Simplify used to not simplify arguments to a {} repeat.)
- { "(?:a{1,}){1,}", "a+" },
- { "(a{1,}b{1,})", "(a+b+)" },
- { "a{1,}|b{1,}", "a+|b+" },
- { "(?:a{1,})*", "(?:a+)*" },
- { "(?:a{1,})+", "a+" },
- { "(?:a{1,})?", "(?:a+)?" },
- { "a{0}", "" },
-
- // Character class simplification
- { "[ab]", "[a-b]" },
- { "[a-za-za-z]", "[a-z]" },
- { "[A-Za-zA-Za-z]", "[A-Za-z]" },
- { "[ABCDEFGH]", "[A-H]" },
- { "[AB-CD-EF-GH]", "[A-H]" },
- { "[W-ZP-XE-R]", "[E-Z]" },
- { "[a-ee-gg-m]", "[a-m]" },
- { "[a-ea-ha-m]", "[a-m]" },
- { "[a-ma-ha-e]", "[a-m]" },
- { "[a-zA-Z0-9 -~]", "[ -~]" },
-
- // Empty character classes
- { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
-
- // Full character classes
- { "[[:cntrl:][:^cntrl:]]", "." },
-
- // Unicode case folding.
- { "(?i)A", "[Aa]" },
- { "(?i)a", "[Aa]" },
- { "(?i)K", "[Kk\\x{212a}]" },
- { "(?i)k", "[Kk\\x{212a}]" },
- { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
- { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
- { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
- { "(?i)[\\x00-\\x{10ffff}]", "." },
-
- // Empty string as a regular expression.
- // Empty string must be preserved inside parens in order
- // to make submatches work right, so these are less
- // interesting than they used to be. ToString inserts
- // explicit (?:) in place of non-parenthesized empty strings,
- // to make them easier to spot for other parsers.
- { "(a|b|)", "([a-b]|(?:))" },
- { "(|)", "((?:)|(?:))" },
- { "a()", "a()" },
- { "(()|())", "(()|())" },
- { "(a|)", "(a|(?:))" },
- { "ab()cd()", "ab()cd()" },
- { "()", "()" },
- { "()*", "()*" },
- { "()+", "()+" },
- { "()?" , "()?" },
- { "(){0}", "" },
- { "(){1}", "()" },
- { "(){1,}", "()+" },
- { "(){0,2}", "(?:()()?)?" },
-
- // Test that coalescing occurs and that the resulting repeats are simplified.
- // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
- { "a*a*", "a*" },
- { "a*a+", "a+" },
- { "a*a?", "a*" },
- { "a*a{2}", "aa+" },
- { "a*a{2,}", "aa+" },
- { "a*a{2,3}", "aa+" },
- { "a+a*", "a+" },
- { "a+a+", "aa+" },
- { "a+a?", "a+" },
- { "a+a{2}", "aaa+" },
- { "a+a{2,}", "aaa+" },
- { "a+a{2,3}", "aaa+" },
- { "a?a*", "a*" },
- { "a?a+", "a+" },
- { "a?a?", "(?:aa?)?" },
- { "a?a{2}", "aaa?" },
- { "a?a{2,}", "aa+" },
- { "a?a{2,3}", "aa(?:aa?)?" },
- { "a{2}a*", "aa+" },
- { "a{2}a+", "aaa+" },
- { "a{2}a?", "aaa?" },
- { "a{2}a{2}", "aaaa" },
- { "a{2}a{2,}", "aaaa+" },
- { "a{2}a{2,3}", "aaaaa?" },
- { "a{2,}a*", "aa+" },
- { "a{2,}a+", "aaa+" },
- { "a{2,}a?", "aa+" },
- { "a{2,}a{2}", "aaaa+" },
- { "a{2,}a{2,}", "aaaa+" },
- { "a{2,}a{2,3}", "aaaa+" },
- { "a{2,3}a*", "aa+" },
- { "a{2,3}a+", "aaa+" },
- { "a{2,3}a?", "aa(?:aa?)?" },
- { "a{2,3}a{2}", "aaaaa?" },
- { "a{2,3}a{2,}", "aaaa+" },
- { "a{2,3}a{2,3}", "aaaa(?:aa?)?" },
- // With a char class, any char and any byte:
- { "\\d*\\d*", "[0-9]*" },
- { ".*.*", ".*" },
- { "\\C*\\C*", "\\C*" },
- // FoldCase works, but must be consistent:
- { "(?i)A*a*", "[Aa]*" },
- { "(?i)a+A+", "[Aa][Aa]+" },
- { "(?i)A*(?-i)a*", "[Aa]*a*" },
- { "(?i)a+(?-i)A+", "[Aa]+A+" },
- // NonGreedy works, but must be consistent:
- { "a*?a*?", "a*?" },
- { "a+?a+?", "aa+?" },
- { "a*?a*", "a*?a*" },
- { "a+a+?", "a+a+?" },
- // The second element is the literal, char class, any char or any byte:
- { "a*a", "a+" },
- { "\\d*\\d", "[0-9]+" },
- { ".*.", ".+" },
- { "\\C*\\C", "\\C+" },
- // FoldCase works, but must be consistent:
- { "(?i)A*a", "[Aa]+" },
- { "(?i)a+A", "[Aa][Aa]+" },
- { "(?i)A*(?-i)a", "[Aa]*a" },
- { "(?i)a+(?-i)A", "[Aa]+A" },
- // The second element is a literal string that begins with the literal:
- { "a*aa", "aa+" },
- { "a*aab", "aa+b" },
- // FoldCase works, but must be consistent:
- { "(?i)a*aa", "[Aa][Aa]+" },
- { "(?i)a*aab", "[Aa][Aa]+[Bb]" },
- { "(?i)a*(?-i)aa", "[Aa]*aa" },
- { "(?i)a*(?-i)aab", "[Aa]*aab" },
- // Negative tests with mismatching ops:
- { "a*b*", "a*b*" },
- { "\\d*\\D*", "[0-9]*[^0-9]*" },
- { "a+b", "a+b" },
- { "\\d+\\D", "[0-9]+[^0-9]" },
- { "a?bb", "a?bb" },
- // Negative tests with capturing groups:
- { "(a*)a*", "(a*)a*" },
- { "a+(a)", "a+(a)" },
- { "(a?)(aa)", "(a?)(aa)" },
- // Just for fun:
- { "aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a", "aaaaaaaaaaaaaaaa+" },
-
- // During coalescing, the child of the repeat changes, so we build a new
- // repeat. The new repeat must have the min and max of the old repeat.
- // Failure to copy them results in min=0 and max=0 -> empty match.
- { "(?:a*aab){2}", "aa+baa+b" },
-
- // During coalescing, the child of the capture changes, so we build a new
- // capture. The new capture must have the cap of the old capture.
- // Failure to copy it results in cap=0 -> ToString() logs a fatal error.
- { "(a*aab)", "(aa+b)" },
-
- // Test squashing of **, ++, ?? et cetera.
- { "(?:(?:a){0,}){0,}", "a*" },
- { "(?:(?:a){1,}){1,}", "a+" },
- { "(?:(?:a){0,1}){0,1}", "a?" },
- { "(?:(?:a){0,}){1,}", "a*" },
- { "(?:(?:a){0,}){0,1}", "a*" },
- { "(?:(?:a){1,}){0,}", "a*" },
- { "(?:(?:a){1,}){0,1}", "a*" },
- { "(?:(?:a){0,1}){0,}", "a*" },
- { "(?:(?:a){0,1}){1,}", "a*" },
-};
-
-TEST(TestSimplify, SimpleRegexps) {
- for (size_t i = 0; i < arraysize(tests); i++) {
- RegexpStatus status;
- VLOG(1) << "Testing " << tests[i].regexp;
- Regexp* re = Regexp::Parse(tests[i].regexp,
- Regexp::MatchNL | (Regexp::LikePerl &
- ~Regexp::OneLine),
- &status);
- ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
- Regexp* sre = re->Simplify();
- ASSERT_TRUE(sre != NULL);
-
- // Check that already-simple regexps don't allocate new ones.
- if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
- ASSERT_TRUE(re == sre) << " " << tests[i].regexp
- << " " << re->ToString() << " " << sre->ToString();
- }
-
- EXPECT_EQ(tests[i].simplified, sre->ToString())
- << " " << tests[i].regexp << " " << sre->Dump();
-
- re->Decref();
- sre->Decref();
- }
-}
-
-} // namespace re2
+// Copyright 2006 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test simplify.cc.
+
+#include <string.h>
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/regexp.h"
+
+namespace re2 {
+
+struct Test {
+ const char* regexp;
+ const char* simplified;
+};
+
+static Test tests[] = {
+ // Already-simple constructs
+ { "a", "a" },
+ { "ab", "ab" },
+ { "a|b", "[a-b]" },
+ { "ab|cd", "ab|cd" },
+ { "(ab)*", "(ab)*" },
+ { "(ab)+", "(ab)+" },
+ { "(ab)?", "(ab)?" },
+ { ".", "." },
+ { "^", "^" },
+ { "$", "$" },
+ { "[ac]", "[ac]" },
+ { "[^ac]", "[^ac]" },
+
+ // Posix character classes
+ { "[[:alnum:]]", "[0-9A-Za-z]" },
+ { "[[:alpha:]]", "[A-Za-z]" },
+ { "[[:blank:]]", "[\\t ]" },
+ { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
+ { "[[:digit:]]", "[0-9]" },
+ { "[[:graph:]]", "[!-~]" },
+ { "[[:lower:]]", "[a-z]" },
+ { "[[:print:]]", "[ -~]" },
+ { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
+ { "[[:space:]]" , "[\\t-\\r ]" },
+ { "[[:upper:]]", "[A-Z]" },
+ { "[[:xdigit:]]", "[0-9A-Fa-f]" },
+
+ // Perl character classes
+ { "\\d", "[0-9]" },
+ { "\\s", "[\\t-\\n\\f-\\r ]" },
+ { "\\w", "[0-9A-Z_a-z]" },
+ { "\\D", "[^0-9]" },
+ { "\\S", "[^\\t-\\n\\f-\\r ]" },
+ { "\\W", "[^0-9A-Z_a-z]" },
+ { "[\\d]", "[0-9]" },
+ { "[\\s]", "[\\t-\\n\\f-\\r ]" },
+ { "[\\w]", "[0-9A-Z_a-z]" },
+ { "[\\D]", "[^0-9]" },
+ { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
+ { "[\\W]", "[^0-9A-Z_a-z]" },
+
+ // Posix repetitions
+ { "a{1}", "a" },
+ { "a{2}", "aa" },
+ { "a{5}", "aaaaa" },
+ { "a{0,1}", "a?" },
+ // The next three are illegible because Simplify inserts (?:)
+ // parens instead of () parens to avoid creating extra
+ // captured subexpressions. The comments show a version fewer parens.
+ { "(a){0,2}", "(?:(a)(a)?)?" }, // (aa?)?
+ { "(a){0,4}", "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // (a(a(aa?)?)?)?
+ { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // aa(a(a(aa?)?)?)?
+ { "a{0,2}", "(?:aa?)?" }, // (aa?)?
+ { "a{0,4}", "(?:a(?:a(?:aa?)?)?)?" }, // (a(a(aa?)?)?)?
+ { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" }, // aa(a(a(aa?)?)?)?
+ { "a{0,}", "a*" },
+ { "a{1,}", "a+" },
+ { "a{2,}", "aa+" },
+ { "a{5,}", "aaaaa+" },
+
+ // Test that operators simplify their arguments.
+ // (Simplify used to not simplify arguments to a {} repeat.)
+ { "(?:a{1,}){1,}", "a+" },
+ { "(a{1,}b{1,})", "(a+b+)" },
+ { "a{1,}|b{1,}", "a+|b+" },
+ { "(?:a{1,})*", "(?:a+)*" },
+ { "(?:a{1,})+", "a+" },
+ { "(?:a{1,})?", "(?:a+)?" },
+ { "a{0}", "" },
+
+ // Character class simplification
+ { "[ab]", "[a-b]" },
+ { "[a-za-za-z]", "[a-z]" },
+ { "[A-Za-zA-Za-z]", "[A-Za-z]" },
+ { "[ABCDEFGH]", "[A-H]" },
+ { "[AB-CD-EF-GH]", "[A-H]" },
+ { "[W-ZP-XE-R]", "[E-Z]" },
+ { "[a-ee-gg-m]", "[a-m]" },
+ { "[a-ea-ha-m]", "[a-m]" },
+ { "[a-ma-ha-e]", "[a-m]" },
+ { "[a-zA-Z0-9 -~]", "[ -~]" },
+
+ // Empty character classes
+ { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
+
+ // Full character classes
+ { "[[:cntrl:][:^cntrl:]]", "." },
+
+ // Unicode case folding.
+ { "(?i)A", "[Aa]" },
+ { "(?i)a", "[Aa]" },
+ { "(?i)K", "[Kk\\x{212a}]" },
+ { "(?i)k", "[Kk\\x{212a}]" },
+ { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
+ { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
+ { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
+ { "(?i)[\\x00-\\x{10ffff}]", "." },
+
+ // Empty string as a regular expression.
+ // Empty string must be preserved inside parens in order
+ // to make submatches work right, so these are less
+ // interesting than they used to be. ToString inserts
+ // explicit (?:) in place of non-parenthesized empty strings,
+ // to make them easier to spot for other parsers.
+ { "(a|b|)", "([a-b]|(?:))" },
+ { "(|)", "((?:)|(?:))" },
+ { "a()", "a()" },
+ { "(()|())", "(()|())" },
+ { "(a|)", "(a|(?:))" },
+ { "ab()cd()", "ab()cd()" },
+ { "()", "()" },
+ { "()*", "()*" },
+ { "()+", "()+" },
+ { "()?" , "()?" },
+ { "(){0}", "" },
+ { "(){1}", "()" },
+ { "(){1,}", "()+" },
+ { "(){0,2}", "(?:()()?)?" },
+
+ // Test that coalescing occurs and that the resulting repeats are simplified.
+ // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
+ { "a*a*", "a*" },
+ { "a*a+", "a+" },
+ { "a*a?", "a*" },
+ { "a*a{2}", "aa+" },
+ { "a*a{2,}", "aa+" },
+ { "a*a{2,3}", "aa+" },
+ { "a+a*", "a+" },
+ { "a+a+", "aa+" },
+ { "a+a?", "a+" },
+ { "a+a{2}", "aaa+" },
+ { "a+a{2,}", "aaa+" },
+ { "a+a{2,3}", "aaa+" },
+ { "a?a*", "a*" },
+ { "a?a+", "a+" },
+ { "a?a?", "(?:aa?)?" },
+ { "a?a{2}", "aaa?" },
+ { "a?a{2,}", "aa+" },
+ { "a?a{2,3}", "aa(?:aa?)?" },
+ { "a{2}a*", "aa+" },
+ { "a{2}a+", "aaa+" },
+ { "a{2}a?", "aaa?" },
+ { "a{2}a{2}", "aaaa" },
+ { "a{2}a{2,}", "aaaa+" },
+ { "a{2}a{2,3}", "aaaaa?" },
+ { "a{2,}a*", "aa+" },
+ { "a{2,}a+", "aaa+" },
+ { "a{2,}a?", "aa+" },
+ { "a{2,}a{2}", "aaaa+" },
+ { "a{2,}a{2,}", "aaaa+" },
+ { "a{2,}a{2,3}", "aaaa+" },
+ { "a{2,3}a*", "aa+" },
+ { "a{2,3}a+", "aaa+" },
+ { "a{2,3}a?", "aa(?:aa?)?" },
+ { "a{2,3}a{2}", "aaaaa?" },
+ { "a{2,3}a{2,}", "aaaa+" },
+ { "a{2,3}a{2,3}", "aaaa(?:aa?)?" },
+ // With a char class, any char and any byte:
+ { "\\d*\\d*", "[0-9]*" },
+ { ".*.*", ".*" },
+ { "\\C*\\C*", "\\C*" },
+ // FoldCase works, but must be consistent:
+ { "(?i)A*a*", "[Aa]*" },
+ { "(?i)a+A+", "[Aa][Aa]+" },
+ { "(?i)A*(?-i)a*", "[Aa]*a*" },
+ { "(?i)a+(?-i)A+", "[Aa]+A+" },
+ // NonGreedy works, but must be consistent:
+ { "a*?a*?", "a*?" },
+ { "a+?a+?", "aa+?" },
+ { "a*?a*", "a*?a*" },
+ { "a+a+?", "a+a+?" },
+ // The second element is the literal, char class, any char or any byte:
+ { "a*a", "a+" },
+ { "\\d*\\d", "[0-9]+" },
+ { ".*.", ".+" },
+ { "\\C*\\C", "\\C+" },
+ // FoldCase works, but must be consistent:
+ { "(?i)A*a", "[Aa]+" },
+ { "(?i)a+A", "[Aa][Aa]+" },
+ { "(?i)A*(?-i)a", "[Aa]*a" },
+ { "(?i)a+(?-i)A", "[Aa]+A" },
+ // The second element is a literal string that begins with the literal:
+ { "a*aa", "aa+" },
+ { "a*aab", "aa+b" },
+ // FoldCase works, but must be consistent:
+ { "(?i)a*aa", "[Aa][Aa]+" },
+ { "(?i)a*aab", "[Aa][Aa]+[Bb]" },
+ { "(?i)a*(?-i)aa", "[Aa]*aa" },
+ { "(?i)a*(?-i)aab", "[Aa]*aab" },
+ // Negative tests with mismatching ops:
+ { "a*b*", "a*b*" },
+ { "\\d*\\D*", "[0-9]*[^0-9]*" },
+ { "a+b", "a+b" },
+ { "\\d+\\D", "[0-9]+[^0-9]" },
+ { "a?bb", "a?bb" },
+ // Negative tests with capturing groups:
+ { "(a*)a*", "(a*)a*" },
+ { "a+(a)", "a+(a)" },
+ { "(a?)(aa)", "(a?)(aa)" },
+ // Just for fun:
+ { "aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a", "aaaaaaaaaaaaaaaa+" },
+
+ // During coalescing, the child of the repeat changes, so we build a new
+ // repeat. The new repeat must have the min and max of the old repeat.
+ // Failure to copy them results in min=0 and max=0 -> empty match.
+ { "(?:a*aab){2}", "aa+baa+b" },
+
+ // During coalescing, the child of the capture changes, so we build a new
+ // capture. The new capture must have the cap of the old capture.
+ // Failure to copy it results in cap=0 -> ToString() logs a fatal error.
+ { "(a*aab)", "(aa+b)" },
+
+ // Test squashing of **, ++, ?? et cetera.
+ { "(?:(?:a){0,}){0,}", "a*" },
+ { "(?:(?:a){1,}){1,}", "a+" },
+ { "(?:(?:a){0,1}){0,1}", "a?" },
+ { "(?:(?:a){0,}){1,}", "a*" },
+ { "(?:(?:a){0,}){0,1}", "a*" },
+ { "(?:(?:a){1,}){0,}", "a*" },
+ { "(?:(?:a){1,}){0,1}", "a*" },
+ { "(?:(?:a){0,1}){0,}", "a*" },
+ { "(?:(?:a){0,1}){1,}", "a*" },
+};
+
+TEST(TestSimplify, SimpleRegexps) {
+ for (size_t i = 0; i < arraysize(tests); i++) {
+ RegexpStatus status;
+ VLOG(1) << "Testing " << tests[i].regexp;
+ Regexp* re = Regexp::Parse(tests[i].regexp,
+ Regexp::MatchNL | (Regexp::LikePerl &
+ ~Regexp::OneLine),
+ &status);
+ ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
+ Regexp* sre = re->Simplify();
+ ASSERT_TRUE(sre != NULL);
+
+ // Check that already-simple regexps don't allocate new ones.
+ if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
+ ASSERT_TRUE(re == sre) << " " << tests[i].regexp
+ << " " << re->ToString() << " " << sre->ToString();
+ }
+
+ EXPECT_EQ(tests[i].simplified, sre->ToString())
+ << " " << tests[i].regexp << " " << sre->Dump();
+
+ re->Decref();
+ sre->Decref();
+ }
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/string_generator.cc b/contrib/libs/re2/re2/testing/string_generator.cc
index f42df29c82..96dbbf5d82 100644
--- a/contrib/libs/re2/re2/testing/string_generator.cc
+++ b/contrib/libs/re2/re2/testing/string_generator.cc
@@ -1,141 +1,141 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// String generator: generates all possible strings of up to
-// maxlen letters using the set of letters in alpha.
-// Fetch strings using a Java-like Next()/HasNext() interface.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/logging.h"
-#include "re2/testing/string_generator.h"
-
-namespace re2 {
-
-StringGenerator::StringGenerator(int maxlen,
- const std::vector<std::string>& alphabet)
- : maxlen_(maxlen), alphabet_(alphabet),
- generate_null_(false),
- random_(false), nrandom_(0) {
-
- // Degenerate case: no letters, no non-empty strings.
- if (alphabet_.empty())
- maxlen_ = 0;
-
- // Next() will return empty string (digits_ is empty).
- hasnext_ = true;
-}
-
-// Resets the string generator state to the beginning.
-void StringGenerator::Reset() {
- digits_.clear();
- hasnext_ = true;
- random_ = false;
- nrandom_ = 0;
- generate_null_ = false;
-}
-
-// Increments the big number in digits_, returning true if successful.
-// Returns false if all the numbers have been used.
-bool StringGenerator::IncrementDigits() {
- // First try to increment the current number.
- for (int i = static_cast<int>(digits_.size()) - 1; i >= 0; i--) {
- if (++digits_[i] < static_cast<int>(alphabet_.size()))
- return true;
- digits_[i] = 0;
- }
-
- // If that failed, make a longer number.
- if (static_cast<int>(digits_.size()) < maxlen_) {
- digits_.push_back(0);
- return true;
- }
-
- return false;
-}
-
-// Generates random digits_, return true if successful.
-// Returns false if the random sequence is over.
-bool StringGenerator::RandomDigits() {
- if (--nrandom_ <= 0)
- return false;
-
- std::uniform_int_distribution<int> random_len(0, maxlen_);
- std::uniform_int_distribution<int> random_alphabet_index(
- 0, static_cast<int>(alphabet_.size()) - 1);
-
- // Pick length.
- int len = random_len(rng_);
- digits_.resize(len);
- for (int i = 0; i < len; i++)
- digits_[i] = random_alphabet_index(rng_);
- return true;
-}
-
-// Returns the next string in the iteration, which is the one
-// currently described by digits_. Calls IncrementDigits
-// after computing the string, so that it knows the answer
-// for subsequent HasNext() calls.
-const StringPiece& StringGenerator::Next() {
- CHECK(hasnext_);
- if (generate_null_) {
- generate_null_ = false;
- sp_ = StringPiece();
- return sp_;
- }
- s_.clear();
- for (size_t i = 0; i < digits_.size(); i++) {
- s_ += alphabet_[digits_[i]];
- }
- hasnext_ = random_ ? RandomDigits() : IncrementDigits();
- sp_ = s_;
- return sp_;
-}
-
-// Sets generator up to return n random strings.
-void StringGenerator::Random(int32_t seed, int n) {
- rng_.seed(seed);
-
- random_ = true;
- nrandom_ = n;
- hasnext_ = nrandom_ > 0;
-}
-
-void StringGenerator::GenerateNULL() {
- generate_null_ = true;
- hasnext_ = true;
-}
-
-std::string DeBruijnString(int n) {
- CHECK_GE(n, 1);
- CHECK_LE(n, 29);
- const size_t size = size_t{1} << static_cast<size_t>(n);
- const size_t mask = size - 1;
- std::vector<bool> did(size, false);
- std::string s;
- s.reserve(static_cast<size_t>(n) + size);
- for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
- s += '0';
- size_t bits = 0;
- for (size_t i = 0; i < size; i++) {
- bits <<= 1;
- bits &= mask;
- if (!did[bits | 1]) {
- bits |= 1;
- s += '1';
- } else {
- s += '0';
- }
- CHECK(!did[bits]);
- did[bits] = true;
- }
- CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
- return s;
-}
-
-} // namespace re2
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// String generator: generates all possible strings of up to
+// maxlen letters using the set of letters in alpha.
+// Fetch strings using a Java-like Next()/HasNext() interface.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/logging.h"
+#include "re2/testing/string_generator.h"
+
+namespace re2 {
+
+StringGenerator::StringGenerator(int maxlen,
+ const std::vector<std::string>& alphabet)
+ : maxlen_(maxlen), alphabet_(alphabet),
+ generate_null_(false),
+ random_(false), nrandom_(0) {
+
+ // Degenerate case: no letters, no non-empty strings.
+ if (alphabet_.empty())
+ maxlen_ = 0;
+
+ // Next() will return empty string (digits_ is empty).
+ hasnext_ = true;
+}
+
+// Resets the string generator state to the beginning.
+void StringGenerator::Reset() {
+ digits_.clear();
+ hasnext_ = true;
+ random_ = false;
+ nrandom_ = 0;
+ generate_null_ = false;
+}
+
+// Increments the big number in digits_, returning true if successful.
+// Returns false if all the numbers have been used.
+bool StringGenerator::IncrementDigits() {
+ // First try to increment the current number.
+ for (int i = static_cast<int>(digits_.size()) - 1; i >= 0; i--) {
+ if (++digits_[i] < static_cast<int>(alphabet_.size()))
+ return true;
+ digits_[i] = 0;
+ }
+
+ // If that failed, make a longer number.
+ if (static_cast<int>(digits_.size()) < maxlen_) {
+ digits_.push_back(0);
+ return true;
+ }
+
+ return false;
+}
+
+// Generates random digits_, return true if successful.
+// Returns false if the random sequence is over.
+bool StringGenerator::RandomDigits() {
+ if (--nrandom_ <= 0)
+ return false;
+
+ std::uniform_int_distribution<int> random_len(0, maxlen_);
+ std::uniform_int_distribution<int> random_alphabet_index(
+ 0, static_cast<int>(alphabet_.size()) - 1);
+
+ // Pick length.
+ int len = random_len(rng_);
+ digits_.resize(len);
+ for (int i = 0; i < len; i++)
+ digits_[i] = random_alphabet_index(rng_);
+ return true;
+}
+
+// Returns the next string in the iteration, which is the one
+// currently described by digits_. Calls IncrementDigits
+// after computing the string, so that it knows the answer
+// for subsequent HasNext() calls.
+const StringPiece& StringGenerator::Next() {
+ CHECK(hasnext_);
+ if (generate_null_) {
+ generate_null_ = false;
+ sp_ = StringPiece();
+ return sp_;
+ }
+ s_.clear();
+ for (size_t i = 0; i < digits_.size(); i++) {
+ s_ += alphabet_[digits_[i]];
+ }
+ hasnext_ = random_ ? RandomDigits() : IncrementDigits();
+ sp_ = s_;
+ return sp_;
+}
+
+// Sets generator up to return n random strings.
+void StringGenerator::Random(int32_t seed, int n) {
+ rng_.seed(seed);
+
+ random_ = true;
+ nrandom_ = n;
+ hasnext_ = nrandom_ > 0;
+}
+
+void StringGenerator::GenerateNULL() {
+ generate_null_ = true;
+ hasnext_ = true;
+}
+
+std::string DeBruijnString(int n) {
+ CHECK_GE(n, 1);
+ CHECK_LE(n, 29);
+ const size_t size = size_t{1} << static_cast<size_t>(n);
+ const size_t mask = size - 1;
+ std::vector<bool> did(size, false);
+ std::string s;
+ s.reserve(static_cast<size_t>(n) + size);
+ for (size_t i = 0; i < static_cast<size_t>(n - 1); i++)
+ s += '0';
+ size_t bits = 0;
+ for (size_t i = 0; i < size; i++) {
+ bits <<= 1;
+ bits &= mask;
+ if (!did[bits | 1]) {
+ bits |= 1;
+ s += '1';
+ } else {
+ s += '0';
+ }
+ CHECK(!did[bits]);
+ did[bits] = true;
+ }
+ CHECK_EQ(s.size(), static_cast<size_t>(n - 1) + size);
+ return s;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/string_generator.h b/contrib/libs/re2/re2/testing/string_generator.h
index ff8179bfb7..73fbb51451 100644
--- a/contrib/libs/re2/re2/testing/string_generator.h
+++ b/contrib/libs/re2/re2/testing/string_generator.h
@@ -1,76 +1,76 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_TESTING_STRING_GENERATOR_H_
-#define RE2_TESTING_STRING_GENERATOR_H_
-
-// String generator: generates all possible strings of up to
-// maxlen letters using the set of letters in alpha.
-// Fetch strings using a Java-like Next()/HasNext() interface.
-
-#include <stdint.h>
-#include <random>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "re2/stringpiece.h"
-
-namespace re2 {
-
-class StringGenerator {
- public:
- StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
- ~StringGenerator() {}
-
- const StringPiece& Next();
- bool HasNext() { return hasnext_; }
-
- // Resets generator to start sequence over.
- void Reset();
-
- // Causes generator to emit random strings for next n calls to Next().
- void Random(int32_t seed, int n);
-
- // Causes generator to emit a NULL as the next call.
- void GenerateNULL();
-
- private:
- bool IncrementDigits();
- bool RandomDigits();
-
- // Global state.
- int maxlen_; // Maximum length string to generate.
- std::vector<std::string> alphabet_; // Alphabet, one string per letter.
-
- // Iteration state.
- StringPiece sp_; // Last StringPiece returned by Next().
- std::string s_; // String data in last StringPiece returned by Next().
- bool hasnext_; // Whether Next() can be called again.
- std::vector<int> digits_; // Alphabet indices for next string.
- bool generate_null_; // Whether to generate a NULL StringPiece next.
- bool random_; // Whether generated strings are random.
- int nrandom_; // Number of random strings left to generate.
- std::minstd_rand0 rng_; // Random number generator.
-
- StringGenerator(const StringGenerator&) = delete;
- StringGenerator& operator=(const StringGenerator&) = delete;
-};
-
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences. The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n-1 + 2^n called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-//
-// Such a string is useful for testing a DFA. If you have a DFA
-// where distinct last n bytes implies distinct states, then running on a
-// DeBruijn string causes the DFA to need to create a new state at every
-// position in the input, never reusing any states until it gets to the
-// end of the string. This is the worst possible case for DFA execution.
-std::string DeBruijnString(int n);
-
-} // namespace re2
-
-#endif // RE2_TESTING_STRING_GENERATOR_H_
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_TESTING_STRING_GENERATOR_H_
+#define RE2_TESTING_STRING_GENERATOR_H_
+
+// String generator: generates all possible strings of up to
+// maxlen letters using the set of letters in alpha.
+// Fetch strings using a Java-like Next()/HasNext() interface.
+
+#include <stdint.h>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+namespace re2 {
+
+class StringGenerator {
+ public:
+ StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
+ ~StringGenerator() {}
+
+ const StringPiece& Next();
+ bool HasNext() { return hasnext_; }
+
+ // Resets generator to start sequence over.
+ void Reset();
+
+ // Causes generator to emit random strings for next n calls to Next().
+ void Random(int32_t seed, int n);
+
+ // Causes generator to emit a NULL as the next call.
+ void GenerateNULL();
+
+ private:
+ bool IncrementDigits();
+ bool RandomDigits();
+
+ // Global state.
+ int maxlen_; // Maximum length string to generate.
+ std::vector<std::string> alphabet_; // Alphabet, one string per letter.
+
+ // Iteration state.
+ StringPiece sp_; // Last StringPiece returned by Next().
+ std::string s_; // String data in last StringPiece returned by Next().
+ bool hasnext_; // Whether Next() can be called again.
+ std::vector<int> digits_; // Alphabet indices for next string.
+ bool generate_null_; // Whether to generate a NULL StringPiece next.
+ bool random_; // Whether generated strings are random.
+ int nrandom_; // Number of random strings left to generate.
+ std::minstd_rand0 rng_; // Random number generator.
+
+ StringGenerator(const StringGenerator&) = delete;
+ StringGenerator& operator=(const StringGenerator&) = delete;
+};
+
+// Generates and returns a string over binary alphabet {0,1} that contains
+// all possible binary sequences of length n as subsequences. The obvious
+// brute force method would generate a string of length n * 2^n, but this
+// generates a string of length n-1 + 2^n called a De Bruijn cycle.
+// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
+//
+// Such a string is useful for testing a DFA. If you have a DFA
+// where distinct last n bytes implies distinct states, then running on a
+// DeBruijn string causes the DFA to need to create a new state at every
+// position in the input, never reusing any states until it gets to the
+// end of the string. This is the worst possible case for DFA execution.
+std::string DeBruijnString(int n);
+
+} // namespace re2
+
+#endif // RE2_TESTING_STRING_GENERATOR_H_
diff --git a/contrib/libs/re2/re2/testing/string_generator_test.cc b/contrib/libs/re2/re2/testing/string_generator_test.cc
index 89a3ebaf82..80521568b3 100644
--- a/contrib/libs/re2/re2/testing/string_generator_test.cc
+++ b/contrib/libs/re2/re2/testing/string_generator_test.cc
@@ -1,110 +1,110 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Test StringGenerator.
-
-#include <stdint.h>
-#include <string>
-
-#include "library/cpp/testing/gtest/gtest.h"
-#include "util/utf.h"
-#include "re2/testing/string_generator.h"
-#include "re2/testing/regexp_generator.h"
-
-namespace re2 {
-
-// Returns i to the e.
-static int64_t IntegerPower(int i, int e) {
- int64_t p = 1;
- while (e-- > 0)
- p *= i;
- return p;
-}
-
-// Checks that for given settings of the string generator:
-// * it generates strings that are non-decreasing in length.
-// * strings of the same length are sorted in alphabet order.
-// * it doesn't generate the same string twice.
-// * it generates the right number of strings.
-//
-// If all of these hold, the StringGenerator is behaving.
-// Assumes that the alphabet is sorted, so that the generated
-// strings can just be compared lexicographically.
-static void RunTest(int len, const std::string& alphabet, bool donull) {
- StringGenerator g(len, Explode(alphabet));
-
- int n = 0;
- int last_l = -1;
- std::string last_s;
-
- if (donull) {
- g.GenerateNULL();
- EXPECT_TRUE(g.HasNext());
- StringPiece sp = g.Next();
- EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
- EXPECT_EQ(sp.size(), 0);
- }
-
- while (g.HasNext()) {
- std::string s = std::string(g.Next());
- n++;
-
- // Check that all characters in s appear in alphabet.
- for (const char *p = s.c_str(); *p != '\0'; ) {
- Rune r;
- p += chartorune(&r, p);
- EXPECT_TRUE(utfrune(alphabet.c_str(), r) != NULL);
- }
-
- // Check that string is properly ordered w.r.t. previous string.
- int l = utflen(s.c_str());
- EXPECT_LE(l, len);
- if (last_l < l) {
- last_l = l;
- } else {
- EXPECT_EQ(last_l, l);
- EXPECT_LT(last_s, s);
- }
- last_s = s;
- }
-
- // Check total string count.
- int64_t m = 0;
- int alpha = utflen(alphabet.c_str());
- if (alpha == 0) // Degenerate case.
- len = 0;
- for (int i = 0; i <= len; i++)
- m += IntegerPower(alpha, i);
- EXPECT_EQ(n, m);
-}
-
-TEST(StringGenerator, NoLength) {
- RunTest(0, "abc", false);
-}
-
-TEST(StringGenerator, NoLengthNoAlphabet) {
- RunTest(0, "", false);
-}
-
-TEST(StringGenerator, NoAlphabet) {
- RunTest(5, "", false);
-}
-
-TEST(StringGenerator, Simple) {
- RunTest(3, "abc", false);
-}
-
-TEST(StringGenerator, UTF8) {
- RunTest(4, "abc\xE2\x98\xBA", false);
-}
-
-TEST(StringGenerator, GenNULL) {
- RunTest(0, "abc", true);
- RunTest(0, "", true);
- RunTest(5, "", true);
- RunTest(3, "abc", true);
- RunTest(4, "abc\xE2\x98\xBA", true);
-}
-
-} // namespace re2
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test StringGenerator.
+
+#include <stdint.h>
+#include <string>
+
+#include "library/cpp/testing/gtest/gtest.h"
+#include "util/utf.h"
+#include "re2/testing/string_generator.h"
+#include "re2/testing/regexp_generator.h"
+
+namespace re2 {
+
+// Returns i to the e.
+static int64_t IntegerPower(int i, int e) {
+ int64_t p = 1;
+ while (e-- > 0)
+ p *= i;
+ return p;
+}
+
+// Checks that for given settings of the string generator:
+// * it generates strings that are non-decreasing in length.
+// * strings of the same length are sorted in alphabet order.
+// * it doesn't generate the same string twice.
+// * it generates the right number of strings.
+//
+// If all of these hold, the StringGenerator is behaving.
+// Assumes that the alphabet is sorted, so that the generated
+// strings can just be compared lexicographically.
+static void RunTest(int len, const std::string& alphabet, bool donull) {
+ StringGenerator g(len, Explode(alphabet));
+
+ int n = 0;
+ int last_l = -1;
+ std::string last_s;
+
+ if (donull) {
+ g.GenerateNULL();
+ EXPECT_TRUE(g.HasNext());
+ StringPiece sp = g.Next();
+ EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
+ EXPECT_EQ(sp.size(), 0);
+ }
+
+ while (g.HasNext()) {
+ std::string s = std::string(g.Next());
+ n++;
+
+ // Check that all characters in s appear in alphabet.
+ for (const char *p = s.c_str(); *p != '\0'; ) {
+ Rune r;
+ p += chartorune(&r, p);
+ EXPECT_TRUE(utfrune(alphabet.c_str(), r) != NULL);
+ }
+
+ // Check that string is properly ordered w.r.t. previous string.
+ int l = utflen(s.c_str());
+ EXPECT_LE(l, len);
+ if (last_l < l) {
+ last_l = l;
+ } else {
+ EXPECT_EQ(last_l, l);
+ EXPECT_LT(last_s, s);
+ }
+ last_s = s;
+ }
+
+ // Check total string count.
+ int64_t m = 0;
+ int alpha = utflen(alphabet.c_str());
+ if (alpha == 0) // Degenerate case.
+ len = 0;
+ for (int i = 0; i <= len; i++)
+ m += IntegerPower(alpha, i);
+ EXPECT_EQ(n, m);
+}
+
+TEST(StringGenerator, NoLength) {
+ RunTest(0, "abc", false);
+}
+
+TEST(StringGenerator, NoLengthNoAlphabet) {
+ RunTest(0, "", false);
+}
+
+TEST(StringGenerator, NoAlphabet) {
+ RunTest(5, "", false);
+}
+
+TEST(StringGenerator, Simple) {
+ RunTest(3, "abc", false);
+}
+
+TEST(StringGenerator, UTF8) {
+ RunTest(4, "abc\xE2\x98\xBA", false);
+}
+
+TEST(StringGenerator, GenNULL) {
+ RunTest(0, "abc", true);
+ RunTest(0, "", true);
+ RunTest(5, "", true);
+ RunTest(3, "abc", true);
+ RunTest(4, "abc\xE2\x98\xBA", true);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/tester.cc b/contrib/libs/re2/re2/testing/tester.cc
index 8c74ed80d7..b0c22f25b2 100644
--- a/contrib/libs/re2/re2/testing/tester.cc
+++ b/contrib/libs/re2/re2/testing/tester.cc
@@ -1,685 +1,685 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Regular expression engine tester -- test all the implementations against each other.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <string>
-
-#include "util/util.h"
-#include "util/flags.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/testing/tester.h"
-#include "re2/prog.h"
-#include "re2/re2.h"
-#include "re2/regexp.h"
-
-DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
-DEFINE_FLAG(bool, log_okay, false, "log successful runs");
-DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
-
-DEFINE_FLAG(int, max_regexp_failures, 100,
- "maximum number of regexp test failures (-1 = unlimited)");
-
-DEFINE_FLAG(std::string, regexp_engines, "",
- "pattern to select regexp engines to test");
-
-namespace re2 {
-
-enum {
- kMaxSubmatch = 1+16, // $0...$16
-};
-
-const char* engine_names[kEngineMax] = {
- "Backtrack",
- "NFA",
- "DFA",
- "DFA1",
- "OnePass",
- "BitState",
- "RE2",
- "RE2a",
- "RE2b",
- "PCRE",
-};
-
-// Returns the name of the engine.
-static const char* EngineName(Engine e) {
- CHECK_GE(e, 0);
- CHECK_LT(e, arraysize(engine_names));
- CHECK(engine_names[e] != NULL);
- return engine_names[e];
-}
-
-// Returns bit mask of engines to use.
-static uint32_t Engines() {
- static bool did_parse = false;
- static uint32_t cached_engines = 0;
-
- if (did_parse)
- return cached_engines;
-
- if (GetFlag(FLAGS_regexp_engines).empty()) {
- cached_engines = ~0;
- } else {
- for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
- if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
- cached_engines |= 1<<i;
- }
-
- if (cached_engines == 0)
- LOG(INFO) << "Warning: no engines enabled.";
- if (!UsingPCRE)
- cached_engines &= ~(1<<kEnginePCRE);
- for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
- if (cached_engines & (1<<i))
- LOG(INFO) << EngineName(i) << " enabled";
- }
-
- did_parse = true;
- return cached_engines;
-}
-
-// The result of running a match.
-struct TestInstance::Result {
- Result()
- : skipped(false),
- matched(false),
- untrusted(false),
- have_submatch(false),
- have_submatch0(false) {
- ClearSubmatch();
- }
-
- void ClearSubmatch() {
- for (int i = 0; i < kMaxSubmatch; i++)
- submatch[i] = StringPiece();
- }
-
- bool skipped; // test skipped: wasn't applicable
- bool matched; // found a match
- bool untrusted; // don't really trust the answer
- bool have_submatch; // computed all submatch info
- bool have_submatch0; // computed just submatch[0]
- StringPiece submatch[kMaxSubmatch];
-};
-
-typedef TestInstance::Result Result;
-
-// Formats a single capture range s in text in the form (a,b)
-// where a and b are the starting and ending offsets of s in text.
-static std::string FormatCapture(const StringPiece& text,
- const StringPiece& s) {
- if (s.data() == NULL)
- return "(?,?)";
- return StringPrintf("(%td,%td)",
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression engine tester -- test all the implementations against each other.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <string>
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/testing/tester.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+
+DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
+DEFINE_FLAG(bool, log_okay, false, "log successful runs");
+DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
+
+DEFINE_FLAG(int, max_regexp_failures, 100,
+ "maximum number of regexp test failures (-1 = unlimited)");
+
+DEFINE_FLAG(std::string, regexp_engines, "",
+ "pattern to select regexp engines to test");
+
+namespace re2 {
+
+enum {
+ kMaxSubmatch = 1+16, // $0...$16
+};
+
+const char* engine_names[kEngineMax] = {
+ "Backtrack",
+ "NFA",
+ "DFA",
+ "DFA1",
+ "OnePass",
+ "BitState",
+ "RE2",
+ "RE2a",
+ "RE2b",
+ "PCRE",
+};
+
+// Returns the name of the engine.
+static const char* EngineName(Engine e) {
+ CHECK_GE(e, 0);
+ CHECK_LT(e, arraysize(engine_names));
+ CHECK(engine_names[e] != NULL);
+ return engine_names[e];
+}
+
+// Returns bit mask of engines to use.
+static uint32_t Engines() {
+ static bool did_parse = false;
+ static uint32_t cached_engines = 0;
+
+ if (did_parse)
+ return cached_engines;
+
+ if (GetFlag(FLAGS_regexp_engines).empty()) {
+ cached_engines = ~0;
+ } else {
+ for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
+ if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
+ cached_engines |= 1<<i;
+ }
+
+ if (cached_engines == 0)
+ LOG(INFO) << "Warning: no engines enabled.";
+ if (!UsingPCRE)
+ cached_engines &= ~(1<<kEnginePCRE);
+ for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
+ if (cached_engines & (1<<i))
+ LOG(INFO) << EngineName(i) << " enabled";
+ }
+
+ did_parse = true;
+ return cached_engines;
+}
+
+// The result of running a match.
+struct TestInstance::Result {
+ Result()
+ : skipped(false),
+ matched(false),
+ untrusted(false),
+ have_submatch(false),
+ have_submatch0(false) {
+ ClearSubmatch();
+ }
+
+ void ClearSubmatch() {
+ for (int i = 0; i < kMaxSubmatch; i++)
+ submatch[i] = StringPiece();
+ }
+
+ bool skipped; // test skipped: wasn't applicable
+ bool matched; // found a match
+ bool untrusted; // don't really trust the answer
+ bool have_submatch; // computed all submatch info
+ bool have_submatch0; // computed just submatch[0]
+ StringPiece submatch[kMaxSubmatch];
+};
+
+typedef TestInstance::Result Result;
+
+// Formats a single capture range s in text in the form (a,b)
+// where a and b are the starting and ending offsets of s in text.
+static std::string FormatCapture(const StringPiece& text,
+ const StringPiece& s) {
+ if (s.data() == NULL)
+ return "(?,?)";
+ return StringPrintf("(%td,%td)",
BeginPtr(s) - BeginPtr(text),
EndPtr(s) - BeginPtr(text));
-}
-
-// Returns whether text contains non-ASCII (>= 0x80) bytes.
-static bool NonASCII(const StringPiece& text) {
- for (size_t i = 0; i < text.size(); i++)
- if ((uint8_t)text[i] >= 0x80)
- return true;
- return false;
-}
-
-// Returns string representation of match kind.
-static std::string FormatKind(Prog::MatchKind kind) {
- switch (kind) {
- case Prog::kFullMatch:
- return "full match";
- case Prog::kLongestMatch:
- return "longest match";
- case Prog::kFirstMatch:
- return "first match";
- case Prog::kManyMatch:
- return "many match";
- }
- return "???";
-}
-
-// Returns string representation of anchor kind.
-static std::string FormatAnchor(Prog::Anchor anchor) {
- switch (anchor) {
- case Prog::kAnchored:
- return "anchored";
- case Prog::kUnanchored:
- return "unanchored";
- }
- return "???";
-}
-
-struct ParseMode {
- Regexp::ParseFlags parse_flags;
- std::string desc;
-};
-
-static const Regexp::ParseFlags single_line =
- Regexp::LikePerl;
-static const Regexp::ParseFlags multi_line =
- static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
-
-static ParseMode parse_modes[] = {
- { single_line, "single-line" },
- { single_line|Regexp::Latin1, "single-line, latin1" },
- { multi_line, "multiline" },
- { multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
- { multi_line|Regexp::Latin1, "multiline, latin1" },
-};
-
-static std::string FormatMode(Regexp::ParseFlags flags) {
- for (size_t i = 0; i < arraysize(parse_modes); i++)
- if (parse_modes[i].parse_flags == flags)
- return parse_modes[i].desc;
- return StringPrintf("%#x", static_cast<uint32_t>(flags));
-}
-
-// Constructs and saves all the matching engines that
-// will be required for the given tests.
-TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
- Regexp::ParseFlags flags)
- : regexp_str_(regexp_str),
- kind_(kind),
- flags_(flags),
- error_(false),
- regexp_(NULL),
- num_captures_(0),
- prog_(NULL),
- rprog_(NULL),
- re_(NULL),
- re2_(NULL) {
-
- VLOG(1) << CEscape(regexp_str);
-
- // Compile regexp to prog.
- // Always required - needed for backtracking (reference implementation).
- RegexpStatus status;
- regexp_ = Regexp::Parse(regexp_str, flags, &status);
- if (regexp_ == NULL) {
- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
- << " mode: " << FormatMode(flags);
- error_ = true;
- return;
- }
- num_captures_ = regexp_->NumCaptures();
- prog_ = regexp_->CompileToProg(0);
- if (prog_ == NULL) {
- LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
- error_ = true;
- return;
- }
- if (GetFlag(FLAGS_dump_prog)) {
- LOG(INFO) << "Prog for "
- << " regexp "
- << CEscape(regexp_str_)
- << " (" << FormatKind(kind_)
- << ", " << FormatMode(flags_)
- << ")\n"
- << prog_->Dump();
- }
-
- // Compile regexp to reversed prog. Only needed for DFA engines.
- if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
- rprog_ = regexp_->CompileToReverseProg(0);
- if (rprog_ == NULL) {
- LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
- error_ = true;
- return;
- }
- if (GetFlag(FLAGS_dump_rprog))
- LOG(INFO) << rprog_->Dump();
- }
-
- // Create re string that will be used for RE and RE2.
- std::string re = std::string(regexp_str);
- // Accomodate flags.
- // Regexp::Latin1 will be accomodated below.
- if (!(flags & Regexp::OneLine))
- re = "(?m)" + re;
- if (flags & Regexp::NonGreedy)
- re = "(?U)" + re;
- if (flags & Regexp::DotNL)
- re = "(?s)" + re;
-
- // Compile regexp to RE2.
- if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
- RE2::Options options;
- if (flags & Regexp::Latin1)
- options.set_encoding(RE2::Options::EncodingLatin1);
- if (kind_ == Prog::kLongestMatch)
- options.set_longest_match(true);
- re2_ = new RE2(re, options);
- if (!re2_->error().empty()) {
- LOG(INFO) << "Cannot RE2: " << CEscape(re);
- error_ = true;
- return;
- }
- }
-
- // Compile regexp to RE.
- // PCRE as exposed by the RE interface isn't always usable.
- // 1. It disagrees about handling of empty-string reptitions
- // like matching (a*)* against "b". PCRE treats the (a*) as
- // occurring once, while we treat it as occurring not at all.
- // 2. It treats $ as this weird thing meaning end of string
- // or before the \n at the end of the string.
- // 3. It doesn't implement POSIX leftmost-longest matching.
- // 4. It lets \s match vertical tab.
- // MimicsPCRE() detects 1 and 2.
- if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
- kind_ != Prog::kLongestMatch) {
- PCRE_Options o;
- o.set_option(PCRE::UTF8);
- if (flags & Regexp::Latin1)
- o.set_option(PCRE::None);
- // PCRE has interface bug keeping us from finding $0, so
- // add one more layer of parens.
- re_ = new PCRE("("+re+")", o);
- if (!re_->error().empty()) {
- LOG(INFO) << "Cannot PCRE: " << CEscape(re);
- error_ = true;
- return;
- }
- }
-}
-
-TestInstance::~TestInstance() {
- if (regexp_)
- regexp_->Decref();
- delete prog_;
- delete rprog_;
- delete re_;
- delete re2_;
-}
-
-// Runs a single search using the named engine type.
-// This interface hides all the irregularities of the various
-// engine interfaces from the rest of this file.
-void TestInstance::RunSearch(Engine type,
- const StringPiece& orig_text,
- const StringPiece& orig_context,
- Prog::Anchor anchor,
- Result* result) {
- if (regexp_ == NULL) {
- result->skipped = true;
- return;
- }
- int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0
- if (nsubmatch > kMaxSubmatch)
- nsubmatch = kMaxSubmatch;
-
- StringPiece text = orig_text;
- StringPiece context = orig_context;
-
- switch (type) {
- default:
- LOG(FATAL) << "Bad RunSearch type: " << (int)type;
-
- case kEngineBacktrack:
- if (prog_ == NULL) {
- result->skipped = true;
- break;
- }
- result->matched =
- prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
- result->submatch, nsubmatch);
- result->have_submatch = true;
- break;
-
- case kEngineNFA:
- if (prog_ == NULL) {
- result->skipped = true;
- break;
- }
- result->matched =
- prog_->SearchNFA(text, context, anchor, kind_,
- result->submatch, nsubmatch);
- result->have_submatch = true;
- break;
-
- case kEngineDFA:
- if (prog_ == NULL) {
- result->skipped = true;
- break;
- }
- result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
- &result->skipped, NULL);
- break;
-
- case kEngineDFA1:
- if (prog_ == NULL || rprog_ == NULL) {
- result->skipped = true;
- break;
- }
- result->matched =
- prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
- &result->skipped, NULL);
- // If anchored, no need for second run,
- // but do it anyway to find more bugs.
- if (result->matched) {
- if (!rprog_->SearchDFA(result->submatch[0], context,
- Prog::kAnchored, Prog::kLongestMatch,
- result->submatch,
- &result->skipped, NULL)) {
- LOG(ERROR) << "Reverse DFA inconsistency: "
- << CEscape(regexp_str_)
- << " on " << CEscape(text);
- result->matched = false;
- }
- }
- result->have_submatch0 = true;
- break;
-
- case kEngineOnePass:
- if (prog_ == NULL ||
- !prog_->IsOnePass() ||
- anchor == Prog::kUnanchored ||
- nsubmatch > Prog::kMaxOnePassCapture) {
- result->skipped = true;
- break;
- }
- result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
- result->submatch, nsubmatch);
- result->have_submatch = true;
- break;
-
- case kEngineBitState:
- if (prog_ == NULL ||
- !prog_->CanBitState()) {
- result->skipped = true;
- break;
- }
- result->matched = prog_->SearchBitState(text, context, anchor, kind_,
- result->submatch, nsubmatch);
- result->have_submatch = true;
- break;
-
- case kEngineRE2:
- case kEngineRE2a:
- case kEngineRE2b: {
+}
+
+// Returns whether text contains non-ASCII (>= 0x80) bytes.
+static bool NonASCII(const StringPiece& text) {
+ for (size_t i = 0; i < text.size(); i++)
+ if ((uint8_t)text[i] >= 0x80)
+ return true;
+ return false;
+}
+
+// Returns string representation of match kind.
+static std::string FormatKind(Prog::MatchKind kind) {
+ switch (kind) {
+ case Prog::kFullMatch:
+ return "full match";
+ case Prog::kLongestMatch:
+ return "longest match";
+ case Prog::kFirstMatch:
+ return "first match";
+ case Prog::kManyMatch:
+ return "many match";
+ }
+ return "???";
+}
+
+// Returns string representation of anchor kind.
+static std::string FormatAnchor(Prog::Anchor anchor) {
+ switch (anchor) {
+ case Prog::kAnchored:
+ return "anchored";
+ case Prog::kUnanchored:
+ return "unanchored";
+ }
+ return "???";
+}
+
+struct ParseMode {
+ Regexp::ParseFlags parse_flags;
+ std::string desc;
+};
+
+static const Regexp::ParseFlags single_line =
+ Regexp::LikePerl;
+static const Regexp::ParseFlags multi_line =
+ static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
+
+static ParseMode parse_modes[] = {
+ { single_line, "single-line" },
+ { single_line|Regexp::Latin1, "single-line, latin1" },
+ { multi_line, "multiline" },
+ { multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
+ { multi_line|Regexp::Latin1, "multiline, latin1" },
+};
+
+static std::string FormatMode(Regexp::ParseFlags flags) {
+ for (size_t i = 0; i < arraysize(parse_modes); i++)
+ if (parse_modes[i].parse_flags == flags)
+ return parse_modes[i].desc;
+ return StringPrintf("%#x", static_cast<uint32_t>(flags));
+}
+
+// Constructs and saves all the matching engines that
+// will be required for the given tests.
+TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
+ Regexp::ParseFlags flags)
+ : regexp_str_(regexp_str),
+ kind_(kind),
+ flags_(flags),
+ error_(false),
+ regexp_(NULL),
+ num_captures_(0),
+ prog_(NULL),
+ rprog_(NULL),
+ re_(NULL),
+ re2_(NULL) {
+
+ VLOG(1) << CEscape(regexp_str);
+
+ // Compile regexp to prog.
+ // Always required - needed for backtracking (reference implementation).
+ RegexpStatus status;
+ regexp_ = Regexp::Parse(regexp_str, flags, &status);
+ if (regexp_ == NULL) {
+ LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
+ << " mode: " << FormatMode(flags);
+ error_ = true;
+ return;
+ }
+ num_captures_ = regexp_->NumCaptures();
+ prog_ = regexp_->CompileToProg(0);
+ if (prog_ == NULL) {
+ LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
+ error_ = true;
+ return;
+ }
+ if (GetFlag(FLAGS_dump_prog)) {
+ LOG(INFO) << "Prog for "
+ << " regexp "
+ << CEscape(regexp_str_)
+ << " (" << FormatKind(kind_)
+ << ", " << FormatMode(flags_)
+ << ")\n"
+ << prog_->Dump();
+ }
+
+ // Compile regexp to reversed prog. Only needed for DFA engines.
+ if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
+ rprog_ = regexp_->CompileToReverseProg(0);
+ if (rprog_ == NULL) {
+ LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
+ error_ = true;
+ return;
+ }
+ if (GetFlag(FLAGS_dump_rprog))
+ LOG(INFO) << rprog_->Dump();
+ }
+
+ // Create re string that will be used for RE and RE2.
+ std::string re = std::string(regexp_str);
+ // Accomodate flags.
+ // Regexp::Latin1 will be accomodated below.
+ if (!(flags & Regexp::OneLine))
+ re = "(?m)" + re;
+ if (flags & Regexp::NonGreedy)
+ re = "(?U)" + re;
+ if (flags & Regexp::DotNL)
+ re = "(?s)" + re;
+
+ // Compile regexp to RE2.
+ if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
+ RE2::Options options;
+ if (flags & Regexp::Latin1)
+ options.set_encoding(RE2::Options::EncodingLatin1);
+ if (kind_ == Prog::kLongestMatch)
+ options.set_longest_match(true);
+ re2_ = new RE2(re, options);
+ if (!re2_->error().empty()) {
+ LOG(INFO) << "Cannot RE2: " << CEscape(re);
+ error_ = true;
+ return;
+ }
+ }
+
+ // Compile regexp to RE.
+ // PCRE as exposed by the RE interface isn't always usable.
+ // 1. It disagrees about handling of empty-string reptitions
+ // like matching (a*)* against "b". PCRE treats the (a*) as
+ // occurring once, while we treat it as occurring not at all.
+ // 2. It treats $ as this weird thing meaning end of string
+ // or before the \n at the end of the string.
+ // 3. It doesn't implement POSIX leftmost-longest matching.
+ // 4. It lets \s match vertical tab.
+ // MimicsPCRE() detects 1 and 2.
+ if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
+ kind_ != Prog::kLongestMatch) {
+ PCRE_Options o;
+ o.set_option(PCRE::UTF8);
+ if (flags & Regexp::Latin1)
+ o.set_option(PCRE::None);
+ // PCRE has interface bug keeping us from finding $0, so
+ // add one more layer of parens.
+ re_ = new PCRE("("+re+")", o);
+ if (!re_->error().empty()) {
+ LOG(INFO) << "Cannot PCRE: " << CEscape(re);
+ error_ = true;
+ return;
+ }
+ }
+}
+
+TestInstance::~TestInstance() {
+ if (regexp_)
+ regexp_->Decref();
+ delete prog_;
+ delete rprog_;
+ delete re_;
+ delete re2_;
+}
+
+// Runs a single search using the named engine type.
+// This interface hides all the irregularities of the various
+// engine interfaces from the rest of this file.
+void TestInstance::RunSearch(Engine type,
+ const StringPiece& orig_text,
+ const StringPiece& orig_context,
+ Prog::Anchor anchor,
+ Result* result) {
+ if (regexp_ == NULL) {
+ result->skipped = true;
+ return;
+ }
+ int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0
+ if (nsubmatch > kMaxSubmatch)
+ nsubmatch = kMaxSubmatch;
+
+ StringPiece text = orig_text;
+ StringPiece context = orig_context;
+
+ switch (type) {
+ default:
+ LOG(FATAL) << "Bad RunSearch type: " << (int)type;
+
+ case kEngineBacktrack:
+ if (prog_ == NULL) {
+ result->skipped = true;
+ break;
+ }
+ result->matched =
+ prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
+ result->submatch, nsubmatch);
+ result->have_submatch = true;
+ break;
+
+ case kEngineNFA:
+ if (prog_ == NULL) {
+ result->skipped = true;
+ break;
+ }
+ result->matched =
+ prog_->SearchNFA(text, context, anchor, kind_,
+ result->submatch, nsubmatch);
+ result->have_submatch = true;
+ break;
+
+ case kEngineDFA:
+ if (prog_ == NULL) {
+ result->skipped = true;
+ break;
+ }
+ result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
+ &result->skipped, NULL);
+ break;
+
+ case kEngineDFA1:
+ if (prog_ == NULL || rprog_ == NULL) {
+ result->skipped = true;
+ break;
+ }
+ result->matched =
+ prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
+ &result->skipped, NULL);
+ // If anchored, no need for second run,
+ // but do it anyway to find more bugs.
+ if (result->matched) {
+ if (!rprog_->SearchDFA(result->submatch[0], context,
+ Prog::kAnchored, Prog::kLongestMatch,
+ result->submatch,
+ &result->skipped, NULL)) {
+ LOG(ERROR) << "Reverse DFA inconsistency: "
+ << CEscape(regexp_str_)
+ << " on " << CEscape(text);
+ result->matched = false;
+ }
+ }
+ result->have_submatch0 = true;
+ break;
+
+ case kEngineOnePass:
+ if (prog_ == NULL ||
+ !prog_->IsOnePass() ||
+ anchor == Prog::kUnanchored ||
+ nsubmatch > Prog::kMaxOnePassCapture) {
+ result->skipped = true;
+ break;
+ }
+ result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
+ result->submatch, nsubmatch);
+ result->have_submatch = true;
+ break;
+
+ case kEngineBitState:
+ if (prog_ == NULL ||
+ !prog_->CanBitState()) {
+ result->skipped = true;
+ break;
+ }
+ result->matched = prog_->SearchBitState(text, context, anchor, kind_,
+ result->submatch, nsubmatch);
+ result->have_submatch = true;
+ break;
+
+ case kEngineRE2:
+ case kEngineRE2a:
+ case kEngineRE2b: {
if (!re2_ || EndPtr(text) != EndPtr(context)) {
- result->skipped = true;
- break;
- }
-
- RE2::Anchor re_anchor;
- if (anchor == Prog::kAnchored)
- re_anchor = RE2::ANCHOR_START;
- else
- re_anchor = RE2::UNANCHORED;
- if (kind_ == Prog::kFullMatch)
- re_anchor = RE2::ANCHOR_BOTH;
-
- result->matched = re2_->Match(
- context,
+ result->skipped = true;
+ break;
+ }
+
+ RE2::Anchor re_anchor;
+ if (anchor == Prog::kAnchored)
+ re_anchor = RE2::ANCHOR_START;
+ else
+ re_anchor = RE2::UNANCHORED;
+ if (kind_ == Prog::kFullMatch)
+ re_anchor = RE2::ANCHOR_BOTH;
+
+ result->matched = re2_->Match(
+ context,
static_cast<size_t>(BeginPtr(text) - BeginPtr(context)),
static_cast<size_t>(EndPtr(text) - BeginPtr(context)),
- re_anchor,
- result->submatch,
- nsubmatch);
- result->have_submatch = nsubmatch > 0;
- break;
- }
-
- case kEnginePCRE: {
+ re_anchor,
+ result->submatch,
+ nsubmatch);
+ result->have_submatch = nsubmatch > 0;
+ break;
+ }
+
+ case kEnginePCRE: {
if (!re_ || BeginPtr(text) != BeginPtr(context) ||
EndPtr(text) != EndPtr(context)) {
- result->skipped = true;
- break;
- }
-
- // In Perl/PCRE, \v matches any character considered vertical
- // whitespace, not just vertical tab. Regexp::MimicsPCRE() is
- // unable to handle all cases of this, unfortunately, so just
- // catch them here. :(
- if (regexp_str_.find("\\v") != StringPiece::npos &&
- (text.find('\n') != StringPiece::npos ||
- text.find('\f') != StringPiece::npos ||
- text.find('\r') != StringPiece::npos)) {
- result->skipped = true;
- break;
- }
-
- // PCRE 8.34 or so started allowing vertical tab to match \s,
- // following a change made in Perl 5.18. RE2 does not.
- if ((regexp_str_.find("\\s") != StringPiece::npos ||
- regexp_str_.find("\\S") != StringPiece::npos) &&
- text.find('\v') != StringPiece::npos) {
- result->skipped = true;
- break;
- }
-
- const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
- PCRE::Arg *a = new PCRE::Arg[nsubmatch];
- for (int i = 0; i < nsubmatch; i++) {
- a[i] = PCRE::Arg(&result->submatch[i]);
- argptr[i] = &a[i];
- }
- size_t consumed;
- PCRE::Anchor pcre_anchor;
- if (anchor == Prog::kAnchored)
- pcre_anchor = PCRE::ANCHOR_START;
- else
- pcre_anchor = PCRE::UNANCHORED;
- if (kind_ == Prog::kFullMatch)
- pcre_anchor = PCRE::ANCHOR_BOTH;
- re_->ClearHitLimit();
- result->matched =
- re_->DoMatch(text,
- pcre_anchor,
- &consumed,
- argptr, nsubmatch);
- if (re_->HitLimit()) {
- result->untrusted = true;
- delete[] argptr;
- delete[] a;
- break;
- }
- result->have_submatch = true;
- delete[] argptr;
- delete[] a;
- break;
- }
- }
-
- if (!result->matched)
- result->ClearSubmatch();
-}
-
-// Checks whether r is okay given that correct is the right answer.
-// Specifically, r's answers have to match (but it doesn't have to
-// claim to have all the answers).
-static bool ResultOkay(const Result& r, const Result& correct) {
- if (r.skipped)
- return true;
- if (r.matched != correct.matched)
- return false;
- if (r.have_submatch || r.have_submatch0) {
- for (int i = 0; i < kMaxSubmatch; i++) {
- if (correct.submatch[i].data() != r.submatch[i].data() ||
- correct.submatch[i].size() != r.submatch[i].size())
- return false;
- if (!r.have_submatch)
- break;
- }
- }
- return true;
-}
-
-// Runs a single test.
-bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor) {
- // Backtracking is the gold standard.
- Result correct;
- RunSearch(kEngineBacktrack, text, context, anchor, &correct);
- if (correct.skipped) {
- if (regexp_ == NULL)
- return true;
- LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
- << " " << FormatMode(flags_);
- return false;
- }
- VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
- << " text " << CEscape(text)
- << " (" << FormatKind(kind_)
- << ", " << FormatAnchor(anchor)
- << ", " << FormatMode(flags_)
- << ")";
-
- // Compare the others.
- bool all_okay = true;
- for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
- if (!(Engines() & (1<<i)))
- continue;
-
- Result r;
- RunSearch(i, text, context, anchor, &r);
- if (ResultOkay(r, correct)) {
- if (GetFlag(FLAGS_log_okay))
- LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
- continue;
- }
-
- // We disagree with PCRE on the meaning of some Unicode matches.
- // In particular, we treat non-ASCII UTF-8 as non-word characters.
- // We also treat "empty" character sets like [^\w\W] as being
- // impossible to match, while PCRE apparently excludes some code
- // points (e.g., 0x0080) from both \w and \W.
- if (i == kEnginePCRE && NonASCII(text))
- continue;
-
- if (!r.untrusted)
- all_okay = false;
-
- LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
- context, anchor);
- if (r.matched != correct.matched) {
- if (r.matched) {
- LOG(INFO) << " Should not match (but does).";
- } else {
- LOG(INFO) << " Should match (but does not).";
- continue;
- }
- }
- for (int i = 0; i < 1+num_captures_; i++) {
- if (r.submatch[i].data() != correct.submatch[i].data() ||
- r.submatch[i].size() != correct.submatch[i].size()) {
- LOG(INFO) <<
- StringPrintf(" $%d: should be %s is %s",
- i,
- FormatCapture(text, correct.submatch[i]).c_str(),
- FormatCapture(text, r.submatch[i]).c_str());
- } else {
- LOG(INFO) <<
- StringPrintf(" $%d: %s ok", i,
- FormatCapture(text, r.submatch[i]).c_str());
- }
- }
- }
-
- if (!all_okay) {
- // This will be initialised once (after flags have been initialised)
- // and that is desirable because we want to enforce a global limit.
- static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
- if (max_regexp_failures > 0 && --max_regexp_failures == 0)
- LOG(QFATAL) << "Too many regexp failures.";
- }
-
- return all_okay;
-}
-
-void TestInstance::LogMatch(const char* prefix, Engine e,
- const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor) {
- LOG(INFO) << prefix
- << EngineName(e)
- << " regexp "
- << CEscape(regexp_str_)
- << " "
- << CEscape(regexp_->ToString())
- << " text "
- << CEscape(text)
- << " ("
+ result->skipped = true;
+ break;
+ }
+
+ // In Perl/PCRE, \v matches any character considered vertical
+ // whitespace, not just vertical tab. Regexp::MimicsPCRE() is
+ // unable to handle all cases of this, unfortunately, so just
+ // catch them here. :(
+ if (regexp_str_.find("\\v") != StringPiece::npos &&
+ (text.find('\n') != StringPiece::npos ||
+ text.find('\f') != StringPiece::npos ||
+ text.find('\r') != StringPiece::npos)) {
+ result->skipped = true;
+ break;
+ }
+
+ // PCRE 8.34 or so started allowing vertical tab to match \s,
+ // following a change made in Perl 5.18. RE2 does not.
+ if ((regexp_str_.find("\\s") != StringPiece::npos ||
+ regexp_str_.find("\\S") != StringPiece::npos) &&
+ text.find('\v') != StringPiece::npos) {
+ result->skipped = true;
+ break;
+ }
+
+ const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
+ PCRE::Arg *a = new PCRE::Arg[nsubmatch];
+ for (int i = 0; i < nsubmatch; i++) {
+ a[i] = PCRE::Arg(&result->submatch[i]);
+ argptr[i] = &a[i];
+ }
+ size_t consumed;
+ PCRE::Anchor pcre_anchor;
+ if (anchor == Prog::kAnchored)
+ pcre_anchor = PCRE::ANCHOR_START;
+ else
+ pcre_anchor = PCRE::UNANCHORED;
+ if (kind_ == Prog::kFullMatch)
+ pcre_anchor = PCRE::ANCHOR_BOTH;
+ re_->ClearHitLimit();
+ result->matched =
+ re_->DoMatch(text,
+ pcre_anchor,
+ &consumed,
+ argptr, nsubmatch);
+ if (re_->HitLimit()) {
+ result->untrusted = true;
+ delete[] argptr;
+ delete[] a;
+ break;
+ }
+ result->have_submatch = true;
+ delete[] argptr;
+ delete[] a;
+ break;
+ }
+ }
+
+ if (!result->matched)
+ result->ClearSubmatch();
+}
+
+// Checks whether r is okay given that correct is the right answer.
+// Specifically, r's answers have to match (but it doesn't have to
+// claim to have all the answers).
+static bool ResultOkay(const Result& r, const Result& correct) {
+ if (r.skipped)
+ return true;
+ if (r.matched != correct.matched)
+ return false;
+ if (r.have_submatch || r.have_submatch0) {
+ for (int i = 0; i < kMaxSubmatch; i++) {
+ if (correct.submatch[i].data() != r.submatch[i].data() ||
+ correct.submatch[i].size() != r.submatch[i].size())
+ return false;
+ if (!r.have_submatch)
+ break;
+ }
+ }
+ return true;
+}
+
+// Runs a single test.
+bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor) {
+ // Backtracking is the gold standard.
+ Result correct;
+ RunSearch(kEngineBacktrack, text, context, anchor, &correct);
+ if (correct.skipped) {
+ if (regexp_ == NULL)
+ return true;
+ LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
+ << " " << FormatMode(flags_);
+ return false;
+ }
+ VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
+ << " text " << CEscape(text)
+ << " (" << FormatKind(kind_)
+ << ", " << FormatAnchor(anchor)
+ << ", " << FormatMode(flags_)
+ << ")";
+
+ // Compare the others.
+ bool all_okay = true;
+ for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
+ if (!(Engines() & (1<<i)))
+ continue;
+
+ Result r;
+ RunSearch(i, text, context, anchor, &r);
+ if (ResultOkay(r, correct)) {
+ if (GetFlag(FLAGS_log_okay))
+ LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
+ continue;
+ }
+
+ // We disagree with PCRE on the meaning of some Unicode matches.
+ // In particular, we treat non-ASCII UTF-8 as non-word characters.
+ // We also treat "empty" character sets like [^\w\W] as being
+ // impossible to match, while PCRE apparently excludes some code
+ // points (e.g., 0x0080) from both \w and \W.
+ if (i == kEnginePCRE && NonASCII(text))
+ continue;
+
+ if (!r.untrusted)
+ all_okay = false;
+
+ LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
+ context, anchor);
+ if (r.matched != correct.matched) {
+ if (r.matched) {
+ LOG(INFO) << " Should not match (but does).";
+ } else {
+ LOG(INFO) << " Should match (but does not).";
+ continue;
+ }
+ }
+ for (int i = 0; i < 1+num_captures_; i++) {
+ if (r.submatch[i].data() != correct.submatch[i].data() ||
+ r.submatch[i].size() != correct.submatch[i].size()) {
+ LOG(INFO) <<
+ StringPrintf(" $%d: should be %s is %s",
+ i,
+ FormatCapture(text, correct.submatch[i]).c_str(),
+ FormatCapture(text, r.submatch[i]).c_str());
+ } else {
+ LOG(INFO) <<
+ StringPrintf(" $%d: %s ok", i,
+ FormatCapture(text, r.submatch[i]).c_str());
+ }
+ }
+ }
+
+ if (!all_okay) {
+ // This will be initialised once (after flags have been initialised)
+ // and that is desirable because we want to enforce a global limit.
+ static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
+ if (max_regexp_failures > 0 && --max_regexp_failures == 0)
+ LOG(QFATAL) << "Too many regexp failures.";
+ }
+
+ return all_okay;
+}
+
+void TestInstance::LogMatch(const char* prefix, Engine e,
+ const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor) {
+ LOG(INFO) << prefix
+ << EngineName(e)
+ << " regexp "
+ << CEscape(regexp_str_)
+ << " "
+ << CEscape(regexp_->ToString())
+ << " text "
+ << CEscape(text)
+ << " ("
<< BeginPtr(text) - BeginPtr(context)
- << ","
+ << ","
<< EndPtr(text) - BeginPtr(context)
- << ") of context "
- << CEscape(context)
- << " (" << FormatKind(kind_)
- << ", " << FormatAnchor(anchor)
- << ", " << FormatMode(flags_)
- << ")";
-}
-
-static Prog::MatchKind kinds[] = {
- Prog::kFirstMatch,
- Prog::kLongestMatch,
- Prog::kFullMatch,
-};
-
-// Test all possible match kinds and parse modes.
-Tester::Tester(const StringPiece& regexp) {
- error_ = false;
- for (size_t i = 0; i < arraysize(kinds); i++) {
- for (size_t j = 0; j < arraysize(parse_modes); j++) {
- TestInstance* t = new TestInstance(regexp, kinds[i],
- parse_modes[j].parse_flags);
- error_ |= t->error();
- v_.push_back(t);
- }
- }
-}
-
-Tester::~Tester() {
- for (size_t i = 0; i < v_.size(); i++)
- delete v_[i];
-}
-
-bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor) {
- bool okay = true;
- for (size_t i = 0; i < v_.size(); i++)
- okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
- return okay;
-}
-
-static Prog::Anchor anchors[] = {
- Prog::kAnchored,
- Prog::kUnanchored
-};
-
-bool Tester::TestInput(const StringPiece& text) {
- bool okay = TestInputInContext(text, text);
- if (!text.empty()) {
- StringPiece sp;
- sp = text;
- sp.remove_prefix(1);
- okay &= TestInputInContext(sp, text);
- sp = text;
- sp.remove_suffix(1);
- okay &= TestInputInContext(sp, text);
- }
- return okay;
-}
-
-bool Tester::TestInputInContext(const StringPiece& text,
- const StringPiece& context) {
- bool okay = true;
- for (size_t i = 0; i < arraysize(anchors); i++)
- okay &= TestCase(text, context, anchors[i]);
- return okay;
-}
-
-bool TestRegexpOnText(const StringPiece& regexp,
- const StringPiece& text) {
- Tester t(regexp);
- return t.TestInput(text);
-}
-
-} // namespace re2
+ << ") of context "
+ << CEscape(context)
+ << " (" << FormatKind(kind_)
+ << ", " << FormatAnchor(anchor)
+ << ", " << FormatMode(flags_)
+ << ")";
+}
+
+static Prog::MatchKind kinds[] = {
+ Prog::kFirstMatch,
+ Prog::kLongestMatch,
+ Prog::kFullMatch,
+};
+
+// Test all possible match kinds and parse modes.
+Tester::Tester(const StringPiece& regexp) {
+ error_ = false;
+ for (size_t i = 0; i < arraysize(kinds); i++) {
+ for (size_t j = 0; j < arraysize(parse_modes); j++) {
+ TestInstance* t = new TestInstance(regexp, kinds[i],
+ parse_modes[j].parse_flags);
+ error_ |= t->error();
+ v_.push_back(t);
+ }
+ }
+}
+
+Tester::~Tester() {
+ for (size_t i = 0; i < v_.size(); i++)
+ delete v_[i];
+}
+
+bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor) {
+ bool okay = true;
+ for (size_t i = 0; i < v_.size(); i++)
+ okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
+ return okay;
+}
+
+static Prog::Anchor anchors[] = {
+ Prog::kAnchored,
+ Prog::kUnanchored
+};
+
+bool Tester::TestInput(const StringPiece& text) {
+ bool okay = TestInputInContext(text, text);
+ if (!text.empty()) {
+ StringPiece sp;
+ sp = text;
+ sp.remove_prefix(1);
+ okay &= TestInputInContext(sp, text);
+ sp = text;
+ sp.remove_suffix(1);
+ okay &= TestInputInContext(sp, text);
+ }
+ return okay;
+}
+
+bool Tester::TestInputInContext(const StringPiece& text,
+ const StringPiece& context) {
+ bool okay = true;
+ for (size_t i = 0; i < arraysize(anchors); i++)
+ okay &= TestCase(text, context, anchors[i]);
+ return okay;
+}
+
+bool TestRegexpOnText(const StringPiece& regexp,
+ const StringPiece& text) {
+ Tester t(regexp);
+ return t.TestInput(text);
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/testing/tester.h b/contrib/libs/re2/re2/testing/tester.h
index 1ddab2c5ce..47d0c4304f 100644
--- a/contrib/libs/re2/re2/testing/tester.h
+++ b/contrib/libs/re2/re2/testing/tester.h
@@ -1,123 +1,123 @@
-// Copyright 2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_TESTING_TESTER_H_
-#define RE2_TESTING_TESTER_H_
-
-// Comparative tester for regular expression matching.
-// Checks all implementations against each other.
-
-#include <vector>
-
-#include "re2/stringpiece.h"
-#include "re2/prog.h"
-#include "re2/regexp.h"
-#include "re2/re2.h"
-#include "util/pcre.h"
-
-namespace re2 {
-
-// All the supported regexp engines.
-enum Engine {
- kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack
- kEngineNFA, // Prog::SearchNFA
- kEngineDFA, // Prog::SearchDFA, only ask whether it matched
- kEngineDFA1, // Prog::SearchDFA, ask for match[0]
- kEngineOnePass, // Prog::SearchOnePass, if applicable
- kEngineBitState, // Prog::SearchBitState
- kEngineRE2, // RE2, all submatches
- kEngineRE2a, // RE2, only ask for match[0]
- kEngineRE2b, // RE2, only ask whether it matched
- kEnginePCRE, // PCRE (util/pcre.h)
-
- kEngineMax,
-};
-
-// Make normal math on the enum preserve the type.
-// By default, C++ doesn't define ++ on enum, and e+1 has type int.
-static inline void operator++(Engine& e, int unused) {
- e = static_cast<Engine>(e+1);
-}
-
-static inline Engine operator+(Engine e, int i) {
- return static_cast<Engine>(static_cast<int>(e)+i);
-}
-
-// A TestInstance caches per-regexp state for a given
-// regular expression in a given configuration
-// (UTF-8 vs Latin1, longest vs first match, etc.).
-class TestInstance {
- public:
- struct Result;
-
- TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
- Regexp::ParseFlags flags);
- ~TestInstance();
- Regexp::ParseFlags flags() { return flags_; }
- bool error() { return error_; }
-
- // Runs a single test case: search in text, which is in context,
- // using the given anchoring.
- bool RunCase(const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor);
-
- private:
- // Runs a single search using the named engine type.
- void RunSearch(Engine type,
- const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor,
- Result *result);
-
- void LogMatch(const char* prefix, Engine e, const StringPiece& text,
- const StringPiece& context, Prog::Anchor anchor);
-
- const StringPiece regexp_str_; // regexp being tested
- Prog::MatchKind kind_; // kind of match
- Regexp::ParseFlags flags_; // flags for parsing regexp_str_
- bool error_; // error during constructor?
-
- Regexp* regexp_; // parsed regexp
- int num_captures_; // regexp_->NumCaptures() cached
- Prog* prog_; // compiled program
- Prog* rprog_; // compiled reverse program
- PCRE* re_; // PCRE implementation
- RE2* re2_; // RE2 implementation
-
- TestInstance(const TestInstance&) = delete;
- TestInstance& operator=(const TestInstance&) = delete;
-};
-
-// A group of TestInstances for all possible configurations.
-class Tester {
- public:
- explicit Tester(const StringPiece& regexp);
- ~Tester();
-
- bool error() { return error_; }
-
- // Runs a single test case: search in text, which is in context,
- // using the given anchoring.
- bool TestCase(const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor);
-
- // Run TestCase(text, text, anchor) for all anchoring modes.
- bool TestInput(const StringPiece& text);
-
- // Run TestCase(text, context, anchor) for all anchoring modes.
- bool TestInputInContext(const StringPiece& text, const StringPiece& context);
-
- private:
- bool error_;
- std::vector<TestInstance*> v_;
-
- Tester(const Tester&) = delete;
- Tester& operator=(const Tester&) = delete;
-};
-
-// Run all possible tests using regexp and text.
-bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
-
-} // namespace re2
-
-#endif // RE2_TESTING_TESTER_H_
+// Copyright 2008 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_TESTING_TESTER_H_
+#define RE2_TESTING_TESTER_H_
+
+// Comparative tester for regular expression matching.
+// Checks all implementations against each other.
+
+#include <vector>
+
+#include "re2/stringpiece.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/re2.h"
+#include "util/pcre.h"
+
+namespace re2 {
+
+// All the supported regexp engines.
+enum Engine {
+ kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack
+ kEngineNFA, // Prog::SearchNFA
+ kEngineDFA, // Prog::SearchDFA, only ask whether it matched
+ kEngineDFA1, // Prog::SearchDFA, ask for match[0]
+ kEngineOnePass, // Prog::SearchOnePass, if applicable
+ kEngineBitState, // Prog::SearchBitState
+ kEngineRE2, // RE2, all submatches
+ kEngineRE2a, // RE2, only ask for match[0]
+ kEngineRE2b, // RE2, only ask whether it matched
+ kEnginePCRE, // PCRE (util/pcre.h)
+
+ kEngineMax,
+};
+
+// Make normal math on the enum preserve the type.
+// By default, C++ doesn't define ++ on enum, and e+1 has type int.
+static inline void operator++(Engine& e, int unused) {
+ e = static_cast<Engine>(e+1);
+}
+
+static inline Engine operator+(Engine e, int i) {
+ return static_cast<Engine>(static_cast<int>(e)+i);
+}
+
+// A TestInstance caches per-regexp state for a given
+// regular expression in a given configuration
+// (UTF-8 vs Latin1, longest vs first match, etc.).
+class TestInstance {
+ public:
+ struct Result;
+
+ TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
+ Regexp::ParseFlags flags);
+ ~TestInstance();
+ Regexp::ParseFlags flags() { return flags_; }
+ bool error() { return error_; }
+
+ // Runs a single test case: search in text, which is in context,
+ // using the given anchoring.
+ bool RunCase(const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor);
+
+ private:
+ // Runs a single search using the named engine type.
+ void RunSearch(Engine type,
+ const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor,
+ Result *result);
+
+ void LogMatch(const char* prefix, Engine e, const StringPiece& text,
+ const StringPiece& context, Prog::Anchor anchor);
+
+ const StringPiece regexp_str_; // regexp being tested
+ Prog::MatchKind kind_; // kind of match
+ Regexp::ParseFlags flags_; // flags for parsing regexp_str_
+ bool error_; // error during constructor?
+
+ Regexp* regexp_; // parsed regexp
+ int num_captures_; // regexp_->NumCaptures() cached
+ Prog* prog_; // compiled program
+ Prog* rprog_; // compiled reverse program
+ PCRE* re_; // PCRE implementation
+ RE2* re2_; // RE2 implementation
+
+ TestInstance(const TestInstance&) = delete;
+ TestInstance& operator=(const TestInstance&) = delete;
+};
+
+// A group of TestInstances for all possible configurations.
+class Tester {
+ public:
+ explicit Tester(const StringPiece& regexp);
+ ~Tester();
+
+ bool error() { return error_; }
+
+ // Runs a single test case: search in text, which is in context,
+ // using the given anchoring.
+ bool TestCase(const StringPiece& text, const StringPiece& context,
+ Prog::Anchor anchor);
+
+ // Run TestCase(text, text, anchor) for all anchoring modes.
+ bool TestInput(const StringPiece& text);
+
+ // Run TestCase(text, context, anchor) for all anchoring modes.
+ bool TestInputInContext(const StringPiece& text, const StringPiece& context);
+
+ private:
+ bool error_;
+ std::vector<TestInstance*> v_;
+
+ Tester(const Tester&) = delete;
+ Tester& operator=(const Tester&) = delete;
+};
+
+// Run all possible tests using regexp and text.
+bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
+
+} // namespace re2
+
+#endif // RE2_TESTING_TESTER_H_
diff --git a/contrib/libs/re2/re2/testing/ya.make b/contrib/libs/re2/re2/testing/ya.make
index 5436c70dbb..df9023fee5 100644
--- a/contrib/libs/re2/re2/testing/ya.make
+++ b/contrib/libs/re2/re2/testing/ya.make
@@ -1,50 +1,50 @@
-# Generated by devtools/yamaker.
-
-GTEST()
-
-OWNER(g:cpp-contrib)
-
-LICENSE(BSD-3-Clause)
-
+# Generated by devtools/yamaker.
+
+GTEST()
+
+OWNER(g:cpp-contrib)
+
+LICENSE(BSD-3-Clause)
+
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-PEERDIR(
- contrib/libs/re2
-)
-
+PEERDIR(
+ contrib/libs/re2
+)
+
ADDINCL(
contrib/libs/re2
)
-
-NO_COMPILER_WARNINGS()
-
-NO_UTIL()
-
-SRCDIR(contrib/libs/re2)
-
-SRCS(
- re2/testing/backtrack.cc
- re2/testing/charclass_test.cc
- re2/testing/compile_test.cc
- re2/testing/dump.cc
- re2/testing/exhaustive_tester.cc
- re2/testing/filtered_re2_test.cc
- re2/testing/mimics_pcre_test.cc
- re2/testing/null_walker.cc
- re2/testing/parse_test.cc
- re2/testing/possible_match_test.cc
- re2/testing/re2_arg_test.cc
- re2/testing/re2_test.cc
- re2/testing/regexp_generator.cc
- re2/testing/regexp_test.cc
- re2/testing/required_prefix_test.cc
- re2/testing/search_test.cc
- re2/testing/set_test.cc
- re2/testing/simplify_test.cc
- re2/testing/string_generator.cc
- re2/testing/string_generator_test.cc
- re2/testing/tester.cc
- util/pcre.cc
-)
-
-END()
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCDIR(contrib/libs/re2)
+
+SRCS(
+ re2/testing/backtrack.cc
+ re2/testing/charclass_test.cc
+ re2/testing/compile_test.cc
+ re2/testing/dump.cc
+ re2/testing/exhaustive_tester.cc
+ re2/testing/filtered_re2_test.cc
+ re2/testing/mimics_pcre_test.cc
+ re2/testing/null_walker.cc
+ re2/testing/parse_test.cc
+ re2/testing/possible_match_test.cc
+ re2/testing/re2_arg_test.cc
+ re2/testing/re2_test.cc
+ re2/testing/regexp_generator.cc
+ re2/testing/regexp_test.cc
+ re2/testing/required_prefix_test.cc
+ re2/testing/search_test.cc
+ re2/testing/set_test.cc
+ re2/testing/simplify_test.cc
+ re2/testing/string_generator.cc
+ re2/testing/string_generator_test.cc
+ re2/testing/tester.cc
+ util/pcre.cc
+)
+
+END()
diff --git a/contrib/libs/re2/re2/tostring.cc b/contrib/libs/re2/re2/tostring.cc
index 255aa94820..9c1c038ca6 100644
--- a/contrib/libs/re2/re2/tostring.cc
+++ b/contrib/libs/re2/re2/tostring.cc
@@ -28,7 +28,7 @@ enum {
};
// Helper function. See description below.
-static void AppendCCRange(std::string* t, Rune lo, Rune hi);
+static void AppendCCRange(std::string* t, Rune lo, Rune hi);
// Walker to generate string in s_.
// The arg pointers are actually integers giving the
@@ -36,7 +36,7 @@ static void AppendCCRange(std::string* t, Rune lo, Rune hi);
// The child_args are always NULL.
class ToStringWalker : public Regexp::Walker<int> {
public:
- explicit ToStringWalker(std::string* t) : t_(t) {}
+ explicit ToStringWalker(std::string* t) : t_(t) {}
virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
@@ -46,14 +46,14 @@ class ToStringWalker : public Regexp::Walker<int> {
}
private:
- std::string* t_; // The string the walker appends to.
+ std::string* t_; // The string the walker appends to.
ToStringWalker(const ToStringWalker&) = delete;
ToStringWalker& operator=(const ToStringWalker&) = delete;
};
-std::string Regexp::ToString() {
- std::string t;
+std::string Regexp::ToString() {
+ std::string t;
ToStringWalker w(&t);
w.WalkExponential(this, PrecToplevel, 100000);
if (w.stopped_early())
@@ -126,7 +126,7 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
return nprec;
}
-static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
+static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
t->append(1, '\\');
t->append(1, static_cast<char>(r));
@@ -269,9 +269,9 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
}
t_->append("[");
// Heuristic: show class as negated if it contains the
- // non-character 0xFFFE and yet somehow isn't full.
+ // non-character 0xFFFE and yet somehow isn't full.
CharClass* cc = re->cc();
- if (cc->Contains(0xFFFE) && !cc->full()) {
+ if (cc->Contains(0xFFFE) && !cc->full()) {
cc = cc->Negate();
t_->append("^");
}
@@ -291,7 +291,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
// There's no syntax accepted by the parser to generate
// this node (it is generated by RE2::Set) so make something
// up that is readable but won't compile.
- t_->append(StringPrintf("(?HaveMatch:%d)", re->match_id()));
+ t_->append(StringPrintf("(?HaveMatch:%d)", re->match_id()));
break;
}
@@ -303,7 +303,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
}
// Appends a rune for use in a character class to the string t.
-static void AppendCCChar(std::string* t, Rune r) {
+static void AppendCCChar(std::string* t, Rune r) {
if (0x20 <= r && r <= 0x7E) {
if (strchr("[]^-\\", r))
t->append("\\");
@@ -332,13 +332,13 @@ static void AppendCCChar(std::string* t, Rune r) {
}
if (r < 0x100) {
- *t += StringPrintf("\\x%02x", static_cast<int>(r));
+ *t += StringPrintf("\\x%02x", static_cast<int>(r));
return;
}
- *t += StringPrintf("\\x{%x}", static_cast<int>(r));
+ *t += StringPrintf("\\x{%x}", static_cast<int>(r));
}
-static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
+static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
if (lo > hi)
return;
AppendCCChar(t, lo);
diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc
index f6899c6cf6..d9de2821d5 100644
--- a/contrib/libs/re2/re2/unicode_casefold.cc
+++ b/contrib/libs/re2/re2/unicode_casefold.cc
@@ -113,7 +113,7 @@ const CaseFold unicode_casefold[] = {
{ 614, 614, 42308 },
{ 616, 616, -209 },
{ 617, 617, -211 },
- { 618, 618, 42308 },
+ { 618, 618, 42308 },
{ 619, 619, 10743 },
{ 620, 620, 42305 },
{ 623, 623, -211 },
@@ -122,7 +122,7 @@ const CaseFold unicode_casefold[] = {
{ 629, 629, -214 },
{ 637, 637, 10727 },
{ 640, 640, -218 },
- { 642, 642, 42307 },
+ { 642, 642, 42307 },
{ 643, 643, -218 },
{ 647, 647, 42282 },
{ 648, 648, -218 },
@@ -186,21 +186,21 @@ const CaseFold unicode_casefold[] = {
{ 1021, 1023, -130 },
{ 1024, 1039, 80 },
{ 1040, 1071, 32 },
- { 1072, 1073, -32 },
- { 1074, 1074, 6222 },
- { 1075, 1075, -32 },
- { 1076, 1076, 6221 },
- { 1077, 1085, -32 },
- { 1086, 1086, 6212 },
- { 1087, 1088, -32 },
- { 1089, 1090, 6210 },
- { 1091, 1097, -32 },
- { 1098, 1098, 6204 },
- { 1099, 1103, -32 },
+ { 1072, 1073, -32 },
+ { 1074, 1074, 6222 },
+ { 1075, 1075, -32 },
+ { 1076, 1076, 6221 },
+ { 1077, 1085, -32 },
+ { 1086, 1086, 6212 },
+ { 1087, 1088, -32 },
+ { 1089, 1090, 6210 },
+ { 1091, 1097, -32 },
+ { 1098, 1098, 6204 },
+ { 1099, 1103, -32 },
{ 1104, 1119, -80 },
- { 1120, 1122, EvenOdd },
- { 1123, 1123, 6180 },
- { 1124, 1153, EvenOdd },
+ { 1120, 1122, EvenOdd },
+ { 1123, 1123, 6180 },
+ { 1124, 1153, EvenOdd },
{ 1162, 1215, EvenOdd },
{ 1216, 1216, 15 },
{ 1217, 1230, OddEven },
@@ -211,25 +211,25 @@ const CaseFold unicode_casefold[] = {
{ 4256, 4293, 7264 },
{ 4295, 4295, 7264 },
{ 4301, 4301, 7264 },
- { 4304, 4346, 3008 },
- { 4349, 4351, 3008 },
+ { 4304, 4346, 3008 },
+ { 4349, 4351, 3008 },
{ 5024, 5103, 38864 },
{ 5104, 5109, 8 },
{ 5112, 5117, -8 },
- { 7296, 7296, -6254 },
- { 7297, 7297, -6253 },
- { 7298, 7298, -6244 },
- { 7299, 7299, -6242 },
- { 7300, 7300, EvenOdd },
- { 7301, 7301, -6243 },
- { 7302, 7302, -6236 },
- { 7303, 7303, -6181 },
- { 7304, 7304, 35266 },
- { 7312, 7354, -3008 },
- { 7357, 7359, -3008 },
+ { 7296, 7296, -6254 },
+ { 7297, 7297, -6253 },
+ { 7298, 7298, -6244 },
+ { 7299, 7299, -6242 },
+ { 7300, 7300, EvenOdd },
+ { 7301, 7301, -6243 },
+ { 7302, 7302, -6236 },
+ { 7303, 7303, -6181 },
+ { 7304, 7304, 35266 },
+ { 7312, 7354, -3008 },
+ { 7357, 7359, -3008 },
{ 7545, 7545, 35332 },
{ 7549, 7549, 3814 },
- { 7566, 7566, 35384 },
+ { 7566, 7566, 35384 },
{ 7680, 7776, EvenOdd },
{ 7777, 7777, 58 },
{ 7778, 7829, EvenOdd },
@@ -321,9 +321,9 @@ const CaseFold unicode_casefold[] = {
{ 11520, 11557, -7264 },
{ 11559, 11559, -7264 },
{ 11565, 11565, -7264 },
- { 42560, 42570, EvenOdd },
- { 42571, 42571, -35267 },
- { 42572, 42605, EvenOdd },
+ { 42560, 42570, EvenOdd },
+ { 42571, 42571, -35267 },
+ { 42572, 42605, EvenOdd },
{ 42624, 42651, EvenOdd },
{ 42786, 42799, EvenOdd },
{ 42802, 42863, EvenOdd },
@@ -333,33 +333,33 @@ const CaseFold unicode_casefold[] = {
{ 42891, 42892, OddEven },
{ 42893, 42893, -42280 },
{ 42896, 42899, EvenOdd },
- { 42900, 42900, 48 },
+ { 42900, 42900, 48 },
{ 42902, 42921, EvenOdd },
{ 42922, 42922, -42308 },
{ 42923, 42923, -42319 },
{ 42924, 42924, -42315 },
{ 42925, 42925, -42305 },
- { 42926, 42926, -42308 },
+ { 42926, 42926, -42308 },
{ 42928, 42928, -42258 },
{ 42929, 42929, -42282 },
{ 42930, 42930, -42261 },
{ 42931, 42931, 928 },
{ 42932, 42947, EvenOdd },
- { 42948, 42948, -48 },
- { 42949, 42949, -42307 },
- { 42950, 42950, -35384 },
- { 42951, 42954, OddEven },
+ { 42948, 42948, -48 },
+ { 42949, 42949, -42307 },
+ { 42950, 42950, -35384 },
+ { 42951, 42954, OddEven },
{ 42960, 42961, EvenOdd },
{ 42966, 42969, EvenOdd },
- { 42997, 42998, OddEven },
+ { 42997, 42998, OddEven },
{ 43859, 43859, -928 },
{ 43888, 43967, -38864 },
{ 65313, 65338, 32 },
{ 65345, 65370, -32 },
{ 66560, 66599, 40 },
{ 66600, 66639, -40 },
- { 66736, 66771, 40 },
- { 66776, 66811, -40 },
+ { 66736, 66771, 40 },
+ { 66776, 66811, -40 },
{ 66928, 66938, 39 },
{ 66940, 66954, 39 },
{ 66956, 66962, 39 },
@@ -372,10 +372,10 @@ const CaseFold unicode_casefold[] = {
{ 68800, 68850, -64 },
{ 71840, 71871, 32 },
{ 71872, 71903, -32 },
- { 93760, 93791, 32 },
- { 93792, 93823, -32 },
- { 125184, 125217, 34 },
- { 125218, 125251, -34 },
+ { 93760, 93791, 32 },
+ { 93792, 93823, -32 },
+ { 125184, 125217, 34 },
+ { 125218, 125251, -34 },
};
const int num_unicode_casefold = 367;
@@ -482,16 +482,16 @@ const CaseFold unicode_tolower[] = {
{ 4295, 4295, 7264 },
{ 4301, 4301, 7264 },
{ 5112, 5117, -8 },
- { 7296, 7296, -6222 },
- { 7297, 7297, -6221 },
- { 7298, 7298, -6212 },
- { 7299, 7300, -6210 },
- { 7301, 7301, -6211 },
- { 7302, 7302, -6204 },
- { 7303, 7303, -6180 },
- { 7304, 7304, 35267 },
- { 7312, 7354, -3008 },
- { 7357, 7359, -3008 },
+ { 7296, 7296, -6222 },
+ { 7297, 7297, -6221 },
+ { 7298, 7298, -6212 },
+ { 7299, 7300, -6210 },
+ { 7301, 7301, -6211 },
+ { 7302, 7302, -6204 },
+ { 7303, 7303, -6180 },
+ { 7304, 7304, 35267 },
+ { 7312, 7354, -3008 },
+ { 7357, 7359, -3008 },
{ 7680, 7828, EvenOddSkip },
{ 7835, 7835, -58 },
{ 7838, 7838, -7615 },
@@ -561,31 +561,31 @@ const CaseFold unicode_tolower[] = {
{ 42923, 42923, -42319 },
{ 42924, 42924, -42315 },
{ 42925, 42925, -42305 },
- { 42926, 42926, -42308 },
+ { 42926, 42926, -42308 },
{ 42928, 42928, -42258 },
{ 42929, 42929, -42282 },
{ 42930, 42930, -42261 },
{ 42931, 42931, 928 },
{ 42932, 42946, EvenOddSkip },
- { 42948, 42948, -48 },
- { 42949, 42949, -42307 },
- { 42950, 42950, -35384 },
- { 42951, 42953, OddEvenSkip },
+ { 42948, 42948, -48 },
+ { 42949, 42949, -42307 },
+ { 42950, 42950, -35384 },
+ { 42951, 42953, OddEvenSkip },
{ 42960, 42960, EvenOdd },
{ 42966, 42968, EvenOddSkip },
- { 42997, 42997, OddEven },
+ { 42997, 42997, OddEven },
{ 43888, 43967, -38864 },
{ 65313, 65338, 32 },
{ 66560, 66599, 40 },
- { 66736, 66771, 40 },
+ { 66736, 66771, 40 },
{ 66928, 66938, 39 },
{ 66940, 66954, 39 },
{ 66956, 66962, 39 },
{ 66964, 66965, 39 },
{ 68736, 68786, 64 },
{ 71840, 71871, 32 },
- { 93760, 93791, 32 },
- { 125184, 125217, 34 },
+ { 93760, 93791, 32 },
+ { 125184, 125217, 34 },
};
const int num_unicode_tolower = 205;
diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc
index 21aeca513f..2a8d7dae1f 100644
--- a/contrib/libs/re2/re2/unicode_groups.cc
+++ b/contrib/libs/re2/re2/unicode_groups.cc
@@ -7,109 +7,109 @@
namespace re2 {
-static const URange16 C_range16[] = {
- { 0, 31 },
- { 127, 159 },
- { 173, 173 },
- { 1536, 1541 },
- { 1564, 1564 },
- { 1757, 1757 },
- { 1807, 1807 },
+static const URange16 C_range16[] = {
+ { 0, 31 },
+ { 127, 159 },
+ { 173, 173 },
+ { 1536, 1541 },
+ { 1564, 1564 },
+ { 1757, 1757 },
+ { 1807, 1807 },
{ 2192, 2193 },
- { 2274, 2274 },
- { 6158, 6158 },
- { 8203, 8207 },
- { 8234, 8238 },
- { 8288, 8292 },
- { 8294, 8303 },
- { 55296, 63743 },
- { 65279, 65279 },
- { 65529, 65531 },
-};
-static const URange32 C_range32[] = {
- { 69821, 69821 },
- { 69837, 69837 },
- { 78896, 78904 },
- { 113824, 113827 },
- { 119155, 119162 },
- { 917505, 917505 },
- { 917536, 917631 },
- { 983040, 1048573 },
- { 1048576, 1114109 },
-};
-static const URange16 Cc_range16[] = {
- { 0, 31 },
- { 127, 159 },
-};
-static const URange16 Cf_range16[] = {
- { 173, 173 },
- { 1536, 1541 },
- { 1564, 1564 },
- { 1757, 1757 },
- { 1807, 1807 },
+ { 2274, 2274 },
+ { 6158, 6158 },
+ { 8203, 8207 },
+ { 8234, 8238 },
+ { 8288, 8292 },
+ { 8294, 8303 },
+ { 55296, 63743 },
+ { 65279, 65279 },
+ { 65529, 65531 },
+};
+static const URange32 C_range32[] = {
+ { 69821, 69821 },
+ { 69837, 69837 },
+ { 78896, 78904 },
+ { 113824, 113827 },
+ { 119155, 119162 },
+ { 917505, 917505 },
+ { 917536, 917631 },
+ { 983040, 1048573 },
+ { 1048576, 1114109 },
+};
+static const URange16 Cc_range16[] = {
+ { 0, 31 },
+ { 127, 159 },
+};
+static const URange16 Cf_range16[] = {
+ { 173, 173 },
+ { 1536, 1541 },
+ { 1564, 1564 },
+ { 1757, 1757 },
+ { 1807, 1807 },
{ 2192, 2193 },
- { 2274, 2274 },
- { 6158, 6158 },
- { 8203, 8207 },
- { 8234, 8238 },
- { 8288, 8292 },
- { 8294, 8303 },
- { 65279, 65279 },
- { 65529, 65531 },
-};
-static const URange32 Cf_range32[] = {
- { 69821, 69821 },
- { 69837, 69837 },
- { 78896, 78904 },
- { 113824, 113827 },
- { 119155, 119162 },
- { 917505, 917505 },
- { 917536, 917631 },
-};
-static const URange16 Co_range16[] = {
- { 57344, 63743 },
-};
-static const URange32 Co_range32[] = {
- { 983040, 1048573 },
- { 1048576, 1114109 },
-};
-static const URange16 Cs_range16[] = {
- { 55296, 57343 },
-};
-static const URange16 L_range16[] = {
- { 65, 90 },
- { 97, 122 },
+ { 2274, 2274 },
+ { 6158, 6158 },
+ { 8203, 8207 },
+ { 8234, 8238 },
+ { 8288, 8292 },
+ { 8294, 8303 },
+ { 65279, 65279 },
+ { 65529, 65531 },
+};
+static const URange32 Cf_range32[] = {
+ { 69821, 69821 },
+ { 69837, 69837 },
+ { 78896, 78904 },
+ { 113824, 113827 },
+ { 119155, 119162 },
+ { 917505, 917505 },
+ { 917536, 917631 },
+};
+static const URange16 Co_range16[] = {
+ { 57344, 63743 },
+};
+static const URange32 Co_range32[] = {
+ { 983040, 1048573 },
+ { 1048576, 1114109 },
+};
+static const URange16 Cs_range16[] = {
+ { 55296, 57343 },
+};
+static const URange16 L_range16[] = {
+ { 65, 90 },
+ { 97, 122 },
{ 170, 170 },
- { 181, 181 },
+ { 181, 181 },
{ 186, 186 },
- { 192, 214 },
- { 216, 246 },
- { 248, 705 },
- { 710, 721 },
- { 736, 740 },
- { 748, 748 },
- { 750, 750 },
- { 880, 884 },
- { 886, 887 },
- { 890, 893 },
- { 895, 895 },
- { 902, 902 },
- { 904, 906 },
- { 908, 908 },
- { 910, 929 },
- { 931, 1013 },
- { 1015, 1153 },
- { 1162, 1327 },
- { 1329, 1366 },
- { 1369, 1369 },
- { 1376, 1416 },
+ { 192, 214 },
+ { 216, 246 },
+ { 248, 705 },
+ { 710, 721 },
+ { 736, 740 },
+ { 748, 748 },
+ { 750, 750 },
+ { 880, 884 },
+ { 886, 887 },
+ { 890, 893 },
+ { 895, 895 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 1013 },
+ { 1015, 1153 },
+ { 1162, 1327 },
+ { 1329, 1366 },
+ { 1369, 1369 },
+ { 1376, 1416 },
{ 1488, 1514 },
- { 1519, 1522 },
- { 1568, 1610 },
+ { 1519, 1522 },
+ { 1568, 1610 },
{ 1646, 1647 },
{ 1649, 1747 },
{ 1749, 1749 },
- { 1765, 1766 },
+ { 1765, 1766 },
{ 1774, 1775 },
{ 1786, 1788 },
{ 1791, 1791 },
@@ -118,14 +118,14 @@ static const URange16 L_range16[] = {
{ 1869, 1957 },
{ 1969, 1969 },
{ 1994, 2026 },
- { 2036, 2037 },
- { 2042, 2042 },
+ { 2036, 2037 },
+ { 2042, 2042 },
{ 2048, 2069 },
- { 2074, 2074 },
- { 2084, 2084 },
- { 2088, 2088 },
+ { 2074, 2074 },
+ { 2084, 2084 },
+ { 2088, 2088 },
{ 2112, 2136 },
- { 2144, 2154 },
+ { 2144, 2154 },
{ 2160, 2183 },
{ 2185, 2190 },
{ 2208, 2249 },
@@ -133,7 +133,7 @@ static const URange16 L_range16[] = {
{ 2365, 2365 },
{ 2384, 2384 },
{ 2392, 2401 },
- { 2417, 2432 },
+ { 2417, 2432 },
{ 2437, 2444 },
{ 2447, 2448 },
{ 2451, 2472 },
@@ -145,7 +145,7 @@ static const URange16 L_range16[] = {
{ 2524, 2525 },
{ 2527, 2529 },
{ 2544, 2545 },
- { 2556, 2556 },
+ { 2556, 2556 },
{ 2565, 2570 },
{ 2575, 2576 },
{ 2579, 2600 },
@@ -195,7 +195,7 @@ static const URange16 L_range16[] = {
{ 3160, 3162 },
{ 3165, 3165 },
{ 3168, 3169 },
- { 3200, 3200 },
+ { 3200, 3200 },
{ 3205, 3212 },
{ 3214, 3216 },
{ 3218, 3240 },
@@ -205,12 +205,12 @@ static const URange16 L_range16[] = {
{ 3293, 3294 },
{ 3296, 3297 },
{ 3313, 3314 },
- { 3332, 3340 },
+ { 3332, 3340 },
{ 3342, 3344 },
{ 3346, 3386 },
{ 3389, 3389 },
{ 3406, 3406 },
- { 3412, 3414 },
+ { 3412, 3414 },
{ 3423, 3425 },
{ 3450, 3455 },
{ 3461, 3478 },
@@ -220,17 +220,17 @@ static const URange16 L_range16[] = {
{ 3520, 3526 },
{ 3585, 3632 },
{ 3634, 3635 },
- { 3648, 3654 },
+ { 3648, 3654 },
{ 3713, 3714 },
{ 3716, 3716 },
- { 3718, 3722 },
- { 3724, 3747 },
+ { 3718, 3722 },
+ { 3724, 3747 },
{ 3749, 3749 },
- { 3751, 3760 },
+ { 3751, 3760 },
{ 3762, 3763 },
{ 3773, 3773 },
{ 3776, 3780 },
- { 3782, 3782 },
+ { 3782, 3782 },
{ 3804, 3807 },
{ 3840, 3840 },
{ 3904, 3911 },
@@ -245,11 +245,11 @@ static const URange16 L_range16[] = {
{ 4206, 4208 },
{ 4213, 4225 },
{ 4238, 4238 },
- { 4256, 4293 },
- { 4295, 4295 },
- { 4301, 4301 },
+ { 4256, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
{ 4304, 4346 },
- { 4348, 4680 },
+ { 4348, 4680 },
{ 4682, 4685 },
{ 4688, 4694 },
{ 4696, 4696 },
@@ -266,8 +266,8 @@ static const URange16 L_range16[] = {
{ 4882, 4885 },
{ 4888, 4954 },
{ 4992, 5007 },
- { 5024, 5109 },
- { 5112, 5117 },
+ { 5024, 5109 },
+ { 5112, 5117 },
{ 5121, 5740 },
{ 5743, 5759 },
{ 5761, 5786 },
@@ -279,11 +279,11 @@ static const URange16 L_range16[] = {
{ 5984, 5996 },
{ 5998, 6000 },
{ 6016, 6067 },
- { 6103, 6103 },
+ { 6103, 6103 },
{ 6108, 6108 },
- { 6176, 6264 },
- { 6272, 6276 },
- { 6279, 6312 },
+ { 6176, 6264 },
+ { 6272, 6276 },
+ { 6279, 6312 },
{ 6314, 6314 },
{ 6320, 6389 },
{ 6400, 6430 },
@@ -293,7 +293,7 @@ static const URange16 L_range16[] = {
{ 6576, 6601 },
{ 6656, 6678 },
{ 6688, 6740 },
- { 6823, 6823 },
+ { 6823, 6823 },
{ 6917, 6963 },
{ 6981, 6988 },
{ 7043, 7072 },
@@ -301,59 +301,59 @@ static const URange16 L_range16[] = {
{ 7098, 7141 },
{ 7168, 7203 },
{ 7245, 7247 },
- { 7258, 7293 },
- { 7296, 7304 },
- { 7312, 7354 },
- { 7357, 7359 },
+ { 7258, 7293 },
+ { 7296, 7304 },
+ { 7312, 7354 },
+ { 7357, 7359 },
{ 7401, 7404 },
- { 7406, 7411 },
+ { 7406, 7411 },
{ 7413, 7414 },
- { 7418, 7418 },
- { 7424, 7615 },
- { 7680, 7957 },
- { 7960, 7965 },
- { 7968, 8005 },
- { 8008, 8013 },
- { 8016, 8023 },
- { 8025, 8025 },
- { 8027, 8027 },
- { 8029, 8029 },
- { 8031, 8061 },
- { 8064, 8116 },
- { 8118, 8124 },
- { 8126, 8126 },
- { 8130, 8132 },
- { 8134, 8140 },
- { 8144, 8147 },
- { 8150, 8155 },
- { 8160, 8172 },
- { 8178, 8180 },
- { 8182, 8188 },
- { 8305, 8305 },
- { 8319, 8319 },
- { 8336, 8348 },
- { 8450, 8450 },
- { 8455, 8455 },
- { 8458, 8467 },
- { 8469, 8469 },
- { 8473, 8477 },
- { 8484, 8484 },
- { 8486, 8486 },
- { 8488, 8488 },
- { 8490, 8493 },
- { 8495, 8505 },
- { 8508, 8511 },
- { 8517, 8521 },
- { 8526, 8526 },
- { 8579, 8580 },
+ { 7418, 7418 },
+ { 7424, 7615 },
+ { 7680, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8124 },
+ { 8126, 8126 },
+ { 8130, 8132 },
+ { 8134, 8140 },
+ { 8144, 8147 },
+ { 8150, 8155 },
+ { 8160, 8172 },
+ { 8178, 8180 },
+ { 8182, 8188 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8450, 8450 },
+ { 8455, 8455 },
+ { 8458, 8467 },
+ { 8469, 8469 },
+ { 8473, 8477 },
+ { 8484, 8484 },
+ { 8486, 8486 },
+ { 8488, 8488 },
+ { 8490, 8493 },
+ { 8495, 8505 },
+ { 8508, 8511 },
+ { 8517, 8521 },
+ { 8526, 8526 },
+ { 8579, 8580 },
{ 11264, 11492 },
- { 11499, 11502 },
- { 11506, 11507 },
- { 11520, 11557 },
- { 11559, 11559 },
- { 11565, 11565 },
+ { 11499, 11502 },
+ { 11506, 11507 },
+ { 11520, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
{ 11568, 11623 },
- { 11631, 11631 },
+ { 11631, 11631 },
{ 11648, 11670 },
{ 11680, 11686 },
{ 11688, 11694 },
@@ -363,29 +363,29 @@ static const URange16 L_range16[] = {
{ 11720, 11726 },
{ 11728, 11734 },
{ 11736, 11742 },
- { 11823, 11823 },
- { 12293, 12294 },
- { 12337, 12341 },
- { 12347, 12348 },
+ { 11823, 11823 },
+ { 12293, 12294 },
+ { 12337, 12341 },
+ { 12347, 12348 },
{ 12353, 12438 },
- { 12445, 12447 },
+ { 12445, 12447 },
{ 12449, 12538 },
- { 12540, 12543 },
- { 12549, 12591 },
+ { 12540, 12543 },
+ { 12549, 12591 },
{ 12593, 12686 },
- { 12704, 12735 },
+ { 12704, 12735 },
{ 12784, 12799 },
- { 13312, 19903 },
+ { 13312, 19903 },
{ 19968, 42124 },
- { 42192, 42237 },
- { 42240, 42508 },
+ { 42192, 42237 },
+ { 42240, 42508 },
{ 42512, 42527 },
{ 42538, 42539 },
- { 42560, 42606 },
- { 42623, 42653 },
+ { 42560, 42606 },
+ { 42623, 42653 },
{ 42656, 42725 },
- { 42775, 42783 },
- { 42786, 42888 },
+ { 42775, 42783 },
+ { 42786, 42888 },
{ 42891, 42954 },
{ 42960, 42961 },
{ 42963, 42963 },
@@ -398,19 +398,19 @@ static const URange16 L_range16[] = {
{ 43138, 43187 },
{ 43250, 43255 },
{ 43259, 43259 },
- { 43261, 43262 },
+ { 43261, 43262 },
{ 43274, 43301 },
{ 43312, 43334 },
{ 43360, 43388 },
{ 43396, 43442 },
- { 43471, 43471 },
+ { 43471, 43471 },
{ 43488, 43492 },
- { 43494, 43503 },
+ { 43494, 43503 },
{ 43514, 43518 },
{ 43520, 43560 },
{ 43584, 43586 },
{ 43588, 43595 },
- { 43616, 43638 },
+ { 43616, 43638 },
{ 43642, 43642 },
{ 43646, 43695 },
{ 43697, 43697 },
@@ -418,24 +418,24 @@ static const URange16 L_range16[] = {
{ 43705, 43709 },
{ 43712, 43712 },
{ 43714, 43714 },
- { 43739, 43741 },
+ { 43739, 43741 },
{ 43744, 43754 },
- { 43762, 43764 },
+ { 43762, 43764 },
{ 43777, 43782 },
{ 43785, 43790 },
{ 43793, 43798 },
{ 43808, 43814 },
{ 43816, 43822 },
- { 43824, 43866 },
- { 43868, 43881 },
- { 43888, 44002 },
+ { 43824, 43866 },
+ { 43868, 43881 },
+ { 43888, 44002 },
{ 44032, 55203 },
{ 55216, 55238 },
{ 55243, 55291 },
{ 63744, 64109 },
{ 64112, 64217 },
- { 64256, 64262 },
- { 64275, 64279 },
+ { 64256, 64262 },
+ { 64275, 64279 },
{ 64285, 64285 },
{ 64287, 64296 },
{ 64298, 64310 },
@@ -450,15 +450,15 @@ static const URange16 L_range16[] = {
{ 65008, 65019 },
{ 65136, 65140 },
{ 65142, 65276 },
- { 65313, 65338 },
- { 65345, 65370 },
- { 65382, 65470 },
+ { 65313, 65338 },
+ { 65345, 65370 },
+ { 65382, 65470 },
{ 65474, 65479 },
{ 65482, 65487 },
{ 65490, 65495 },
{ 65498, 65500 },
};
-static const URange32 L_range32[] = {
+static const URange32 L_range32[] = {
{ 65536, 65547 },
{ 65549, 65574 },
{ 65576, 65594 },
@@ -469,15 +469,15 @@ static const URange32 L_range32[] = {
{ 66176, 66204 },
{ 66208, 66256 },
{ 66304, 66335 },
- { 66349, 66368 },
+ { 66349, 66368 },
{ 66370, 66377 },
{ 66384, 66421 },
{ 66432, 66461 },
{ 66464, 66499 },
{ 66504, 66511 },
- { 66560, 66717 },
- { 66736, 66771 },
- { 66776, 66811 },
+ { 66560, 66717 },
+ { 66736, 66771 },
+ { 66776, 66811 },
{ 66816, 66855 },
{ 66864, 66915 },
{ 66928, 66938 },
@@ -511,7 +511,7 @@ static const URange32 L_range32[] = {
{ 68096, 68096 },
{ 68112, 68115 },
{ 68117, 68119 },
- { 68121, 68149 },
+ { 68121, 68149 },
{ 68192, 68220 },
{ 68224, 68252 },
{ 68288, 68295 },
@@ -521,25 +521,25 @@ static const URange32 L_range32[] = {
{ 68448, 68466 },
{ 68480, 68497 },
{ 68608, 68680 },
- { 68736, 68786 },
- { 68800, 68850 },
- { 68864, 68899 },
- { 69248, 69289 },
- { 69296, 69297 },
- { 69376, 69404 },
- { 69415, 69415 },
- { 69424, 69445 },
+ { 68736, 68786 },
+ { 68800, 68850 },
+ { 68864, 68899 },
+ { 69248, 69289 },
+ { 69296, 69297 },
+ { 69376, 69404 },
+ { 69415, 69415 },
+ { 69424, 69445 },
{ 69488, 69505 },
- { 69552, 69572 },
- { 69600, 69622 },
+ { 69552, 69572 },
+ { 69600, 69622 },
{ 69635, 69687 },
{ 69745, 69746 },
{ 69749, 69749 },
{ 69763, 69807 },
{ 69840, 69864 },
{ 69891, 69926 },
- { 69956, 69956 },
- { 69959, 69959 },
+ { 69956, 69956 },
+ { 69959, 69959 },
{ 69968, 70002 },
{ 70006, 70006 },
{ 70019, 70066 },
@@ -563,9 +563,9 @@ static const URange32 L_range32[] = {
{ 70461, 70461 },
{ 70480, 70480 },
{ 70493, 70497 },
- { 70656, 70708 },
- { 70727, 70730 },
- { 70751, 70753 },
+ { 70656, 70708 },
+ { 70727, 70730 },
+ { 70751, 70753 },
{ 70784, 70831 },
{ 70852, 70853 },
{ 70855, 70855 },
@@ -574,43 +574,43 @@ static const URange32 L_range32[] = {
{ 71168, 71215 },
{ 71236, 71236 },
{ 71296, 71338 },
- { 71352, 71352 },
- { 71424, 71450 },
+ { 71352, 71352 },
+ { 71424, 71450 },
{ 71488, 71494 },
- { 71680, 71723 },
- { 71840, 71903 },
- { 71935, 71942 },
- { 71945, 71945 },
- { 71948, 71955 },
- { 71957, 71958 },
- { 71960, 71983 },
- { 71999, 71999 },
- { 72001, 72001 },
- { 72096, 72103 },
- { 72106, 72144 },
- { 72161, 72161 },
- { 72163, 72163 },
- { 72192, 72192 },
- { 72203, 72242 },
- { 72250, 72250 },
- { 72272, 72272 },
- { 72284, 72329 },
- { 72349, 72349 },
+ { 71680, 71723 },
+ { 71840, 71903 },
+ { 71935, 71942 },
+ { 71945, 71945 },
+ { 71948, 71955 },
+ { 71957, 71958 },
+ { 71960, 71983 },
+ { 71999, 71999 },
+ { 72001, 72001 },
+ { 72096, 72103 },
+ { 72106, 72144 },
+ { 72161, 72161 },
+ { 72163, 72163 },
+ { 72192, 72192 },
+ { 72203, 72242 },
+ { 72250, 72250 },
+ { 72272, 72272 },
+ { 72284, 72329 },
+ { 72349, 72349 },
{ 72368, 72440 },
- { 72704, 72712 },
- { 72714, 72750 },
- { 72768, 72768 },
- { 72818, 72847 },
- { 72960, 72966 },
- { 72968, 72969 },
- { 72971, 73008 },
- { 73030, 73030 },
- { 73056, 73061 },
- { 73063, 73064 },
- { 73066, 73097 },
- { 73112, 73112 },
- { 73440, 73458 },
- { 73648, 73648 },
+ { 72704, 72712 },
+ { 72714, 72750 },
+ { 72768, 72768 },
+ { 72818, 72847 },
+ { 72960, 72966 },
+ { 72968, 72969 },
+ { 72971, 73008 },
+ { 73030, 73030 },
+ { 73056, 73061 },
+ { 73063, 73064 },
+ { 73066, 73097 },
+ { 73112, 73112 },
+ { 73440, 73458 },
+ { 73648, 73648 },
{ 73728, 74649 },
{ 74880, 75075 },
{ 77712, 77808 },
@@ -621,72 +621,72 @@ static const URange32 L_range32[] = {
{ 92784, 92862 },
{ 92880, 92909 },
{ 92928, 92975 },
- { 92992, 92995 },
+ { 92992, 92995 },
{ 93027, 93047 },
{ 93053, 93071 },
- { 93760, 93823 },
- { 93952, 94026 },
+ { 93760, 93823 },
+ { 93952, 94026 },
{ 94032, 94032 },
- { 94099, 94111 },
- { 94176, 94177 },
- { 94179, 94179 },
- { 94208, 100343 },
- { 100352, 101589 },
- { 101632, 101640 },
+ { 94099, 94111 },
+ { 94176, 94177 },
+ { 94179, 94179 },
+ { 94208, 100343 },
+ { 100352, 101589 },
+ { 101632, 101640 },
{ 110576, 110579 },
{ 110581, 110587 },
{ 110589, 110590 },
{ 110592, 110882 },
- { 110928, 110930 },
- { 110948, 110951 },
- { 110960, 111355 },
+ { 110928, 110930 },
+ { 110948, 110951 },
+ { 110960, 111355 },
{ 113664, 113770 },
{ 113776, 113788 },
{ 113792, 113800 },
{ 113808, 113817 },
- { 119808, 119892 },
- { 119894, 119964 },
- { 119966, 119967 },
- { 119970, 119970 },
- { 119973, 119974 },
- { 119977, 119980 },
- { 119982, 119993 },
- { 119995, 119995 },
- { 119997, 120003 },
- { 120005, 120069 },
- { 120071, 120074 },
- { 120077, 120084 },
- { 120086, 120092 },
- { 120094, 120121 },
- { 120123, 120126 },
- { 120128, 120132 },
- { 120134, 120134 },
- { 120138, 120144 },
- { 120146, 120485 },
- { 120488, 120512 },
- { 120514, 120538 },
- { 120540, 120570 },
- { 120572, 120596 },
- { 120598, 120628 },
- { 120630, 120654 },
- { 120656, 120686 },
- { 120688, 120712 },
- { 120714, 120744 },
- { 120746, 120770 },
- { 120772, 120779 },
+ { 119808, 119892 },
+ { 119894, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120094, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120146, 120485 },
+ { 120488, 120512 },
+ { 120514, 120538 },
+ { 120540, 120570 },
+ { 120572, 120596 },
+ { 120598, 120628 },
+ { 120630, 120654 },
+ { 120656, 120686 },
+ { 120688, 120712 },
+ { 120714, 120744 },
+ { 120746, 120770 },
+ { 120772, 120779 },
{ 122624, 122654 },
- { 123136, 123180 },
- { 123191, 123197 },
- { 123214, 123214 },
+ { 123136, 123180 },
+ { 123191, 123197 },
+ { 123214, 123214 },
{ 123536, 123565 },
- { 123584, 123627 },
+ { 123584, 123627 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
{ 124912, 124926 },
{ 124928, 125124 },
- { 125184, 125251 },
- { 125259, 125259 },
+ { 125184, 125251 },
+ { 125259, 125259 },
{ 126464, 126467 },
{ 126469, 126495 },
{ 126497, 126498 },
@@ -724,9 +724,9 @@ static const URange32 L_range32[] = {
{ 173824, 177976 },
{ 177984, 178205 },
{ 178208, 183969 },
- { 183984, 191456 },
+ { 183984, 191456 },
{ 194560, 195101 },
- { 196608, 201546 },
+ { 196608, 201546 },
};
static const URange16 Ll_range16[] = {
{ 97, 122 },
@@ -998,11 +998,11 @@ static const URange16 Ll_range16[] = {
{ 1323, 1323 },
{ 1325, 1325 },
{ 1327, 1327 },
- { 1376, 1416 },
- { 4304, 4346 },
- { 4349, 4351 },
+ { 1376, 1416 },
+ { 4304, 4346 },
+ { 4349, 4351 },
{ 5112, 5117 },
- { 7296, 7304 },
+ { 7296, 7304 },
{ 7424, 7467 },
{ 7531, 7543 },
{ 7545, 7578 },
@@ -1322,26 +1322,26 @@ static const URange16 Ll_range16[] = {
{ 42917, 42917 },
{ 42919, 42919 },
{ 42921, 42921 },
- { 42927, 42927 },
+ { 42927, 42927 },
{ 42933, 42933 },
{ 42935, 42935 },
- { 42937, 42937 },
- { 42939, 42939 },
- { 42941, 42941 },
- { 42943, 42943 },
+ { 42937, 42937 },
+ { 42939, 42939 },
+ { 42941, 42941 },
+ { 42943, 42943 },
{ 42945, 42945 },
- { 42947, 42947 },
- { 42952, 42952 },
- { 42954, 42954 },
+ { 42947, 42947 },
+ { 42952, 42952 },
+ { 42954, 42954 },
{ 42961, 42961 },
{ 42963, 42963 },
{ 42965, 42965 },
{ 42967, 42967 },
{ 42969, 42969 },
- { 42998, 42998 },
+ { 42998, 42998 },
{ 43002, 43002 },
{ 43824, 43866 },
- { 43872, 43880 },
+ { 43872, 43880 },
{ 43888, 43967 },
{ 64256, 64262 },
{ 64275, 64279 },
@@ -1349,14 +1349,14 @@ static const URange16 Ll_range16[] = {
};
static const URange32 Ll_range32[] = {
{ 66600, 66639 },
- { 66776, 66811 },
+ { 66776, 66811 },
{ 66967, 66977 },
{ 66979, 66993 },
{ 66995, 67001 },
{ 67003, 67004 },
{ 68800, 68850 },
{ 71872, 71903 },
- { 93792, 93823 },
+ { 93792, 93823 },
{ 119834, 119859 },
{ 119886, 119892 },
{ 119894, 119911 },
@@ -1387,7 +1387,7 @@ static const URange32 Ll_range32[] = {
{ 120779, 120779 },
{ 122624, 122633 },
{ 122635, 122654 },
- { 125218, 125251 },
+ { 125218, 125251 },
};
static const URange16 Lm_range16[] = {
{ 688, 705 },
@@ -1444,7 +1444,7 @@ static const URange16 Lm_range16[] = {
{ 43741, 43741 },
{ 43763, 43764 },
{ 43868, 43871 },
- { 43881, 43881 },
+ { 43881, 43881 },
{ 65392, 65392 },
{ 65438, 65439 },
};
@@ -1454,518 +1454,518 @@ static const URange32 Lm_range32[] = {
{ 67506, 67514 },
{ 92992, 92995 },
{ 94099, 94111 },
- { 94176, 94177 },
- { 94179, 94179 },
+ { 94176, 94177 },
+ { 94179, 94179 },
{ 110576, 110579 },
{ 110581, 110587 },
{ 110589, 110590 },
- { 123191, 123197 },
- { 125259, 125259 },
-};
-static const URange16 Lo_range16[] = {
- { 170, 170 },
- { 186, 186 },
- { 443, 443 },
- { 448, 451 },
- { 660, 660 },
- { 1488, 1514 },
- { 1519, 1522 },
- { 1568, 1599 },
- { 1601, 1610 },
- { 1646, 1647 },
- { 1649, 1747 },
- { 1749, 1749 },
- { 1774, 1775 },
- { 1786, 1788 },
- { 1791, 1791 },
- { 1808, 1808 },
- { 1810, 1839 },
- { 1869, 1957 },
- { 1969, 1969 },
- { 1994, 2026 },
- { 2048, 2069 },
- { 2112, 2136 },
- { 2144, 2154 },
+ { 123191, 123197 },
+ { 125259, 125259 },
+};
+static const URange16 Lo_range16[] = {
+ { 170, 170 },
+ { 186, 186 },
+ { 443, 443 },
+ { 448, 451 },
+ { 660, 660 },
+ { 1488, 1514 },
+ { 1519, 1522 },
+ { 1568, 1599 },
+ { 1601, 1610 },
+ { 1646, 1647 },
+ { 1649, 1747 },
+ { 1749, 1749 },
+ { 1774, 1775 },
+ { 1786, 1788 },
+ { 1791, 1791 },
+ { 1808, 1808 },
+ { 1810, 1839 },
+ { 1869, 1957 },
+ { 1969, 1969 },
+ { 1994, 2026 },
+ { 2048, 2069 },
+ { 2112, 2136 },
+ { 2144, 2154 },
{ 2160, 2183 },
{ 2185, 2190 },
{ 2208, 2248 },
- { 2308, 2361 },
- { 2365, 2365 },
- { 2384, 2384 },
- { 2392, 2401 },
- { 2418, 2432 },
- { 2437, 2444 },
- { 2447, 2448 },
- { 2451, 2472 },
- { 2474, 2480 },
- { 2482, 2482 },
- { 2486, 2489 },
- { 2493, 2493 },
- { 2510, 2510 },
- { 2524, 2525 },
- { 2527, 2529 },
- { 2544, 2545 },
- { 2556, 2556 },
- { 2565, 2570 },
- { 2575, 2576 },
- { 2579, 2600 },
- { 2602, 2608 },
- { 2610, 2611 },
- { 2613, 2614 },
- { 2616, 2617 },
- { 2649, 2652 },
- { 2654, 2654 },
- { 2674, 2676 },
- { 2693, 2701 },
- { 2703, 2705 },
- { 2707, 2728 },
- { 2730, 2736 },
- { 2738, 2739 },
- { 2741, 2745 },
- { 2749, 2749 },
- { 2768, 2768 },
- { 2784, 2785 },
- { 2809, 2809 },
- { 2821, 2828 },
- { 2831, 2832 },
- { 2835, 2856 },
- { 2858, 2864 },
- { 2866, 2867 },
- { 2869, 2873 },
- { 2877, 2877 },
- { 2908, 2909 },
- { 2911, 2913 },
- { 2929, 2929 },
- { 2947, 2947 },
- { 2949, 2954 },
- { 2958, 2960 },
- { 2962, 2965 },
- { 2969, 2970 },
- { 2972, 2972 },
- { 2974, 2975 },
- { 2979, 2980 },
- { 2984, 2986 },
- { 2990, 3001 },
- { 3024, 3024 },
- { 3077, 3084 },
- { 3086, 3088 },
- { 3090, 3112 },
- { 3114, 3129 },
- { 3133, 3133 },
- { 3160, 3162 },
+ { 2308, 2361 },
+ { 2365, 2365 },
+ { 2384, 2384 },
+ { 2392, 2401 },
+ { 2418, 2432 },
+ { 2437, 2444 },
+ { 2447, 2448 },
+ { 2451, 2472 },
+ { 2474, 2480 },
+ { 2482, 2482 },
+ { 2486, 2489 },
+ { 2493, 2493 },
+ { 2510, 2510 },
+ { 2524, 2525 },
+ { 2527, 2529 },
+ { 2544, 2545 },
+ { 2556, 2556 },
+ { 2565, 2570 },
+ { 2575, 2576 },
+ { 2579, 2600 },
+ { 2602, 2608 },
+ { 2610, 2611 },
+ { 2613, 2614 },
+ { 2616, 2617 },
+ { 2649, 2652 },
+ { 2654, 2654 },
+ { 2674, 2676 },
+ { 2693, 2701 },
+ { 2703, 2705 },
+ { 2707, 2728 },
+ { 2730, 2736 },
+ { 2738, 2739 },
+ { 2741, 2745 },
+ { 2749, 2749 },
+ { 2768, 2768 },
+ { 2784, 2785 },
+ { 2809, 2809 },
+ { 2821, 2828 },
+ { 2831, 2832 },
+ { 2835, 2856 },
+ { 2858, 2864 },
+ { 2866, 2867 },
+ { 2869, 2873 },
+ { 2877, 2877 },
+ { 2908, 2909 },
+ { 2911, 2913 },
+ { 2929, 2929 },
+ { 2947, 2947 },
+ { 2949, 2954 },
+ { 2958, 2960 },
+ { 2962, 2965 },
+ { 2969, 2970 },
+ { 2972, 2972 },
+ { 2974, 2975 },
+ { 2979, 2980 },
+ { 2984, 2986 },
+ { 2990, 3001 },
+ { 3024, 3024 },
+ { 3077, 3084 },
+ { 3086, 3088 },
+ { 3090, 3112 },
+ { 3114, 3129 },
+ { 3133, 3133 },
+ { 3160, 3162 },
{ 3165, 3165 },
- { 3168, 3169 },
- { 3200, 3200 },
- { 3205, 3212 },
- { 3214, 3216 },
- { 3218, 3240 },
- { 3242, 3251 },
- { 3253, 3257 },
- { 3261, 3261 },
+ { 3168, 3169 },
+ { 3200, 3200 },
+ { 3205, 3212 },
+ { 3214, 3216 },
+ { 3218, 3240 },
+ { 3242, 3251 },
+ { 3253, 3257 },
+ { 3261, 3261 },
{ 3293, 3294 },
- { 3296, 3297 },
- { 3313, 3314 },
- { 3332, 3340 },
- { 3342, 3344 },
- { 3346, 3386 },
- { 3389, 3389 },
- { 3406, 3406 },
- { 3412, 3414 },
- { 3423, 3425 },
- { 3450, 3455 },
- { 3461, 3478 },
- { 3482, 3505 },
- { 3507, 3515 },
- { 3517, 3517 },
- { 3520, 3526 },
- { 3585, 3632 },
- { 3634, 3635 },
- { 3648, 3653 },
- { 3713, 3714 },
- { 3716, 3716 },
- { 3718, 3722 },
- { 3724, 3747 },
- { 3749, 3749 },
- { 3751, 3760 },
- { 3762, 3763 },
- { 3773, 3773 },
- { 3776, 3780 },
- { 3804, 3807 },
- { 3840, 3840 },
- { 3904, 3911 },
- { 3913, 3948 },
- { 3976, 3980 },
- { 4096, 4138 },
- { 4159, 4159 },
- { 4176, 4181 },
- { 4186, 4189 },
- { 4193, 4193 },
- { 4197, 4198 },
- { 4206, 4208 },
- { 4213, 4225 },
- { 4238, 4238 },
- { 4352, 4680 },
- { 4682, 4685 },
- { 4688, 4694 },
- { 4696, 4696 },
- { 4698, 4701 },
- { 4704, 4744 },
- { 4746, 4749 },
- { 4752, 4784 },
- { 4786, 4789 },
- { 4792, 4798 },
- { 4800, 4800 },
- { 4802, 4805 },
- { 4808, 4822 },
- { 4824, 4880 },
- { 4882, 4885 },
- { 4888, 4954 },
- { 4992, 5007 },
- { 5121, 5740 },
- { 5743, 5759 },
- { 5761, 5786 },
- { 5792, 5866 },
- { 5873, 5880 },
+ { 3296, 3297 },
+ { 3313, 3314 },
+ { 3332, 3340 },
+ { 3342, 3344 },
+ { 3346, 3386 },
+ { 3389, 3389 },
+ { 3406, 3406 },
+ { 3412, 3414 },
+ { 3423, 3425 },
+ { 3450, 3455 },
+ { 3461, 3478 },
+ { 3482, 3505 },
+ { 3507, 3515 },
+ { 3517, 3517 },
+ { 3520, 3526 },
+ { 3585, 3632 },
+ { 3634, 3635 },
+ { 3648, 3653 },
+ { 3713, 3714 },
+ { 3716, 3716 },
+ { 3718, 3722 },
+ { 3724, 3747 },
+ { 3749, 3749 },
+ { 3751, 3760 },
+ { 3762, 3763 },
+ { 3773, 3773 },
+ { 3776, 3780 },
+ { 3804, 3807 },
+ { 3840, 3840 },
+ { 3904, 3911 },
+ { 3913, 3948 },
+ { 3976, 3980 },
+ { 4096, 4138 },
+ { 4159, 4159 },
+ { 4176, 4181 },
+ { 4186, 4189 },
+ { 4193, 4193 },
+ { 4197, 4198 },
+ { 4206, 4208 },
+ { 4213, 4225 },
+ { 4238, 4238 },
+ { 4352, 4680 },
+ { 4682, 4685 },
+ { 4688, 4694 },
+ { 4696, 4696 },
+ { 4698, 4701 },
+ { 4704, 4744 },
+ { 4746, 4749 },
+ { 4752, 4784 },
+ { 4786, 4789 },
+ { 4792, 4798 },
+ { 4800, 4800 },
+ { 4802, 4805 },
+ { 4808, 4822 },
+ { 4824, 4880 },
+ { 4882, 4885 },
+ { 4888, 4954 },
+ { 4992, 5007 },
+ { 5121, 5740 },
+ { 5743, 5759 },
+ { 5761, 5786 },
+ { 5792, 5866 },
+ { 5873, 5880 },
{ 5888, 5905 },
{ 5919, 5937 },
- { 5952, 5969 },
- { 5984, 5996 },
- { 5998, 6000 },
- { 6016, 6067 },
- { 6108, 6108 },
- { 6176, 6210 },
- { 6212, 6264 },
- { 6272, 6276 },
- { 6279, 6312 },
- { 6314, 6314 },
- { 6320, 6389 },
- { 6400, 6430 },
- { 6480, 6509 },
- { 6512, 6516 },
- { 6528, 6571 },
- { 6576, 6601 },
- { 6656, 6678 },
- { 6688, 6740 },
- { 6917, 6963 },
+ { 5952, 5969 },
+ { 5984, 5996 },
+ { 5998, 6000 },
+ { 6016, 6067 },
+ { 6108, 6108 },
+ { 6176, 6210 },
+ { 6212, 6264 },
+ { 6272, 6276 },
+ { 6279, 6312 },
+ { 6314, 6314 },
+ { 6320, 6389 },
+ { 6400, 6430 },
+ { 6480, 6509 },
+ { 6512, 6516 },
+ { 6528, 6571 },
+ { 6576, 6601 },
+ { 6656, 6678 },
+ { 6688, 6740 },
+ { 6917, 6963 },
{ 6981, 6988 },
- { 7043, 7072 },
- { 7086, 7087 },
- { 7098, 7141 },
- { 7168, 7203 },
- { 7245, 7247 },
- { 7258, 7287 },
- { 7401, 7404 },
- { 7406, 7411 },
- { 7413, 7414 },
- { 7418, 7418 },
- { 8501, 8504 },
- { 11568, 11623 },
- { 11648, 11670 },
- { 11680, 11686 },
- { 11688, 11694 },
- { 11696, 11702 },
- { 11704, 11710 },
- { 11712, 11718 },
- { 11720, 11726 },
- { 11728, 11734 },
- { 11736, 11742 },
- { 12294, 12294 },
- { 12348, 12348 },
- { 12353, 12438 },
- { 12447, 12447 },
- { 12449, 12538 },
- { 12543, 12543 },
- { 12549, 12591 },
- { 12593, 12686 },
- { 12704, 12735 },
- { 12784, 12799 },
- { 13312, 19903 },
+ { 7043, 7072 },
+ { 7086, 7087 },
+ { 7098, 7141 },
+ { 7168, 7203 },
+ { 7245, 7247 },
+ { 7258, 7287 },
+ { 7401, 7404 },
+ { 7406, 7411 },
+ { 7413, 7414 },
+ { 7418, 7418 },
+ { 8501, 8504 },
+ { 11568, 11623 },
+ { 11648, 11670 },
+ { 11680, 11686 },
+ { 11688, 11694 },
+ { 11696, 11702 },
+ { 11704, 11710 },
+ { 11712, 11718 },
+ { 11720, 11726 },
+ { 11728, 11734 },
+ { 11736, 11742 },
+ { 12294, 12294 },
+ { 12348, 12348 },
+ { 12353, 12438 },
+ { 12447, 12447 },
+ { 12449, 12538 },
+ { 12543, 12543 },
+ { 12549, 12591 },
+ { 12593, 12686 },
+ { 12704, 12735 },
+ { 12784, 12799 },
+ { 13312, 19903 },
{ 19968, 40980 },
- { 40982, 42124 },
- { 42192, 42231 },
- { 42240, 42507 },
- { 42512, 42527 },
- { 42538, 42539 },
- { 42606, 42606 },
- { 42656, 42725 },
- { 42895, 42895 },
- { 42999, 42999 },
- { 43003, 43009 },
- { 43011, 43013 },
- { 43015, 43018 },
- { 43020, 43042 },
- { 43072, 43123 },
- { 43138, 43187 },
- { 43250, 43255 },
- { 43259, 43259 },
- { 43261, 43262 },
- { 43274, 43301 },
- { 43312, 43334 },
- { 43360, 43388 },
- { 43396, 43442 },
- { 43488, 43492 },
- { 43495, 43503 },
- { 43514, 43518 },
- { 43520, 43560 },
- { 43584, 43586 },
- { 43588, 43595 },
- { 43616, 43631 },
- { 43633, 43638 },
- { 43642, 43642 },
- { 43646, 43695 },
- { 43697, 43697 },
- { 43701, 43702 },
- { 43705, 43709 },
- { 43712, 43712 },
- { 43714, 43714 },
- { 43739, 43740 },
- { 43744, 43754 },
- { 43762, 43762 },
- { 43777, 43782 },
- { 43785, 43790 },
- { 43793, 43798 },
- { 43808, 43814 },
- { 43816, 43822 },
- { 43968, 44002 },
- { 44032, 55203 },
- { 55216, 55238 },
- { 55243, 55291 },
- { 63744, 64109 },
- { 64112, 64217 },
- { 64285, 64285 },
- { 64287, 64296 },
- { 64298, 64310 },
- { 64312, 64316 },
- { 64318, 64318 },
- { 64320, 64321 },
- { 64323, 64324 },
- { 64326, 64433 },
- { 64467, 64829 },
- { 64848, 64911 },
- { 64914, 64967 },
- { 65008, 65019 },
- { 65136, 65140 },
- { 65142, 65276 },
- { 65382, 65391 },
- { 65393, 65437 },
- { 65440, 65470 },
- { 65474, 65479 },
- { 65482, 65487 },
- { 65490, 65495 },
- { 65498, 65500 },
-};
-static const URange32 Lo_range32[] = {
- { 65536, 65547 },
- { 65549, 65574 },
- { 65576, 65594 },
- { 65596, 65597 },
- { 65599, 65613 },
- { 65616, 65629 },
- { 65664, 65786 },
- { 66176, 66204 },
- { 66208, 66256 },
- { 66304, 66335 },
- { 66349, 66368 },
- { 66370, 66377 },
- { 66384, 66421 },
- { 66432, 66461 },
- { 66464, 66499 },
- { 66504, 66511 },
- { 66640, 66717 },
- { 66816, 66855 },
- { 66864, 66915 },
- { 67072, 67382 },
- { 67392, 67413 },
- { 67424, 67431 },
- { 67584, 67589 },
- { 67592, 67592 },
- { 67594, 67637 },
- { 67639, 67640 },
- { 67644, 67644 },
- { 67647, 67669 },
- { 67680, 67702 },
- { 67712, 67742 },
- { 67808, 67826 },
- { 67828, 67829 },
- { 67840, 67861 },
- { 67872, 67897 },
- { 67968, 68023 },
- { 68030, 68031 },
- { 68096, 68096 },
- { 68112, 68115 },
- { 68117, 68119 },
- { 68121, 68149 },
- { 68192, 68220 },
- { 68224, 68252 },
- { 68288, 68295 },
- { 68297, 68324 },
- { 68352, 68405 },
- { 68416, 68437 },
- { 68448, 68466 },
- { 68480, 68497 },
- { 68608, 68680 },
- { 68864, 68899 },
- { 69248, 69289 },
- { 69296, 69297 },
- { 69376, 69404 },
- { 69415, 69415 },
- { 69424, 69445 },
+ { 40982, 42124 },
+ { 42192, 42231 },
+ { 42240, 42507 },
+ { 42512, 42527 },
+ { 42538, 42539 },
+ { 42606, 42606 },
+ { 42656, 42725 },
+ { 42895, 42895 },
+ { 42999, 42999 },
+ { 43003, 43009 },
+ { 43011, 43013 },
+ { 43015, 43018 },
+ { 43020, 43042 },
+ { 43072, 43123 },
+ { 43138, 43187 },
+ { 43250, 43255 },
+ { 43259, 43259 },
+ { 43261, 43262 },
+ { 43274, 43301 },
+ { 43312, 43334 },
+ { 43360, 43388 },
+ { 43396, 43442 },
+ { 43488, 43492 },
+ { 43495, 43503 },
+ { 43514, 43518 },
+ { 43520, 43560 },
+ { 43584, 43586 },
+ { 43588, 43595 },
+ { 43616, 43631 },
+ { 43633, 43638 },
+ { 43642, 43642 },
+ { 43646, 43695 },
+ { 43697, 43697 },
+ { 43701, 43702 },
+ { 43705, 43709 },
+ { 43712, 43712 },
+ { 43714, 43714 },
+ { 43739, 43740 },
+ { 43744, 43754 },
+ { 43762, 43762 },
+ { 43777, 43782 },
+ { 43785, 43790 },
+ { 43793, 43798 },
+ { 43808, 43814 },
+ { 43816, 43822 },
+ { 43968, 44002 },
+ { 44032, 55203 },
+ { 55216, 55238 },
+ { 55243, 55291 },
+ { 63744, 64109 },
+ { 64112, 64217 },
+ { 64285, 64285 },
+ { 64287, 64296 },
+ { 64298, 64310 },
+ { 64312, 64316 },
+ { 64318, 64318 },
+ { 64320, 64321 },
+ { 64323, 64324 },
+ { 64326, 64433 },
+ { 64467, 64829 },
+ { 64848, 64911 },
+ { 64914, 64967 },
+ { 65008, 65019 },
+ { 65136, 65140 },
+ { 65142, 65276 },
+ { 65382, 65391 },
+ { 65393, 65437 },
+ { 65440, 65470 },
+ { 65474, 65479 },
+ { 65482, 65487 },
+ { 65490, 65495 },
+ { 65498, 65500 },
+};
+static const URange32 Lo_range32[] = {
+ { 65536, 65547 },
+ { 65549, 65574 },
+ { 65576, 65594 },
+ { 65596, 65597 },
+ { 65599, 65613 },
+ { 65616, 65629 },
+ { 65664, 65786 },
+ { 66176, 66204 },
+ { 66208, 66256 },
+ { 66304, 66335 },
+ { 66349, 66368 },
+ { 66370, 66377 },
+ { 66384, 66421 },
+ { 66432, 66461 },
+ { 66464, 66499 },
+ { 66504, 66511 },
+ { 66640, 66717 },
+ { 66816, 66855 },
+ { 66864, 66915 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
+ { 67584, 67589 },
+ { 67592, 67592 },
+ { 67594, 67637 },
+ { 67639, 67640 },
+ { 67644, 67644 },
+ { 67647, 67669 },
+ { 67680, 67702 },
+ { 67712, 67742 },
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67840, 67861 },
+ { 67872, 67897 },
+ { 67968, 68023 },
+ { 68030, 68031 },
+ { 68096, 68096 },
+ { 68112, 68115 },
+ { 68117, 68119 },
+ { 68121, 68149 },
+ { 68192, 68220 },
+ { 68224, 68252 },
+ { 68288, 68295 },
+ { 68297, 68324 },
+ { 68352, 68405 },
+ { 68416, 68437 },
+ { 68448, 68466 },
+ { 68480, 68497 },
+ { 68608, 68680 },
+ { 68864, 68899 },
+ { 69248, 69289 },
+ { 69296, 69297 },
+ { 69376, 69404 },
+ { 69415, 69415 },
+ { 69424, 69445 },
{ 69488, 69505 },
- { 69552, 69572 },
- { 69600, 69622 },
- { 69635, 69687 },
+ { 69552, 69572 },
+ { 69600, 69622 },
+ { 69635, 69687 },
{ 69745, 69746 },
{ 69749, 69749 },
- { 69763, 69807 },
- { 69840, 69864 },
- { 69891, 69926 },
- { 69956, 69956 },
- { 69959, 69959 },
- { 69968, 70002 },
- { 70006, 70006 },
- { 70019, 70066 },
- { 70081, 70084 },
- { 70106, 70106 },
- { 70108, 70108 },
- { 70144, 70161 },
- { 70163, 70187 },
- { 70272, 70278 },
- { 70280, 70280 },
- { 70282, 70285 },
- { 70287, 70301 },
- { 70303, 70312 },
- { 70320, 70366 },
- { 70405, 70412 },
- { 70415, 70416 },
- { 70419, 70440 },
- { 70442, 70448 },
- { 70450, 70451 },
- { 70453, 70457 },
- { 70461, 70461 },
- { 70480, 70480 },
- { 70493, 70497 },
- { 70656, 70708 },
- { 70727, 70730 },
- { 70751, 70753 },
- { 70784, 70831 },
- { 70852, 70853 },
- { 70855, 70855 },
- { 71040, 71086 },
- { 71128, 71131 },
- { 71168, 71215 },
- { 71236, 71236 },
- { 71296, 71338 },
- { 71352, 71352 },
- { 71424, 71450 },
+ { 69763, 69807 },
+ { 69840, 69864 },
+ { 69891, 69926 },
+ { 69956, 69956 },
+ { 69959, 69959 },
+ { 69968, 70002 },
+ { 70006, 70006 },
+ { 70019, 70066 },
+ { 70081, 70084 },
+ { 70106, 70106 },
+ { 70108, 70108 },
+ { 70144, 70161 },
+ { 70163, 70187 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70312 },
+ { 70320, 70366 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70461, 70461 },
+ { 70480, 70480 },
+ { 70493, 70497 },
+ { 70656, 70708 },
+ { 70727, 70730 },
+ { 70751, 70753 },
+ { 70784, 70831 },
+ { 70852, 70853 },
+ { 70855, 70855 },
+ { 71040, 71086 },
+ { 71128, 71131 },
+ { 71168, 71215 },
+ { 71236, 71236 },
+ { 71296, 71338 },
+ { 71352, 71352 },
+ { 71424, 71450 },
{ 71488, 71494 },
- { 71680, 71723 },
- { 71935, 71942 },
- { 71945, 71945 },
- { 71948, 71955 },
- { 71957, 71958 },
- { 71960, 71983 },
- { 71999, 71999 },
- { 72001, 72001 },
- { 72096, 72103 },
- { 72106, 72144 },
- { 72161, 72161 },
- { 72163, 72163 },
- { 72192, 72192 },
- { 72203, 72242 },
- { 72250, 72250 },
- { 72272, 72272 },
- { 72284, 72329 },
- { 72349, 72349 },
+ { 71680, 71723 },
+ { 71935, 71942 },
+ { 71945, 71945 },
+ { 71948, 71955 },
+ { 71957, 71958 },
+ { 71960, 71983 },
+ { 71999, 71999 },
+ { 72001, 72001 },
+ { 72096, 72103 },
+ { 72106, 72144 },
+ { 72161, 72161 },
+ { 72163, 72163 },
+ { 72192, 72192 },
+ { 72203, 72242 },
+ { 72250, 72250 },
+ { 72272, 72272 },
+ { 72284, 72329 },
+ { 72349, 72349 },
{ 72368, 72440 },
- { 72704, 72712 },
- { 72714, 72750 },
- { 72768, 72768 },
- { 72818, 72847 },
- { 72960, 72966 },
- { 72968, 72969 },
- { 72971, 73008 },
- { 73030, 73030 },
- { 73056, 73061 },
- { 73063, 73064 },
- { 73066, 73097 },
- { 73112, 73112 },
- { 73440, 73458 },
- { 73648, 73648 },
- { 73728, 74649 },
- { 74880, 75075 },
+ { 72704, 72712 },
+ { 72714, 72750 },
+ { 72768, 72768 },
+ { 72818, 72847 },
+ { 72960, 72966 },
+ { 72968, 72969 },
+ { 72971, 73008 },
+ { 73030, 73030 },
+ { 73056, 73061 },
+ { 73063, 73064 },
+ { 73066, 73097 },
+ { 73112, 73112 },
+ { 73440, 73458 },
+ { 73648, 73648 },
+ { 73728, 74649 },
+ { 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
- { 82944, 83526 },
- { 92160, 92728 },
- { 92736, 92766 },
+ { 77824, 78894 },
+ { 82944, 83526 },
+ { 92160, 92728 },
+ { 92736, 92766 },
{ 92784, 92862 },
- { 92880, 92909 },
- { 92928, 92975 },
- { 93027, 93047 },
- { 93053, 93071 },
- { 93952, 94026 },
- { 94032, 94032 },
- { 94208, 100343 },
- { 100352, 101589 },
- { 101632, 101640 },
+ { 92880, 92909 },
+ { 92928, 92975 },
+ { 93027, 93047 },
+ { 93053, 93071 },
+ { 93952, 94026 },
+ { 94032, 94032 },
+ { 94208, 100343 },
+ { 100352, 101589 },
+ { 101632, 101640 },
{ 110592, 110882 },
- { 110928, 110930 },
- { 110948, 110951 },
- { 110960, 111355 },
- { 113664, 113770 },
- { 113776, 113788 },
- { 113792, 113800 },
- { 113808, 113817 },
+ { 110928, 110930 },
+ { 110948, 110951 },
+ { 110960, 111355 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
{ 122634, 122634 },
- { 123136, 123180 },
- { 123214, 123214 },
+ { 123136, 123180 },
+ { 123214, 123214 },
{ 123536, 123565 },
- { 123584, 123627 },
+ { 123584, 123627 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
{ 124912, 124926 },
- { 124928, 125124 },
- { 126464, 126467 },
- { 126469, 126495 },
- { 126497, 126498 },
- { 126500, 126500 },
- { 126503, 126503 },
- { 126505, 126514 },
- { 126516, 126519 },
- { 126521, 126521 },
- { 126523, 126523 },
- { 126530, 126530 },
- { 126535, 126535 },
- { 126537, 126537 },
- { 126539, 126539 },
- { 126541, 126543 },
- { 126545, 126546 },
- { 126548, 126548 },
- { 126551, 126551 },
- { 126553, 126553 },
- { 126555, 126555 },
- { 126557, 126557 },
- { 126559, 126559 },
- { 126561, 126562 },
- { 126564, 126564 },
- { 126567, 126570 },
- { 126572, 126578 },
- { 126580, 126583 },
- { 126585, 126588 },
- { 126590, 126590 },
- { 126592, 126601 },
- { 126603, 126619 },
- { 126625, 126627 },
- { 126629, 126633 },
- { 126635, 126651 },
+ { 124928, 125124 },
+ { 126464, 126467 },
+ { 126469, 126495 },
+ { 126497, 126498 },
+ { 126500, 126500 },
+ { 126503, 126503 },
+ { 126505, 126514 },
+ { 126516, 126519 },
+ { 126521, 126521 },
+ { 126523, 126523 },
+ { 126530, 126530 },
+ { 126535, 126535 },
+ { 126537, 126537 },
+ { 126539, 126539 },
+ { 126541, 126543 },
+ { 126545, 126546 },
+ { 126548, 126548 },
+ { 126551, 126551 },
+ { 126553, 126553 },
+ { 126555, 126555 },
+ { 126557, 126557 },
+ { 126559, 126559 },
+ { 126561, 126562 },
+ { 126564, 126564 },
+ { 126567, 126570 },
+ { 126572, 126578 },
+ { 126580, 126583 },
+ { 126585, 126588 },
+ { 126590, 126590 },
+ { 126592, 126601 },
+ { 126603, 126619 },
+ { 126625, 126627 },
+ { 126629, 126633 },
+ { 126635, 126651 },
{ 131072, 173791 },
{ 173824, 177976 },
- { 177984, 178205 },
- { 178208, 183969 },
- { 183984, 191456 },
- { 194560, 195101 },
- { 196608, 201546 },
+ { 177984, 178205 },
+ { 178208, 183969 },
+ { 183984, 191456 },
+ { 194560, 195101 },
+ { 196608, 201546 },
};
static const URange16 Lt_range16[] = {
{ 453, 453 },
@@ -2256,8 +2256,8 @@ static const URange16 Lu_range16[] = {
{ 4295, 4295 },
{ 4301, 4301 },
{ 5024, 5109 },
- { 7312, 7354 },
- { 7357, 7359 },
+ { 7312, 7354 },
+ { 7357, 7359 },
{ 7680, 7680 },
{ 7682, 7682 },
{ 7684, 7684 },
@@ -2569,33 +2569,33 @@ static const URange16 Lu_range16[] = {
{ 42916, 42916 },
{ 42918, 42918 },
{ 42920, 42920 },
- { 42922, 42926 },
+ { 42922, 42926 },
{ 42928, 42932 },
{ 42934, 42934 },
- { 42936, 42936 },
- { 42938, 42938 },
- { 42940, 42940 },
- { 42942, 42942 },
+ { 42936, 42936 },
+ { 42938, 42938 },
+ { 42940, 42940 },
+ { 42942, 42942 },
{ 42944, 42944 },
- { 42946, 42946 },
- { 42948, 42951 },
- { 42953, 42953 },
+ { 42946, 42946 },
+ { 42948, 42951 },
+ { 42953, 42953 },
{ 42960, 42960 },
{ 42966, 42966 },
{ 42968, 42968 },
- { 42997, 42997 },
+ { 42997, 42997 },
{ 65313, 65338 },
};
static const URange32 Lu_range32[] = {
{ 66560, 66599 },
- { 66736, 66771 },
+ { 66736, 66771 },
{ 66928, 66938 },
{ 66940, 66954 },
{ 66956, 66962 },
{ 66964, 66965 },
{ 68736, 68786 },
{ 71840, 71871 },
- { 93760, 93791 },
+ { 93760, 93791 },
{ 119808, 119833 },
{ 119860, 119885 },
{ 119912, 119937 },
@@ -2627,310 +2627,310 @@ static const URange32 Lu_range32[] = {
{ 120662, 120686 },
{ 120720, 120744 },
{ 120778, 120778 },
- { 125184, 125217 },
-};
-static const URange16 M_range16[] = {
- { 768, 879 },
- { 1155, 1161 },
- { 1425, 1469 },
- { 1471, 1471 },
- { 1473, 1474 },
- { 1476, 1477 },
- { 1479, 1479 },
- { 1552, 1562 },
- { 1611, 1631 },
- { 1648, 1648 },
- { 1750, 1756 },
- { 1759, 1764 },
- { 1767, 1768 },
- { 1770, 1773 },
- { 1809, 1809 },
- { 1840, 1866 },
- { 1958, 1968 },
- { 2027, 2035 },
- { 2045, 2045 },
- { 2070, 2073 },
- { 2075, 2083 },
- { 2085, 2087 },
- { 2089, 2093 },
- { 2137, 2139 },
+ { 125184, 125217 },
+};
+static const URange16 M_range16[] = {
+ { 768, 879 },
+ { 1155, 1161 },
+ { 1425, 1469 },
+ { 1471, 1471 },
+ { 1473, 1474 },
+ { 1476, 1477 },
+ { 1479, 1479 },
+ { 1552, 1562 },
+ { 1611, 1631 },
+ { 1648, 1648 },
+ { 1750, 1756 },
+ { 1759, 1764 },
+ { 1767, 1768 },
+ { 1770, 1773 },
+ { 1809, 1809 },
+ { 1840, 1866 },
+ { 1958, 1968 },
+ { 2027, 2035 },
+ { 2045, 2045 },
+ { 2070, 2073 },
+ { 2075, 2083 },
+ { 2085, 2087 },
+ { 2089, 2093 },
+ { 2137, 2139 },
{ 2200, 2207 },
{ 2250, 2273 },
- { 2275, 2307 },
- { 2362, 2364 },
- { 2366, 2383 },
- { 2385, 2391 },
- { 2402, 2403 },
- { 2433, 2435 },
- { 2492, 2492 },
- { 2494, 2500 },
- { 2503, 2504 },
- { 2507, 2509 },
- { 2519, 2519 },
- { 2530, 2531 },
- { 2558, 2558 },
- { 2561, 2563 },
- { 2620, 2620 },
- { 2622, 2626 },
- { 2631, 2632 },
- { 2635, 2637 },
- { 2641, 2641 },
- { 2672, 2673 },
- { 2677, 2677 },
- { 2689, 2691 },
- { 2748, 2748 },
- { 2750, 2757 },
- { 2759, 2761 },
- { 2763, 2765 },
- { 2786, 2787 },
- { 2810, 2815 },
- { 2817, 2819 },
- { 2876, 2876 },
- { 2878, 2884 },
- { 2887, 2888 },
- { 2891, 2893 },
- { 2901, 2903 },
- { 2914, 2915 },
- { 2946, 2946 },
- { 3006, 3010 },
- { 3014, 3016 },
- { 3018, 3021 },
- { 3031, 3031 },
- { 3072, 3076 },
+ { 2275, 2307 },
+ { 2362, 2364 },
+ { 2366, 2383 },
+ { 2385, 2391 },
+ { 2402, 2403 },
+ { 2433, 2435 },
+ { 2492, 2492 },
+ { 2494, 2500 },
+ { 2503, 2504 },
+ { 2507, 2509 },
+ { 2519, 2519 },
+ { 2530, 2531 },
+ { 2558, 2558 },
+ { 2561, 2563 },
+ { 2620, 2620 },
+ { 2622, 2626 },
+ { 2631, 2632 },
+ { 2635, 2637 },
+ { 2641, 2641 },
+ { 2672, 2673 },
+ { 2677, 2677 },
+ { 2689, 2691 },
+ { 2748, 2748 },
+ { 2750, 2757 },
+ { 2759, 2761 },
+ { 2763, 2765 },
+ { 2786, 2787 },
+ { 2810, 2815 },
+ { 2817, 2819 },
+ { 2876, 2876 },
+ { 2878, 2884 },
+ { 2887, 2888 },
+ { 2891, 2893 },
+ { 2901, 2903 },
+ { 2914, 2915 },
+ { 2946, 2946 },
+ { 3006, 3010 },
+ { 3014, 3016 },
+ { 3018, 3021 },
+ { 3031, 3031 },
+ { 3072, 3076 },
{ 3132, 3132 },
- { 3134, 3140 },
- { 3142, 3144 },
- { 3146, 3149 },
- { 3157, 3158 },
- { 3170, 3171 },
- { 3201, 3203 },
- { 3260, 3260 },
- { 3262, 3268 },
- { 3270, 3272 },
- { 3274, 3277 },
- { 3285, 3286 },
- { 3298, 3299 },
- { 3328, 3331 },
- { 3387, 3388 },
- { 3390, 3396 },
- { 3398, 3400 },
- { 3402, 3405 },
- { 3415, 3415 },
- { 3426, 3427 },
- { 3457, 3459 },
- { 3530, 3530 },
- { 3535, 3540 },
- { 3542, 3542 },
- { 3544, 3551 },
- { 3570, 3571 },
- { 3633, 3633 },
- { 3636, 3642 },
- { 3655, 3662 },
- { 3761, 3761 },
- { 3764, 3772 },
- { 3784, 3789 },
- { 3864, 3865 },
- { 3893, 3893 },
- { 3895, 3895 },
- { 3897, 3897 },
- { 3902, 3903 },
- { 3953, 3972 },
- { 3974, 3975 },
- { 3981, 3991 },
- { 3993, 4028 },
- { 4038, 4038 },
- { 4139, 4158 },
- { 4182, 4185 },
- { 4190, 4192 },
- { 4194, 4196 },
- { 4199, 4205 },
- { 4209, 4212 },
- { 4226, 4237 },
- { 4239, 4239 },
- { 4250, 4253 },
- { 4957, 4959 },
+ { 3134, 3140 },
+ { 3142, 3144 },
+ { 3146, 3149 },
+ { 3157, 3158 },
+ { 3170, 3171 },
+ { 3201, 3203 },
+ { 3260, 3260 },
+ { 3262, 3268 },
+ { 3270, 3272 },
+ { 3274, 3277 },
+ { 3285, 3286 },
+ { 3298, 3299 },
+ { 3328, 3331 },
+ { 3387, 3388 },
+ { 3390, 3396 },
+ { 3398, 3400 },
+ { 3402, 3405 },
+ { 3415, 3415 },
+ { 3426, 3427 },
+ { 3457, 3459 },
+ { 3530, 3530 },
+ { 3535, 3540 },
+ { 3542, 3542 },
+ { 3544, 3551 },
+ { 3570, 3571 },
+ { 3633, 3633 },
+ { 3636, 3642 },
+ { 3655, 3662 },
+ { 3761, 3761 },
+ { 3764, 3772 },
+ { 3784, 3789 },
+ { 3864, 3865 },
+ { 3893, 3893 },
+ { 3895, 3895 },
+ { 3897, 3897 },
+ { 3902, 3903 },
+ { 3953, 3972 },
+ { 3974, 3975 },
+ { 3981, 3991 },
+ { 3993, 4028 },
+ { 4038, 4038 },
+ { 4139, 4158 },
+ { 4182, 4185 },
+ { 4190, 4192 },
+ { 4194, 4196 },
+ { 4199, 4205 },
+ { 4209, 4212 },
+ { 4226, 4237 },
+ { 4239, 4239 },
+ { 4250, 4253 },
+ { 4957, 4959 },
{ 5906, 5909 },
- { 5938, 5940 },
- { 5970, 5971 },
- { 6002, 6003 },
- { 6068, 6099 },
- { 6109, 6109 },
- { 6155, 6157 },
+ { 5938, 5940 },
+ { 5970, 5971 },
+ { 6002, 6003 },
+ { 6068, 6099 },
+ { 6109, 6109 },
+ { 6155, 6157 },
{ 6159, 6159 },
- { 6277, 6278 },
- { 6313, 6313 },
- { 6432, 6443 },
- { 6448, 6459 },
- { 6679, 6683 },
- { 6741, 6750 },
- { 6752, 6780 },
- { 6783, 6783 },
+ { 6277, 6278 },
+ { 6313, 6313 },
+ { 6432, 6443 },
+ { 6448, 6459 },
+ { 6679, 6683 },
+ { 6741, 6750 },
+ { 6752, 6780 },
+ { 6783, 6783 },
{ 6832, 6862 },
- { 6912, 6916 },
- { 6964, 6980 },
- { 7019, 7027 },
- { 7040, 7042 },
- { 7073, 7085 },
- { 7142, 7155 },
- { 7204, 7223 },
- { 7376, 7378 },
- { 7380, 7400 },
- { 7405, 7405 },
- { 7412, 7412 },
- { 7415, 7417 },
+ { 6912, 6916 },
+ { 6964, 6980 },
+ { 7019, 7027 },
+ { 7040, 7042 },
+ { 7073, 7085 },
+ { 7142, 7155 },
+ { 7204, 7223 },
+ { 7376, 7378 },
+ { 7380, 7400 },
+ { 7405, 7405 },
+ { 7412, 7412 },
+ { 7415, 7417 },
{ 7616, 7679 },
- { 8400, 8432 },
- { 11503, 11505 },
- { 11647, 11647 },
- { 11744, 11775 },
- { 12330, 12335 },
- { 12441, 12442 },
- { 42607, 42610 },
- { 42612, 42621 },
- { 42654, 42655 },
- { 42736, 42737 },
- { 43010, 43010 },
- { 43014, 43014 },
- { 43019, 43019 },
- { 43043, 43047 },
- { 43052, 43052 },
- { 43136, 43137 },
- { 43188, 43205 },
- { 43232, 43249 },
- { 43263, 43263 },
- { 43302, 43309 },
- { 43335, 43347 },
- { 43392, 43395 },
- { 43443, 43456 },
- { 43493, 43493 },
- { 43561, 43574 },
- { 43587, 43587 },
- { 43596, 43597 },
- { 43643, 43645 },
- { 43696, 43696 },
- { 43698, 43700 },
- { 43703, 43704 },
- { 43710, 43711 },
- { 43713, 43713 },
- { 43755, 43759 },
- { 43765, 43766 },
- { 44003, 44010 },
- { 44012, 44013 },
- { 64286, 64286 },
- { 65024, 65039 },
- { 65056, 65071 },
-};
-static const URange32 M_range32[] = {
- { 66045, 66045 },
- { 66272, 66272 },
- { 66422, 66426 },
- { 68097, 68099 },
- { 68101, 68102 },
- { 68108, 68111 },
- { 68152, 68154 },
- { 68159, 68159 },
- { 68325, 68326 },
- { 68900, 68903 },
- { 69291, 69292 },
- { 69446, 69456 },
+ { 8400, 8432 },
+ { 11503, 11505 },
+ { 11647, 11647 },
+ { 11744, 11775 },
+ { 12330, 12335 },
+ { 12441, 12442 },
+ { 42607, 42610 },
+ { 42612, 42621 },
+ { 42654, 42655 },
+ { 42736, 42737 },
+ { 43010, 43010 },
+ { 43014, 43014 },
+ { 43019, 43019 },
+ { 43043, 43047 },
+ { 43052, 43052 },
+ { 43136, 43137 },
+ { 43188, 43205 },
+ { 43232, 43249 },
+ { 43263, 43263 },
+ { 43302, 43309 },
+ { 43335, 43347 },
+ { 43392, 43395 },
+ { 43443, 43456 },
+ { 43493, 43493 },
+ { 43561, 43574 },
+ { 43587, 43587 },
+ { 43596, 43597 },
+ { 43643, 43645 },
+ { 43696, 43696 },
+ { 43698, 43700 },
+ { 43703, 43704 },
+ { 43710, 43711 },
+ { 43713, 43713 },
+ { 43755, 43759 },
+ { 43765, 43766 },
+ { 44003, 44010 },
+ { 44012, 44013 },
+ { 64286, 64286 },
+ { 65024, 65039 },
+ { 65056, 65071 },
+};
+static const URange32 M_range32[] = {
+ { 66045, 66045 },
+ { 66272, 66272 },
+ { 66422, 66426 },
+ { 68097, 68099 },
+ { 68101, 68102 },
+ { 68108, 68111 },
+ { 68152, 68154 },
+ { 68159, 68159 },
+ { 68325, 68326 },
+ { 68900, 68903 },
+ { 69291, 69292 },
+ { 69446, 69456 },
{ 69506, 69509 },
- { 69632, 69634 },
- { 69688, 69702 },
+ { 69632, 69634 },
+ { 69688, 69702 },
{ 69744, 69744 },
{ 69747, 69748 },
- { 69759, 69762 },
- { 69808, 69818 },
+ { 69759, 69762 },
+ { 69808, 69818 },
{ 69826, 69826 },
- { 69888, 69890 },
- { 69927, 69940 },
- { 69957, 69958 },
- { 70003, 70003 },
- { 70016, 70018 },
- { 70067, 70080 },
- { 70089, 70092 },
- { 70094, 70095 },
- { 70188, 70199 },
- { 70206, 70206 },
- { 70367, 70378 },
- { 70400, 70403 },
- { 70459, 70460 },
- { 70462, 70468 },
- { 70471, 70472 },
- { 70475, 70477 },
- { 70487, 70487 },
- { 70498, 70499 },
- { 70502, 70508 },
- { 70512, 70516 },
- { 70709, 70726 },
- { 70750, 70750 },
- { 70832, 70851 },
- { 71087, 71093 },
- { 71096, 71104 },
- { 71132, 71133 },
- { 71216, 71232 },
- { 71339, 71351 },
- { 71453, 71467 },
- { 71724, 71738 },
- { 71984, 71989 },
- { 71991, 71992 },
- { 71995, 71998 },
- { 72000, 72000 },
- { 72002, 72003 },
- { 72145, 72151 },
- { 72154, 72160 },
- { 72164, 72164 },
- { 72193, 72202 },
- { 72243, 72249 },
- { 72251, 72254 },
- { 72263, 72263 },
- { 72273, 72283 },
- { 72330, 72345 },
- { 72751, 72758 },
- { 72760, 72767 },
- { 72850, 72871 },
- { 72873, 72886 },
- { 73009, 73014 },
- { 73018, 73018 },
- { 73020, 73021 },
- { 73023, 73029 },
- { 73031, 73031 },
- { 73098, 73102 },
- { 73104, 73105 },
- { 73107, 73111 },
- { 73459, 73462 },
- { 92912, 92916 },
- { 92976, 92982 },
- { 94031, 94031 },
- { 94033, 94087 },
- { 94095, 94098 },
- { 94180, 94180 },
- { 94192, 94193 },
- { 113821, 113822 },
+ { 69888, 69890 },
+ { 69927, 69940 },
+ { 69957, 69958 },
+ { 70003, 70003 },
+ { 70016, 70018 },
+ { 70067, 70080 },
+ { 70089, 70092 },
+ { 70094, 70095 },
+ { 70188, 70199 },
+ { 70206, 70206 },
+ { 70367, 70378 },
+ { 70400, 70403 },
+ { 70459, 70460 },
+ { 70462, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70487, 70487 },
+ { 70498, 70499 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+ { 70709, 70726 },
+ { 70750, 70750 },
+ { 70832, 70851 },
+ { 71087, 71093 },
+ { 71096, 71104 },
+ { 71132, 71133 },
+ { 71216, 71232 },
+ { 71339, 71351 },
+ { 71453, 71467 },
+ { 71724, 71738 },
+ { 71984, 71989 },
+ { 71991, 71992 },
+ { 71995, 71998 },
+ { 72000, 72000 },
+ { 72002, 72003 },
+ { 72145, 72151 },
+ { 72154, 72160 },
+ { 72164, 72164 },
+ { 72193, 72202 },
+ { 72243, 72249 },
+ { 72251, 72254 },
+ { 72263, 72263 },
+ { 72273, 72283 },
+ { 72330, 72345 },
+ { 72751, 72758 },
+ { 72760, 72767 },
+ { 72850, 72871 },
+ { 72873, 72886 },
+ { 73009, 73014 },
+ { 73018, 73018 },
+ { 73020, 73021 },
+ { 73023, 73029 },
+ { 73031, 73031 },
+ { 73098, 73102 },
+ { 73104, 73105 },
+ { 73107, 73111 },
+ { 73459, 73462 },
+ { 92912, 92916 },
+ { 92976, 92982 },
+ { 94031, 94031 },
+ { 94033, 94087 },
+ { 94095, 94098 },
+ { 94180, 94180 },
+ { 94192, 94193 },
+ { 113821, 113822 },
{ 118528, 118573 },
{ 118576, 118598 },
- { 119141, 119145 },
- { 119149, 119154 },
- { 119163, 119170 },
- { 119173, 119179 },
- { 119210, 119213 },
- { 119362, 119364 },
- { 121344, 121398 },
- { 121403, 121452 },
- { 121461, 121461 },
- { 121476, 121476 },
- { 121499, 121503 },
- { 121505, 121519 },
- { 122880, 122886 },
- { 122888, 122904 },
- { 122907, 122913 },
- { 122915, 122916 },
- { 122918, 122922 },
- { 123184, 123190 },
+ { 119141, 119145 },
+ { 119149, 119154 },
+ { 119163, 119170 },
+ { 119173, 119179 },
+ { 119210, 119213 },
+ { 119362, 119364 },
+ { 121344, 121398 },
+ { 121403, 121452 },
+ { 121461, 121461 },
+ { 121476, 121476 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+ { 122880, 122886 },
+ { 122888, 122904 },
+ { 122907, 122913 },
+ { 122915, 122916 },
+ { 122918, 122922 },
+ { 123184, 123190 },
{ 123566, 123566 },
- { 123628, 123631 },
- { 125136, 125142 },
- { 125252, 125258 },
- { 917760, 917999 },
+ { 123628, 123631 },
+ { 125136, 125142 },
+ { 125252, 125258 },
+ { 917760, 917999 },
};
static const URange16 Mc_range16[] = {
{ 2307, 2307 },
@@ -3021,7 +3021,7 @@ static const URange16 Mc_range16[] = {
{ 7204, 7211 },
{ 7220, 7221 },
{ 7393, 7393 },
- { 7415, 7415 },
+ { 7415, 7415 },
{ 12334, 12335 },
{ 43043, 43044 },
{ 43047, 43047 },
@@ -3031,7 +3031,7 @@ static const URange16 Mc_range16[] = {
{ 43395, 43395 },
{ 43444, 43445 },
{ 43450, 43451 },
- { 43454, 43456 },
+ { 43454, 43456 },
{ 43567, 43568 },
{ 43571, 43572 },
{ 43597, 43597 },
@@ -3052,11 +3052,11 @@ static const URange32 Mc_range32[] = {
{ 69808, 69810 },
{ 69815, 69816 },
{ 69932, 69932 },
- { 69957, 69958 },
+ { 69957, 69958 },
{ 70018, 70018 },
{ 70067, 70069 },
{ 70079, 70080 },
- { 70094, 70094 },
+ { 70094, 70094 },
{ 70188, 70190 },
{ 70194, 70195 },
{ 70197, 70197 },
@@ -3068,9 +3068,9 @@ static const URange32 Mc_range32[] = {
{ 70475, 70477 },
{ 70487, 70487 },
{ 70498, 70499 },
- { 70709, 70711 },
- { 70720, 70721 },
- { 70725, 70725 },
+ { 70709, 70711 },
+ { 70720, 70721 },
+ { 70725, 70725 },
{ 70832, 70834 },
{ 70841, 70841 },
{ 70843, 70846 },
@@ -3086,40 +3086,40 @@ static const URange32 Mc_range32[] = {
{ 71350, 71350 },
{ 71456, 71457 },
{ 71462, 71462 },
- { 71724, 71726 },
- { 71736, 71736 },
- { 71984, 71989 },
- { 71991, 71992 },
- { 71997, 71997 },
- { 72000, 72000 },
- { 72002, 72002 },
- { 72145, 72147 },
- { 72156, 72159 },
- { 72164, 72164 },
- { 72249, 72249 },
- { 72279, 72280 },
- { 72343, 72343 },
- { 72751, 72751 },
- { 72766, 72766 },
- { 72873, 72873 },
- { 72881, 72881 },
- { 72884, 72884 },
- { 73098, 73102 },
- { 73107, 73108 },
- { 73110, 73110 },
- { 73461, 73462 },
- { 94033, 94087 },
- { 94192, 94193 },
+ { 71724, 71726 },
+ { 71736, 71736 },
+ { 71984, 71989 },
+ { 71991, 71992 },
+ { 71997, 71997 },
+ { 72000, 72000 },
+ { 72002, 72002 },
+ { 72145, 72147 },
+ { 72156, 72159 },
+ { 72164, 72164 },
+ { 72249, 72249 },
+ { 72279, 72280 },
+ { 72343, 72343 },
+ { 72751, 72751 },
+ { 72766, 72766 },
+ { 72873, 72873 },
+ { 72881, 72881 },
+ { 72884, 72884 },
+ { 73098, 73102 },
+ { 73107, 73108 },
+ { 73110, 73110 },
+ { 73461, 73462 },
+ { 94033, 94087 },
+ { 94192, 94193 },
{ 119141, 119142 },
{ 119149, 119154 },
};
-static const URange16 Me_range16[] = {
- { 1160, 1161 },
- { 6846, 6846 },
- { 8413, 8416 },
- { 8418, 8420 },
- { 42608, 42610 },
-};
+static const URange16 Me_range16[] = {
+ { 1160, 1161 },
+ { 6846, 6846 },
+ { 8413, 8416 },
+ { 8418, 8420 },
+ { 42608, 42610 },
+};
static const URange16 Mn_range16[] = {
{ 768, 879 },
{ 1155, 1159 },
@@ -3139,7 +3139,7 @@ static const URange16 Mn_range16[] = {
{ 1840, 1866 },
{ 1958, 1968 },
{ 2027, 2035 },
- { 2045, 2045 },
+ { 2045, 2045 },
{ 2070, 2073 },
{ 2075, 2083 },
{ 2085, 2087 },
@@ -3159,7 +3159,7 @@ static const URange16 Mn_range16[] = {
{ 2497, 2500 },
{ 2509, 2509 },
{ 2530, 2531 },
- { 2558, 2558 },
+ { 2558, 2558 },
{ 2561, 2562 },
{ 2620, 2620 },
{ 2625, 2626 },
@@ -3174,19 +3174,19 @@ static const URange16 Mn_range16[] = {
{ 2759, 2760 },
{ 2765, 2765 },
{ 2786, 2787 },
- { 2810, 2815 },
+ { 2810, 2815 },
{ 2817, 2817 },
{ 2876, 2876 },
{ 2879, 2879 },
{ 2881, 2884 },
{ 2893, 2893 },
- { 2901, 2902 },
+ { 2901, 2902 },
{ 2914, 2915 },
{ 2946, 2946 },
{ 3008, 3008 },
{ 3021, 3021 },
{ 3072, 3072 },
- { 3076, 3076 },
+ { 3076, 3076 },
{ 3132, 3132 },
{ 3134, 3136 },
{ 3142, 3144 },
@@ -3199,12 +3199,12 @@ static const URange16 Mn_range16[] = {
{ 3270, 3270 },
{ 3276, 3277 },
{ 3298, 3299 },
- { 3328, 3329 },
- { 3387, 3388 },
+ { 3328, 3329 },
+ { 3387, 3388 },
{ 3393, 3396 },
{ 3405, 3405 },
{ 3426, 3427 },
- { 3457, 3457 },
+ { 3457, 3457 },
{ 3530, 3530 },
{ 3538, 3540 },
{ 3542, 3542 },
@@ -3212,7 +3212,7 @@ static const URange16 Mn_range16[] = {
{ 3636, 3642 },
{ 3655, 3662 },
{ 3761, 3761 },
- { 3764, 3772 },
+ { 3764, 3772 },
{ 3784, 3789 },
{ 3864, 3865 },
{ 3893, 3893 },
@@ -3247,7 +3247,7 @@ static const URange16 Mn_range16[] = {
{ 6109, 6109 },
{ 6155, 6157 },
{ 6159, 6159 },
- { 6277, 6278 },
+ { 6277, 6278 },
{ 6313, 6313 },
{ 6432, 6434 },
{ 6439, 6440 },
@@ -3303,16 +3303,16 @@ static const URange16 Mn_range16[] = {
{ 43014, 43014 },
{ 43019, 43019 },
{ 43045, 43046 },
- { 43052, 43052 },
- { 43204, 43205 },
+ { 43052, 43052 },
+ { 43204, 43205 },
{ 43232, 43249 },
- { 43263, 43263 },
+ { 43263, 43263 },
{ 43302, 43309 },
{ 43335, 43345 },
{ 43392, 43394 },
{ 43443, 43443 },
{ 43446, 43449 },
- { 43452, 43453 },
+ { 43452, 43453 },
{ 43493, 43493 },
{ 43561, 43566 },
{ 43569, 43570 },
@@ -3344,9 +3344,9 @@ static const URange32 Mn_range32[] = {
{ 68152, 68154 },
{ 68159, 68159 },
{ 68325, 68326 },
- { 68900, 68903 },
- { 69291, 69292 },
- { 69446, 69456 },
+ { 68900, 68903 },
+ { 69291, 69292 },
+ { 69446, 69456 },
{ 69506, 69509 },
{ 69633, 69633 },
{ 69688, 69702 },
@@ -3362,23 +3362,23 @@ static const URange32 Mn_range32[] = {
{ 70003, 70003 },
{ 70016, 70017 },
{ 70070, 70078 },
- { 70089, 70092 },
- { 70095, 70095 },
+ { 70089, 70092 },
+ { 70095, 70095 },
{ 70191, 70193 },
{ 70196, 70196 },
{ 70198, 70199 },
- { 70206, 70206 },
+ { 70206, 70206 },
{ 70367, 70367 },
{ 70371, 70378 },
{ 70400, 70401 },
- { 70459, 70460 },
+ { 70459, 70460 },
{ 70464, 70464 },
{ 70502, 70508 },
{ 70512, 70516 },
- { 70712, 70719 },
- { 70722, 70724 },
- { 70726, 70726 },
- { 70750, 70750 },
+ { 70712, 70719 },
+ { 70722, 70724 },
+ { 70726, 70726 },
+ { 70750, 70750 },
{ 70835, 70840 },
{ 70842, 70842 },
{ 70847, 70848 },
@@ -3397,43 +3397,43 @@ static const URange32 Mn_range32[] = {
{ 71453, 71455 },
{ 71458, 71461 },
{ 71463, 71467 },
- { 71727, 71735 },
- { 71737, 71738 },
- { 71995, 71996 },
- { 71998, 71998 },
- { 72003, 72003 },
- { 72148, 72151 },
- { 72154, 72155 },
- { 72160, 72160 },
- { 72193, 72202 },
- { 72243, 72248 },
- { 72251, 72254 },
- { 72263, 72263 },
- { 72273, 72278 },
- { 72281, 72283 },
- { 72330, 72342 },
- { 72344, 72345 },
- { 72752, 72758 },
- { 72760, 72765 },
- { 72767, 72767 },
- { 72850, 72871 },
- { 72874, 72880 },
- { 72882, 72883 },
- { 72885, 72886 },
- { 73009, 73014 },
- { 73018, 73018 },
- { 73020, 73021 },
- { 73023, 73029 },
- { 73031, 73031 },
- { 73104, 73105 },
- { 73109, 73109 },
- { 73111, 73111 },
- { 73459, 73460 },
+ { 71727, 71735 },
+ { 71737, 71738 },
+ { 71995, 71996 },
+ { 71998, 71998 },
+ { 72003, 72003 },
+ { 72148, 72151 },
+ { 72154, 72155 },
+ { 72160, 72160 },
+ { 72193, 72202 },
+ { 72243, 72248 },
+ { 72251, 72254 },
+ { 72263, 72263 },
+ { 72273, 72278 },
+ { 72281, 72283 },
+ { 72330, 72342 },
+ { 72344, 72345 },
+ { 72752, 72758 },
+ { 72760, 72765 },
+ { 72767, 72767 },
+ { 72850, 72871 },
+ { 72874, 72880 },
+ { 72882, 72883 },
+ { 72885, 72886 },
+ { 73009, 73014 },
+ { 73018, 73018 },
+ { 73020, 73021 },
+ { 73023, 73029 },
+ { 73031, 73031 },
+ { 73104, 73105 },
+ { 73109, 73109 },
+ { 73111, 73111 },
+ { 73459, 73460 },
{ 92912, 92916 },
{ 92976, 92982 },
- { 94031, 94031 },
+ { 94031, 94031 },
{ 94095, 94098 },
- { 94180, 94180 },
+ { 94180, 94180 },
{ 113821, 113822 },
{ 118528, 118573 },
{ 118576, 118598 },
@@ -3448,16 +3448,16 @@ static const URange32 Mn_range32[] = {
{ 121476, 121476 },
{ 121499, 121503 },
{ 121505, 121519 },
- { 122880, 122886 },
- { 122888, 122904 },
- { 122907, 122913 },
- { 122915, 122916 },
- { 122918, 122922 },
- { 123184, 123190 },
+ { 122880, 122886 },
+ { 122888, 122904 },
+ { 122907, 122913 },
+ { 122915, 122916 },
+ { 122918, 122922 },
+ { 123184, 123190 },
{ 123566, 123566 },
- { 123628, 123631 },
+ { 123628, 123631 },
{ 125136, 125142 },
- { 125252, 125258 },
+ { 125252, 125258 },
{ 917760, 917999 },
};
static const URange16 N_range16[] = {
@@ -3479,8 +3479,8 @@ static const URange16 N_range16[] = {
{ 3174, 3183 },
{ 3192, 3198 },
{ 3302, 3311 },
- { 3416, 3422 },
- { 3430, 3448 },
+ { 3416, 3422 },
+ { 3430, 3448 },
{ 3558, 3567 },
{ 3664, 3673 },
{ 3792, 3801 },
@@ -3547,7 +3547,7 @@ static const URange32 N_range32[] = {
{ 68028, 68029 },
{ 68032, 68047 },
{ 68050, 68095 },
- { 68160, 68168 },
+ { 68160, 68168 },
{ 68221, 68222 },
{ 68253, 68255 },
{ 68331, 68335 },
@@ -3555,206 +3555,206 @@ static const URange32 N_range32[] = {
{ 68472, 68479 },
{ 68521, 68527 },
{ 68858, 68863 },
- { 68912, 68921 },
+ { 68912, 68921 },
{ 69216, 69246 },
- { 69405, 69414 },
- { 69457, 69460 },
- { 69573, 69579 },
+ { 69405, 69414 },
+ { 69457, 69460 },
+ { 69573, 69579 },
{ 69714, 69743 },
{ 69872, 69881 },
{ 69942, 69951 },
{ 70096, 70105 },
{ 70113, 70132 },
{ 70384, 70393 },
- { 70736, 70745 },
+ { 70736, 70745 },
{ 70864, 70873 },
{ 71248, 71257 },
{ 71360, 71369 },
{ 71472, 71483 },
{ 71904, 71922 },
- { 72016, 72025 },
- { 72784, 72812 },
- { 73040, 73049 },
- { 73120, 73129 },
- { 73664, 73684 },
+ { 72016, 72025 },
+ { 72784, 72812 },
+ { 73040, 73049 },
+ { 73120, 73129 },
+ { 73664, 73684 },
{ 74752, 74862 },
{ 92768, 92777 },
{ 92864, 92873 },
{ 93008, 93017 },
{ 93019, 93025 },
- { 93824, 93846 },
- { 119520, 119539 },
- { 119648, 119672 },
+ { 93824, 93846 },
+ { 119520, 119539 },
+ { 119648, 119672 },
{ 120782, 120831 },
- { 123200, 123209 },
- { 123632, 123641 },
+ { 123200, 123209 },
+ { 123632, 123641 },
{ 125127, 125135 },
- { 125264, 125273 },
- { 126065, 126123 },
- { 126125, 126127 },
- { 126129, 126132 },
- { 126209, 126253 },
- { 126255, 126269 },
+ { 125264, 125273 },
+ { 126065, 126123 },
+ { 126125, 126127 },
+ { 126129, 126132 },
+ { 126209, 126253 },
+ { 126255, 126269 },
{ 127232, 127244 },
- { 130032, 130041 },
-};
-static const URange16 Nd_range16[] = {
- { 48, 57 },
- { 1632, 1641 },
- { 1776, 1785 },
- { 1984, 1993 },
- { 2406, 2415 },
- { 2534, 2543 },
- { 2662, 2671 },
- { 2790, 2799 },
- { 2918, 2927 },
- { 3046, 3055 },
- { 3174, 3183 },
- { 3302, 3311 },
- { 3430, 3439 },
- { 3558, 3567 },
- { 3664, 3673 },
- { 3792, 3801 },
- { 3872, 3881 },
- { 4160, 4169 },
- { 4240, 4249 },
- { 6112, 6121 },
- { 6160, 6169 },
- { 6470, 6479 },
- { 6608, 6617 },
- { 6784, 6793 },
- { 6800, 6809 },
- { 6992, 7001 },
- { 7088, 7097 },
- { 7232, 7241 },
- { 7248, 7257 },
- { 42528, 42537 },
- { 43216, 43225 },
- { 43264, 43273 },
- { 43472, 43481 },
- { 43504, 43513 },
- { 43600, 43609 },
- { 44016, 44025 },
- { 65296, 65305 },
-};
-static const URange32 Nd_range32[] = {
- { 66720, 66729 },
- { 68912, 68921 },
- { 69734, 69743 },
- { 69872, 69881 },
- { 69942, 69951 },
- { 70096, 70105 },
- { 70384, 70393 },
- { 70736, 70745 },
- { 70864, 70873 },
- { 71248, 71257 },
- { 71360, 71369 },
- { 71472, 71481 },
- { 71904, 71913 },
- { 72016, 72025 },
- { 72784, 72793 },
- { 73040, 73049 },
- { 73120, 73129 },
- { 92768, 92777 },
+ { 130032, 130041 },
+};
+static const URange16 Nd_range16[] = {
+ { 48, 57 },
+ { 1632, 1641 },
+ { 1776, 1785 },
+ { 1984, 1993 },
+ { 2406, 2415 },
+ { 2534, 2543 },
+ { 2662, 2671 },
+ { 2790, 2799 },
+ { 2918, 2927 },
+ { 3046, 3055 },
+ { 3174, 3183 },
+ { 3302, 3311 },
+ { 3430, 3439 },
+ { 3558, 3567 },
+ { 3664, 3673 },
+ { 3792, 3801 },
+ { 3872, 3881 },
+ { 4160, 4169 },
+ { 4240, 4249 },
+ { 6112, 6121 },
+ { 6160, 6169 },
+ { 6470, 6479 },
+ { 6608, 6617 },
+ { 6784, 6793 },
+ { 6800, 6809 },
+ { 6992, 7001 },
+ { 7088, 7097 },
+ { 7232, 7241 },
+ { 7248, 7257 },
+ { 42528, 42537 },
+ { 43216, 43225 },
+ { 43264, 43273 },
+ { 43472, 43481 },
+ { 43504, 43513 },
+ { 43600, 43609 },
+ { 44016, 44025 },
+ { 65296, 65305 },
+};
+static const URange32 Nd_range32[] = {
+ { 66720, 66729 },
+ { 68912, 68921 },
+ { 69734, 69743 },
+ { 69872, 69881 },
+ { 69942, 69951 },
+ { 70096, 70105 },
+ { 70384, 70393 },
+ { 70736, 70745 },
+ { 70864, 70873 },
+ { 71248, 71257 },
+ { 71360, 71369 },
+ { 71472, 71481 },
+ { 71904, 71913 },
+ { 72016, 72025 },
+ { 72784, 72793 },
+ { 73040, 73049 },
+ { 73120, 73129 },
+ { 92768, 92777 },
{ 92864, 92873 },
- { 93008, 93017 },
- { 120782, 120831 },
- { 123200, 123209 },
- { 123632, 123641 },
- { 125264, 125273 },
- { 130032, 130041 },
-};
-static const URange16 Nl_range16[] = {
- { 5870, 5872 },
- { 8544, 8578 },
- { 8581, 8584 },
- { 12295, 12295 },
- { 12321, 12329 },
- { 12344, 12346 },
- { 42726, 42735 },
-};
-static const URange32 Nl_range32[] = {
- { 65856, 65908 },
- { 66369, 66369 },
- { 66378, 66378 },
- { 66513, 66517 },
- { 74752, 74862 },
-};
-static const URange16 No_range16[] = {
- { 178, 179 },
- { 185, 185 },
- { 188, 190 },
- { 2548, 2553 },
- { 2930, 2935 },
- { 3056, 3058 },
- { 3192, 3198 },
- { 3416, 3422 },
- { 3440, 3448 },
- { 3882, 3891 },
- { 4969, 4988 },
- { 6128, 6137 },
- { 6618, 6618 },
- { 8304, 8304 },
- { 8308, 8313 },
- { 8320, 8329 },
- { 8528, 8543 },
- { 8585, 8585 },
- { 9312, 9371 },
- { 9450, 9471 },
- { 10102, 10131 },
- { 11517, 11517 },
- { 12690, 12693 },
- { 12832, 12841 },
- { 12872, 12879 },
- { 12881, 12895 },
- { 12928, 12937 },
- { 12977, 12991 },
- { 43056, 43061 },
-};
-static const URange32 No_range32[] = {
- { 65799, 65843 },
- { 65909, 65912 },
- { 65930, 65931 },
- { 66273, 66299 },
- { 66336, 66339 },
- { 67672, 67679 },
- { 67705, 67711 },
- { 67751, 67759 },
- { 67835, 67839 },
- { 67862, 67867 },
- { 68028, 68029 },
- { 68032, 68047 },
- { 68050, 68095 },
- { 68160, 68168 },
- { 68221, 68222 },
- { 68253, 68255 },
- { 68331, 68335 },
- { 68440, 68447 },
- { 68472, 68479 },
- { 68521, 68527 },
- { 68858, 68863 },
- { 69216, 69246 },
- { 69405, 69414 },
- { 69457, 69460 },
- { 69573, 69579 },
- { 69714, 69733 },
- { 70113, 70132 },
- { 71482, 71483 },
- { 71914, 71922 },
- { 72794, 72812 },
- { 73664, 73684 },
- { 93019, 93025 },
- { 93824, 93846 },
- { 119520, 119539 },
- { 119648, 119672 },
- { 125127, 125135 },
- { 126065, 126123 },
- { 126125, 126127 },
- { 126129, 126132 },
- { 126209, 126253 },
- { 126255, 126269 },
- { 127232, 127244 },
-};
+ { 93008, 93017 },
+ { 120782, 120831 },
+ { 123200, 123209 },
+ { 123632, 123641 },
+ { 125264, 125273 },
+ { 130032, 130041 },
+};
+static const URange16 Nl_range16[] = {
+ { 5870, 5872 },
+ { 8544, 8578 },
+ { 8581, 8584 },
+ { 12295, 12295 },
+ { 12321, 12329 },
+ { 12344, 12346 },
+ { 42726, 42735 },
+};
+static const URange32 Nl_range32[] = {
+ { 65856, 65908 },
+ { 66369, 66369 },
+ { 66378, 66378 },
+ { 66513, 66517 },
+ { 74752, 74862 },
+};
+static const URange16 No_range16[] = {
+ { 178, 179 },
+ { 185, 185 },
+ { 188, 190 },
+ { 2548, 2553 },
+ { 2930, 2935 },
+ { 3056, 3058 },
+ { 3192, 3198 },
+ { 3416, 3422 },
+ { 3440, 3448 },
+ { 3882, 3891 },
+ { 4969, 4988 },
+ { 6128, 6137 },
+ { 6618, 6618 },
+ { 8304, 8304 },
+ { 8308, 8313 },
+ { 8320, 8329 },
+ { 8528, 8543 },
+ { 8585, 8585 },
+ { 9312, 9371 },
+ { 9450, 9471 },
+ { 10102, 10131 },
+ { 11517, 11517 },
+ { 12690, 12693 },
+ { 12832, 12841 },
+ { 12872, 12879 },
+ { 12881, 12895 },
+ { 12928, 12937 },
+ { 12977, 12991 },
+ { 43056, 43061 },
+};
+static const URange32 No_range32[] = {
+ { 65799, 65843 },
+ { 65909, 65912 },
+ { 65930, 65931 },
+ { 66273, 66299 },
+ { 66336, 66339 },
+ { 67672, 67679 },
+ { 67705, 67711 },
+ { 67751, 67759 },
+ { 67835, 67839 },
+ { 67862, 67867 },
+ { 68028, 68029 },
+ { 68032, 68047 },
+ { 68050, 68095 },
+ { 68160, 68168 },
+ { 68221, 68222 },
+ { 68253, 68255 },
+ { 68331, 68335 },
+ { 68440, 68447 },
+ { 68472, 68479 },
+ { 68521, 68527 },
+ { 68858, 68863 },
+ { 69216, 69246 },
+ { 69405, 69414 },
+ { 69457, 69460 },
+ { 69573, 69579 },
+ { 69714, 69733 },
+ { 70113, 70132 },
+ { 71482, 71483 },
+ { 71914, 71922 },
+ { 72794, 72812 },
+ { 73664, 73684 },
+ { 93019, 93025 },
+ { 93824, 93846 },
+ { 119520, 119539 },
+ { 119648, 119672 },
+ { 125127, 125135 },
+ { 126065, 126123 },
+ { 126125, 126127 },
+ { 126129, 126132 },
+ { 126209, 126253 },
+ { 126255, 126269 },
+ { 127232, 127244 },
+};
static const URange16 P_range16[] = {
{ 33, 35 },
{ 37, 42 },
@@ -3792,11 +3792,11 @@ static const URange16 P_range16[] = {
{ 2142, 2142 },
{ 2404, 2405 },
{ 2416, 2416 },
- { 2557, 2557 },
- { 2678, 2678 },
+ { 2557, 2557 },
+ { 2678, 2678 },
{ 2800, 2800 },
- { 3191, 3191 },
- { 3204, 3204 },
+ { 3191, 3191 },
+ { 3204, 3204 },
{ 3572, 3572 },
{ 3663, 3663 },
{ 3674, 3675 },
@@ -3810,7 +3810,7 @@ static const URange16 P_range16[] = {
{ 4347, 4347 },
{ 4960, 4968 },
{ 5120, 5120 },
- { 5742, 5742 },
+ { 5742, 5742 },
{ 5787, 5788 },
{ 5867, 5869 },
{ 5941, 5942 },
@@ -3846,7 +3846,7 @@ static const URange16 P_range16[] = {
{ 11518, 11519 },
{ 11632, 11632 },
{ 11776, 11822 },
- { 11824, 11855 },
+ { 11824, 11855 },
{ 11858, 11869 },
{ 12289, 12291 },
{ 12296, 12305 },
@@ -3903,455 +3903,455 @@ static const URange32 P_range32[] = {
{ 68336, 68342 },
{ 68409, 68415 },
{ 68505, 68508 },
- { 69293, 69293 },
- { 69461, 69465 },
+ { 69293, 69293 },
+ { 69461, 69465 },
{ 69510, 69513 },
{ 69703, 69709 },
{ 69819, 69820 },
{ 69822, 69825 },
{ 69952, 69955 },
{ 70004, 70005 },
- { 70085, 70088 },
+ { 70085, 70088 },
{ 70093, 70093 },
{ 70107, 70107 },
{ 70109, 70111 },
{ 70200, 70205 },
{ 70313, 70313 },
- { 70731, 70735 },
- { 70746, 70747 },
- { 70749, 70749 },
+ { 70731, 70735 },
+ { 70746, 70747 },
+ { 70749, 70749 },
{ 70854, 70854 },
{ 71105, 71127 },
{ 71233, 71235 },
- { 71264, 71276 },
+ { 71264, 71276 },
{ 71353, 71353 },
{ 71484, 71486 },
- { 71739, 71739 },
- { 72004, 72006 },
- { 72162, 72162 },
- { 72255, 72262 },
- { 72346, 72348 },
- { 72350, 72354 },
- { 72769, 72773 },
- { 72816, 72817 },
- { 73463, 73464 },
- { 73727, 73727 },
+ { 71739, 71739 },
+ { 72004, 72006 },
+ { 72162, 72162 },
+ { 72255, 72262 },
+ { 72346, 72348 },
+ { 72350, 72354 },
+ { 72769, 72773 },
+ { 72816, 72817 },
+ { 73463, 73464 },
+ { 73727, 73727 },
{ 74864, 74868 },
{ 77809, 77810 },
{ 92782, 92783 },
{ 92917, 92917 },
{ 92983, 92987 },
{ 92996, 92996 },
- { 93847, 93850 },
- { 94178, 94178 },
+ { 93847, 93850 },
+ { 94178, 94178 },
{ 113823, 113823 },
{ 121479, 121483 },
- { 125278, 125279 },
-};
-static const URange16 Pc_range16[] = {
- { 95, 95 },
- { 8255, 8256 },
- { 8276, 8276 },
- { 65075, 65076 },
- { 65101, 65103 },
- { 65343, 65343 },
-};
-static const URange16 Pd_range16[] = {
- { 45, 45 },
- { 1418, 1418 },
- { 1470, 1470 },
- { 5120, 5120 },
- { 6150, 6150 },
- { 8208, 8213 },
- { 11799, 11799 },
- { 11802, 11802 },
- { 11834, 11835 },
- { 11840, 11840 },
+ { 125278, 125279 },
+};
+static const URange16 Pc_range16[] = {
+ { 95, 95 },
+ { 8255, 8256 },
+ { 8276, 8276 },
+ { 65075, 65076 },
+ { 65101, 65103 },
+ { 65343, 65343 },
+};
+static const URange16 Pd_range16[] = {
+ { 45, 45 },
+ { 1418, 1418 },
+ { 1470, 1470 },
+ { 5120, 5120 },
+ { 6150, 6150 },
+ { 8208, 8213 },
+ { 11799, 11799 },
+ { 11802, 11802 },
+ { 11834, 11835 },
+ { 11840, 11840 },
{ 11869, 11869 },
- { 12316, 12316 },
- { 12336, 12336 },
- { 12448, 12448 },
- { 65073, 65074 },
- { 65112, 65112 },
- { 65123, 65123 },
- { 65293, 65293 },
-};
-static const URange32 Pd_range32[] = {
- { 69293, 69293 },
-};
-static const URange16 Pe_range16[] = {
- { 41, 41 },
- { 93, 93 },
- { 125, 125 },
- { 3899, 3899 },
- { 3901, 3901 },
- { 5788, 5788 },
- { 8262, 8262 },
- { 8318, 8318 },
- { 8334, 8334 },
- { 8969, 8969 },
- { 8971, 8971 },
- { 9002, 9002 },
- { 10089, 10089 },
- { 10091, 10091 },
- { 10093, 10093 },
- { 10095, 10095 },
- { 10097, 10097 },
- { 10099, 10099 },
- { 10101, 10101 },
- { 10182, 10182 },
- { 10215, 10215 },
- { 10217, 10217 },
- { 10219, 10219 },
- { 10221, 10221 },
- { 10223, 10223 },
- { 10628, 10628 },
- { 10630, 10630 },
- { 10632, 10632 },
- { 10634, 10634 },
- { 10636, 10636 },
- { 10638, 10638 },
- { 10640, 10640 },
- { 10642, 10642 },
- { 10644, 10644 },
- { 10646, 10646 },
- { 10648, 10648 },
- { 10713, 10713 },
- { 10715, 10715 },
- { 10749, 10749 },
- { 11811, 11811 },
- { 11813, 11813 },
- { 11815, 11815 },
- { 11817, 11817 },
+ { 12316, 12316 },
+ { 12336, 12336 },
+ { 12448, 12448 },
+ { 65073, 65074 },
+ { 65112, 65112 },
+ { 65123, 65123 },
+ { 65293, 65293 },
+};
+static const URange32 Pd_range32[] = {
+ { 69293, 69293 },
+};
+static const URange16 Pe_range16[] = {
+ { 41, 41 },
+ { 93, 93 },
+ { 125, 125 },
+ { 3899, 3899 },
+ { 3901, 3901 },
+ { 5788, 5788 },
+ { 8262, 8262 },
+ { 8318, 8318 },
+ { 8334, 8334 },
+ { 8969, 8969 },
+ { 8971, 8971 },
+ { 9002, 9002 },
+ { 10089, 10089 },
+ { 10091, 10091 },
+ { 10093, 10093 },
+ { 10095, 10095 },
+ { 10097, 10097 },
+ { 10099, 10099 },
+ { 10101, 10101 },
+ { 10182, 10182 },
+ { 10215, 10215 },
+ { 10217, 10217 },
+ { 10219, 10219 },
+ { 10221, 10221 },
+ { 10223, 10223 },
+ { 10628, 10628 },
+ { 10630, 10630 },
+ { 10632, 10632 },
+ { 10634, 10634 },
+ { 10636, 10636 },
+ { 10638, 10638 },
+ { 10640, 10640 },
+ { 10642, 10642 },
+ { 10644, 10644 },
+ { 10646, 10646 },
+ { 10648, 10648 },
+ { 10713, 10713 },
+ { 10715, 10715 },
+ { 10749, 10749 },
+ { 11811, 11811 },
+ { 11813, 11813 },
+ { 11815, 11815 },
+ { 11817, 11817 },
{ 11862, 11862 },
{ 11864, 11864 },
{ 11866, 11866 },
{ 11868, 11868 },
- { 12297, 12297 },
- { 12299, 12299 },
- { 12301, 12301 },
- { 12303, 12303 },
- { 12305, 12305 },
- { 12309, 12309 },
- { 12311, 12311 },
- { 12313, 12313 },
- { 12315, 12315 },
- { 12318, 12319 },
- { 64830, 64830 },
- { 65048, 65048 },
- { 65078, 65078 },
- { 65080, 65080 },
- { 65082, 65082 },
- { 65084, 65084 },
- { 65086, 65086 },
- { 65088, 65088 },
- { 65090, 65090 },
- { 65092, 65092 },
- { 65096, 65096 },
- { 65114, 65114 },
- { 65116, 65116 },
- { 65118, 65118 },
- { 65289, 65289 },
- { 65341, 65341 },
- { 65373, 65373 },
- { 65376, 65376 },
- { 65379, 65379 },
-};
-static const URange16 Pf_range16[] = {
- { 187, 187 },
- { 8217, 8217 },
- { 8221, 8221 },
- { 8250, 8250 },
- { 11779, 11779 },
- { 11781, 11781 },
- { 11786, 11786 },
- { 11789, 11789 },
- { 11805, 11805 },
- { 11809, 11809 },
-};
-static const URange16 Pi_range16[] = {
- { 171, 171 },
- { 8216, 8216 },
- { 8219, 8220 },
- { 8223, 8223 },
- { 8249, 8249 },
- { 11778, 11778 },
- { 11780, 11780 },
- { 11785, 11785 },
- { 11788, 11788 },
- { 11804, 11804 },
- { 11808, 11808 },
-};
-static const URange16 Po_range16[] = {
- { 33, 35 },
- { 37, 39 },
- { 42, 42 },
- { 44, 44 },
- { 46, 47 },
- { 58, 59 },
- { 63, 64 },
- { 92, 92 },
- { 161, 161 },
- { 167, 167 },
- { 182, 183 },
- { 191, 191 },
- { 894, 894 },
- { 903, 903 },
- { 1370, 1375 },
- { 1417, 1417 },
- { 1472, 1472 },
- { 1475, 1475 },
- { 1478, 1478 },
- { 1523, 1524 },
- { 1545, 1546 },
- { 1548, 1549 },
- { 1563, 1563 },
+ { 12297, 12297 },
+ { 12299, 12299 },
+ { 12301, 12301 },
+ { 12303, 12303 },
+ { 12305, 12305 },
+ { 12309, 12309 },
+ { 12311, 12311 },
+ { 12313, 12313 },
+ { 12315, 12315 },
+ { 12318, 12319 },
+ { 64830, 64830 },
+ { 65048, 65048 },
+ { 65078, 65078 },
+ { 65080, 65080 },
+ { 65082, 65082 },
+ { 65084, 65084 },
+ { 65086, 65086 },
+ { 65088, 65088 },
+ { 65090, 65090 },
+ { 65092, 65092 },
+ { 65096, 65096 },
+ { 65114, 65114 },
+ { 65116, 65116 },
+ { 65118, 65118 },
+ { 65289, 65289 },
+ { 65341, 65341 },
+ { 65373, 65373 },
+ { 65376, 65376 },
+ { 65379, 65379 },
+};
+static const URange16 Pf_range16[] = {
+ { 187, 187 },
+ { 8217, 8217 },
+ { 8221, 8221 },
+ { 8250, 8250 },
+ { 11779, 11779 },
+ { 11781, 11781 },
+ { 11786, 11786 },
+ { 11789, 11789 },
+ { 11805, 11805 },
+ { 11809, 11809 },
+};
+static const URange16 Pi_range16[] = {
+ { 171, 171 },
+ { 8216, 8216 },
+ { 8219, 8220 },
+ { 8223, 8223 },
+ { 8249, 8249 },
+ { 11778, 11778 },
+ { 11780, 11780 },
+ { 11785, 11785 },
+ { 11788, 11788 },
+ { 11804, 11804 },
+ { 11808, 11808 },
+};
+static const URange16 Po_range16[] = {
+ { 33, 35 },
+ { 37, 39 },
+ { 42, 42 },
+ { 44, 44 },
+ { 46, 47 },
+ { 58, 59 },
+ { 63, 64 },
+ { 92, 92 },
+ { 161, 161 },
+ { 167, 167 },
+ { 182, 183 },
+ { 191, 191 },
+ { 894, 894 },
+ { 903, 903 },
+ { 1370, 1375 },
+ { 1417, 1417 },
+ { 1472, 1472 },
+ { 1475, 1475 },
+ { 1478, 1478 },
+ { 1523, 1524 },
+ { 1545, 1546 },
+ { 1548, 1549 },
+ { 1563, 1563 },
{ 1565, 1567 },
- { 1642, 1645 },
- { 1748, 1748 },
- { 1792, 1805 },
- { 2039, 2041 },
- { 2096, 2110 },
- { 2142, 2142 },
- { 2404, 2405 },
- { 2416, 2416 },
- { 2557, 2557 },
- { 2678, 2678 },
- { 2800, 2800 },
- { 3191, 3191 },
- { 3204, 3204 },
- { 3572, 3572 },
- { 3663, 3663 },
- { 3674, 3675 },
- { 3844, 3858 },
- { 3860, 3860 },
- { 3973, 3973 },
- { 4048, 4052 },
- { 4057, 4058 },
- { 4170, 4175 },
- { 4347, 4347 },
- { 4960, 4968 },
- { 5742, 5742 },
- { 5867, 5869 },
- { 5941, 5942 },
- { 6100, 6102 },
- { 6104, 6106 },
- { 6144, 6149 },
- { 6151, 6154 },
- { 6468, 6469 },
- { 6686, 6687 },
- { 6816, 6822 },
- { 6824, 6829 },
- { 7002, 7008 },
+ { 1642, 1645 },
+ { 1748, 1748 },
+ { 1792, 1805 },
+ { 2039, 2041 },
+ { 2096, 2110 },
+ { 2142, 2142 },
+ { 2404, 2405 },
+ { 2416, 2416 },
+ { 2557, 2557 },
+ { 2678, 2678 },
+ { 2800, 2800 },
+ { 3191, 3191 },
+ { 3204, 3204 },
+ { 3572, 3572 },
+ { 3663, 3663 },
+ { 3674, 3675 },
+ { 3844, 3858 },
+ { 3860, 3860 },
+ { 3973, 3973 },
+ { 4048, 4052 },
+ { 4057, 4058 },
+ { 4170, 4175 },
+ { 4347, 4347 },
+ { 4960, 4968 },
+ { 5742, 5742 },
+ { 5867, 5869 },
+ { 5941, 5942 },
+ { 6100, 6102 },
+ { 6104, 6106 },
+ { 6144, 6149 },
+ { 6151, 6154 },
+ { 6468, 6469 },
+ { 6686, 6687 },
+ { 6816, 6822 },
+ { 6824, 6829 },
+ { 7002, 7008 },
{ 7037, 7038 },
- { 7164, 7167 },
- { 7227, 7231 },
- { 7294, 7295 },
- { 7360, 7367 },
- { 7379, 7379 },
- { 8214, 8215 },
- { 8224, 8231 },
- { 8240, 8248 },
- { 8251, 8254 },
- { 8257, 8259 },
- { 8263, 8273 },
- { 8275, 8275 },
- { 8277, 8286 },
- { 11513, 11516 },
- { 11518, 11519 },
- { 11632, 11632 },
- { 11776, 11777 },
- { 11782, 11784 },
- { 11787, 11787 },
- { 11790, 11798 },
- { 11800, 11801 },
- { 11803, 11803 },
- { 11806, 11807 },
- { 11818, 11822 },
- { 11824, 11833 },
- { 11836, 11839 },
- { 11841, 11841 },
- { 11843, 11855 },
+ { 7164, 7167 },
+ { 7227, 7231 },
+ { 7294, 7295 },
+ { 7360, 7367 },
+ { 7379, 7379 },
+ { 8214, 8215 },
+ { 8224, 8231 },
+ { 8240, 8248 },
+ { 8251, 8254 },
+ { 8257, 8259 },
+ { 8263, 8273 },
+ { 8275, 8275 },
+ { 8277, 8286 },
+ { 11513, 11516 },
+ { 11518, 11519 },
+ { 11632, 11632 },
+ { 11776, 11777 },
+ { 11782, 11784 },
+ { 11787, 11787 },
+ { 11790, 11798 },
+ { 11800, 11801 },
+ { 11803, 11803 },
+ { 11806, 11807 },
+ { 11818, 11822 },
+ { 11824, 11833 },
+ { 11836, 11839 },
+ { 11841, 11841 },
+ { 11843, 11855 },
{ 11858, 11860 },
- { 12289, 12291 },
- { 12349, 12349 },
- { 12539, 12539 },
- { 42238, 42239 },
- { 42509, 42511 },
- { 42611, 42611 },
- { 42622, 42622 },
- { 42738, 42743 },
- { 43124, 43127 },
- { 43214, 43215 },
- { 43256, 43258 },
- { 43260, 43260 },
- { 43310, 43311 },
- { 43359, 43359 },
- { 43457, 43469 },
- { 43486, 43487 },
- { 43612, 43615 },
- { 43742, 43743 },
- { 43760, 43761 },
- { 44011, 44011 },
- { 65040, 65046 },
- { 65049, 65049 },
- { 65072, 65072 },
- { 65093, 65094 },
- { 65097, 65100 },
- { 65104, 65106 },
- { 65108, 65111 },
- { 65119, 65121 },
- { 65128, 65128 },
- { 65130, 65131 },
- { 65281, 65283 },
- { 65285, 65287 },
- { 65290, 65290 },
- { 65292, 65292 },
- { 65294, 65295 },
- { 65306, 65307 },
- { 65311, 65312 },
- { 65340, 65340 },
- { 65377, 65377 },
- { 65380, 65381 },
-};
-static const URange32 Po_range32[] = {
- { 65792, 65794 },
- { 66463, 66463 },
- { 66512, 66512 },
- { 66927, 66927 },
- { 67671, 67671 },
- { 67871, 67871 },
- { 67903, 67903 },
- { 68176, 68184 },
- { 68223, 68223 },
- { 68336, 68342 },
- { 68409, 68415 },
- { 68505, 68508 },
- { 69461, 69465 },
+ { 12289, 12291 },
+ { 12349, 12349 },
+ { 12539, 12539 },
+ { 42238, 42239 },
+ { 42509, 42511 },
+ { 42611, 42611 },
+ { 42622, 42622 },
+ { 42738, 42743 },
+ { 43124, 43127 },
+ { 43214, 43215 },
+ { 43256, 43258 },
+ { 43260, 43260 },
+ { 43310, 43311 },
+ { 43359, 43359 },
+ { 43457, 43469 },
+ { 43486, 43487 },
+ { 43612, 43615 },
+ { 43742, 43743 },
+ { 43760, 43761 },
+ { 44011, 44011 },
+ { 65040, 65046 },
+ { 65049, 65049 },
+ { 65072, 65072 },
+ { 65093, 65094 },
+ { 65097, 65100 },
+ { 65104, 65106 },
+ { 65108, 65111 },
+ { 65119, 65121 },
+ { 65128, 65128 },
+ { 65130, 65131 },
+ { 65281, 65283 },
+ { 65285, 65287 },
+ { 65290, 65290 },
+ { 65292, 65292 },
+ { 65294, 65295 },
+ { 65306, 65307 },
+ { 65311, 65312 },
+ { 65340, 65340 },
+ { 65377, 65377 },
+ { 65380, 65381 },
+};
+static const URange32 Po_range32[] = {
+ { 65792, 65794 },
+ { 66463, 66463 },
+ { 66512, 66512 },
+ { 66927, 66927 },
+ { 67671, 67671 },
+ { 67871, 67871 },
+ { 67903, 67903 },
+ { 68176, 68184 },
+ { 68223, 68223 },
+ { 68336, 68342 },
+ { 68409, 68415 },
+ { 68505, 68508 },
+ { 69461, 69465 },
{ 69510, 69513 },
- { 69703, 69709 },
- { 69819, 69820 },
- { 69822, 69825 },
- { 69952, 69955 },
- { 70004, 70005 },
- { 70085, 70088 },
- { 70093, 70093 },
- { 70107, 70107 },
- { 70109, 70111 },
- { 70200, 70205 },
- { 70313, 70313 },
- { 70731, 70735 },
- { 70746, 70747 },
- { 70749, 70749 },
- { 70854, 70854 },
- { 71105, 71127 },
- { 71233, 71235 },
- { 71264, 71276 },
+ { 69703, 69709 },
+ { 69819, 69820 },
+ { 69822, 69825 },
+ { 69952, 69955 },
+ { 70004, 70005 },
+ { 70085, 70088 },
+ { 70093, 70093 },
+ { 70107, 70107 },
+ { 70109, 70111 },
+ { 70200, 70205 },
+ { 70313, 70313 },
+ { 70731, 70735 },
+ { 70746, 70747 },
+ { 70749, 70749 },
+ { 70854, 70854 },
+ { 71105, 71127 },
+ { 71233, 71235 },
+ { 71264, 71276 },
{ 71353, 71353 },
- { 71484, 71486 },
- { 71739, 71739 },
- { 72004, 72006 },
- { 72162, 72162 },
- { 72255, 72262 },
- { 72346, 72348 },
- { 72350, 72354 },
- { 72769, 72773 },
- { 72816, 72817 },
- { 73463, 73464 },
- { 73727, 73727 },
- { 74864, 74868 },
+ { 71484, 71486 },
+ { 71739, 71739 },
+ { 72004, 72006 },
+ { 72162, 72162 },
+ { 72255, 72262 },
+ { 72346, 72348 },
+ { 72350, 72354 },
+ { 72769, 72773 },
+ { 72816, 72817 },
+ { 73463, 73464 },
+ { 73727, 73727 },
+ { 74864, 74868 },
{ 77809, 77810 },
- { 92782, 92783 },
- { 92917, 92917 },
- { 92983, 92987 },
- { 92996, 92996 },
- { 93847, 93850 },
- { 94178, 94178 },
- { 113823, 113823 },
- { 121479, 121483 },
- { 125278, 125279 },
-};
-static const URange16 Ps_range16[] = {
- { 40, 40 },
- { 91, 91 },
- { 123, 123 },
- { 3898, 3898 },
- { 3900, 3900 },
- { 5787, 5787 },
- { 8218, 8218 },
- { 8222, 8222 },
- { 8261, 8261 },
- { 8317, 8317 },
- { 8333, 8333 },
- { 8968, 8968 },
- { 8970, 8970 },
- { 9001, 9001 },
- { 10088, 10088 },
- { 10090, 10090 },
- { 10092, 10092 },
- { 10094, 10094 },
- { 10096, 10096 },
- { 10098, 10098 },
- { 10100, 10100 },
- { 10181, 10181 },
- { 10214, 10214 },
- { 10216, 10216 },
- { 10218, 10218 },
- { 10220, 10220 },
- { 10222, 10222 },
- { 10627, 10627 },
- { 10629, 10629 },
- { 10631, 10631 },
- { 10633, 10633 },
- { 10635, 10635 },
- { 10637, 10637 },
- { 10639, 10639 },
- { 10641, 10641 },
- { 10643, 10643 },
- { 10645, 10645 },
- { 10647, 10647 },
- { 10712, 10712 },
- { 10714, 10714 },
- { 10748, 10748 },
- { 11810, 11810 },
- { 11812, 11812 },
- { 11814, 11814 },
- { 11816, 11816 },
- { 11842, 11842 },
+ { 92782, 92783 },
+ { 92917, 92917 },
+ { 92983, 92987 },
+ { 92996, 92996 },
+ { 93847, 93850 },
+ { 94178, 94178 },
+ { 113823, 113823 },
+ { 121479, 121483 },
+ { 125278, 125279 },
+};
+static const URange16 Ps_range16[] = {
+ { 40, 40 },
+ { 91, 91 },
+ { 123, 123 },
+ { 3898, 3898 },
+ { 3900, 3900 },
+ { 5787, 5787 },
+ { 8218, 8218 },
+ { 8222, 8222 },
+ { 8261, 8261 },
+ { 8317, 8317 },
+ { 8333, 8333 },
+ { 8968, 8968 },
+ { 8970, 8970 },
+ { 9001, 9001 },
+ { 10088, 10088 },
+ { 10090, 10090 },
+ { 10092, 10092 },
+ { 10094, 10094 },
+ { 10096, 10096 },
+ { 10098, 10098 },
+ { 10100, 10100 },
+ { 10181, 10181 },
+ { 10214, 10214 },
+ { 10216, 10216 },
+ { 10218, 10218 },
+ { 10220, 10220 },
+ { 10222, 10222 },
+ { 10627, 10627 },
+ { 10629, 10629 },
+ { 10631, 10631 },
+ { 10633, 10633 },
+ { 10635, 10635 },
+ { 10637, 10637 },
+ { 10639, 10639 },
+ { 10641, 10641 },
+ { 10643, 10643 },
+ { 10645, 10645 },
+ { 10647, 10647 },
+ { 10712, 10712 },
+ { 10714, 10714 },
+ { 10748, 10748 },
+ { 11810, 11810 },
+ { 11812, 11812 },
+ { 11814, 11814 },
+ { 11816, 11816 },
+ { 11842, 11842 },
{ 11861, 11861 },
{ 11863, 11863 },
{ 11865, 11865 },
{ 11867, 11867 },
- { 12296, 12296 },
- { 12298, 12298 },
- { 12300, 12300 },
- { 12302, 12302 },
- { 12304, 12304 },
- { 12308, 12308 },
- { 12310, 12310 },
- { 12312, 12312 },
- { 12314, 12314 },
- { 12317, 12317 },
- { 64831, 64831 },
- { 65047, 65047 },
- { 65077, 65077 },
- { 65079, 65079 },
- { 65081, 65081 },
- { 65083, 65083 },
- { 65085, 65085 },
- { 65087, 65087 },
- { 65089, 65089 },
- { 65091, 65091 },
- { 65095, 65095 },
- { 65113, 65113 },
- { 65115, 65115 },
- { 65117, 65117 },
- { 65288, 65288 },
- { 65339, 65339 },
- { 65371, 65371 },
- { 65375, 65375 },
- { 65378, 65378 },
-};
+ { 12296, 12296 },
+ { 12298, 12298 },
+ { 12300, 12300 },
+ { 12302, 12302 },
+ { 12304, 12304 },
+ { 12308, 12308 },
+ { 12310, 12310 },
+ { 12312, 12312 },
+ { 12314, 12314 },
+ { 12317, 12317 },
+ { 64831, 64831 },
+ { 65047, 65047 },
+ { 65077, 65077 },
+ { 65079, 65079 },
+ { 65081, 65081 },
+ { 65083, 65083 },
+ { 65085, 65085 },
+ { 65087, 65087 },
+ { 65089, 65089 },
+ { 65091, 65091 },
+ { 65095, 65095 },
+ { 65113, 65113 },
+ { 65115, 65115 },
+ { 65117, 65117 },
+ { 65288, 65288 },
+ { 65339, 65339 },
+ { 65371, 65371 },
+ { 65375, 65375 },
+ { 65378, 65378 },
+};
static const URange16 S_range16[] = {
{ 36, 36 },
{ 43, 43 },
@@ -4385,7 +4385,7 @@ static const URange16 S_range16[] = {
{ 1769, 1769 },
{ 1789, 1790 },
{ 2038, 2038 },
- { 2046, 2047 },
+ { 2046, 2047 },
{ 2184, 2184 },
{ 2546, 2547 },
{ 2554, 2555 },
@@ -4393,7 +4393,7 @@ static const URange16 S_range16[] = {
{ 2928, 2928 },
{ 3059, 3066 },
{ 3199, 3199 },
- { 3407, 3407 },
+ { 3407, 3407 },
{ 3449, 3449 },
{ 3647, 3647 },
{ 3841, 3843 },
@@ -4409,7 +4409,7 @@ static const URange16 S_range16[] = {
{ 4053, 4056 },
{ 4254, 4255 },
{ 5008, 5017 },
- { 5741, 5741 },
+ { 5741, 5741 },
{ 6107, 6107 },
{ 6464, 6464 },
{ 6622, 6655 },
@@ -4443,7 +4443,7 @@ static const URange16 S_range16[] = {
{ 8586, 8587 },
{ 8592, 8967 },
{ 8972, 9000 },
- { 9003, 9254 },
+ { 9003, 9254 },
{ 9280, 9290 },
{ 9372, 9449 },
{ 9472, 10087 },
@@ -4454,9 +4454,9 @@ static const URange16 S_range16[] = {
{ 10716, 10747 },
{ 10750, 11123 },
{ 11126, 11157 },
- { 11159, 11263 },
+ { 11159, 11263 },
{ 11493, 11498 },
- { 11856, 11857 },
+ { 11856, 11857 },
{ 11904, 11929 },
{ 11931, 12019 },
{ 12032, 12245 },
@@ -4475,7 +4475,7 @@ static const URange16 S_range16[] = {
{ 12880, 12880 },
{ 12896, 12927 },
{ 12938, 12976 },
- { 12992, 13311 },
+ { 12992, 13311 },
{ 19904, 19967 },
{ 42128, 42182 },
{ 42752, 42774 },
@@ -4485,7 +4485,7 @@ static const URange16 S_range16[] = {
{ 43062, 43065 },
{ 43639, 43641 },
{ 43867, 43867 },
- { 43882, 43883 },
+ { 43882, 43883 },
{ 64297, 64297 },
{ 64434, 64450 },
{ 64832, 64847 },
@@ -4508,14 +4508,14 @@ static const URange16 S_range16[] = {
static const URange32 S_range32[] = {
{ 65847, 65855 },
{ 65913, 65929 },
- { 65932, 65934 },
- { 65936, 65948 },
+ { 65932, 65934 },
+ { 65936, 65948 },
{ 65952, 65952 },
{ 66000, 66044 },
{ 67703, 67704 },
{ 68296, 68296 },
{ 71487, 71487 },
- { 73685, 73713 },
+ { 73685, 73713 },
{ 92988, 92991 },
{ 92997, 92997 },
{ 113820, 113820 },
@@ -4545,11 +4545,11 @@ static const URange32 S_range32[] = {
{ 121453, 121460 },
{ 121462, 121475 },
{ 121477, 121478 },
- { 123215, 123215 },
- { 123647, 123647 },
- { 126124, 126124 },
- { 126128, 126128 },
- { 126254, 126254 },
+ { 123215, 123215 },
+ { 123647, 123647 },
+ { 126124, 126124 },
+ { 126128, 126128 },
+ { 126254, 126254 },
{ 126704, 126705 },
{ 126976, 127019 },
{ 127024, 127123 },
@@ -4557,167 +4557,167 @@ static const URange32 S_range32[] = {
{ 127153, 127167 },
{ 127169, 127183 },
{ 127185, 127221 },
- { 127245, 127405 },
+ { 127245, 127405 },
{ 127462, 127490 },
- { 127504, 127547 },
+ { 127504, 127547 },
{ 127552, 127560 },
{ 127568, 127569 },
- { 127584, 127589 },
- { 127744, 128727 },
+ { 127584, 127589 },
+ { 127744, 128727 },
{ 128733, 128748 },
- { 128752, 128764 },
+ { 128752, 128764 },
{ 128768, 128883 },
- { 128896, 128984 },
- { 128992, 129003 },
+ { 128896, 128984 },
+ { 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
{ 129040, 129095 },
{ 129104, 129113 },
{ 129120, 129159 },
{ 129168, 129197 },
- { 129200, 129201 },
+ { 129200, 129201 },
{ 129280, 129619 },
- { 129632, 129645 },
- { 129648, 129652 },
+ { 129632, 129645 },
+ { 129648, 129652 },
{ 129656, 129660 },
- { 129664, 129670 },
+ { 129664, 129670 },
{ 129680, 129708 },
{ 129712, 129722 },
{ 129728, 129733 },
{ 129744, 129753 },
{ 129760, 129767 },
{ 129776, 129782 },
- { 129792, 129938 },
- { 129940, 129994 },
-};
-static const URange16 Sc_range16[] = {
- { 36, 36 },
- { 162, 165 },
- { 1423, 1423 },
- { 1547, 1547 },
- { 2046, 2047 },
- { 2546, 2547 },
- { 2555, 2555 },
- { 2801, 2801 },
- { 3065, 3065 },
- { 3647, 3647 },
- { 6107, 6107 },
+ { 129792, 129938 },
+ { 129940, 129994 },
+};
+static const URange16 Sc_range16[] = {
+ { 36, 36 },
+ { 162, 165 },
+ { 1423, 1423 },
+ { 1547, 1547 },
+ { 2046, 2047 },
+ { 2546, 2547 },
+ { 2555, 2555 },
+ { 2801, 2801 },
+ { 3065, 3065 },
+ { 3647, 3647 },
+ { 6107, 6107 },
{ 8352, 8384 },
- { 43064, 43064 },
- { 65020, 65020 },
- { 65129, 65129 },
- { 65284, 65284 },
- { 65504, 65505 },
- { 65509, 65510 },
-};
-static const URange32 Sc_range32[] = {
- { 73693, 73696 },
- { 123647, 123647 },
- { 126128, 126128 },
-};
-static const URange16 Sk_range16[] = {
- { 94, 94 },
- { 96, 96 },
- { 168, 168 },
- { 175, 175 },
- { 180, 180 },
- { 184, 184 },
- { 706, 709 },
- { 722, 735 },
- { 741, 747 },
- { 749, 749 },
- { 751, 767 },
- { 885, 885 },
- { 900, 901 },
+ { 43064, 43064 },
+ { 65020, 65020 },
+ { 65129, 65129 },
+ { 65284, 65284 },
+ { 65504, 65505 },
+ { 65509, 65510 },
+};
+static const URange32 Sc_range32[] = {
+ { 73693, 73696 },
+ { 123647, 123647 },
+ { 126128, 126128 },
+};
+static const URange16 Sk_range16[] = {
+ { 94, 94 },
+ { 96, 96 },
+ { 168, 168 },
+ { 175, 175 },
+ { 180, 180 },
+ { 184, 184 },
+ { 706, 709 },
+ { 722, 735 },
+ { 741, 747 },
+ { 749, 749 },
+ { 751, 767 },
+ { 885, 885 },
+ { 900, 901 },
{ 2184, 2184 },
- { 8125, 8125 },
- { 8127, 8129 },
- { 8141, 8143 },
- { 8157, 8159 },
- { 8173, 8175 },
- { 8189, 8190 },
- { 12443, 12444 },
- { 42752, 42774 },
- { 42784, 42785 },
- { 42889, 42890 },
- { 43867, 43867 },
- { 43882, 43883 },
+ { 8125, 8125 },
+ { 8127, 8129 },
+ { 8141, 8143 },
+ { 8157, 8159 },
+ { 8173, 8175 },
+ { 8189, 8190 },
+ { 12443, 12444 },
+ { 42752, 42774 },
+ { 42784, 42785 },
+ { 42889, 42890 },
+ { 43867, 43867 },
+ { 43882, 43883 },
{ 64434, 64450 },
- { 65342, 65342 },
- { 65344, 65344 },
- { 65507, 65507 },
-};
-static const URange32 Sk_range32[] = {
- { 127995, 127999 },
-};
-static const URange16 Sm_range16[] = {
- { 43, 43 },
- { 60, 62 },
- { 124, 124 },
- { 126, 126 },
- { 172, 172 },
- { 177, 177 },
- { 215, 215 },
- { 247, 247 },
- { 1014, 1014 },
- { 1542, 1544 },
- { 8260, 8260 },
- { 8274, 8274 },
- { 8314, 8316 },
- { 8330, 8332 },
- { 8472, 8472 },
- { 8512, 8516 },
- { 8523, 8523 },
- { 8592, 8596 },
- { 8602, 8603 },
- { 8608, 8608 },
- { 8611, 8611 },
- { 8614, 8614 },
- { 8622, 8622 },
- { 8654, 8655 },
- { 8658, 8658 },
- { 8660, 8660 },
- { 8692, 8959 },
- { 8992, 8993 },
- { 9084, 9084 },
- { 9115, 9139 },
- { 9180, 9185 },
- { 9655, 9655 },
- { 9665, 9665 },
- { 9720, 9727 },
- { 9839, 9839 },
- { 10176, 10180 },
- { 10183, 10213 },
- { 10224, 10239 },
- { 10496, 10626 },
- { 10649, 10711 },
- { 10716, 10747 },
- { 10750, 11007 },
- { 11056, 11076 },
- { 11079, 11084 },
- { 64297, 64297 },
- { 65122, 65122 },
- { 65124, 65126 },
- { 65291, 65291 },
- { 65308, 65310 },
- { 65372, 65372 },
- { 65374, 65374 },
- { 65506, 65506 },
- { 65513, 65516 },
-};
-static const URange32 Sm_range32[] = {
- { 120513, 120513 },
- { 120539, 120539 },
- { 120571, 120571 },
- { 120597, 120597 },
- { 120629, 120629 },
- { 120655, 120655 },
- { 120687, 120687 },
- { 120713, 120713 },
- { 120745, 120745 },
- { 120771, 120771 },
- { 126704, 126705 },
-};
+ { 65342, 65342 },
+ { 65344, 65344 },
+ { 65507, 65507 },
+};
+static const URange32 Sk_range32[] = {
+ { 127995, 127999 },
+};
+static const URange16 Sm_range16[] = {
+ { 43, 43 },
+ { 60, 62 },
+ { 124, 124 },
+ { 126, 126 },
+ { 172, 172 },
+ { 177, 177 },
+ { 215, 215 },
+ { 247, 247 },
+ { 1014, 1014 },
+ { 1542, 1544 },
+ { 8260, 8260 },
+ { 8274, 8274 },
+ { 8314, 8316 },
+ { 8330, 8332 },
+ { 8472, 8472 },
+ { 8512, 8516 },
+ { 8523, 8523 },
+ { 8592, 8596 },
+ { 8602, 8603 },
+ { 8608, 8608 },
+ { 8611, 8611 },
+ { 8614, 8614 },
+ { 8622, 8622 },
+ { 8654, 8655 },
+ { 8658, 8658 },
+ { 8660, 8660 },
+ { 8692, 8959 },
+ { 8992, 8993 },
+ { 9084, 9084 },
+ { 9115, 9139 },
+ { 9180, 9185 },
+ { 9655, 9655 },
+ { 9665, 9665 },
+ { 9720, 9727 },
+ { 9839, 9839 },
+ { 10176, 10180 },
+ { 10183, 10213 },
+ { 10224, 10239 },
+ { 10496, 10626 },
+ { 10649, 10711 },
+ { 10716, 10747 },
+ { 10750, 11007 },
+ { 11056, 11076 },
+ { 11079, 11084 },
+ { 64297, 64297 },
+ { 65122, 65122 },
+ { 65124, 65126 },
+ { 65291, 65291 },
+ { 65308, 65310 },
+ { 65372, 65372 },
+ { 65374, 65374 },
+ { 65506, 65506 },
+ { 65513, 65516 },
+};
+static const URange32 Sm_range32[] = {
+ { 120513, 120513 },
+ { 120539, 120539 },
+ { 120571, 120571 },
+ { 120597, 120597 },
+ { 120629, 120629 },
+ { 120655, 120655 },
+ { 120687, 120687 },
+ { 120713, 120713 },
+ { 120745, 120745 },
+ { 120771, 120771 },
+ { 126704, 126705 },
+};
static const URange16 So_range16[] = {
{ 166, 166 },
{ 169, 169 },
@@ -4735,7 +4735,7 @@ static const URange16 So_range16[] = {
{ 3059, 3064 },
{ 3066, 3066 },
{ 3199, 3199 },
- { 3407, 3407 },
+ { 3407, 3407 },
{ 3449, 3449 },
{ 3841, 3843 },
{ 3859, 3859 },
@@ -4750,7 +4750,7 @@ static const URange16 So_range16[] = {
{ 4053, 4056 },
{ 4254, 4255 },
{ 5008, 5017 },
- { 5741, 5741 },
+ { 5741, 5741 },
{ 6464, 6464 },
{ 6622, 6655 },
{ 7009, 7018 },
@@ -4785,7 +4785,7 @@ static const URange16 So_range16[] = {
{ 9003, 9083 },
{ 9085, 9114 },
{ 9140, 9179 },
- { 9186, 9254 },
+ { 9186, 9254 },
{ 9280, 9290 },
{ 9372, 9449 },
{ 9472, 9654 },
@@ -4799,9 +4799,9 @@ static const URange16 So_range16[] = {
{ 11077, 11078 },
{ 11085, 11123 },
{ 11126, 11157 },
- { 11159, 11263 },
+ { 11159, 11263 },
{ 11493, 11498 },
- { 11856, 11857 },
+ { 11856, 11857 },
{ 11904, 11929 },
{ 11931, 12019 },
{ 12032, 12245 },
@@ -4819,7 +4819,7 @@ static const URange16 So_range16[] = {
{ 12880, 12880 },
{ 12896, 12927 },
{ 12938, 12976 },
- { 12992, 13311 },
+ { 12992, 13311 },
{ 19904, 19967 },
{ 42128, 42182 },
{ 43048, 43051 },
@@ -4837,15 +4837,15 @@ static const URange16 So_range16[] = {
static const URange32 So_range32[] = {
{ 65847, 65855 },
{ 65913, 65929 },
- { 65932, 65934 },
- { 65936, 65948 },
+ { 65932, 65934 },
+ { 65936, 65948 },
{ 65952, 65952 },
{ 66000, 66044 },
{ 67703, 67704 },
{ 68296, 68296 },
{ 71487, 71487 },
- { 73685, 73692 },
- { 73697, 73713 },
+ { 73685, 73692 },
+ { 73697, 73713 },
{ 92988, 92991 },
{ 92997, 92997 },
{ 113820, 113820 },
@@ -4865,48 +4865,48 @@ static const URange32 So_range32[] = {
{ 121453, 121460 },
{ 121462, 121475 },
{ 121477, 121478 },
- { 123215, 123215 },
- { 126124, 126124 },
- { 126254, 126254 },
+ { 123215, 123215 },
+ { 126124, 126124 },
+ { 126254, 126254 },
{ 126976, 127019 },
{ 127024, 127123 },
{ 127136, 127150 },
{ 127153, 127167 },
{ 127169, 127183 },
{ 127185, 127221 },
- { 127245, 127405 },
+ { 127245, 127405 },
{ 127462, 127490 },
- { 127504, 127547 },
+ { 127504, 127547 },
{ 127552, 127560 },
{ 127568, 127569 },
- { 127584, 127589 },
+ { 127584, 127589 },
{ 127744, 127994 },
- { 128000, 128727 },
+ { 128000, 128727 },
{ 128733, 128748 },
- { 128752, 128764 },
+ { 128752, 128764 },
{ 128768, 128883 },
- { 128896, 128984 },
- { 128992, 129003 },
+ { 128896, 128984 },
+ { 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
{ 129040, 129095 },
{ 129104, 129113 },
{ 129120, 129159 },
{ 129168, 129197 },
- { 129200, 129201 },
+ { 129200, 129201 },
{ 129280, 129619 },
- { 129632, 129645 },
- { 129648, 129652 },
+ { 129632, 129645 },
+ { 129648, 129652 },
{ 129656, 129660 },
- { 129664, 129670 },
+ { 129664, 129670 },
{ 129680, 129708 },
{ 129712, 129722 },
{ 129728, 129733 },
{ 129744, 129753 },
{ 129760, 129767 },
{ 129776, 129782 },
- { 129792, 129938 },
- { 129940, 129994 },
+ { 129792, 129938 },
+ { 129940, 129994 },
};
static const URange16 Z_range16[] = {
{ 32, 32 },
@@ -4921,390 +4921,390 @@ static const URange16 Z_range16[] = {
static const URange16 Zl_range16[] = {
{ 8232, 8232 },
};
-static const URange16 Zp_range16[] = {
- { 8233, 8233 },
-};
-static const URange16 Zs_range16[] = {
- { 32, 32 },
- { 160, 160 },
- { 5760, 5760 },
- { 8192, 8202 },
- { 8239, 8239 },
- { 8287, 8287 },
- { 12288, 12288 },
-};
-static const URange32 Adlam_range32[] = {
- { 125184, 125259 },
- { 125264, 125273 },
- { 125278, 125279 },
-};
-static const URange32 Ahom_range32[] = {
- { 71424, 71450 },
- { 71453, 71467 },
+static const URange16 Zp_range16[] = {
+ { 8233, 8233 },
+};
+static const URange16 Zs_range16[] = {
+ { 32, 32 },
+ { 160, 160 },
+ { 5760, 5760 },
+ { 8192, 8202 },
+ { 8239, 8239 },
+ { 8287, 8287 },
+ { 12288, 12288 },
+};
+static const URange32 Adlam_range32[] = {
+ { 125184, 125259 },
+ { 125264, 125273 },
+ { 125278, 125279 },
+};
+static const URange32 Ahom_range32[] = {
+ { 71424, 71450 },
+ { 71453, 71467 },
{ 71472, 71494 },
-};
-static const URange32 Anatolian_Hieroglyphs_range32[] = {
- { 82944, 83526 },
-};
-static const URange16 Arabic_range16[] = {
- { 1536, 1540 },
- { 1542, 1547 },
- { 1549, 1562 },
+};
+static const URange32 Anatolian_Hieroglyphs_range32[] = {
+ { 82944, 83526 },
+};
+static const URange16 Arabic_range16[] = {
+ { 1536, 1540 },
+ { 1542, 1547 },
+ { 1549, 1562 },
{ 1564, 1566 },
- { 1568, 1599 },
- { 1601, 1610 },
- { 1622, 1647 },
- { 1649, 1756 },
- { 1758, 1791 },
- { 1872, 1919 },
+ { 1568, 1599 },
+ { 1601, 1610 },
+ { 1622, 1647 },
+ { 1649, 1756 },
+ { 1758, 1791 },
+ { 1872, 1919 },
{ 2160, 2190 },
{ 2192, 2193 },
{ 2200, 2273 },
- { 2275, 2303 },
+ { 2275, 2303 },
{ 64336, 64450 },
- { 64467, 64829 },
+ { 64467, 64829 },
{ 64832, 64911 },
- { 64914, 64967 },
+ { 64914, 64967 },
{ 64975, 64975 },
{ 65008, 65023 },
- { 65136, 65140 },
- { 65142, 65276 },
-};
-static const URange32 Arabic_range32[] = {
- { 69216, 69246 },
- { 126464, 126467 },
- { 126469, 126495 },
- { 126497, 126498 },
- { 126500, 126500 },
- { 126503, 126503 },
- { 126505, 126514 },
- { 126516, 126519 },
- { 126521, 126521 },
- { 126523, 126523 },
- { 126530, 126530 },
- { 126535, 126535 },
- { 126537, 126537 },
- { 126539, 126539 },
- { 126541, 126543 },
- { 126545, 126546 },
- { 126548, 126548 },
- { 126551, 126551 },
- { 126553, 126553 },
- { 126555, 126555 },
- { 126557, 126557 },
- { 126559, 126559 },
- { 126561, 126562 },
- { 126564, 126564 },
- { 126567, 126570 },
- { 126572, 126578 },
- { 126580, 126583 },
- { 126585, 126588 },
- { 126590, 126590 },
- { 126592, 126601 },
- { 126603, 126619 },
- { 126625, 126627 },
- { 126629, 126633 },
- { 126635, 126651 },
- { 126704, 126705 },
-};
-static const URange16 Armenian_range16[] = {
- { 1329, 1366 },
- { 1369, 1418 },
- { 1421, 1423 },
- { 64275, 64279 },
-};
-static const URange32 Avestan_range32[] = {
- { 68352, 68405 },
- { 68409, 68415 },
-};
-static const URange16 Balinese_range16[] = {
+ { 65136, 65140 },
+ { 65142, 65276 },
+};
+static const URange32 Arabic_range32[] = {
+ { 69216, 69246 },
+ { 126464, 126467 },
+ { 126469, 126495 },
+ { 126497, 126498 },
+ { 126500, 126500 },
+ { 126503, 126503 },
+ { 126505, 126514 },
+ { 126516, 126519 },
+ { 126521, 126521 },
+ { 126523, 126523 },
+ { 126530, 126530 },
+ { 126535, 126535 },
+ { 126537, 126537 },
+ { 126539, 126539 },
+ { 126541, 126543 },
+ { 126545, 126546 },
+ { 126548, 126548 },
+ { 126551, 126551 },
+ { 126553, 126553 },
+ { 126555, 126555 },
+ { 126557, 126557 },
+ { 126559, 126559 },
+ { 126561, 126562 },
+ { 126564, 126564 },
+ { 126567, 126570 },
+ { 126572, 126578 },
+ { 126580, 126583 },
+ { 126585, 126588 },
+ { 126590, 126590 },
+ { 126592, 126601 },
+ { 126603, 126619 },
+ { 126625, 126627 },
+ { 126629, 126633 },
+ { 126635, 126651 },
+ { 126704, 126705 },
+};
+static const URange16 Armenian_range16[] = {
+ { 1329, 1366 },
+ { 1369, 1418 },
+ { 1421, 1423 },
+ { 64275, 64279 },
+};
+static const URange32 Avestan_range32[] = {
+ { 68352, 68405 },
+ { 68409, 68415 },
+};
+static const URange16 Balinese_range16[] = {
{ 6912, 6988 },
{ 6992, 7038 },
-};
-static const URange16 Bamum_range16[] = {
- { 42656, 42743 },
-};
-static const URange32 Bamum_range32[] = {
- { 92160, 92728 },
-};
-static const URange32 Bassa_Vah_range32[] = {
- { 92880, 92909 },
- { 92912, 92917 },
-};
-static const URange16 Batak_range16[] = {
- { 7104, 7155 },
- { 7164, 7167 },
-};
-static const URange16 Bengali_range16[] = {
- { 2432, 2435 },
- { 2437, 2444 },
- { 2447, 2448 },
- { 2451, 2472 },
- { 2474, 2480 },
- { 2482, 2482 },
- { 2486, 2489 },
- { 2492, 2500 },
- { 2503, 2504 },
- { 2507, 2510 },
- { 2519, 2519 },
- { 2524, 2525 },
- { 2527, 2531 },
- { 2534, 2558 },
-};
-static const URange32 Bhaiksuki_range32[] = {
- { 72704, 72712 },
- { 72714, 72758 },
- { 72760, 72773 },
- { 72784, 72812 },
-};
-static const URange16 Bopomofo_range16[] = {
- { 746, 747 },
- { 12549, 12591 },
- { 12704, 12735 },
-};
-static const URange32 Brahmi_range32[] = {
- { 69632, 69709 },
+};
+static const URange16 Bamum_range16[] = {
+ { 42656, 42743 },
+};
+static const URange32 Bamum_range32[] = {
+ { 92160, 92728 },
+};
+static const URange32 Bassa_Vah_range32[] = {
+ { 92880, 92909 },
+ { 92912, 92917 },
+};
+static const URange16 Batak_range16[] = {
+ { 7104, 7155 },
+ { 7164, 7167 },
+};
+static const URange16 Bengali_range16[] = {
+ { 2432, 2435 },
+ { 2437, 2444 },
+ { 2447, 2448 },
+ { 2451, 2472 },
+ { 2474, 2480 },
+ { 2482, 2482 },
+ { 2486, 2489 },
+ { 2492, 2500 },
+ { 2503, 2504 },
+ { 2507, 2510 },
+ { 2519, 2519 },
+ { 2524, 2525 },
+ { 2527, 2531 },
+ { 2534, 2558 },
+};
+static const URange32 Bhaiksuki_range32[] = {
+ { 72704, 72712 },
+ { 72714, 72758 },
+ { 72760, 72773 },
+ { 72784, 72812 },
+};
+static const URange16 Bopomofo_range16[] = {
+ { 746, 747 },
+ { 12549, 12591 },
+ { 12704, 12735 },
+};
+static const URange32 Brahmi_range32[] = {
+ { 69632, 69709 },
{ 69714, 69749 },
- { 69759, 69759 },
-};
-static const URange16 Braille_range16[] = {
- { 10240, 10495 },
-};
-static const URange16 Buginese_range16[] = {
- { 6656, 6683 },
- { 6686, 6687 },
-};
-static const URange16 Buhid_range16[] = {
- { 5952, 5971 },
-};
-static const URange16 Canadian_Aboriginal_range16[] = {
- { 5120, 5759 },
- { 6320, 6389 },
-};
+ { 69759, 69759 },
+};
+static const URange16 Braille_range16[] = {
+ { 10240, 10495 },
+};
+static const URange16 Buginese_range16[] = {
+ { 6656, 6683 },
+ { 6686, 6687 },
+};
+static const URange16 Buhid_range16[] = {
+ { 5952, 5971 },
+};
+static const URange16 Canadian_Aboriginal_range16[] = {
+ { 5120, 5759 },
+ { 6320, 6389 },
+};
static const URange32 Canadian_Aboriginal_range32[] = {
{ 72368, 72383 },
};
-static const URange32 Carian_range32[] = {
- { 66208, 66256 },
-};
-static const URange32 Caucasian_Albanian_range32[] = {
- { 66864, 66915 },
- { 66927, 66927 },
-};
-static const URange32 Chakma_range32[] = {
- { 69888, 69940 },
- { 69942, 69959 },
-};
-static const URange16 Cham_range16[] = {
- { 43520, 43574 },
- { 43584, 43597 },
- { 43600, 43609 },
- { 43612, 43615 },
-};
-static const URange16 Cherokee_range16[] = {
- { 5024, 5109 },
- { 5112, 5117 },
- { 43888, 43967 },
-};
-static const URange32 Chorasmian_range32[] = {
- { 69552, 69579 },
-};
-static const URange16 Common_range16[] = {
- { 0, 64 },
- { 91, 96 },
- { 123, 169 },
- { 171, 185 },
- { 187, 191 },
- { 215, 215 },
- { 247, 247 },
- { 697, 735 },
- { 741, 745 },
- { 748, 767 },
- { 884, 884 },
- { 894, 894 },
- { 901, 901 },
- { 903, 903 },
- { 1541, 1541 },
- { 1548, 1548 },
- { 1563, 1563 },
- { 1567, 1567 },
- { 1600, 1600 },
+static const URange32 Carian_range32[] = {
+ { 66208, 66256 },
+};
+static const URange32 Caucasian_Albanian_range32[] = {
+ { 66864, 66915 },
+ { 66927, 66927 },
+};
+static const URange32 Chakma_range32[] = {
+ { 69888, 69940 },
+ { 69942, 69959 },
+};
+static const URange16 Cham_range16[] = {
+ { 43520, 43574 },
+ { 43584, 43597 },
+ { 43600, 43609 },
+ { 43612, 43615 },
+};
+static const URange16 Cherokee_range16[] = {
+ { 5024, 5109 },
+ { 5112, 5117 },
+ { 43888, 43967 },
+};
+static const URange32 Chorasmian_range32[] = {
+ { 69552, 69579 },
+};
+static const URange16 Common_range16[] = {
+ { 0, 64 },
+ { 91, 96 },
+ { 123, 169 },
+ { 171, 185 },
+ { 187, 191 },
+ { 215, 215 },
+ { 247, 247 },
+ { 697, 735 },
+ { 741, 745 },
+ { 748, 767 },
+ { 884, 884 },
+ { 894, 894 },
+ { 901, 901 },
+ { 903, 903 },
+ { 1541, 1541 },
+ { 1548, 1548 },
+ { 1563, 1563 },
+ { 1567, 1567 },
+ { 1600, 1600 },
{ 1757, 1757 },
- { 2274, 2274 },
- { 2404, 2405 },
- { 3647, 3647 },
- { 4053, 4056 },
- { 4347, 4347 },
- { 5867, 5869 },
- { 5941, 5942 },
- { 6146, 6147 },
- { 6149, 6149 },
- { 7379, 7379 },
- { 7393, 7393 },
- { 7401, 7404 },
- { 7406, 7411 },
- { 7413, 7415 },
- { 7418, 7418 },
- { 8192, 8203 },
- { 8206, 8292 },
- { 8294, 8304 },
- { 8308, 8318 },
- { 8320, 8334 },
+ { 2274, 2274 },
+ { 2404, 2405 },
+ { 3647, 3647 },
+ { 4053, 4056 },
+ { 4347, 4347 },
+ { 5867, 5869 },
+ { 5941, 5942 },
+ { 6146, 6147 },
+ { 6149, 6149 },
+ { 7379, 7379 },
+ { 7393, 7393 },
+ { 7401, 7404 },
+ { 7406, 7411 },
+ { 7413, 7415 },
+ { 7418, 7418 },
+ { 8192, 8203 },
+ { 8206, 8292 },
+ { 8294, 8304 },
+ { 8308, 8318 },
+ { 8320, 8334 },
{ 8352, 8384 },
- { 8448, 8485 },
- { 8487, 8489 },
- { 8492, 8497 },
- { 8499, 8525 },
- { 8527, 8543 },
- { 8585, 8587 },
- { 8592, 9254 },
- { 9280, 9290 },
- { 9312, 10239 },
- { 10496, 11123 },
- { 11126, 11157 },
- { 11159, 11263 },
+ { 8448, 8485 },
+ { 8487, 8489 },
+ { 8492, 8497 },
+ { 8499, 8525 },
+ { 8527, 8543 },
+ { 8585, 8587 },
+ { 8592, 9254 },
+ { 9280, 9290 },
+ { 9312, 10239 },
+ { 10496, 11123 },
+ { 11126, 11157 },
+ { 11159, 11263 },
{ 11776, 11869 },
- { 12272, 12283 },
- { 12288, 12292 },
- { 12294, 12294 },
- { 12296, 12320 },
- { 12336, 12343 },
- { 12348, 12351 },
- { 12443, 12444 },
- { 12448, 12448 },
- { 12539, 12540 },
- { 12688, 12703 },
- { 12736, 12771 },
- { 12832, 12895 },
- { 12927, 13007 },
- { 13055, 13055 },
- { 13144, 13311 },
- { 19904, 19967 },
- { 42752, 42785 },
- { 42888, 42890 },
- { 43056, 43065 },
- { 43310, 43310 },
- { 43471, 43471 },
- { 43867, 43867 },
- { 43882, 43883 },
- { 64830, 64831 },
- { 65040, 65049 },
- { 65072, 65106 },
- { 65108, 65126 },
- { 65128, 65131 },
+ { 12272, 12283 },
+ { 12288, 12292 },
+ { 12294, 12294 },
+ { 12296, 12320 },
+ { 12336, 12343 },
+ { 12348, 12351 },
+ { 12443, 12444 },
+ { 12448, 12448 },
+ { 12539, 12540 },
+ { 12688, 12703 },
+ { 12736, 12771 },
+ { 12832, 12895 },
+ { 12927, 13007 },
+ { 13055, 13055 },
+ { 13144, 13311 },
+ { 19904, 19967 },
+ { 42752, 42785 },
+ { 42888, 42890 },
+ { 43056, 43065 },
+ { 43310, 43310 },
+ { 43471, 43471 },
+ { 43867, 43867 },
+ { 43882, 43883 },
+ { 64830, 64831 },
+ { 65040, 65049 },
+ { 65072, 65106 },
+ { 65108, 65126 },
+ { 65128, 65131 },
{ 65279, 65279 },
- { 65281, 65312 },
- { 65339, 65344 },
- { 65371, 65381 },
- { 65392, 65392 },
- { 65438, 65439 },
- { 65504, 65510 },
- { 65512, 65518 },
- { 65529, 65533 },
-};
-static const URange32 Common_range32[] = {
- { 65792, 65794 },
- { 65799, 65843 },
- { 65847, 65855 },
- { 65936, 65948 },
- { 66000, 66044 },
- { 66273, 66299 },
+ { 65281, 65312 },
+ { 65339, 65344 },
+ { 65371, 65381 },
+ { 65392, 65392 },
+ { 65438, 65439 },
+ { 65504, 65510 },
+ { 65512, 65518 },
+ { 65529, 65533 },
+};
+static const URange32 Common_range32[] = {
+ { 65792, 65794 },
+ { 65799, 65843 },
+ { 65847, 65855 },
+ { 65936, 65948 },
+ { 66000, 66044 },
+ { 66273, 66299 },
{ 113824, 113827 },
{ 118608, 118723 },
- { 118784, 119029 },
- { 119040, 119078 },
- { 119081, 119142 },
- { 119146, 119162 },
- { 119171, 119172 },
- { 119180, 119209 },
+ { 118784, 119029 },
+ { 119040, 119078 },
+ { 119081, 119142 },
+ { 119146, 119162 },
+ { 119171, 119172 },
+ { 119180, 119209 },
{ 119214, 119274 },
- { 119520, 119539 },
- { 119552, 119638 },
- { 119648, 119672 },
- { 119808, 119892 },
- { 119894, 119964 },
- { 119966, 119967 },
- { 119970, 119970 },
- { 119973, 119974 },
- { 119977, 119980 },
- { 119982, 119993 },
- { 119995, 119995 },
- { 119997, 120003 },
- { 120005, 120069 },
- { 120071, 120074 },
- { 120077, 120084 },
- { 120086, 120092 },
- { 120094, 120121 },
- { 120123, 120126 },
- { 120128, 120132 },
- { 120134, 120134 },
- { 120138, 120144 },
- { 120146, 120485 },
- { 120488, 120779 },
- { 120782, 120831 },
- { 126065, 126132 },
- { 126209, 126269 },
- { 126976, 127019 },
- { 127024, 127123 },
- { 127136, 127150 },
- { 127153, 127167 },
- { 127169, 127183 },
- { 127185, 127221 },
- { 127232, 127405 },
- { 127462, 127487 },
- { 127489, 127490 },
- { 127504, 127547 },
- { 127552, 127560 },
- { 127568, 127569 },
- { 127584, 127589 },
- { 127744, 128727 },
+ { 119520, 119539 },
+ { 119552, 119638 },
+ { 119648, 119672 },
+ { 119808, 119892 },
+ { 119894, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120094, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120146, 120485 },
+ { 120488, 120779 },
+ { 120782, 120831 },
+ { 126065, 126132 },
+ { 126209, 126269 },
+ { 126976, 127019 },
+ { 127024, 127123 },
+ { 127136, 127150 },
+ { 127153, 127167 },
+ { 127169, 127183 },
+ { 127185, 127221 },
+ { 127232, 127405 },
+ { 127462, 127487 },
+ { 127489, 127490 },
+ { 127504, 127547 },
+ { 127552, 127560 },
+ { 127568, 127569 },
+ { 127584, 127589 },
+ { 127744, 128727 },
{ 128733, 128748 },
- { 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
- { 128992, 129003 },
+ { 128752, 128764 },
+ { 128768, 128883 },
+ { 128896, 128984 },
+ { 128992, 129003 },
{ 129008, 129008 },
- { 129024, 129035 },
- { 129040, 129095 },
- { 129104, 129113 },
- { 129120, 129159 },
- { 129168, 129197 },
- { 129200, 129201 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129200, 129201 },
{ 129280, 129619 },
- { 129632, 129645 },
- { 129648, 129652 },
+ { 129632, 129645 },
+ { 129648, 129652 },
{ 129656, 129660 },
- { 129664, 129670 },
+ { 129664, 129670 },
{ 129680, 129708 },
{ 129712, 129722 },
{ 129728, 129733 },
{ 129744, 129753 },
{ 129760, 129767 },
{ 129776, 129782 },
- { 129792, 129938 },
- { 129940, 129994 },
- { 130032, 130041 },
+ { 129792, 129938 },
+ { 129940, 129994 },
+ { 130032, 130041 },
{ 917505, 917505 },
{ 917536, 917631 },
};
-static const URange16 Coptic_range16[] = {
- { 994, 1007 },
- { 11392, 11507 },
- { 11513, 11519 },
-};
-static const URange32 Cuneiform_range32[] = {
- { 73728, 74649 },
- { 74752, 74862 },
- { 74864, 74868 },
- { 74880, 75075 },
-};
-static const URange32 Cypriot_range32[] = {
- { 67584, 67589 },
- { 67592, 67592 },
- { 67594, 67637 },
- { 67639, 67640 },
- { 67644, 67644 },
- { 67647, 67647 },
+static const URange16 Coptic_range16[] = {
+ { 994, 1007 },
+ { 11392, 11507 },
+ { 11513, 11519 },
+};
+static const URange32 Cuneiform_range32[] = {
+ { 73728, 74649 },
+ { 74752, 74862 },
+ { 74864, 74868 },
+ { 74880, 75075 },
+};
+static const URange32 Cypriot_range32[] = {
+ { 67584, 67589 },
+ { 67592, 67592 },
+ { 67594, 67637 },
+ { 67639, 67640 },
+ { 67644, 67644 },
+ { 67647, 67647 },
};
static const URange32 Cypro_Minoan_range32[] = {
{ 77712, 77810 },
@@ -5312,52 +5312,52 @@ static const URange32 Cypro_Minoan_range32[] = {
static const URange16 Cyrillic_range16[] = {
{ 1024, 1156 },
{ 1159, 1327 },
- { 7296, 7304 },
+ { 7296, 7304 },
{ 7467, 7467 },
{ 7544, 7544 },
{ 11744, 11775 },
{ 42560, 42655 },
{ 65070, 65071 },
};
-static const URange32 Deseret_range32[] = {
- { 66560, 66639 },
-};
-static const URange16 Devanagari_range16[] = {
- { 2304, 2384 },
- { 2389, 2403 },
- { 2406, 2431 },
- { 43232, 43263 },
-};
-static const URange32 Dives_Akuru_range32[] = {
- { 71936, 71942 },
- { 71945, 71945 },
- { 71948, 71955 },
- { 71957, 71958 },
- { 71960, 71989 },
- { 71991, 71992 },
- { 71995, 72006 },
- { 72016, 72025 },
-};
-static const URange32 Dogra_range32[] = {
- { 71680, 71739 },
-};
-static const URange32 Duployan_range32[] = {
- { 113664, 113770 },
- { 113776, 113788 },
- { 113792, 113800 },
- { 113808, 113817 },
- { 113820, 113823 },
-};
-static const URange32 Egyptian_Hieroglyphs_range32[] = {
- { 77824, 78894 },
- { 78896, 78904 },
-};
-static const URange32 Elbasan_range32[] = {
- { 66816, 66855 },
-};
-static const URange32 Elymaic_range32[] = {
- { 69600, 69622 },
-};
+static const URange32 Deseret_range32[] = {
+ { 66560, 66639 },
+};
+static const URange16 Devanagari_range16[] = {
+ { 2304, 2384 },
+ { 2389, 2403 },
+ { 2406, 2431 },
+ { 43232, 43263 },
+};
+static const URange32 Dives_Akuru_range32[] = {
+ { 71936, 71942 },
+ { 71945, 71945 },
+ { 71948, 71955 },
+ { 71957, 71958 },
+ { 71960, 71989 },
+ { 71991, 71992 },
+ { 71995, 72006 },
+ { 72016, 72025 },
+};
+static const URange32 Dogra_range32[] = {
+ { 71680, 71739 },
+};
+static const URange32 Duployan_range32[] = {
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
+ { 113820, 113823 },
+};
+static const URange32 Egyptian_Hieroglyphs_range32[] = {
+ { 77824, 78894 },
+ { 78896, 78904 },
+};
+static const URange32 Elbasan_range32[] = {
+ { 66816, 66855 },
+};
+static const URange32 Elymaic_range32[] = {
+ { 69600, 69622 },
+};
static const URange16 Ethiopic_range16[] = {
{ 4608, 4680 },
{ 4682, 4685 },
@@ -5398,130 +5398,130 @@ static const URange32 Ethiopic_range32[] = {
{ 124909, 124910 },
{ 124912, 124926 },
};
-static const URange16 Georgian_range16[] = {
- { 4256, 4293 },
- { 4295, 4295 },
- { 4301, 4301 },
- { 4304, 4346 },
- { 4348, 4351 },
- { 7312, 7354 },
- { 7357, 7359 },
- { 11520, 11557 },
- { 11559, 11559 },
- { 11565, 11565 },
-};
-static const URange16 Glagolitic_range16[] = {
+static const URange16 Georgian_range16[] = {
+ { 4256, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
+ { 4304, 4346 },
+ { 4348, 4351 },
+ { 7312, 7354 },
+ { 7357, 7359 },
+ { 11520, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
+};
+static const URange16 Glagolitic_range16[] = {
{ 11264, 11359 },
};
-static const URange32 Glagolitic_range32[] = {
- { 122880, 122886 },
- { 122888, 122904 },
- { 122907, 122913 },
- { 122915, 122916 },
- { 122918, 122922 },
-};
-static const URange32 Gothic_range32[] = {
- { 66352, 66378 },
-};
-static const URange32 Grantha_range32[] = {
- { 70400, 70403 },
- { 70405, 70412 },
- { 70415, 70416 },
- { 70419, 70440 },
- { 70442, 70448 },
- { 70450, 70451 },
- { 70453, 70457 },
- { 70460, 70468 },
- { 70471, 70472 },
- { 70475, 70477 },
- { 70480, 70480 },
- { 70487, 70487 },
- { 70493, 70499 },
- { 70502, 70508 },
- { 70512, 70516 },
-};
-static const URange16 Greek_range16[] = {
- { 880, 883 },
- { 885, 887 },
- { 890, 893 },
- { 895, 895 },
- { 900, 900 },
- { 902, 902 },
- { 904, 906 },
- { 908, 908 },
- { 910, 929 },
- { 931, 993 },
- { 1008, 1023 },
- { 7462, 7466 },
- { 7517, 7521 },
- { 7526, 7530 },
- { 7615, 7615 },
- { 7936, 7957 },
- { 7960, 7965 },
- { 7968, 8005 },
- { 8008, 8013 },
- { 8016, 8023 },
- { 8025, 8025 },
- { 8027, 8027 },
- { 8029, 8029 },
- { 8031, 8061 },
- { 8064, 8116 },
- { 8118, 8132 },
- { 8134, 8147 },
- { 8150, 8155 },
- { 8157, 8175 },
- { 8178, 8180 },
- { 8182, 8190 },
- { 8486, 8486 },
- { 43877, 43877 },
-};
-static const URange32 Greek_range32[] = {
- { 65856, 65934 },
- { 65952, 65952 },
- { 119296, 119365 },
-};
-static const URange16 Gujarati_range16[] = {
- { 2689, 2691 },
- { 2693, 2701 },
- { 2703, 2705 },
- { 2707, 2728 },
- { 2730, 2736 },
- { 2738, 2739 },
- { 2741, 2745 },
- { 2748, 2757 },
- { 2759, 2761 },
- { 2763, 2765 },
- { 2768, 2768 },
- { 2784, 2787 },
- { 2790, 2801 },
- { 2809, 2815 },
-};
-static const URange32 Gunjala_Gondi_range32[] = {
- { 73056, 73061 },
- { 73063, 73064 },
- { 73066, 73102 },
- { 73104, 73105 },
- { 73107, 73112 },
- { 73120, 73129 },
-};
-static const URange16 Gurmukhi_range16[] = {
- { 2561, 2563 },
- { 2565, 2570 },
- { 2575, 2576 },
- { 2579, 2600 },
- { 2602, 2608 },
- { 2610, 2611 },
- { 2613, 2614 },
- { 2616, 2617 },
- { 2620, 2620 },
- { 2622, 2626 },
- { 2631, 2632 },
- { 2635, 2637 },
- { 2641, 2641 },
- { 2649, 2652 },
- { 2654, 2654 },
- { 2662, 2678 },
-};
+static const URange32 Glagolitic_range32[] = {
+ { 122880, 122886 },
+ { 122888, 122904 },
+ { 122907, 122913 },
+ { 122915, 122916 },
+ { 122918, 122922 },
+};
+static const URange32 Gothic_range32[] = {
+ { 66352, 66378 },
+};
+static const URange32 Grantha_range32[] = {
+ { 70400, 70403 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70460, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70480, 70480 },
+ { 70487, 70487 },
+ { 70493, 70499 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+};
+static const URange16 Greek_range16[] = {
+ { 880, 883 },
+ { 885, 887 },
+ { 890, 893 },
+ { 895, 895 },
+ { 900, 900 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 993 },
+ { 1008, 1023 },
+ { 7462, 7466 },
+ { 7517, 7521 },
+ { 7526, 7530 },
+ { 7615, 7615 },
+ { 7936, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8132 },
+ { 8134, 8147 },
+ { 8150, 8155 },
+ { 8157, 8175 },
+ { 8178, 8180 },
+ { 8182, 8190 },
+ { 8486, 8486 },
+ { 43877, 43877 },
+};
+static const URange32 Greek_range32[] = {
+ { 65856, 65934 },
+ { 65952, 65952 },
+ { 119296, 119365 },
+};
+static const URange16 Gujarati_range16[] = {
+ { 2689, 2691 },
+ { 2693, 2701 },
+ { 2703, 2705 },
+ { 2707, 2728 },
+ { 2730, 2736 },
+ { 2738, 2739 },
+ { 2741, 2745 },
+ { 2748, 2757 },
+ { 2759, 2761 },
+ { 2763, 2765 },
+ { 2768, 2768 },
+ { 2784, 2787 },
+ { 2790, 2801 },
+ { 2809, 2815 },
+};
+static const URange32 Gunjala_Gondi_range32[] = {
+ { 73056, 73061 },
+ { 73063, 73064 },
+ { 73066, 73102 },
+ { 73104, 73105 },
+ { 73107, 73112 },
+ { 73120, 73129 },
+};
+static const URange16 Gurmukhi_range16[] = {
+ { 2561, 2563 },
+ { 2565, 2570 },
+ { 2575, 2576 },
+ { 2579, 2600 },
+ { 2602, 2608 },
+ { 2610, 2611 },
+ { 2613, 2614 },
+ { 2616, 2617 },
+ { 2620, 2620 },
+ { 2622, 2626 },
+ { 2631, 2632 },
+ { 2635, 2637 },
+ { 2641, 2641 },
+ { 2649, 2652 },
+ { 2654, 2654 },
+ { 2662, 2678 },
+};
static const URange16 Han_range16[] = {
{ 11904, 11929 },
{ 11931, 12019 },
@@ -5530,60 +5530,60 @@ static const URange16 Han_range16[] = {
{ 12295, 12295 },
{ 12321, 12329 },
{ 12344, 12347 },
- { 13312, 19903 },
+ { 13312, 19903 },
{ 19968, 40959 },
{ 63744, 64109 },
{ 64112, 64217 },
};
static const URange32 Han_range32[] = {
{ 94178, 94179 },
- { 94192, 94193 },
+ { 94192, 94193 },
{ 131072, 173791 },
{ 173824, 177976 },
{ 177984, 178205 },
{ 178208, 183969 },
- { 183984, 191456 },
+ { 183984, 191456 },
{ 194560, 195101 },
- { 196608, 201546 },
-};
-static const URange16 Hangul_range16[] = {
- { 4352, 4607 },
- { 12334, 12335 },
- { 12593, 12686 },
- { 12800, 12830 },
- { 12896, 12926 },
- { 43360, 43388 },
- { 44032, 55203 },
- { 55216, 55238 },
- { 55243, 55291 },
- { 65440, 65470 },
- { 65474, 65479 },
- { 65482, 65487 },
- { 65490, 65495 },
- { 65498, 65500 },
-};
-static const URange32 Hanifi_Rohingya_range32[] = {
- { 68864, 68903 },
- { 68912, 68921 },
-};
-static const URange16 Hanunoo_range16[] = {
- { 5920, 5940 },
-};
-static const URange32 Hatran_range32[] = {
- { 67808, 67826 },
- { 67828, 67829 },
- { 67835, 67839 },
-};
-static const URange16 Hebrew_range16[] = {
- { 1425, 1479 },
- { 1488, 1514 },
- { 1519, 1524 },
- { 64285, 64310 },
- { 64312, 64316 },
- { 64318, 64318 },
- { 64320, 64321 },
- { 64323, 64324 },
- { 64326, 64335 },
+ { 196608, 201546 },
+};
+static const URange16 Hangul_range16[] = {
+ { 4352, 4607 },
+ { 12334, 12335 },
+ { 12593, 12686 },
+ { 12800, 12830 },
+ { 12896, 12926 },
+ { 43360, 43388 },
+ { 44032, 55203 },
+ { 55216, 55238 },
+ { 55243, 55291 },
+ { 65440, 65470 },
+ { 65474, 65479 },
+ { 65482, 65487 },
+ { 65490, 65495 },
+ { 65498, 65500 },
+};
+static const URange32 Hanifi_Rohingya_range32[] = {
+ { 68864, 68903 },
+ { 68912, 68921 },
+};
+static const URange16 Hanunoo_range16[] = {
+ { 5920, 5940 },
+};
+static const URange32 Hatran_range32[] = {
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67835, 67839 },
+};
+static const URange16 Hebrew_range16[] = {
+ { 1425, 1479 },
+ { 1488, 1514 },
+ { 1519, 1524 },
+ { 64285, 64310 },
+ { 64312, 64316 },
+ { 64318, 64318 },
+ { 64320, 64321 },
+ { 64323, 64324 },
+ { 64326, 64335 },
};
static const URange16 Hiragana_range16[] = {
{ 12353, 12438 },
@@ -5591,95 +5591,95 @@ static const URange16 Hiragana_range16[] = {
};
static const URange32 Hiragana_range32[] = {
{ 110593, 110879 },
- { 110928, 110930 },
+ { 110928, 110930 },
{ 127488, 127488 },
};
-static const URange32 Imperial_Aramaic_range32[] = {
- { 67648, 67669 },
- { 67671, 67679 },
+static const URange32 Imperial_Aramaic_range32[] = {
+ { 67648, 67669 },
+ { 67671, 67679 },
};
-static const URange16 Inherited_range16[] = {
- { 768, 879 },
- { 1157, 1158 },
- { 1611, 1621 },
- { 1648, 1648 },
- { 2385, 2388 },
+static const URange16 Inherited_range16[] = {
+ { 768, 879 },
+ { 1157, 1158 },
+ { 1611, 1621 },
+ { 1648, 1648 },
+ { 2385, 2388 },
{ 6832, 6862 },
- { 7376, 7378 },
- { 7380, 7392 },
- { 7394, 7400 },
- { 7405, 7405 },
- { 7412, 7412 },
- { 7416, 7417 },
+ { 7376, 7378 },
+ { 7380, 7392 },
+ { 7394, 7400 },
+ { 7405, 7405 },
+ { 7412, 7412 },
+ { 7416, 7417 },
{ 7616, 7679 },
- { 8204, 8205 },
- { 8400, 8432 },
- { 12330, 12333 },
- { 12441, 12442 },
- { 65024, 65039 },
- { 65056, 65069 },
-};
-static const URange32 Inherited_range32[] = {
- { 66045, 66045 },
- { 66272, 66272 },
- { 70459, 70459 },
+ { 8204, 8205 },
+ { 8400, 8432 },
+ { 12330, 12333 },
+ { 12441, 12442 },
+ { 65024, 65039 },
+ { 65056, 65069 },
+};
+static const URange32 Inherited_range32[] = {
+ { 66045, 66045 },
+ { 66272, 66272 },
+ { 70459, 70459 },
{ 118528, 118573 },
{ 118576, 118598 },
- { 119143, 119145 },
- { 119163, 119170 },
- { 119173, 119179 },
- { 119210, 119213 },
- { 917760, 917999 },
-};
-static const URange32 Inscriptional_Pahlavi_range32[] = {
- { 68448, 68466 },
- { 68472, 68479 },
-};
-static const URange32 Inscriptional_Parthian_range32[] = {
- { 68416, 68437 },
- { 68440, 68447 },
-};
-static const URange16 Javanese_range16[] = {
- { 43392, 43469 },
- { 43472, 43481 },
- { 43486, 43487 },
-};
-static const URange32 Kaithi_range32[] = {
+ { 119143, 119145 },
+ { 119163, 119170 },
+ { 119173, 119179 },
+ { 119210, 119213 },
+ { 917760, 917999 },
+};
+static const URange32 Inscriptional_Pahlavi_range32[] = {
+ { 68448, 68466 },
+ { 68472, 68479 },
+};
+static const URange32 Inscriptional_Parthian_range32[] = {
+ { 68416, 68437 },
+ { 68440, 68447 },
+};
+static const URange16 Javanese_range16[] = {
+ { 43392, 43469 },
+ { 43472, 43481 },
+ { 43486, 43487 },
+};
+static const URange32 Kaithi_range32[] = {
{ 69760, 69826 },
- { 69837, 69837 },
-};
-static const URange16 Kannada_range16[] = {
- { 3200, 3212 },
- { 3214, 3216 },
- { 3218, 3240 },
- { 3242, 3251 },
- { 3253, 3257 },
- { 3260, 3268 },
- { 3270, 3272 },
- { 3274, 3277 },
- { 3285, 3286 },
+ { 69837, 69837 },
+};
+static const URange16 Kannada_range16[] = {
+ { 3200, 3212 },
+ { 3214, 3216 },
+ { 3218, 3240 },
+ { 3242, 3251 },
+ { 3253, 3257 },
+ { 3260, 3268 },
+ { 3270, 3272 },
+ { 3274, 3277 },
+ { 3285, 3286 },
{ 3293, 3294 },
- { 3296, 3299 },
- { 3302, 3311 },
- { 3313, 3314 },
-};
-static const URange16 Katakana_range16[] = {
- { 12449, 12538 },
- { 12541, 12543 },
- { 12784, 12799 },
- { 13008, 13054 },
- { 13056, 13143 },
- { 65382, 65391 },
- { 65393, 65437 },
-};
-static const URange32 Katakana_range32[] = {
+ { 3296, 3299 },
+ { 3302, 3311 },
+ { 3313, 3314 },
+};
+static const URange16 Katakana_range16[] = {
+ { 12449, 12538 },
+ { 12541, 12543 },
+ { 12784, 12799 },
+ { 13008, 13054 },
+ { 13056, 13143 },
+ { 65382, 65391 },
+ { 65393, 65437 },
+};
+static const URange32 Katakana_range32[] = {
{ 110576, 110579 },
{ 110581, 110587 },
{ 110589, 110590 },
- { 110592, 110592 },
+ { 110592, 110592 },
{ 110880, 110882 },
- { 110948, 110951 },
-};
+ { 110948, 110951 },
+};
static const URange16 Kayah_Li_range16[] = {
{ 43264, 43309 },
{ 43311, 43311 },
@@ -5689,42 +5689,42 @@ static const URange32 Kharoshthi_range32[] = {
{ 68101, 68102 },
{ 68108, 68115 },
{ 68117, 68119 },
- { 68121, 68149 },
+ { 68121, 68149 },
{ 68152, 68154 },
- { 68159, 68168 },
+ { 68159, 68168 },
{ 68176, 68184 },
};
-static const URange32 Khitan_Small_Script_range32[] = {
- { 94180, 94180 },
- { 101120, 101589 },
-};
-static const URange16 Khmer_range16[] = {
- { 6016, 6109 },
- { 6112, 6121 },
- { 6128, 6137 },
- { 6624, 6655 },
-};
-static const URange32 Khojki_range32[] = {
- { 70144, 70161 },
- { 70163, 70206 },
-};
-static const URange32 Khudawadi_range32[] = {
- { 70320, 70378 },
- { 70384, 70393 },
-};
-static const URange16 Lao_range16[] = {
- { 3713, 3714 },
- { 3716, 3716 },
- { 3718, 3722 },
- { 3724, 3747 },
- { 3749, 3749 },
- { 3751, 3773 },
- { 3776, 3780 },
- { 3782, 3782 },
- { 3784, 3789 },
- { 3792, 3801 },
- { 3804, 3807 },
-};
+static const URange32 Khitan_Small_Script_range32[] = {
+ { 94180, 94180 },
+ { 101120, 101589 },
+};
+static const URange16 Khmer_range16[] = {
+ { 6016, 6109 },
+ { 6112, 6121 },
+ { 6128, 6137 },
+ { 6624, 6655 },
+};
+static const URange32 Khojki_range32[] = {
+ { 70144, 70161 },
+ { 70163, 70206 },
+};
+static const URange32 Khudawadi_range32[] = {
+ { 70320, 70378 },
+ { 70384, 70393 },
+};
+static const URange16 Lao_range16[] = {
+ { 3713, 3714 },
+ { 3716, 3716 },
+ { 3718, 3722 },
+ { 3724, 3747 },
+ { 3749, 3749 },
+ { 3751, 3773 },
+ { 3776, 3780 },
+ { 3782, 3782 },
+ { 3784, 3789 },
+ { 3792, 3801 },
+ { 3804, 3807 },
+};
static const URange16 Latin_range16[] = {
{ 65, 90 },
{ 97, 122 },
@@ -5756,7 +5756,7 @@ static const URange16 Latin_range16[] = {
{ 42994, 43007 },
{ 43824, 43866 },
{ 43868, 43876 },
- { 43878, 43881 },
+ { 43878, 43881 },
{ 64256, 64262 },
{ 65313, 65338 },
{ 65345, 65370 },
@@ -5767,146 +5767,146 @@ static const URange32 Latin_range32[] = {
{ 67506, 67514 },
{ 122624, 122654 },
};
-static const URange16 Lepcha_range16[] = {
- { 7168, 7223 },
- { 7227, 7241 },
- { 7245, 7247 },
-};
-static const URange16 Limbu_range16[] = {
- { 6400, 6430 },
- { 6432, 6443 },
- { 6448, 6459 },
- { 6464, 6464 },
- { 6468, 6479 },
-};
-static const URange32 Linear_A_range32[] = {
- { 67072, 67382 },
- { 67392, 67413 },
- { 67424, 67431 },
-};
-static const URange32 Linear_B_range32[] = {
- { 65536, 65547 },
- { 65549, 65574 },
- { 65576, 65594 },
- { 65596, 65597 },
- { 65599, 65613 },
- { 65616, 65629 },
- { 65664, 65786 },
-};
-static const URange16 Lisu_range16[] = {
- { 42192, 42239 },
-};
-static const URange32 Lisu_range32[] = {
- { 73648, 73648 },
-};
-static const URange32 Lycian_range32[] = {
- { 66176, 66204 },
-};
-static const URange32 Lydian_range32[] = {
- { 67872, 67897 },
- { 67903, 67903 },
-};
-static const URange32 Mahajani_range32[] = {
- { 69968, 70006 },
-};
-static const URange32 Makasar_range32[] = {
- { 73440, 73464 },
-};
-static const URange16 Malayalam_range16[] = {
- { 3328, 3340 },
- { 3342, 3344 },
- { 3346, 3396 },
- { 3398, 3400 },
- { 3402, 3407 },
- { 3412, 3427 },
- { 3430, 3455 },
-};
-static const URange16 Mandaic_range16[] = {
- { 2112, 2139 },
- { 2142, 2142 },
-};
-static const URange32 Manichaean_range32[] = {
- { 68288, 68326 },
- { 68331, 68342 },
-};
-static const URange32 Marchen_range32[] = {
- { 72816, 72847 },
- { 72850, 72871 },
- { 72873, 72886 },
-};
-static const URange32 Masaram_Gondi_range32[] = {
- { 72960, 72966 },
- { 72968, 72969 },
- { 72971, 73014 },
- { 73018, 73018 },
- { 73020, 73021 },
- { 73023, 73031 },
- { 73040, 73049 },
-};
-static const URange32 Medefaidrin_range32[] = {
- { 93760, 93850 },
-};
-static const URange16 Meetei_Mayek_range16[] = {
- { 43744, 43766 },
- { 43968, 44013 },
- { 44016, 44025 },
-};
+static const URange16 Lepcha_range16[] = {
+ { 7168, 7223 },
+ { 7227, 7241 },
+ { 7245, 7247 },
+};
+static const URange16 Limbu_range16[] = {
+ { 6400, 6430 },
+ { 6432, 6443 },
+ { 6448, 6459 },
+ { 6464, 6464 },
+ { 6468, 6479 },
+};
+static const URange32 Linear_A_range32[] = {
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
+};
+static const URange32 Linear_B_range32[] = {
+ { 65536, 65547 },
+ { 65549, 65574 },
+ { 65576, 65594 },
+ { 65596, 65597 },
+ { 65599, 65613 },
+ { 65616, 65629 },
+ { 65664, 65786 },
+};
+static const URange16 Lisu_range16[] = {
+ { 42192, 42239 },
+};
+static const URange32 Lisu_range32[] = {
+ { 73648, 73648 },
+};
+static const URange32 Lycian_range32[] = {
+ { 66176, 66204 },
+};
+static const URange32 Lydian_range32[] = {
+ { 67872, 67897 },
+ { 67903, 67903 },
+};
+static const URange32 Mahajani_range32[] = {
+ { 69968, 70006 },
+};
+static const URange32 Makasar_range32[] = {
+ { 73440, 73464 },
+};
+static const URange16 Malayalam_range16[] = {
+ { 3328, 3340 },
+ { 3342, 3344 },
+ { 3346, 3396 },
+ { 3398, 3400 },
+ { 3402, 3407 },
+ { 3412, 3427 },
+ { 3430, 3455 },
+};
+static const URange16 Mandaic_range16[] = {
+ { 2112, 2139 },
+ { 2142, 2142 },
+};
+static const URange32 Manichaean_range32[] = {
+ { 68288, 68326 },
+ { 68331, 68342 },
+};
+static const URange32 Marchen_range32[] = {
+ { 72816, 72847 },
+ { 72850, 72871 },
+ { 72873, 72886 },
+};
+static const URange32 Masaram_Gondi_range32[] = {
+ { 72960, 72966 },
+ { 72968, 72969 },
+ { 72971, 73014 },
+ { 73018, 73018 },
+ { 73020, 73021 },
+ { 73023, 73031 },
+ { 73040, 73049 },
+};
+static const URange32 Medefaidrin_range32[] = {
+ { 93760, 93850 },
+};
+static const URange16 Meetei_Mayek_range16[] = {
+ { 43744, 43766 },
+ { 43968, 44013 },
+ { 44016, 44025 },
+};
static const URange32 Mende_Kikakui_range32[] = {
{ 124928, 125124 },
{ 125127, 125142 },
};
-static const URange32 Meroitic_Cursive_range32[] = {
- { 68000, 68023 },
- { 68028, 68047 },
- { 68050, 68095 },
+static const URange32 Meroitic_Cursive_range32[] = {
+ { 68000, 68023 },
+ { 68028, 68047 },
+ { 68050, 68095 },
};
-static const URange32 Meroitic_Hieroglyphs_range32[] = {
- { 67968, 67999 },
+static const URange32 Meroitic_Hieroglyphs_range32[] = {
+ { 67968, 67999 },
};
-static const URange32 Miao_range32[] = {
- { 93952, 94026 },
- { 94031, 94087 },
- { 94095, 94111 },
+static const URange32 Miao_range32[] = {
+ { 93952, 94026 },
+ { 94031, 94087 },
+ { 94095, 94111 },
};
-static const URange32 Modi_range32[] = {
- { 71168, 71236 },
- { 71248, 71257 },
+static const URange32 Modi_range32[] = {
+ { 71168, 71236 },
+ { 71248, 71257 },
};
-static const URange16 Mongolian_range16[] = {
- { 6144, 6145 },
- { 6148, 6148 },
+static const URange16 Mongolian_range16[] = {
+ { 6144, 6145 },
+ { 6148, 6148 },
{ 6150, 6169 },
- { 6176, 6264 },
- { 6272, 6314 },
-};
-static const URange32 Mongolian_range32[] = {
- { 71264, 71276 },
-};
-static const URange32 Mro_range32[] = {
- { 92736, 92766 },
- { 92768, 92777 },
- { 92782, 92783 },
-};
-static const URange32 Multani_range32[] = {
- { 70272, 70278 },
- { 70280, 70280 },
- { 70282, 70285 },
- { 70287, 70301 },
- { 70303, 70313 },
-};
-static const URange16 Myanmar_range16[] = {
- { 4096, 4255 },
- { 43488, 43518 },
- { 43616, 43647 },
-};
-static const URange32 Nabataean_range32[] = {
- { 67712, 67742 },
- { 67751, 67759 },
-};
-static const URange32 Nandinagari_range32[] = {
- { 72096, 72103 },
- { 72106, 72151 },
- { 72154, 72164 },
+ { 6176, 6264 },
+ { 6272, 6314 },
+};
+static const URange32 Mongolian_range32[] = {
+ { 71264, 71276 },
+};
+static const URange32 Mro_range32[] = {
+ { 92736, 92766 },
+ { 92768, 92777 },
+ { 92782, 92783 },
+};
+static const URange32 Multani_range32[] = {
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70313 },
+};
+static const URange16 Myanmar_range16[] = {
+ { 4096, 4255 },
+ { 43488, 43518 },
+ { 43616, 43647 },
+};
+static const URange32 Nabataean_range32[] = {
+ { 67712, 67742 },
+ { 67751, 67759 },
+};
+static const URange32 Nandinagari_range32[] = {
+ { 72096, 72103 },
+ { 72106, 72151 },
+ { 72154, 72164 },
};
static const URange16 New_Tai_Lue_range16[] = {
{ 6528, 6571 },
@@ -5914,58 +5914,58 @@ static const URange16 New_Tai_Lue_range16[] = {
{ 6608, 6618 },
{ 6622, 6623 },
};
-static const URange32 Newa_range32[] = {
- { 70656, 70747 },
- { 70749, 70753 },
-};
-static const URange16 Nko_range16[] = {
- { 1984, 2042 },
- { 2045, 2047 },
-};
-static const URange32 Nushu_range32[] = {
- { 94177, 94177 },
- { 110960, 111355 },
-};
-static const URange32 Nyiakeng_Puachue_Hmong_range32[] = {
- { 123136, 123180 },
- { 123184, 123197 },
- { 123200, 123209 },
- { 123214, 123215 },
-};
-static const URange16 Ogham_range16[] = {
- { 5760, 5788 },
-};
+static const URange32 Newa_range32[] = {
+ { 70656, 70747 },
+ { 70749, 70753 },
+};
+static const URange16 Nko_range16[] = {
+ { 1984, 2042 },
+ { 2045, 2047 },
+};
+static const URange32 Nushu_range32[] = {
+ { 94177, 94177 },
+ { 110960, 111355 },
+};
+static const URange32 Nyiakeng_Puachue_Hmong_range32[] = {
+ { 123136, 123180 },
+ { 123184, 123197 },
+ { 123200, 123209 },
+ { 123214, 123215 },
+};
+static const URange16 Ogham_range16[] = {
+ { 5760, 5788 },
+};
static const URange16 Ol_Chiki_range16[] = {
{ 7248, 7295 },
};
-static const URange32 Old_Hungarian_range32[] = {
- { 68736, 68786 },
- { 68800, 68850 },
- { 68858, 68863 },
-};
-static const URange32 Old_Italic_range32[] = {
- { 66304, 66339 },
- { 66349, 66351 },
-};
-static const URange32 Old_North_Arabian_range32[] = {
- { 68224, 68255 },
-};
-static const URange32 Old_Permic_range32[] = {
- { 66384, 66426 },
-};
-static const URange32 Old_Persian_range32[] = {
- { 66464, 66499 },
- { 66504, 66517 },
-};
-static const URange32 Old_Sogdian_range32[] = {
- { 69376, 69415 },
-};
-static const URange32 Old_South_Arabian_range32[] = {
- { 68192, 68223 },
-};
-static const URange32 Old_Turkic_range32[] = {
- { 68608, 68680 },
-};
+static const URange32 Old_Hungarian_range32[] = {
+ { 68736, 68786 },
+ { 68800, 68850 },
+ { 68858, 68863 },
+};
+static const URange32 Old_Italic_range32[] = {
+ { 66304, 66339 },
+ { 66349, 66351 },
+};
+static const URange32 Old_North_Arabian_range32[] = {
+ { 68224, 68255 },
+};
+static const URange32 Old_Permic_range32[] = {
+ { 66384, 66426 },
+};
+static const URange32 Old_Persian_range32[] = {
+ { 66464, 66499 },
+ { 66504, 66517 },
+};
+static const URange32 Old_Sogdian_range32[] = {
+ { 69376, 69415 },
+};
+static const URange32 Old_South_Arabian_range32[] = {
+ { 68192, 68223 },
+};
+static const URange32 Old_Turkic_range32[] = {
+ { 68608, 68680 },
+};
static const URange32 Old_Uyghur_range32[] = {
{ 69488, 69513 },
};
@@ -5980,77 +5980,77 @@ static const URange16 Oriya_range16[] = {
{ 2876, 2884 },
{ 2887, 2888 },
{ 2891, 2893 },
- { 2901, 2903 },
+ { 2901, 2903 },
{ 2908, 2909 },
{ 2911, 2915 },
{ 2918, 2935 },
};
-static const URange32 Osage_range32[] = {
- { 66736, 66771 },
- { 66776, 66811 },
+static const URange32 Osage_range32[] = {
+ { 66736, 66771 },
+ { 66776, 66811 },
};
-static const URange32 Osmanya_range32[] = {
- { 66688, 66717 },
- { 66720, 66729 },
+static const URange32 Osmanya_range32[] = {
+ { 66688, 66717 },
+ { 66720, 66729 },
};
-static const URange32 Pahawh_Hmong_range32[] = {
- { 92928, 92997 },
- { 93008, 93017 },
- { 93019, 93025 },
- { 93027, 93047 },
- { 93053, 93071 },
+static const URange32 Pahawh_Hmong_range32[] = {
+ { 92928, 92997 },
+ { 93008, 93017 },
+ { 93019, 93025 },
+ { 93027, 93047 },
+ { 93053, 93071 },
};
-static const URange32 Palmyrene_range32[] = {
- { 67680, 67711 },
+static const URange32 Palmyrene_range32[] = {
+ { 67680, 67711 },
};
-static const URange32 Pau_Cin_Hau_range32[] = {
- { 72384, 72440 },
+static const URange32 Pau_Cin_Hau_range32[] = {
+ { 72384, 72440 },
};
-static const URange16 Phags_Pa_range16[] = {
- { 43072, 43127 },
+static const URange16 Phags_Pa_range16[] = {
+ { 43072, 43127 },
};
static const URange32 Phoenician_range32[] = {
{ 67840, 67867 },
{ 67871, 67871 },
};
-static const URange32 Psalter_Pahlavi_range32[] = {
- { 68480, 68497 },
- { 68505, 68508 },
- { 68521, 68527 },
+static const URange32 Psalter_Pahlavi_range32[] = {
+ { 68480, 68497 },
+ { 68505, 68508 },
+ { 68521, 68527 },
};
-static const URange16 Rejang_range16[] = {
- { 43312, 43347 },
- { 43359, 43359 },
+static const URange16 Rejang_range16[] = {
+ { 43312, 43347 },
+ { 43359, 43359 },
};
-static const URange16 Runic_range16[] = {
- { 5792, 5866 },
- { 5870, 5880 },
+static const URange16 Runic_range16[] = {
+ { 5792, 5866 },
+ { 5870, 5880 },
};
-static const URange16 Samaritan_range16[] = {
- { 2048, 2093 },
- { 2096, 2110 },
+static const URange16 Samaritan_range16[] = {
+ { 2048, 2093 },
+ { 2096, 2110 },
};
-static const URange16 Saurashtra_range16[] = {
- { 43136, 43205 },
- { 43214, 43225 },
+static const URange16 Saurashtra_range16[] = {
+ { 43136, 43205 },
+ { 43214, 43225 },
};
-static const URange32 Sharada_range32[] = {
- { 70016, 70111 },
+static const URange32 Sharada_range32[] = {
+ { 70016, 70111 },
};
-static const URange32 Shavian_range32[] = {
- { 66640, 66687 },
+static const URange32 Shavian_range32[] = {
+ { 66640, 66687 },
};
-static const URange32 Siddham_range32[] = {
- { 71040, 71093 },
- { 71096, 71133 },
+static const URange32 Siddham_range32[] = {
+ { 71040, 71093 },
+ { 71096, 71133 },
};
-static const URange32 SignWriting_range32[] = {
- { 120832, 121483 },
- { 121499, 121503 },
- { 121505, 121519 },
+static const URange32 SignWriting_range32[] = {
+ { 120832, 121483 },
+ { 121499, 121503 },
+ { 121505, 121519 },
};
static const URange16 Sinhala_range16[] = {
- { 3457, 3459 },
+ { 3457, 3459 },
{ 3461, 3478 },
{ 3482, 3505 },
{ 3507, 3515 },
@@ -6066,124 +6066,124 @@ static const URange16 Sinhala_range16[] = {
static const URange32 Sinhala_range32[] = {
{ 70113, 70132 },
};
-static const URange32 Sogdian_range32[] = {
- { 69424, 69465 },
+static const URange32 Sogdian_range32[] = {
+ { 69424, 69465 },
};
-static const URange32 Sora_Sompeng_range32[] = {
- { 69840, 69864 },
- { 69872, 69881 },
+static const URange32 Sora_Sompeng_range32[] = {
+ { 69840, 69864 },
+ { 69872, 69881 },
};
-static const URange32 Soyombo_range32[] = {
- { 72272, 72354 },
+static const URange32 Soyombo_range32[] = {
+ { 72272, 72354 },
};
-static const URange16 Sundanese_range16[] = {
- { 7040, 7103 },
- { 7360, 7367 },
+static const URange16 Sundanese_range16[] = {
+ { 7040, 7103 },
+ { 7360, 7367 },
};
-static const URange16 Syloti_Nagri_range16[] = {
- { 43008, 43052 },
+static const URange16 Syloti_Nagri_range16[] = {
+ { 43008, 43052 },
};
-static const URange16 Syriac_range16[] = {
- { 1792, 1805 },
- { 1807, 1866 },
- { 1869, 1871 },
- { 2144, 2154 },
+static const URange16 Syriac_range16[] = {
+ { 1792, 1805 },
+ { 1807, 1866 },
+ { 1869, 1871 },
+ { 2144, 2154 },
};
-static const URange16 Tagalog_range16[] = {
+static const URange16 Tagalog_range16[] = {
{ 5888, 5909 },
{ 5919, 5919 },
};
-static const URange16 Tagbanwa_range16[] = {
- { 5984, 5996 },
- { 5998, 6000 },
- { 6002, 6003 },
+static const URange16 Tagbanwa_range16[] = {
+ { 5984, 5996 },
+ { 5998, 6000 },
+ { 6002, 6003 },
};
-static const URange16 Tai_Le_range16[] = {
- { 6480, 6509 },
- { 6512, 6516 },
+static const URange16 Tai_Le_range16[] = {
+ { 6480, 6509 },
+ { 6512, 6516 },
};
-static const URange16 Tai_Tham_range16[] = {
- { 6688, 6750 },
- { 6752, 6780 },
- { 6783, 6793 },
- { 6800, 6809 },
- { 6816, 6829 },
+static const URange16 Tai_Tham_range16[] = {
+ { 6688, 6750 },
+ { 6752, 6780 },
+ { 6783, 6793 },
+ { 6800, 6809 },
+ { 6816, 6829 },
};
-static const URange16 Tai_Viet_range16[] = {
- { 43648, 43714 },
- { 43739, 43743 },
+static const URange16 Tai_Viet_range16[] = {
+ { 43648, 43714 },
+ { 43739, 43743 },
};
-static const URange32 Takri_range32[] = {
+static const URange32 Takri_range32[] = {
{ 71296, 71353 },
- { 71360, 71369 },
-};
-static const URange16 Tamil_range16[] = {
- { 2946, 2947 },
- { 2949, 2954 },
- { 2958, 2960 },
- { 2962, 2965 },
- { 2969, 2970 },
- { 2972, 2972 },
- { 2974, 2975 },
- { 2979, 2980 },
- { 2984, 2986 },
- { 2990, 3001 },
- { 3006, 3010 },
- { 3014, 3016 },
- { 3018, 3021 },
- { 3024, 3024 },
- { 3031, 3031 },
- { 3046, 3066 },
-};
-static const URange32 Tamil_range32[] = {
- { 73664, 73713 },
- { 73727, 73727 },
+ { 71360, 71369 },
+};
+static const URange16 Tamil_range16[] = {
+ { 2946, 2947 },
+ { 2949, 2954 },
+ { 2958, 2960 },
+ { 2962, 2965 },
+ { 2969, 2970 },
+ { 2972, 2972 },
+ { 2974, 2975 },
+ { 2979, 2980 },
+ { 2984, 2986 },
+ { 2990, 3001 },
+ { 3006, 3010 },
+ { 3014, 3016 },
+ { 3018, 3021 },
+ { 3024, 3024 },
+ { 3031, 3031 },
+ { 3046, 3066 },
+};
+static const URange32 Tamil_range32[] = {
+ { 73664, 73713 },
+ { 73727, 73727 },
};
static const URange32 Tangsa_range32[] = {
{ 92784, 92862 },
{ 92864, 92873 },
};
-static const URange32 Tangut_range32[] = {
- { 94176, 94176 },
- { 94208, 100343 },
- { 100352, 101119 },
- { 101632, 101640 },
-};
-static const URange16 Telugu_range16[] = {
- { 3072, 3084 },
- { 3086, 3088 },
- { 3090, 3112 },
- { 3114, 3129 },
+static const URange32 Tangut_range32[] = {
+ { 94176, 94176 },
+ { 94208, 100343 },
+ { 100352, 101119 },
+ { 101632, 101640 },
+};
+static const URange16 Telugu_range16[] = {
+ { 3072, 3084 },
+ { 3086, 3088 },
+ { 3090, 3112 },
+ { 3114, 3129 },
{ 3132, 3140 },
- { 3142, 3144 },
- { 3146, 3149 },
- { 3157, 3158 },
- { 3160, 3162 },
+ { 3142, 3144 },
+ { 3146, 3149 },
+ { 3157, 3158 },
+ { 3160, 3162 },
{ 3165, 3165 },
- { 3168, 3171 },
- { 3174, 3183 },
- { 3191, 3199 },
-};
-static const URange16 Thaana_range16[] = {
- { 1920, 1969 },
-};
-static const URange16 Thai_range16[] = {
- { 3585, 3642 },
- { 3648, 3675 },
-};
-static const URange16 Tibetan_range16[] = {
- { 3840, 3911 },
- { 3913, 3948 },
- { 3953, 3991 },
- { 3993, 4028 },
- { 4030, 4044 },
- { 4046, 4052 },
- { 4057, 4058 },
-};
-static const URange16 Tifinagh_range16[] = {
- { 11568, 11623 },
- { 11631, 11632 },
- { 11647, 11647 },
+ { 3168, 3171 },
+ { 3174, 3183 },
+ { 3191, 3199 },
+};
+static const URange16 Thaana_range16[] = {
+ { 1920, 1969 },
+};
+static const URange16 Thai_range16[] = {
+ { 3585, 3642 },
+ { 3648, 3675 },
+};
+static const URange16 Tibetan_range16[] = {
+ { 3840, 3911 },
+ { 3913, 3948 },
+ { 3953, 3991 },
+ { 3993, 4028 },
+ { 4030, 4044 },
+ { 4046, 4052 },
+ { 4057, 4058 },
+};
+static const URange16 Tifinagh_range16[] = {
+ { 11568, 11623 },
+ { 11631, 11632 },
+ { 11647, 11647 },
};
static const URange32 Tirhuta_range32[] = {
{ 70784, 70855 },
@@ -6192,12 +6192,12 @@ static const URange32 Tirhuta_range32[] = {
static const URange32 Toto_range32[] = {
{ 123536, 123566 },
};
-static const URange32 Ugaritic_range32[] = {
- { 66432, 66461 },
- { 66463, 66463 },
+static const URange32 Ugaritic_range32[] = {
+ { 66432, 66461 },
+ { 66463, 66463 },
};
-static const URange16 Vai_range16[] = {
- { 42240, 42539 },
+static const URange16 Vai_range16[] = {
+ { 42240, 42539 },
};
static const URange32 Vithkuqi_range32[] = {
{ 66928, 66938 },
@@ -6209,40 +6209,40 @@ static const URange32 Vithkuqi_range32[] = {
{ 66995, 67001 },
{ 67003, 67004 },
};
-static const URange32 Wancho_range32[] = {
- { 123584, 123641 },
- { 123647, 123647 },
+static const URange32 Wancho_range32[] = {
+ { 123584, 123641 },
+ { 123647, 123647 },
};
-static const URange32 Warang_Citi_range32[] = {
- { 71840, 71922 },
- { 71935, 71935 },
+static const URange32 Warang_Citi_range32[] = {
+ { 71840, 71922 },
+ { 71935, 71935 },
};
-static const URange32 Yezidi_range32[] = {
- { 69248, 69289 },
- { 69291, 69293 },
- { 69296, 69297 },
+static const URange32 Yezidi_range32[] = {
+ { 69248, 69289 },
+ { 69291, 69293 },
+ { 69296, 69297 },
};
-static const URange16 Yi_range16[] = {
- { 40960, 42124 },
- { 42128, 42182 },
+static const URange16 Yi_range16[] = {
+ { 40960, 42124 },
+ { 42128, 42182 },
};
-static const URange32 Zanabazar_Square_range32[] = {
- { 72192, 72263 },
+static const URange32 Zanabazar_Square_range32[] = {
+ { 72192, 72263 },
};
// 4038 16-bit ranges, 1712 32-bit ranges
const UGroup unicode_groups[] = {
- { "Adlam", +1, 0, 0, Adlam_range32, 3 },
+ { "Adlam", +1, 0, 0, Adlam_range32, 3 },
{ "Ahom", +1, 0, 0, Ahom_range32, 3 },
{ "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
- { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
- { "Armenian", +1, Armenian_range16, 4, 0, 0 },
+ { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
+ { "Armenian", +1, Armenian_range16, 4, 0, 0 },
{ "Avestan", +1, 0, 0, Avestan_range32, 2 },
{ "Balinese", +1, Balinese_range16, 2, 0, 0 },
{ "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 },
{ "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 },
{ "Batak", +1, Batak_range16, 2, 0, 0 },
{ "Bengali", +1, Bengali_range16, 14, 0, 0 },
- { "Bhaiksuki", +1, 0, 0, Bhaiksuki_range32, 4 },
+ { "Bhaiksuki", +1, 0, 0, Bhaiksuki_range32, 4 },
{ "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 },
{ "Brahmi", +1, 0, 0, Brahmi_range32, 3 },
{ "Braille", +1, Braille_range16, 1, 0, 0 },
@@ -6257,7 +6257,7 @@ const UGroup unicode_groups[] = {
{ "Chakma", +1, 0, 0, Chakma_range32, 2 },
{ "Cham", +1, Cham_range16, 4, 0, 0 },
{ "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
- { "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 },
+ { "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 },
{ "Co", +1, Co_range16, 1, Co_range32, 2 },
{ "Common", +1, Common_range16, 91, Common_range32, 83 },
{ "Coptic", +1, Coptic_range16, 3, 0, 0 },
@@ -6265,53 +6265,53 @@ const UGroup unicode_groups[] = {
{ "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
{ "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
{ "Cypro_Minoan", +1, 0, 0, Cypro_Minoan_range32, 1 },
- { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 },
+ { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 },
{ "Deseret", +1, 0, 0, Deseret_range32, 1 },
{ "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
- { "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 },
- { "Dogra", +1, 0, 0, Dogra_range32, 1 },
+ { "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 },
+ { "Dogra", +1, 0, 0, Dogra_range32, 1 },
{ "Duployan", +1, 0, 0, Duployan_range32, 5 },
- { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 },
+ { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 },
{ "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
- { "Elymaic", +1, 0, 0, Elymaic_range32, 1 },
+ { "Elymaic", +1, 0, 0, Elymaic_range32, 1 },
{ "Ethiopic", +1, Ethiopic_range16, 32, Ethiopic_range32, 4 },
- { "Georgian", +1, Georgian_range16, 10, 0, 0 },
+ { "Georgian", +1, Georgian_range16, 10, 0, 0 },
{ "Glagolitic", +1, Glagolitic_range16, 1, Glagolitic_range32, 5 },
{ "Gothic", +1, 0, 0, Gothic_range32, 1 },
{ "Grantha", +1, 0, 0, Grantha_range32, 15 },
{ "Greek", +1, Greek_range16, 33, Greek_range32, 3 },
{ "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
- { "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
+ { "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
{ "Han", +1, Han_range16, 11, Han_range32, 9 },
{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
- { "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
+ { "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
{ "Hatran", +1, 0, 0, Hatran_range32, 3 },
{ "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
- { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 },
+ { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 },
{ "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
{ "Inherited", +1, Inherited_range16, 19, Inherited_range32, 10 },
{ "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
{ "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 },
{ "Javanese", +1, Javanese_range16, 3, 0, 0 },
- { "Kaithi", +1, 0, 0, Kaithi_range32, 2 },
- { "Kannada", +1, Kannada_range16, 13, 0, 0 },
+ { "Kaithi", +1, 0, 0, Kaithi_range32, 2 },
+ { "Kannada", +1, Kannada_range16, 13, 0, 0 },
{ "Katakana", +1, Katakana_range16, 7, Katakana_range32, 6 },
{ "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
{ "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
- { "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 },
+ { "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 },
{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
{ "Khojki", +1, 0, 0, Khojki_range32, 2 },
{ "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
{ "L", +1, L_range16, 380, L_range32, 268 },
- { "Lao", +1, Lao_range16, 11, 0, 0 },
+ { "Lao", +1, Lao_range16, 11, 0, 0 },
{ "Latin", +1, Latin_range16, 34, Latin_range32, 4 },
{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
{ "Limbu", +1, Limbu_range16, 5, 0, 0 },
{ "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
{ "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
- { "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
+ { "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
{ "Ll", +1, Ll_range16, 617, Ll_range32, 40 },
{ "Lm", +1, Lm_range16, 57, Lm_range32, 12 },
{ "Lo", +1, Lo_range16, 290, Lo_range32, 211 },
@@ -6321,15 +6321,15 @@ const UGroup unicode_groups[] = {
{ "Lydian", +1, 0, 0, Lydian_range32, 2 },
{ "M", +1, M_range16, 189, M_range32, 110 },
{ "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
- { "Makasar", +1, 0, 0, Makasar_range32, 1 },
- { "Malayalam", +1, Malayalam_range16, 7, 0, 0 },
+ { "Makasar", +1, 0, 0, Makasar_range32, 1 },
+ { "Malayalam", +1, Malayalam_range16, 7, 0, 0 },
{ "Mandaic", +1, Mandaic_range16, 2, 0, 0 },
{ "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
- { "Marchen", +1, 0, 0, Marchen_range32, 3 },
- { "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 },
+ { "Marchen", +1, 0, 0, Marchen_range32, 3 },
+ { "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 },
{ "Mc", +1, Mc_range16, 111, Mc_range32, 66 },
{ "Me", +1, Me_range16, 5, 0, 0 },
- { "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 },
+ { "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 },
{ "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
{ "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 },
{ "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
@@ -6343,28 +6343,28 @@ const UGroup unicode_groups[] = {
{ "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
{ "N", +1, N_range16, 67, N_range32, 67 },
{ "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
- { "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 },
+ { "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 },
{ "Nd", +1, Nd_range16, 37, Nd_range32, 25 },
{ "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
- { "Newa", +1, 0, 0, Newa_range32, 2 },
- { "Nko", +1, Nko_range16, 2, 0, 0 },
+ { "Newa", +1, 0, 0, Newa_range32, 2 },
+ { "Nko", +1, Nko_range16, 2, 0, 0 },
{ "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
- { "No", +1, No_range16, 29, No_range32, 42 },
- { "Nushu", +1, 0, 0, Nushu_range32, 2 },
- { "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 },
+ { "No", +1, No_range16, 29, No_range32, 42 },
+ { "Nushu", +1, 0, 0, Nushu_range32, 2 },
+ { "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 },
{ "Ogham", +1, Ogham_range16, 1, 0, 0 },
{ "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 },
{ "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 },
- { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 },
+ { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 },
{ "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 },
{ "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 },
{ "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 },
- { "Old_Sogdian", +1, 0, 0, Old_Sogdian_range32, 1 },
+ { "Old_Sogdian", +1, 0, 0, Old_Sogdian_range32, 1 },
{ "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 },
{ "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 },
{ "Old_Uyghur", +1, 0, 0, Old_Uyghur_range32, 1 },
{ "Oriya", +1, Oriya_range16, 14, 0, 0 },
- { "Osage", +1, 0, 0, Osage_range32, 2 },
+ { "Osage", +1, 0, 0, Osage_range32, 2 },
{ "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
{ "P", +1, P_range16, 133, P_range32, 56 },
{ "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
@@ -6385,8 +6385,8 @@ const UGroup unicode_groups[] = {
{ "S", +1, S_range16, 151, S_range32, 83 },
{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
- { "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
- { "Sharada", +1, 0, 0, Sharada_range32, 1 },
+ { "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
+ { "Sharada", +1, 0, 0, Sharada_range32, 1 },
{ "Shavian", +1, 0, 0, Shavian_range32, 1 },
{ "Siddham", +1, 0, 0, Siddham_range32, 2 },
{ "SignWriting", +1, 0, 0, SignWriting_range32, 3 },
@@ -6394,21 +6394,21 @@ const UGroup unicode_groups[] = {
{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
{ "So", +1, So_range16, 114, So_range32, 72 },
- { "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
+ { "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
- { "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
+ { "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
{ "Sundanese", +1, Sundanese_range16, 2, 0, 0 },
{ "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 },
- { "Syriac", +1, Syriac_range16, 4, 0, 0 },
+ { "Syriac", +1, Syriac_range16, 4, 0, 0 },
{ "Tagalog", +1, Tagalog_range16, 2, 0, 0 },
{ "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 },
{ "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 },
{ "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 },
{ "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 },
{ "Takri", +1, 0, 0, Takri_range32, 2 },
- { "Tamil", +1, Tamil_range16, 16, Tamil_range32, 2 },
+ { "Tamil", +1, Tamil_range16, 16, Tamil_range32, 2 },
{ "Tangsa", +1, 0, 0, Tangsa_range32, 2 },
- { "Tangut", +1, 0, 0, Tangut_range32, 4 },
+ { "Tangut", +1, 0, 0, Tangut_range32, 4 },
{ "Telugu", +1, Telugu_range16, 13, 0, 0 },
{ "Thaana", +1, Thaana_range16, 1, 0, 0 },
{ "Thai", +1, Thai_range16, 2, 0, 0 },
@@ -6419,12 +6419,12 @@ const UGroup unicode_groups[] = {
{ "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 },
{ "Vai", +1, Vai_range16, 1, 0, 0 },
{ "Vithkuqi", +1, 0, 0, Vithkuqi_range32, 8 },
- { "Wancho", +1, 0, 0, Wancho_range32, 2 },
+ { "Wancho", +1, 0, 0, Wancho_range32, 2 },
{ "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 },
- { "Yezidi", +1, 0, 0, Yezidi_range32, 3 },
+ { "Yezidi", +1, 0, 0, Yezidi_range32, 3 },
{ "Yi", +1, Yi_range16, 2, 0, 0 },
{ "Z", +1, Z_range16, 8, 0, 0 },
- { "Zanabazar_Square", +1, 0, 0, Zanabazar_Square_range32, 1 },
+ { "Zanabazar_Square", +1, 0, 0, Zanabazar_Square_range32, 1 },
{ "Zl", +1, Zl_range16, 1, 0, 0 },
{ "Zp", +1, Zp_range16, 1, 0, 0 },
{ "Zs", +1, Zs_range16, 7, 0, 0 },
diff --git a/contrib/libs/re2/re2/walker-inl.h b/contrib/libs/re2/re2/walker-inl.h
index 336b9a3167..4d064a0970 100644
--- a/contrib/libs/re2/re2/walker-inl.h
+++ b/contrib/libs/re2/re2/walker-inl.h
@@ -89,7 +89,7 @@ template<typename T> class Regexp::Walker {
private:
// Walk state for the entire traversal.
- std::stack<WalkState<T>> stack_;
+ std::stack<WalkState<T>> stack_;
bool stopped_early_;
int max_visits_;
@@ -119,7 +119,7 @@ template<typename T> T Regexp::Walker<T>::Copy(T arg) {
// State about a single level in the traversal.
template<typename T> struct WalkState {
- WalkState(Regexp* re, T parent)
+ WalkState(Regexp* re, T parent)
: re(re),
n(-1),
parent_arg(parent),
@@ -145,12 +145,12 @@ template<typename T> Regexp::Walker<T>::~Walker() {
// Walk always enters and exits with an empty stack.
// Logs DFATAL if stack is not already clear.
template<typename T> void Regexp::Walker<T>::Reset() {
- if (!stack_.empty()) {
+ if (!stack_.empty()) {
LOG(DFATAL) << "Stack not empty.";
- while (!stack_.empty()) {
+ while (!stack_.empty()) {
if (stack_.top().re->nsub_ > 1)
delete[] stack_.top().child_args;
- stack_.pop();
+ stack_.pop();
}
}
}
@@ -164,12 +164,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
return top_arg;
}
- stack_.push(WalkState<T>(re, top_arg));
+ stack_.push(WalkState<T>(re, top_arg));
WalkState<T>* s;
for (;;) {
T t;
- s = &stack_.top();
+ s = &stack_.top();
re = s->re;
switch (s->n) {
case -1: {
@@ -200,7 +200,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
s->n++;
} else {
- stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
+ stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
}
continue;
}
@@ -213,12 +213,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
}
}
- // We've finished stack_.top().
+ // We've finished stack_.top().
// Update next guy down.
- stack_.pop();
- if (stack_.empty())
+ stack_.pop();
+ if (stack_.empty())
return t;
- s = &stack_.top();
+ s = &stack_.top();
if (s->child_args != NULL)
s->child_args[s->n] = t;
else
diff --git a/contrib/libs/re2/util/flags.h b/contrib/libs/re2/util/flags.h
index a3d5fc1234..3386b729d4 100644
--- a/contrib/libs/re2/util/flags.h
+++ b/contrib/libs/re2/util/flags.h
@@ -1,26 +1,26 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_FLAGS_H_
-#define UTIL_FLAGS_H_
-
-// Simplified version of Google's command line flags.
-// Does not support parsing the command line.
-// If you want to do that, see
-// https://gflags.github.io/gflags/
-
-#define DEFINE_FLAG(type, name, deflt, desc) \
- namespace re2 { type FLAGS_##name = deflt; }
-
-#define DECLARE_FLAG(type, name) \
- namespace re2 { extern type FLAGS_##name; }
-
-namespace re2 {
-template <typename T>
-T GetFlag(const T& flag) {
- return flag;
-}
-} // namespace re2
-
-#endif // UTIL_FLAGS_H_
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_FLAGS_H_
+#define UTIL_FLAGS_H_
+
+// Simplified version of Google's command line flags.
+// Does not support parsing the command line.
+// If you want to do that, see
+// https://gflags.github.io/gflags/
+
+#define DEFINE_FLAG(type, name, deflt, desc) \
+ namespace re2 { type FLAGS_##name = deflt; }
+
+#define DECLARE_FLAG(type, name) \
+ namespace re2 { extern type FLAGS_##name; }
+
+namespace re2 {
+template <typename T>
+T GetFlag(const T& flag) {
+ return flag;
+}
+} // namespace re2
+
+#endif // UTIL_FLAGS_H_
diff --git a/contrib/libs/re2/util/logging.h b/contrib/libs/re2/util/logging.h
index be5b4d4dbb..5b2217f29c 100644
--- a/contrib/libs/re2/util/logging.h
+++ b/contrib/libs/re2/util/logging.h
@@ -62,7 +62,7 @@ class LogMessage {
}
void Flush() {
stream() << "\n";
- std::string s = str_.str();
+ std::string s = str_.str();
size_t n = s.size();
if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
flushed_ = true;
@@ -93,7 +93,7 @@ class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
: LogMessage(file, line) {}
- ATTRIBUTE_NORETURN ~LogMessageFatal() {
+ ATTRIBUTE_NORETURN ~LogMessageFatal() {
Flush();
abort();
}
diff --git a/contrib/libs/re2/util/mutex.h b/contrib/libs/re2/util/mutex.h
index 0ad97ff1eb..158046bb5c 100644
--- a/contrib/libs/re2/util/mutex.h
+++ b/contrib/libs/re2/util/mutex.h
@@ -10,13 +10,13 @@
* You should assume the locks are *not* re-entrant.
*/
-#ifdef _WIN32
-// Requires Windows Vista or Windows Server 2008 at minimum.
-#include <windows.h>
-#if defined(WINVER) && WINVER >= 0x0600
-#define MUTEX_IS_WIN32_SRWLOCK
-#endif
-#else
+#ifdef _WIN32
+// Requires Windows Vista or Windows Server 2008 at minimum.
+#include <windows.h>
+#if defined(WINVER) && WINVER >= 0x0600
+#define MUTEX_IS_WIN32_SRWLOCK
+#endif
+#else
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif
@@ -26,9 +26,9 @@
#endif
#endif
-#if defined(MUTEX_IS_WIN32_SRWLOCK)
-typedef SRWLOCK MutexType;
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+typedef SRWLOCK MutexType;
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
#include <pthread.h>
#include <stdlib.h>
typedef pthread_rwlock_t MutexType;
@@ -64,17 +64,17 @@ class Mutex {
Mutex& operator=(const Mutex&) = delete;
};
-#if defined(MUTEX_IS_WIN32_SRWLOCK)
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
Mutex::Mutex() : mutex_(SRWLOCK_INIT) { }
-Mutex::~Mutex() { }
-void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
-void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
-void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
-void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
-
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-
+Mutex::~Mutex() { }
+void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
+void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
+void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
+void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+
#define SAFE_PTHREAD(fncall) \
do { \
if ((fncall) != 0) abort(); \
diff --git a/contrib/libs/re2/util/pcre.cc b/contrib/libs/re2/util/pcre.cc
index 93ffe9421b..b68985144f 100644
--- a/contrib/libs/re2/util/pcre.cc
+++ b/contrib/libs/re2/util/pcre.cc
@@ -1,1025 +1,1025 @@
-// Copyright 2003-2009 Google Inc. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
-// The main changes are the addition of the HitLimit method and
-// compilation as PCRE in namespace re2.
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits>
-#include <string>
-#include <utility>
-
-#include "util/util.h"
-#include "util/flags.h"
-#include "util/logging.h"
-#include "util/pcre.h"
-#include "util/strutil.h"
-
-// Silence warnings about the wacky formatting in the operator() functions.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
-#pragma GCC diagnostic ignored "-Wmisleading-indentation"
-#endif
-
-#define PCREPORT(level) LOG(level)
-
-// Default PCRE limits.
-// Defaults chosen to allow a plausible amount of CPU and
-// not exceed main thread stacks. Note that other threads
-// often have smaller stacks, and therefore tightening
-// regexp_stack_limit may frequently be necessary.
-DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
- "default PCRE stack limit (bytes)");
-DEFINE_FLAG(int, regexp_match_limit, 1000000,
- "default PCRE match limit (function calls)");
-
-#ifndef USEPCRE
-
-// Fake just enough of the PCRE API to allow this file to build. :)
-
-struct pcre_extra {
- int flags;
- int match_limit;
- int match_limit_recursion;
-};
-
-#define PCRE_EXTRA_MATCH_LIMIT 0
-#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
-#define PCRE_ANCHORED 0
-#define PCRE_NOTEMPTY 0
-#define PCRE_ERROR_NOMATCH 1
-#define PCRE_ERROR_MATCHLIMIT 2
-#define PCRE_ERROR_RECURSIONLIMIT 3
-#define PCRE_INFO_CAPTURECOUNT 0
-
-void pcre_free(void*) {
-}
-
-pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
- return NULL;
-}
-
-int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
- return 0;
-}
-
-int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
- return 0;
-}
-
-#endif
-
-namespace re2 {
-
-// Maximum number of args we can set
-static const int kMaxArgs = 16;
-static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
-
-// Approximate size of a recursive invocation of PCRE's
-// internal "match()" frame. This varies depending on the
-// compiler and architecture, of course, so the constant is
-// just a conservative estimate. To find the exact number,
-// run regexp_unittest with --regexp_stack_limit=0 under
-// a debugger and look at the frames when it crashes.
-// The exact frame size was 656 in production on 2008/02/03.
-static const int kPCREFrameSize = 700;
-
-// Special name for missing C++ arguments.
-PCRE::Arg PCRE::no_more_args((void*)NULL);
-
-const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
-const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
-const PCRE::ConsumeFunctor PCRE::Consume = { };
-const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
-
-// If a regular expression has no error, its error_ field points here
-static const std::string empty_string;
-
-void PCRE::Init(const char* pattern, Option options, int match_limit,
- int stack_limit, bool report_errors) {
- pattern_ = pattern;
- options_ = options;
- match_limit_ = match_limit;
- stack_limit_ = stack_limit;
- hit_limit_ = false;
- error_ = &empty_string;
- report_errors_ = report_errors;
- re_full_ = NULL;
- re_partial_ = NULL;
-
- if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
- error_ = new std::string("illegal regexp option");
- PCREPORT(ERROR)
- << "Error compiling '" << pattern << "': illegal regexp option";
- } else {
- re_partial_ = Compile(UNANCHORED);
- if (re_partial_ != NULL) {
- re_full_ = Compile(ANCHOR_BOTH);
- }
- }
-}
-
-PCRE::PCRE(const char* pattern) {
- Init(pattern, None, 0, 0, true);
-}
-PCRE::PCRE(const char* pattern, Option option) {
- Init(pattern, option, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern) {
- Init(pattern.c_str(), None, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern, Option option) {
- Init(pattern.c_str(), option, 0, 0, true);
-}
-PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
- Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
- re_option.stack_limit(), re_option.report_errors());
-}
-
-PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
- Init(pattern, re_option.option(), re_option.match_limit(),
- re_option.stack_limit(), re_option.report_errors());
-}
-
-PCRE::~PCRE() {
- if (re_full_ != NULL) pcre_free(re_full_);
- if (re_partial_ != NULL) pcre_free(re_partial_);
- if (error_ != &empty_string) delete error_;
-}
-
-pcre* PCRE::Compile(Anchor anchor) {
- // Special treatment for anchoring. This is needed because at
- // runtime pcre only provides an option for anchoring at the
- // beginning of a string.
- //
- // There are three types of anchoring we want:
- // UNANCHORED Compile the original pattern, and use
- // a pcre unanchored match.
- // ANCHOR_START Compile the original pattern, and use
- // a pcre anchored match.
- // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
- // and use a pcre anchored match.
-
- const char* error = "";
- int eoffset;
- pcre* re;
- if (anchor != ANCHOR_BOTH) {
- re = pcre_compile(pattern_.c_str(),
- (options_ & EnabledCompileOptions),
- &error, &eoffset, NULL);
- } else {
- // Tack a '\z' at the end of PCRE. Parenthesize it first so that
- // the '\z' applies to all top-level alternatives in the regexp.
- std::string wrapped = "(?:"; // A non-counting grouping operator
- wrapped += pattern_;
- wrapped += ")\\z";
- re = pcre_compile(wrapped.c_str(),
- (options_ & EnabledCompileOptions),
- &error, &eoffset, NULL);
- }
- if (re == NULL) {
- if (error_ == &empty_string) error_ = new std::string(error);
- PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
- }
- return re;
-}
-
-/***** Convenience interfaces *****/
-
-bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
-}
-
-bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
-}
-
-bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
- args, n, vec, kVecSize)) {
- input->remove_prefix(consumed);
- return true;
- } else {
- return false;
- }
-}
-
-bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
- const Arg* args[kMaxArgs];
- int n = 0;
- if (&a0 == &no_more_args) goto done; args[n++] = &a0;
- if (&a1 == &no_more_args) goto done; args[n++] = &a1;
- if (&a2 == &no_more_args) goto done; args[n++] = &a2;
- if (&a3 == &no_more_args) goto done; args[n++] = &a3;
- if (&a4 == &no_more_args) goto done; args[n++] = &a4;
- if (&a5 == &no_more_args) goto done; args[n++] = &a5;
- if (&a6 == &no_more_args) goto done; args[n++] = &a6;
- if (&a7 == &no_more_args) goto done; args[n++] = &a7;
- if (&a8 == &no_more_args) goto done; args[n++] = &a8;
- if (&a9 == &no_more_args) goto done; args[n++] = &a9;
- if (&a10 == &no_more_args) goto done; args[n++] = &a10;
- if (&a11 == &no_more_args) goto done; args[n++] = &a11;
- if (&a12 == &no_more_args) goto done; args[n++] = &a12;
- if (&a13 == &no_more_args) goto done; args[n++] = &a13;
- if (&a14 == &no_more_args) goto done; args[n++] = &a14;
- if (&a15 == &no_more_args) goto done; args[n++] = &a15;
-done:
-
- size_t consumed;
- int vec[kVecSize] = {};
- if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
- args, n, vec, kVecSize)) {
- input->remove_prefix(consumed);
- return true;
- } else {
- return false;
- }
-}
-
-bool PCRE::Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
- int vec[kVecSize] = {};
- int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
- if (matches == 0)
- return false;
-
- std::string s;
- if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
- return false;
-
- assert(vec[0] >= 0);
- assert(vec[1] >= 0);
- str->replace(vec[0], vec[1] - vec[0], s);
- return true;
-}
-
-int PCRE::GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
- int count = 0;
- int vec[kVecSize] = {};
- std::string out;
- size_t start = 0;
- bool last_match_was_empty_string = false;
-
- while (start <= str->size()) {
- // If the previous match was for the empty string, we shouldn't
- // just match again: we'll match in the same way and get an
- // infinite loop. Instead, we do the match in a special way:
- // anchored -- to force another try at the same position --
- // and with a flag saying that this time, ignore empty matches.
- // If this special match returns, that means there's a non-empty
- // match at this position as well, and we can continue. If not,
- // we do what perl does, and just advance by one.
- // Notice that perl prints '@@@' for this;
- // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
- int matches;
- if (last_match_was_empty_string) {
- matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
- vec, kVecSize);
- if (matches <= 0) {
- if (start < str->size())
- out.push_back((*str)[start]);
- start++;
- last_match_was_empty_string = false;
- continue;
- }
- } else {
- matches = pattern.TryMatch(*str, start, UNANCHORED, true,
- vec, kVecSize);
- if (matches <= 0)
- break;
- }
- size_t matchstart = vec[0], matchend = vec[1];
- assert(matchstart >= start);
- assert(matchend >= matchstart);
-
- out.append(*str, start, matchstart - start);
- pattern.Rewrite(&out, rewrite, *str, vec, matches);
- start = matchend;
- count++;
- last_match_was_empty_string = (matchstart == matchend);
- }
-
- if (count == 0)
- return 0;
-
- if (start < str->size())
- out.append(*str, start, str->size() - start);
- using std::swap;
- swap(out, *str);
- return count;
-}
-
-bool PCRE::Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out) {
- int vec[kVecSize] = {};
- int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
- if (matches == 0)
- return false;
- out->clear();
- return pattern.Rewrite(out, rewrite, text, vec, matches);
-}
-
-std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
- std::string result;
- result.reserve(unquoted.size() << 1);
-
- // Escape any ascii character not in [A-Za-z_0-9].
- //
- // Note that it's legal to escape a character even if it has no
- // special meaning in a regular expression -- so this function does
- // that. (This also makes it identical to the perl function of the
- // same name except for the null-character special case;
- // see `perldoc -f quotemeta`.)
- for (size_t ii = 0; ii < unquoted.size(); ++ii) {
- // Note that using 'isalnum' here raises the benchmark time from
- // 32ns to 58ns:
- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
- unquoted[ii] != '_' &&
- // If this is the part of a UTF8 or Latin1 character, we need
- // to copy this byte without escaping. Experimentally this is
- // what works correctly with the regexp library.
- !(unquoted[ii] & 128)) {
- if (unquoted[ii] == '\0') { // Special handling for null chars.
- // Can't use "\\0" since the next character might be a digit.
- result += "\\x00";
- continue;
- }
- result += '\\';
- }
- result += unquoted[ii];
- }
-
- return result;
-}
-
-/***** Actual matching and rewriting code *****/
-
-bool PCRE::HitLimit() {
- return hit_limit_ != 0;
-}
-
-void PCRE::ClearHitLimit() {
- hit_limit_ = 0;
-}
-
-int PCRE::TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const {
- pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
- if (re == NULL) {
- PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
- return 0;
- }
-
- int match_limit = match_limit_;
- if (match_limit <= 0) {
- match_limit = GetFlag(FLAGS_regexp_match_limit);
- }
-
- int stack_limit = stack_limit_;
- if (stack_limit <= 0) {
- stack_limit = GetFlag(FLAGS_regexp_stack_limit);
- }
-
- pcre_extra extra = { 0 };
- if (match_limit > 0) {
- extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
- extra.match_limit = match_limit;
- }
- if (stack_limit > 0) {
- extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra.match_limit_recursion = stack_limit / kPCREFrameSize;
- }
-
- int options = 0;
- if (anchor != UNANCHORED)
- options |= PCRE_ANCHORED;
- if (!empty_ok)
- options |= PCRE_NOTEMPTY;
-
- int rc = pcre_exec(re, // The regular expression object
- &extra,
- (text.data() == NULL) ? "" : text.data(),
- static_cast<int>(text.size()),
- static_cast<int>(startpos),
- options,
- vec,
- vecsize);
-
- // Handle errors
- if (rc == 0) {
- // pcre_exec() returns 0 as a special case when the number of
- // capturing subpatterns exceeds the size of the vector.
- // When this happens, there is a match and the output vector
- // is filled, but we miss out on the positions of the extra subpatterns.
- rc = vecsize / 2;
- } else if (rc < 0) {
- switch (rc) {
- case PCRE_ERROR_NOMATCH:
- return 0;
- case PCRE_ERROR_MATCHLIMIT:
- // Writing to hit_limit is not safe if multiple threads
- // are using the PCRE, but the flag is only intended
- // for use by unit tests anyway, so we let it go.
- hit_limit_ = true;
- PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
- << " when matching '" << pattern_ << "'"
- << " against text that is " << text.size() << " bytes.";
- return 0;
- case PCRE_ERROR_RECURSIONLIMIT:
- // See comment about hit_limit above.
- hit_limit_ = true;
- PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
- << " when matching '" << pattern_ << "'"
- << " against text that is " << text.size() << " bytes.";
- return 0;
- default:
- // There are other return codes from pcre.h :
- // PCRE_ERROR_NULL (-2)
- // PCRE_ERROR_BADOPTION (-3)
- // PCRE_ERROR_BADMAGIC (-4)
- // PCRE_ERROR_UNKNOWN_NODE (-5)
- // PCRE_ERROR_NOMEMORY (-6)
- // PCRE_ERROR_NOSUBSTRING (-7)
- // ...
- PCREPORT(ERROR) << "Unexpected return code: " << rc
- << " when matching '" << pattern_ << "'"
- << ", re=" << re
- << ", text=" << text
- << ", vec=" << vec
- << ", vecsize=" << vecsize;
- return 0;
- }
- }
-
- return rc;
-}
-
-bool PCRE::DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const* args,
- int n,
- int* vec,
- int vecsize) const {
- assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
- if (NumberOfCapturingGroups() < n) {
- // RE has fewer capturing groups than number of Arg pointers passed in.
- return false;
- }
-
- int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
- assert(matches >= 0); // TryMatch never returns negatives
- if (matches == 0)
- return false;
-
- *consumed = vec[1];
-
- if (n == 0 || args == NULL) {
- // We are not interested in results
- return true;
- }
-
- // If we got here, we must have matched the whole pattern.
- // We do not need (can not do) any more checks on the value of 'matches' here
- // -- see the comment for TryMatch.
- for (int i = 0; i < n; i++) {
- const int start = vec[2*(i+1)];
- const int limit = vec[2*(i+1)+1];
-
- // Avoid invoking undefined behavior when text.data() happens
- // to be null and start happens to be -1, the latter being the
- // case for an unmatched subexpression. Even if text.data() is
- // not null, pointing one byte before was a longstanding bug.
- const char* addr = NULL;
- if (start != -1) {
- addr = text.data() + start;
- }
-
- if (!args[i]->Parse(addr, limit-start)) {
- // TODO: Should we indicate what the error was?
- return false;
- }
- }
-
- return true;
-}
-
-bool PCRE::DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n) const {
- assert(n >= 0);
- const int vecsize = (1 + n) * 3; // results + PCRE workspace
- // (as for kVecSize)
- int* vec = new int[vecsize];
- bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
- delete[] vec;
- return b;
-}
-
-bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
- const StringPiece &text, int *vec, int veclen) const {
- int number_of_capturing_groups = NumberOfCapturingGroups();
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- int c = *s;
- if (c == '\\') {
- c = *++s;
- if (isdigit(c)) {
- int n = (c - '0');
- if (n >= veclen) {
- if (n <= number_of_capturing_groups) {
- // unmatched optional capturing group. treat
- // its value as empty string; i.e., nothing to append.
- } else {
- PCREPORT(ERROR) << "requested group " << n
- << " in regexp " << rewrite.data();
- return false;
- }
- }
- int start = vec[2 * n];
- if (start >= 0)
- out->append(text.data() + start, vec[2 * n + 1] - start);
- } else if (c == '\\') {
- out->push_back('\\');
- } else {
- PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
- return false;
- }
- } else {
- out->push_back(c);
- }
- }
- return true;
-}
-
-bool PCRE::CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const {
- int max_token = -1;
- for (const char *s = rewrite.data(), *end = s + rewrite.size();
- s < end; s++) {
- int c = *s;
- if (c != '\\') {
- continue;
- }
- if (++s == end) {
- *error = "Rewrite schema error: '\\' not allowed at end.";
- return false;
- }
- c = *s;
- if (c == '\\') {
- continue;
- }
- if (!isdigit(c)) {
- *error = "Rewrite schema error: "
- "'\\' must be followed by a digit or '\\'.";
- return false;
- }
- int n = (c - '0');
- if (max_token < n) {
- max_token = n;
- }
- }
-
- if (max_token > NumberOfCapturingGroups()) {
- *error = StringPrintf(
- "Rewrite schema requests %d matches, but the regexp only has %d "
- "parenthesized subexpressions.",
- max_token, NumberOfCapturingGroups());
- return false;
- }
- return true;
-}
-
-
-// Return the number of capturing subpatterns, or -1 if the
-// regexp wasn't valid on construction.
-int PCRE::NumberOfCapturingGroups() const {
- if (re_partial_ == NULL) return -1;
-
- int result;
- int rc = pcre_fullinfo(re_partial_, // The regular expression object
- NULL, // We did not study the pattern
- PCRE_INFO_CAPTURECOUNT,
- &result);
- if (rc != 0) {
- PCREPORT(ERROR) << "Unexpected return code: " << rc;
- return -1;
- }
- return result;
-}
-
-
-/***** Parsers for various types *****/
-
-bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
- // We fail if somebody asked us to store into a non-NULL void* pointer
- return (dest == NULL);
-}
-
-bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- reinterpret_cast<std::string*>(dest)->assign(str, n);
- return true;
-}
-
-bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
- return true;
-}
-
-bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<char*>(dest)) = str[0];
- return true;
-}
-
-bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<signed char*>(dest)) = str[0];
- return true;
-}
-
-bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned char*>(dest)) = str[0];
- return true;
-}
-
-// Largest number spec that we are willing to parse
-static const int kMaxNumberLength = 32;
-
-// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
-// PCREQUIPCRES "n > 0"
-// Copies "str" into "buf" and null-terminates if necessary.
-// Returns one of:
-// a. "str" if no termination is needed
-// b. "buf" if the string was copied and null-terminated
-// c. "" if the input was invalid and has no hope of being parsed
-static const char* TerminateNumber(char* buf, const char* str, size_t n) {
- if ((n > 0) && isspace(*str)) {
- // We are less forgiving than the strtoxxx() routines and do not
- // allow leading spaces.
- return "";
- }
-
- // See if the character right after the input text may potentially
- // look like a digit.
- if (isdigit(str[n]) ||
- ((str[n] >= 'a') && (str[n] <= 'f')) ||
- ((str[n] >= 'A') && (str[n] <= 'F'))) {
- if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
- memcpy(buf, str, n);
- buf[n] = '\0';
- return buf;
- } else {
- // We can parse right out of the supplied string, so return it.
- return str;
- }
-}
-
-bool PCRE::Arg::parse_long_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- char* end;
- errno = 0;
- long r = strtol(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_ulong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- if (str[0] == '-') {
- // strtoul() will silently accept negative numbers and parse
- // them. This module is more strict and treats them as errors.
- return false;
- }
-
- char* end;
- errno = 0;
- unsigned long r = strtoul(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_short_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- long r;
- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
- if ((short)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<short*>(dest)) = (short)r;
- return true;
-}
-
-bool PCRE::Arg::parse_ushort_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- unsigned long r;
- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned short)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
- return true;
-}
-
-bool PCRE::Arg::parse_int_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- long r;
- if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
- if ((int)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<int*>(dest)) = (int)r;
- return true;
-}
-
-bool PCRE::Arg::parse_uint_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- unsigned long r;
- if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
- if ((unsigned int)r != r) return false; // Out of range
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
- return true;
-}
-
-bool PCRE::Arg::parse_longlong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- char* end;
- errno = 0;
- long long r = strtoll(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<long long*>(dest)) = r;
- return true;
-}
-
-bool PCRE::Arg::parse_ulonglong_radix(const char* str,
- size_t n,
- void* dest,
- int radix) {
- if (n == 0) return false;
- char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, n);
- if (str[0] == '-') {
- // strtoull() will silently accept negative numbers and parse
- // them. This module is more strict and treats them as errors.
- return false;
- }
- char* end;
- errno = 0;
- unsigned long long r = strtoull(str, &end, radix);
- if (end != str + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- *(reinterpret_cast<unsigned long long*>(dest)) = r;
- return true;
-}
-
-static bool parse_double_float(const char* str, size_t n, bool isfloat,
- void* dest) {
- if (n == 0) return false;
- static const int kMaxLength = 200;
- char buf[kMaxLength];
- if (n >= kMaxLength) return false;
- memcpy(buf, str, n);
- buf[n] = '\0';
- char* end;
- errno = 0;
- double r;
- if (isfloat) {
- r = strtof(buf, &end);
- } else {
- r = strtod(buf, &end);
- }
- if (end != buf + n) return false; // Leftover junk
- if (errno) return false;
- if (dest == NULL) return true;
- if (isfloat) {
- *(reinterpret_cast<float*>(dest)) = (float)r;
- } else {
- *(reinterpret_cast<double*>(dest)) = r;
- }
- return true;
-}
-
-bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
- return parse_double_float(str, n, false, dest);
-}
-
-bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
- return parse_double_float(str, n, true, dest);
-}
-
-#define DEFINE_INTEGER_PARSER(name) \
- bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \
- return parse_##name##_radix(str, n, dest, 10); \
- } \
- bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
- return parse_##name##_radix(str, n, dest, 16); \
- } \
- bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \
- void* dest) { \
- return parse_##name##_radix(str, n, dest, 8); \
- } \
- bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \
- void* dest) { \
- return parse_##name##_radix(str, n, dest, 0); \
- }
-
-DEFINE_INTEGER_PARSER(short);
-DEFINE_INTEGER_PARSER(ushort);
-DEFINE_INTEGER_PARSER(int);
-DEFINE_INTEGER_PARSER(uint);
-DEFINE_INTEGER_PARSER(long);
-DEFINE_INTEGER_PARSER(ulong);
-DEFINE_INTEGER_PARSER(longlong);
-DEFINE_INTEGER_PARSER(ulonglong);
-
-#undef DEFINE_INTEGER_PARSER
-
-} // namespace re2
+// Copyright 2003-2009 Google Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits>
+#include <string>
+#include <utility>
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/pcre.h"
+#include "util/strutil.h"
+
+// Silence warnings about the wacky formatting in the operator() functions.
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wmisleading-indentation"
+#endif
+
+#define PCREPORT(level) LOG(level)
+
+// Default PCRE limits.
+// Defaults chosen to allow a plausible amount of CPU and
+// not exceed main thread stacks. Note that other threads
+// often have smaller stacks, and therefore tightening
+// regexp_stack_limit may frequently be necessary.
+DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
+ "default PCRE stack limit (bytes)");
+DEFINE_FLAG(int, regexp_match_limit, 1000000,
+ "default PCRE match limit (function calls)");
+
+#ifndef USEPCRE
+
+// Fake just enough of the PCRE API to allow this file to build. :)
+
+struct pcre_extra {
+ int flags;
+ int match_limit;
+ int match_limit_recursion;
+};
+
+#define PCRE_EXTRA_MATCH_LIMIT 0
+#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
+#define PCRE_ANCHORED 0
+#define PCRE_NOTEMPTY 0
+#define PCRE_ERROR_NOMATCH 1
+#define PCRE_ERROR_MATCHLIMIT 2
+#define PCRE_ERROR_RECURSIONLIMIT 3
+#define PCRE_INFO_CAPTURECOUNT 0
+
+void pcre_free(void*) {
+}
+
+pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
+ return NULL;
+}
+
+int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
+ return 0;
+}
+
+int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
+ return 0;
+}
+
+#endif
+
+namespace re2 {
+
+// Maximum number of args we can set
+static const int kMaxArgs = 16;
+static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
+
+// Approximate size of a recursive invocation of PCRE's
+// internal "match()" frame. This varies depending on the
+// compiler and architecture, of course, so the constant is
+// just a conservative estimate. To find the exact number,
+// run regexp_unittest with --regexp_stack_limit=0 under
+// a debugger and look at the frames when it crashes.
+// The exact frame size was 656 in production on 2008/02/03.
+static const int kPCREFrameSize = 700;
+
+// Special name for missing C++ arguments.
+PCRE::Arg PCRE::no_more_args((void*)NULL);
+
+const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
+const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
+const PCRE::ConsumeFunctor PCRE::Consume = { };
+const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
+
+// If a regular expression has no error, its error_ field points here
+static const std::string empty_string;
+
+void PCRE::Init(const char* pattern, Option options, int match_limit,
+ int stack_limit, bool report_errors) {
+ pattern_ = pattern;
+ options_ = options;
+ match_limit_ = match_limit;
+ stack_limit_ = stack_limit;
+ hit_limit_ = false;
+ error_ = &empty_string;
+ report_errors_ = report_errors;
+ re_full_ = NULL;
+ re_partial_ = NULL;
+
+ if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
+ error_ = new std::string("illegal regexp option");
+ PCREPORT(ERROR)
+ << "Error compiling '" << pattern << "': illegal regexp option";
+ } else {
+ re_partial_ = Compile(UNANCHORED);
+ if (re_partial_ != NULL) {
+ re_full_ = Compile(ANCHOR_BOTH);
+ }
+ }
+}
+
+PCRE::PCRE(const char* pattern) {
+ Init(pattern, None, 0, 0, true);
+}
+PCRE::PCRE(const char* pattern, Option option) {
+ Init(pattern, option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern) {
+ Init(pattern.c_str(), None, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, Option option) {
+ Init(pattern.c_str(), option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
+ Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
+ re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
+ Init(pattern, re_option.option(), re_option.match_limit(),
+ re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::~PCRE() {
+ if (re_full_ != NULL) pcre_free(re_full_);
+ if (re_partial_ != NULL) pcre_free(re_partial_);
+ if (error_ != &empty_string) delete error_;
+}
+
+pcre* PCRE::Compile(Anchor anchor) {
+ // Special treatment for anchoring. This is needed because at
+ // runtime pcre only provides an option for anchoring at the
+ // beginning of a string.
+ //
+ // There are three types of anchoring we want:
+ // UNANCHORED Compile the original pattern, and use
+ // a pcre unanchored match.
+ // ANCHOR_START Compile the original pattern, and use
+ // a pcre anchored match.
+ // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
+ // and use a pcre anchored match.
+
+ const char* error = "";
+ int eoffset;
+ pcre* re;
+ if (anchor != ANCHOR_BOTH) {
+ re = pcre_compile(pattern_.c_str(),
+ (options_ & EnabledCompileOptions),
+ &error, &eoffset, NULL);
+ } else {
+ // Tack a '\z' at the end of PCRE. Parenthesize it first so that
+ // the '\z' applies to all top-level alternatives in the regexp.
+ std::string wrapped = "(?:"; // A non-counting grouping operator
+ wrapped += pattern_;
+ wrapped += ")\\z";
+ re = pcre_compile(wrapped.c_str(),
+ (options_ & EnabledCompileOptions),
+ &error, &eoffset, NULL);
+ }
+ if (re == NULL) {
+ if (error_ == &empty_string) error_ = new std::string(error);
+ PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
+ }
+ return re;
+}
+
+/***** Convenience interfaces *****/
+
+bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
+ const PCRE& re,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
+ const PCRE& re,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
+ const PCRE& pattern,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
+ args, n, vec, kVecSize)) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
+ const PCRE& pattern,
+ const Arg& a0,
+ const Arg& a1,
+ const Arg& a2,
+ const Arg& a3,
+ const Arg& a4,
+ const Arg& a5,
+ const Arg& a6,
+ const Arg& a7,
+ const Arg& a8,
+ const Arg& a9,
+ const Arg& a10,
+ const Arg& a11,
+ const Arg& a12,
+ const Arg& a13,
+ const Arg& a14,
+ const Arg& a15) const {
+ const Arg* args[kMaxArgs];
+ int n = 0;
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+ size_t consumed;
+ int vec[kVecSize] = {};
+ if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
+ args, n, vec, kVecSize)) {
+ input->remove_prefix(consumed);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool PCRE::Replace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite) {
+ int vec[kVecSize] = {};
+ int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
+ if (matches == 0)
+ return false;
+
+ std::string s;
+ if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
+ return false;
+
+ assert(vec[0] >= 0);
+ assert(vec[1] >= 0);
+ str->replace(vec[0], vec[1] - vec[0], s);
+ return true;
+}
+
+int PCRE::GlobalReplace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite) {
+ int count = 0;
+ int vec[kVecSize] = {};
+ std::string out;
+ size_t start = 0;
+ bool last_match_was_empty_string = false;
+
+ while (start <= str->size()) {
+ // If the previous match was for the empty string, we shouldn't
+ // just match again: we'll match in the same way and get an
+ // infinite loop. Instead, we do the match in a special way:
+ // anchored -- to force another try at the same position --
+ // and with a flag saying that this time, ignore empty matches.
+ // If this special match returns, that means there's a non-empty
+ // match at this position as well, and we can continue. If not,
+ // we do what perl does, and just advance by one.
+ // Notice that perl prints '@@@' for this;
+ // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
+ int matches;
+ if (last_match_was_empty_string) {
+ matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
+ vec, kVecSize);
+ if (matches <= 0) {
+ if (start < str->size())
+ out.push_back((*str)[start]);
+ start++;
+ last_match_was_empty_string = false;
+ continue;
+ }
+ } else {
+ matches = pattern.TryMatch(*str, start, UNANCHORED, true,
+ vec, kVecSize);
+ if (matches <= 0)
+ break;
+ }
+ size_t matchstart = vec[0], matchend = vec[1];
+ assert(matchstart >= start);
+ assert(matchend >= matchstart);
+
+ out.append(*str, start, matchstart - start);
+ pattern.Rewrite(&out, rewrite, *str, vec, matches);
+ start = matchend;
+ count++;
+ last_match_was_empty_string = (matchstart == matchend);
+ }
+
+ if (count == 0)
+ return 0;
+
+ if (start < str->size())
+ out.append(*str, start, str->size() - start);
+ using std::swap;
+ swap(out, *str);
+ return count;
+}
+
+bool PCRE::Extract(const StringPiece &text,
+ const PCRE& pattern,
+ const StringPiece &rewrite,
+ std::string *out) {
+ int vec[kVecSize] = {};
+ int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
+ if (matches == 0)
+ return false;
+ out->clear();
+ return pattern.Rewrite(out, rewrite, text, vec, matches);
+}
+
+std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
+ std::string result;
+ result.reserve(unquoted.size() << 1);
+
+ // Escape any ascii character not in [A-Za-z_0-9].
+ //
+ // Note that it's legal to escape a character even if it has no
+ // special meaning in a regular expression -- so this function does
+ // that. (This also makes it identical to the perl function of the
+ // same name except for the null-character special case;
+ // see `perldoc -f quotemeta`.)
+ for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+ // Note that using 'isalnum' here raises the benchmark time from
+ // 32ns to 58ns:
+ if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
+ if (unquoted[ii] == '\0') { // Special handling for null chars.
+ // Can't use "\\0" since the next character might be a digit.
+ result += "\\x00";
+ continue;
+ }
+ result += '\\';
+ }
+ result += unquoted[ii];
+ }
+
+ return result;
+}
+
+/***** Actual matching and rewriting code *****/
+
+bool PCRE::HitLimit() {
+ return hit_limit_ != 0;
+}
+
+void PCRE::ClearHitLimit() {
+ hit_limit_ = 0;
+}
+
+int PCRE::TryMatch(const StringPiece& text,
+ size_t startpos,
+ Anchor anchor,
+ bool empty_ok,
+ int *vec,
+ int vecsize) const {
+ pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
+ if (re == NULL) {
+ PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
+ return 0;
+ }
+
+ int match_limit = match_limit_;
+ if (match_limit <= 0) {
+ match_limit = GetFlag(FLAGS_regexp_match_limit);
+ }
+
+ int stack_limit = stack_limit_;
+ if (stack_limit <= 0) {
+ stack_limit = GetFlag(FLAGS_regexp_stack_limit);
+ }
+
+ pcre_extra extra = { 0 };
+ if (match_limit > 0) {
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ extra.match_limit = match_limit;
+ }
+ if (stack_limit > 0) {
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ extra.match_limit_recursion = stack_limit / kPCREFrameSize;
+ }
+
+ int options = 0;
+ if (anchor != UNANCHORED)
+ options |= PCRE_ANCHORED;
+ if (!empty_ok)
+ options |= PCRE_NOTEMPTY;
+
+ int rc = pcre_exec(re, // The regular expression object
+ &extra,
+ (text.data() == NULL) ? "" : text.data(),
+ static_cast<int>(text.size()),
+ static_cast<int>(startpos),
+ options,
+ vec,
+ vecsize);
+
+ // Handle errors
+ if (rc == 0) {
+ // pcre_exec() returns 0 as a special case when the number of
+ // capturing subpatterns exceeds the size of the vector.
+ // When this happens, there is a match and the output vector
+ // is filled, but we miss out on the positions of the extra subpatterns.
+ rc = vecsize / 2;
+ } else if (rc < 0) {
+ switch (rc) {
+ case PCRE_ERROR_NOMATCH:
+ return 0;
+ case PCRE_ERROR_MATCHLIMIT:
+ // Writing to hit_limit is not safe if multiple threads
+ // are using the PCRE, but the flag is only intended
+ // for use by unit tests anyway, so we let it go.
+ hit_limit_ = true;
+ PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
+ << " when matching '" << pattern_ << "'"
+ << " against text that is " << text.size() << " bytes.";
+ return 0;
+ case PCRE_ERROR_RECURSIONLIMIT:
+ // See comment about hit_limit above.
+ hit_limit_ = true;
+ PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
+ << " when matching '" << pattern_ << "'"
+ << " against text that is " << text.size() << " bytes.";
+ return 0;
+ default:
+ // There are other return codes from pcre.h :
+ // PCRE_ERROR_NULL (-2)
+ // PCRE_ERROR_BADOPTION (-3)
+ // PCRE_ERROR_BADMAGIC (-4)
+ // PCRE_ERROR_UNKNOWN_NODE (-5)
+ // PCRE_ERROR_NOMEMORY (-6)
+ // PCRE_ERROR_NOSUBSTRING (-7)
+ // ...
+ PCREPORT(ERROR) << "Unexpected return code: " << rc
+ << " when matching '" << pattern_ << "'"
+ << ", re=" << re
+ << ", text=" << text
+ << ", vec=" << vec
+ << ", vecsize=" << vecsize;
+ return 0;
+ }
+ }
+
+ return rc;
+}
+
+bool PCRE::DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const* args,
+ int n,
+ int* vec,
+ int vecsize) const {
+ assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
+ if (NumberOfCapturingGroups() < n) {
+ // RE has fewer capturing groups than number of Arg pointers passed in.
+ return false;
+ }
+
+ int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
+ assert(matches >= 0); // TryMatch never returns negatives
+ if (matches == 0)
+ return false;
+
+ *consumed = vec[1];
+
+ if (n == 0 || args == NULL) {
+ // We are not interested in results
+ return true;
+ }
+
+ // If we got here, we must have matched the whole pattern.
+ // We do not need (can not do) any more checks on the value of 'matches' here
+ // -- see the comment for TryMatch.
+ for (int i = 0; i < n; i++) {
+ const int start = vec[2*(i+1)];
+ const int limit = vec[2*(i+1)+1];
+
+ // Avoid invoking undefined behavior when text.data() happens
+ // to be null and start happens to be -1, the latter being the
+ // case for an unmatched subexpression. Even if text.data() is
+ // not null, pointing one byte before was a longstanding bug.
+ const char* addr = NULL;
+ if (start != -1) {
+ addr = text.data() + start;
+ }
+
+ if (!args[i]->Parse(addr, limit-start)) {
+ // TODO: Should we indicate what the error was?
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool PCRE::DoMatch(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const args[],
+ int n) const {
+ assert(n >= 0);
+ const int vecsize = (1 + n) * 3; // results + PCRE workspace
+ // (as for kVecSize)
+ int* vec = new int[vecsize];
+ bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
+ delete[] vec;
+ return b;
+}
+
+bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
+ const StringPiece &text, int *vec, int veclen) const {
+ int number_of_capturing_groups = NumberOfCapturingGroups();
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ int c = *s;
+ if (c == '\\') {
+ c = *++s;
+ if (isdigit(c)) {
+ int n = (c - '0');
+ if (n >= veclen) {
+ if (n <= number_of_capturing_groups) {
+ // unmatched optional capturing group. treat
+ // its value as empty string; i.e., nothing to append.
+ } else {
+ PCREPORT(ERROR) << "requested group " << n
+ << " in regexp " << rewrite.data();
+ return false;
+ }
+ }
+ int start = vec[2 * n];
+ if (start >= 0)
+ out->append(text.data() + start, vec[2 * n + 1] - start);
+ } else if (c == '\\') {
+ out->push_back('\\');
+ } else {
+ PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+ return false;
+ }
+ } else {
+ out->push_back(c);
+ }
+ }
+ return true;
+}
+
+bool PCRE::CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const {
+ int max_token = -1;
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
+ s < end; s++) {
+ int c = *s;
+ if (c != '\\') {
+ continue;
+ }
+ if (++s == end) {
+ *error = "Rewrite schema error: '\\' not allowed at end.";
+ return false;
+ }
+ c = *s;
+ if (c == '\\') {
+ continue;
+ }
+ if (!isdigit(c)) {
+ *error = "Rewrite schema error: "
+ "'\\' must be followed by a digit or '\\'.";
+ return false;
+ }
+ int n = (c - '0');
+ if (max_token < n) {
+ max_token = n;
+ }
+ }
+
+ if (max_token > NumberOfCapturingGroups()) {
+ *error = StringPrintf(
+ "Rewrite schema requests %d matches, but the regexp only has %d "
+ "parenthesized subexpressions.",
+ max_token, NumberOfCapturingGroups());
+ return false;
+ }
+ return true;
+}
+
+
+// Return the number of capturing subpatterns, or -1 if the
+// regexp wasn't valid on construction.
+int PCRE::NumberOfCapturingGroups() const {
+ if (re_partial_ == NULL) return -1;
+
+ int result;
+ int rc = pcre_fullinfo(re_partial_, // The regular expression object
+ NULL, // We did not study the pattern
+ PCRE_INFO_CAPTURECOUNT,
+ &result);
+ if (rc != 0) {
+ PCREPORT(ERROR) << "Unexpected return code: " << rc;
+ return -1;
+ }
+ return result;
+}
+
+
+/***** Parsers for various types *****/
+
+bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
+ // We fail if somebody asked us to store into a non-NULL void* pointer
+ return (dest == NULL);
+}
+
+bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
+ return true;
+}
+
+bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+ return true;
+}
+
+bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<char*>(dest)) = str[0];
+ return true;
+}
+
+bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<signed char*>(dest)) = str[0];
+ return true;
+}
+
+bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
+ if (n != 1) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+ return true;
+}
+
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+
+// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
+// PCREQUIPCRES "n > 0"
+// Copies "str" into "buf" and null-terminates if necessary.
+// Returns one of:
+// a. "str" if no termination is needed
+// b. "buf" if the string was copied and null-terminated
+// c. "" if the input was invalid and has no hope of being parsed
+static const char* TerminateNumber(char* buf, const char* str, size_t n) {
+ if ((n > 0) && isspace(*str)) {
+ // We are less forgiving than the strtoxxx() routines and do not
+ // allow leading spaces.
+ return "";
+ }
+
+ // See if the character right after the input text may potentially
+ // look like a digit.
+ if (isdigit(str[n]) ||
+ ((str[n] >= 'a') && (str[n] <= 'f')) ||
+ ((str[n] >= 'A') && (str[n] <= 'F'))) {
+ if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ return buf;
+ } else {
+ // We can parse right out of the supplied string, so return it.
+ return str;
+ }
+}
+
+bool PCRE::Arg::parse_long_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+ long r = strtol(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ulong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') {
+ // strtoul() will silently accept negative numbers and parse
+ // them. This module is more strict and treats them as errors.
+ return false;
+ }
+
+ char* end;
+ errno = 0;
+ unsigned long r = strtoul(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_short_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((short)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<short*>(dest)) = (short)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ushort_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned short)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_int_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ long r;
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((int)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<int*>(dest)) = (int)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_uint_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ unsigned long r;
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
+ if ((unsigned int)r != r) return false; // Out of range
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
+ return true;
+}
+
+bool PCRE::Arg::parse_longlong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ char* end;
+ errno = 0;
+ long long r = strtoll(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<long long*>(dest)) = r;
+ return true;
+}
+
+bool PCRE::Arg::parse_ulonglong_radix(const char* str,
+ size_t n,
+ void* dest,
+ int radix) {
+ if (n == 0) return false;
+ char buf[kMaxNumberLength+1];
+ str = TerminateNumber(buf, str, n);
+ if (str[0] == '-') {
+ // strtoull() will silently accept negative numbers and parse
+ // them. This module is more strict and treats them as errors.
+ return false;
+ }
+ char* end;
+ errno = 0;
+ unsigned long long r = strtoull(str, &end, radix);
+ if (end != str + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ *(reinterpret_cast<unsigned long long*>(dest)) = r;
+ return true;
+}
+
+static bool parse_double_float(const char* str, size_t n, bool isfloat,
+ void* dest) {
+ if (n == 0) return false;
+ static const int kMaxLength = 200;
+ char buf[kMaxLength];
+ if (n >= kMaxLength) return false;
+ memcpy(buf, str, n);
+ buf[n] = '\0';
+ char* end;
+ errno = 0;
+ double r;
+ if (isfloat) {
+ r = strtof(buf, &end);
+ } else {
+ r = strtod(buf, &end);
+ }
+ if (end != buf + n) return false; // Leftover junk
+ if (errno) return false;
+ if (dest == NULL) return true;
+ if (isfloat) {
+ *(reinterpret_cast<float*>(dest)) = (float)r;
+ } else {
+ *(reinterpret_cast<double*>(dest)) = r;
+ }
+ return true;
+}
+
+bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
+ return parse_double_float(str, n, false, dest);
+}
+
+bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
+ return parse_double_float(str, n, true, dest);
+}
+
+#define DEFINE_INTEGER_PARSER(name) \
+ bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 10); \
+ } \
+ bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
+ return parse_##name##_radix(str, n, dest, 16); \
+ } \
+ bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \
+ void* dest) { \
+ return parse_##name##_radix(str, n, dest, 8); \
+ } \
+ bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \
+ void* dest) { \
+ return parse_##name##_radix(str, n, dest, 0); \
+ }
+
+DEFINE_INTEGER_PARSER(short);
+DEFINE_INTEGER_PARSER(ushort);
+DEFINE_INTEGER_PARSER(int);
+DEFINE_INTEGER_PARSER(uint);
+DEFINE_INTEGER_PARSER(long);
+DEFINE_INTEGER_PARSER(ulong);
+DEFINE_INTEGER_PARSER(longlong);
+DEFINE_INTEGER_PARSER(ulonglong);
+
+#undef DEFINE_INTEGER_PARSER
+
+} // namespace re2
diff --git a/contrib/libs/re2/util/pcre.h b/contrib/libs/re2/util/pcre.h
index 500c56d283..896b0bdf89 100644
--- a/contrib/libs/re2/util/pcre.h
+++ b/contrib/libs/re2/util/pcre.h
@@ -1,681 +1,681 @@
-// Copyright 2003-2010 Google Inc. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_PCRE_H_
-#define UTIL_PCRE_H_
-
-// This is a variant of PCRE's pcrecpp.h, originally written at Google.
-// The main changes are the addition of the HitLimit method and
-// compilation as PCRE in namespace re2.
-
-// C++ interface to the pcre regular-expression library. PCRE supports
-// Perl-style regular expressions (with extensions like \d, \w, \s,
-// ...).
-//
-// -----------------------------------------------------------------------
-// REGEXP SYNTAX:
-//
-// This module uses the pcre library and hence supports its syntax
-// for regular expressions:
-//
-// http://www.google.com/search?q=pcre
-//
-// The syntax is pretty similar to Perl's. For those not familiar
-// with Perl's regular expressions, here are some examples of the most
-// commonly used extensions:
-//
-// "hello (\\w+) world" -- \w matches a "word" character
-// "version (\\d+)" -- \d matches a digit
-// "hello\\s+world" -- \s matches any whitespace character
-// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
-// "(?i)hello" -- (?i) turns on case-insensitive matching
-// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
-//
-// -----------------------------------------------------------------------
-// MATCHING INTERFACE:
-//
-// The "FullMatch" operation checks that supplied text matches a
-// supplied pattern exactly.
-//
-// Example: successful match
-// CHECK(PCRE::FullMatch("hello", "h.*o"));
-//
-// Example: unsuccessful match (requires full match):
-// CHECK(!PCRE::FullMatch("hello", "e"));
-//
-// -----------------------------------------------------------------------
-// UTF-8 AND THE MATCHING INTERFACE:
-//
-// By default, pattern and text are plain text, one byte per character.
-// The UTF8 flag, passed to the constructor, causes both pattern
-// and string to be treated as UTF-8 text, still a byte stream but
-// potentially multiple bytes per character. In practice, the text
-// is likelier to be UTF-8 than the pattern, but the match returned
-// may depend on the UTF8 flag, so always use it when matching
-// UTF8 text. E.g., "." will match one byte normally but with UTF8
-// set may match up to three bytes of a multi-byte character.
-//
-// Example:
-// PCRE re(utf8_pattern, PCRE::UTF8);
-// CHECK(PCRE::FullMatch(utf8_string, re));
-//
-// -----------------------------------------------------------------------
-// MATCHING WITH SUBSTRING EXTRACTION:
-//
-// You can supply extra pointer arguments to extract matched substrings.
-//
-// Example: extracts "ruby" into "s" and 1234 into "i"
-// int i;
-// std::string s;
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
-//
-// Example: fails because string cannot be stored in integer
-// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
-//
-// Example: fails because there aren't enough sub-patterns:
-// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
-//
-// Example: does not try to extract any extra sub-patterns
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
-//
-// Example: does not try to extract into NULL
-// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
-//
-// Example: integer overflow causes failure
-// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
-//
-// -----------------------------------------------------------------------
-// PARTIAL MATCHES
-//
-// You can use the "PartialMatch" operation when you want the pattern
-// to match any substring of the text.
-//
-// Example: simple search for a string:
-// CHECK(PCRE::PartialMatch("hello", "ell"));
-//
-// Example: find first number in a string
-// int number;
-// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
-// CHECK_EQ(number, 100);
-//
-// -----------------------------------------------------------------------
-// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
-//
-// PCRE makes it easy to use any string as a regular expression, without
-// requiring a separate compilation step.
-//
-// If speed is of the essence, you can create a pre-compiled "PCRE"
-// object from the pattern and use it multiple times. If you do so,
-// you can typically parse text faster than with sscanf.
-//
-// Example: precompile pattern for faster matching:
-// PCRE pattern("h.*o");
-// while (ReadLine(&str)) {
-// if (PCRE::FullMatch(str, pattern)) ...;
-// }
-//
-// -----------------------------------------------------------------------
-// SCANNING TEXT INCPCREMENTALLY
-//
-// The "Consume" operation may be useful if you want to repeatedly
-// match regular expressions at the front of a string and skip over
-// them as they match. This requires use of the "StringPiece" type,
-// which represents a sub-range of a real string.
-//
-// Example: read lines of the form "var = value" from a string.
-// std::string contents = ...; // Fill string somehow
-// StringPiece input(contents); // Wrap a StringPiece around it
-//
-// std::string var;
-// int value;
-// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
-// ...;
-// }
-//
-// Each successful call to "Consume" will set "var/value", and also
-// advance "input" so it points past the matched text. Note that if the
-// regular expression matches an empty string, input will advance
-// by 0 bytes. If the regular expression being used might match
-// an empty string, the loop body must check for this case and either
-// advance the string or break out of the loop.
-//
-// The "FindAndConsume" operation is similar to "Consume" but does not
-// anchor your match at the beginning of the string. For example, you
-// could extract all words from a string by repeatedly calling
-// PCRE::FindAndConsume(&input, "(\\w+)", &word)
-//
-// -----------------------------------------------------------------------
-// PARSING HEX/OCTAL/C-RADIX NUMBERS
-//
-// By default, if you pass a pointer to a numeric value, the
-// corresponding text is interpreted as a base-10 number. You can
-// instead wrap the pointer with a call to one of the operators Hex(),
-// Octal(), or CRadix() to interpret the text in another base. The
-// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
-// prefixes, but defaults to base-10.
-//
-// Example:
-// int a, b, c, d;
-// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
-// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
-// will leave 64 in a, b, c, and d.
-
-#include "util/util.h"
-#include "re2/stringpiece.h"
-
-#ifdef USEPCRE
-#include <pcre.h>
-namespace re2 {
-const bool UsingPCRE = true;
-} // namespace re2
-#else
-struct pcre; // opaque
-namespace re2 {
-const bool UsingPCRE = false;
-} // namespace re2
-#endif
-
-namespace re2 {
-
-class PCRE_Options;
-
-// Interface for regular expression matching. Also corresponds to a
-// pre-compiled regular expression. An "PCRE" object is safe for
-// concurrent use by multiple threads.
-class PCRE {
- public:
- // We convert user-passed pointers into special Arg objects
- class Arg;
-
- // Marks end of arg list.
- // ONLY USE IN OPTIONAL ARG DEFAULTS.
- // DO NOT PASS EXPLICITLY.
- static Arg no_more_args;
-
- // Options are same value as those in pcre. We provide them here
- // to avoid users needing to include pcre.h and also to isolate
- // users from pcre should we change the underlying library.
- // Only those needed by Google programs are exposed here to
- // avoid collision with options employed internally by regexp.cc
- // Note that some options have equivalents that can be specified in
- // the regexp itself. For example, prefixing your regexp with
- // "(?s)" has the same effect as the PCRE_DOTALL option.
- enum Option {
- None = 0x0000,
- UTF8 = 0x0800, // == PCRE_UTF8
- EnabledCompileOptions = UTF8,
- EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
- };
-
- // We provide implicit conversions from strings so that users can
- // pass in a string or a "const char*" wherever an "PCRE" is expected.
- PCRE(const char* pattern);
- PCRE(const char* pattern, Option option);
- PCRE(const std::string& pattern);
- PCRE(const std::string& pattern, Option option);
- PCRE(const char *pattern, const PCRE_Options& re_option);
- PCRE(const std::string& pattern, const PCRE_Options& re_option);
-
- ~PCRE();
-
- // The string specification for this PCRE. E.g.
- // PCRE re("ab*c?d+");
- // re.pattern(); // "ab*c?d+"
- const std::string& pattern() const { return pattern_; }
-
- // If PCRE could not be created properly, returns an error string.
- // Else returns the empty string.
- const std::string& error() const { return *error_; }
-
- // Whether the PCRE has hit a match limit during execution.
- // Not thread safe. Intended only for testing.
- // If hitting match limits is a problem,
- // you should be using PCRE2 (re2/re2.h)
- // instead of checking this flag.
- bool HitLimit();
- void ClearHitLimit();
-
- /***** The useful part: the matching interface *****/
-
- // Matches "text" against "pattern". If pointer arguments are
- // supplied, copies matched sub-patterns into them.
- //
- // You can pass in a "const char*" or a "std::string" for "text".
- // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
- //
- // The provided pointer arguments can be pointers to any scalar numeric
- // type, or one of:
- // std::string (matched piece is copied to string)
- // StringPiece (StringPiece is mutated to point to matched piece)
- // T (where "bool T::ParseFrom(const char*, size_t)" exists)
- // (void*)NULL (the corresponding matched sub-pattern is not copied)
- //
- // Returns true iff all of the following conditions are satisfied:
- // a. "text" matches "pattern" exactly
- // b. The number of matched sub-patterns is >= number of supplied pointers
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
- // number of sub-patterns, "i"th captured sub-pattern is
- // ignored.
- //
- // CAVEAT: An optional sub-pattern that does not exist in the
- // matched string is assigned the empty string. Therefore, the
- // following will return false (because the empty string is not a
- // valid number):
- // int number;
- // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
- struct FullMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const FullMatchFunctor FullMatch;
-
- // Exactly like FullMatch(), except that "pattern" is allowed to match
- // a substring of "text".
- struct PartialMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const PartialMatchFunctor PartialMatch;
-
- // Like FullMatch() and PartialMatch(), except that pattern has to
- // match a prefix of "text", and "input" is advanced past the matched
- // text. Note: "input" is modified iff this routine returns true.
- struct ConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const ConsumeFunctor Consume;
-
- // Like Consume(..), but does not anchor the match at the beginning of the
- // string. That is, "pattern" need not start its match at the beginning of
- // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
- // word in "s" and stores it in "word".
- struct FindAndConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern,
- const Arg& ptr1 = no_more_args,
- const Arg& ptr2 = no_more_args,
- const Arg& ptr3 = no_more_args,
- const Arg& ptr4 = no_more_args,
- const Arg& ptr5 = no_more_args,
- const Arg& ptr6 = no_more_args,
- const Arg& ptr7 = no_more_args,
- const Arg& ptr8 = no_more_args,
- const Arg& ptr9 = no_more_args,
- const Arg& ptr10 = no_more_args,
- const Arg& ptr11 = no_more_args,
- const Arg& ptr12 = no_more_args,
- const Arg& ptr13 = no_more_args,
- const Arg& ptr14 = no_more_args,
- const Arg& ptr15 = no_more_args,
- const Arg& ptr16 = no_more_args) const;
- };
-
- static const FindAndConsumeFunctor FindAndConsume;
-
- // Replace the first match of "pattern" in "str" with "rewrite".
- // Within "rewrite", backslash-escaped digits (\1 to \9) can be
- // used to insert text matching corresponding parenthesized group
- // from the pattern. \0 in "rewrite" refers to the entire matching
- // text. E.g.,
- //
- // std::string s = "yabba dabba doo";
- // CHECK(PCRE::Replace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dabba doo"
- //
- // Returns true if the pattern matches and a replacement occurs,
- // false otherwise.
- static bool Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
-
- // Like Replace(), except replaces all occurrences of the pattern in
- // the string with the rewrite. Replacements are not subject to
- // re-matching. E.g.,
- //
- // std::string s = "yabba dabba doo";
- // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dada doo"
- //
- // Returns the number of replacements made.
- static int GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
-
- // Like Replace, except that if the pattern matches, "rewrite"
- // is copied into "out" with substitutions. The non-matching
- // portions of "text" are ignored.
- //
- // Returns true iff a match occurred and the extraction happened
- // successfully; if no match occurs, the string is left unaffected.
- static bool Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out);
-
- // Check that the given @p rewrite string is suitable for use with
- // this PCRE. It checks that:
- // * The PCRE has enough parenthesized subexpressions to satisfy all
- // of the \N tokens in @p rewrite, and
- // * The @p rewrite string doesn't have any syntax errors
- // ('\' followed by anything besides [0-9] and '\').
- // Making this test will guarantee that "replace" and "extract"
- // operations won't LOG(ERROR) or fail because of a bad rewrite
- // string.
- // @param rewrite The proposed rewrite string.
- // @param error An error message is recorded here, iff we return false.
- // Otherwise, it is unchanged.
- // @return true, iff @p rewrite is suitable for use with the PCRE.
- bool CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const;
-
- // Returns a copy of 'unquoted' with all potentially meaningful
- // regexp characters backslash-escaped. The returned string, used
- // as a regular expression, will exactly match the original string.
- // For example,
- // 1.5-2.0?
- // becomes:
- // 1\.5\-2\.0\?
- static std::string QuoteMeta(const StringPiece& unquoted);
-
- /***** Generic matching interface (not so nice to use) *****/
-
- // Type of match (TODO: Should be restructured as an Option)
- enum Anchor {
- UNANCHORED, // No anchoring
- ANCHOR_START, // Anchor at start only
- ANCHOR_BOTH, // Anchor at start and end
- };
-
- // General matching routine. Stores the length of the match in
- // "*consumed" if successful.
- bool DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const* args, int n) const;
-
- // Return the number of capturing subpatterns, or -1 if the
- // regexp wasn't valid on construction.
- int NumberOfCapturingGroups() const;
-
- private:
- void Init(const char* pattern, Option option, int match_limit,
- int stack_limit, bool report_errors);
-
- // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
- // pairs of integers for the beginning and end positions of matched
- // text. The first pair corresponds to the entire matched text;
- // subsequent pairs correspond, in order, to parentheses-captured
- // matches. Returns the number of pairs (one more than the number of
- // the last subpattern with a match) if matching was successful
- // and zero if the match failed.
- // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
- // against "foo", "bar", and "baz" respectively.
- // When matching PCRE("(foo)|hello") against "hello", it will return 1.
- // But the values for all subpattern are filled in into "vec".
- int TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const;
-
- // Append the "rewrite" string, with backslash subsitutions from "text"
- // and "vec", to string "out".
- bool Rewrite(std::string *out,
- const StringPiece &rewrite,
- const StringPiece &text,
- int *vec,
- int veclen) const;
-
- // internal implementation for DoMatch
- bool DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n,
- int* vec,
- int vecsize) const;
-
- // Compile the regexp for the specified anchoring mode
- pcre* Compile(Anchor anchor);
-
- std::string pattern_;
- Option options_;
- pcre* re_full_; // For full matches
- pcre* re_partial_; // For partial matches
- const std::string* error_; // Error indicator (or empty string)
- bool report_errors_; // Silences error logging if false
- int match_limit_; // Limit on execution resources
- int stack_limit_; // Limit on stack resources (bytes)
- mutable int32_t hit_limit_; // Hit limit during execution (bool)
-
- PCRE(const PCRE&) = delete;
- PCRE& operator=(const PCRE&) = delete;
-};
-
-// PCRE_Options allow you to set the PCRE::Options, plus any pcre
-// "extra" options. The only extras are match_limit, which limits
-// the CPU time of a match, and stack_limit, which limits the
-// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
-// that should not cause too many problems in production code.
-// If PCRE hits a limit during a match, it may return a false negative,
-// but (hopefully) it won't crash.
-//
-// NOTE: If you are handling regular expressions specified by
-// (external or internal) users, rather than hard-coded ones,
-// you should be using PCRE2, which uses an alternate implementation
-// that avoids these issues. See http://go/re2quick.
-class PCRE_Options {
- public:
- // constructor
- PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
- // accessors
- PCRE::Option option() const { return option_; }
- void set_option(PCRE::Option option) {
- option_ = option;
- }
- int match_limit() const { return match_limit_; }
- void set_match_limit(int match_limit) {
- match_limit_ = match_limit;
- }
- int stack_limit() const { return stack_limit_; }
- void set_stack_limit(int stack_limit) {
- stack_limit_ = stack_limit;
- }
-
- // If the regular expression is malformed, an error message will be printed
- // iff report_errors() is true. Default: true.
- bool report_errors() const { return report_errors_; }
- void set_report_errors(bool report_errors) {
- report_errors_ = report_errors;
- }
- private:
- PCRE::Option option_;
- int match_limit_;
- int stack_limit_;
- bool report_errors_;
-};
-
-
-/***** Implementation details *****/
-
-// Hex/Octal/Binary?
-
-// Special class for parsing into objects that define a ParseFrom() method
-template <typename T>
-class _PCRE_MatchObject {
- public:
- static inline bool Parse(const char* str, size_t n, void* dest) {
- if (dest == NULL) return true;
- T* object = reinterpret_cast<T*>(dest);
- return object->ParseFrom(str, n);
- }
-};
-
-class PCRE::Arg {
- public:
- // Empty constructor so we can declare arrays of PCRE::Arg
- Arg();
-
- // Constructor specially designed for NULL arguments
- Arg(void*);
-
- typedef bool (*Parser)(const char* str, size_t n, void* dest);
-
-// Type-specific parsers
-#define MAKE_PARSER(type, name) \
- Arg(type* p) : arg_(p), parser_(name) {} \
- Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
-
- MAKE_PARSER(char, parse_char);
- MAKE_PARSER(signed char, parse_schar);
- MAKE_PARSER(unsigned char, parse_uchar);
- MAKE_PARSER(float, parse_float);
- MAKE_PARSER(double, parse_double);
- MAKE_PARSER(std::string, parse_string);
- MAKE_PARSER(StringPiece, parse_stringpiece);
-
- MAKE_PARSER(short, parse_short);
- MAKE_PARSER(unsigned short, parse_ushort);
- MAKE_PARSER(int, parse_int);
- MAKE_PARSER(unsigned int, parse_uint);
- MAKE_PARSER(long, parse_long);
- MAKE_PARSER(unsigned long, parse_ulong);
- MAKE_PARSER(long long, parse_longlong);
- MAKE_PARSER(unsigned long long, parse_ulonglong);
-
-#undef MAKE_PARSER
-
- // Generic constructor
- template <typename T> Arg(T*, Parser parser);
- // Generic constructor template
- template <typename T> Arg(T* p)
- : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
- }
-
- // Parse the data
- bool Parse(const char* str, size_t n) const;
-
- private:
- void* arg_;
- Parser parser_;
-
- static bool parse_null (const char* str, size_t n, void* dest);
- static bool parse_char (const char* str, size_t n, void* dest);
- static bool parse_schar (const char* str, size_t n, void* dest);
- static bool parse_uchar (const char* str, size_t n, void* dest);
- static bool parse_float (const char* str, size_t n, void* dest);
- static bool parse_double (const char* str, size_t n, void* dest);
- static bool parse_string (const char* str, size_t n, void* dest);
- static bool parse_stringpiece (const char* str, size_t n, void* dest);
-
-#define DECLARE_INTEGER_PARSER(name) \
- private: \
- static bool parse_##name(const char* str, size_t n, void* dest); \
- static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
- int radix); \
- \
- public: \
- static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
- static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
- static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
-
- DECLARE_INTEGER_PARSER(short);
- DECLARE_INTEGER_PARSER(ushort);
- DECLARE_INTEGER_PARSER(int);
- DECLARE_INTEGER_PARSER(uint);
- DECLARE_INTEGER_PARSER(long);
- DECLARE_INTEGER_PARSER(ulong);
- DECLARE_INTEGER_PARSER(longlong);
- DECLARE_INTEGER_PARSER(ulonglong);
-
-#undef DECLARE_INTEGER_PARSER
-
-};
-
-inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
-inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
-
-inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
- return (*parser_)(str, n, arg_);
-}
-
-// This part of the parser, appropriate only for ints, deals with bases
-#define MAKE_INTEGER_PARSER(type, name) \
- inline PCRE::Arg Hex(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
- } \
- inline PCRE::Arg Octal(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
- } \
- inline PCRE::Arg CRadix(type* ptr) { \
- return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
- }
-
-MAKE_INTEGER_PARSER(short, short);
-MAKE_INTEGER_PARSER(unsigned short, ushort);
-MAKE_INTEGER_PARSER(int, int);
-MAKE_INTEGER_PARSER(unsigned int, uint);
-MAKE_INTEGER_PARSER(long, long);
-MAKE_INTEGER_PARSER(unsigned long, ulong);
-MAKE_INTEGER_PARSER(long long, longlong);
-MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
-
-#undef MAKE_INTEGER_PARSER
-
-} // namespace re2
-
-#endif // UTIL_PCRE_H_
+// Copyright 2003-2010 Google Inc. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_PCRE_H_
+#define UTIL_PCRE_H_
+
+// This is a variant of PCRE's pcrecpp.h, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+// C++ interface to the pcre regular-expression library. PCRE supports
+// Perl-style regular expressions (with extensions like \d, \w, \s,
+// ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the pcre library and hence supports its syntax
+// for regular expressions:
+//
+// http://www.google.com/search?q=pcre
+//
+// The syntax is pretty similar to Perl's. For those not familiar
+// with Perl's regular expressions, here are some examples of the most
+// commonly used extensions:
+//
+// "hello (\\w+) world" -- \w matches a "word" character
+// "version (\\d+)" -- \d matches a digit
+// "hello\\s+world" -- \s matches any whitespace character
+// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
+// "(?i)hello" -- (?i) turns on case-insensitive matching
+// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+// CHECK(PCRE::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+// CHECK(!PCRE::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, pattern and text are plain text, one byte per character.
+// The UTF8 flag, passed to the constructor, causes both pattern
+// and string to be treated as UTF-8 text, still a byte stream but
+// potentially multiple bytes per character. In practice, the text
+// is likelier to be UTF-8 than the pattern, but the match returned
+// may depend on the UTF8 flag, so always use it when matching
+// UTF8 text. E.g., "." will match one byte normally but with UTF8
+// set may match up to three bytes of a multi-byte character.
+//
+// Example:
+// PCRE re(utf8_pattern, PCRE::UTF8);
+// CHECK(PCRE::FullMatch(utf8_string, re));
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUBSTRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched substrings.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+// int i;
+// std::string s;
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
+//
+// Example: fails because there aren't enough sub-patterns:
+// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+// CHECK(PCRE::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+// int number;
+// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
+// CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
+//
+// PCRE makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "PCRE"
+// object from the pattern and use it multiple times. If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+// PCRE pattern("h.*o");
+// while (ReadLine(&str)) {
+// if (PCRE::FullMatch(str, pattern)) ...;
+// }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCPCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match. This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
+// std::string contents = ...; // Fill string somehow
+// StringPiece input(contents); // Wrap a StringPiece around it
+//
+// std::string var;
+// int value;
+// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+// ...;
+// }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text. Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes. If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string. For example, you
+// could extract all words from a string by repeatedly calling
+// PCRE::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number. You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base. The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+// int a, b, c, d;
+// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+#ifdef USEPCRE
+#include <pcre.h>
+namespace re2 {
+const bool UsingPCRE = true;
+} // namespace re2
+#else
+struct pcre; // opaque
+namespace re2 {
+const bool UsingPCRE = false;
+} // namespace re2
+#endif
+
+namespace re2 {
+
+class PCRE_Options;
+
+// Interface for regular expression matching. Also corresponds to a
+// pre-compiled regular expression. An "PCRE" object is safe for
+// concurrent use by multiple threads.
+class PCRE {
+ public:
+ // We convert user-passed pointers into special Arg objects
+ class Arg;
+
+ // Marks end of arg list.
+ // ONLY USE IN OPTIONAL ARG DEFAULTS.
+ // DO NOT PASS EXPLICITLY.
+ static Arg no_more_args;
+
+ // Options are same value as those in pcre. We provide them here
+ // to avoid users needing to include pcre.h and also to isolate
+ // users from pcre should we change the underlying library.
+ // Only those needed by Google programs are exposed here to
+ // avoid collision with options employed internally by regexp.cc
+ // Note that some options have equivalents that can be specified in
+ // the regexp itself. For example, prefixing your regexp with
+ // "(?s)" has the same effect as the PCRE_DOTALL option.
+ enum Option {
+ None = 0x0000,
+ UTF8 = 0x0800, // == PCRE_UTF8
+ EnabledCompileOptions = UTF8,
+ EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
+ };
+
+ // We provide implicit conversions from strings so that users can
+ // pass in a string or a "const char*" wherever an "PCRE" is expected.
+ PCRE(const char* pattern);
+ PCRE(const char* pattern, Option option);
+ PCRE(const std::string& pattern);
+ PCRE(const std::string& pattern, Option option);
+ PCRE(const char *pattern, const PCRE_Options& re_option);
+ PCRE(const std::string& pattern, const PCRE_Options& re_option);
+
+ ~PCRE();
+
+ // The string specification for this PCRE. E.g.
+ // PCRE re("ab*c?d+");
+ // re.pattern(); // "ab*c?d+"
+ const std::string& pattern() const { return pattern_; }
+
+ // If PCRE could not be created properly, returns an error string.
+ // Else returns the empty string.
+ const std::string& error() const { return *error_; }
+
+ // Whether the PCRE has hit a match limit during execution.
+ // Not thread safe. Intended only for testing.
+ // If hitting match limits is a problem,
+ // you should be using PCRE2 (re2/re2.h)
+ // instead of checking this flag.
+ bool HitLimit();
+ void ClearHitLimit();
+
+ /***** The useful part: the matching interface *****/
+
+ // Matches "text" against "pattern". If pointer arguments are
+ // supplied, copies matched sub-patterns into them.
+ //
+ // You can pass in a "const char*" or a "std::string" for "text".
+ // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
+ //
+ // The provided pointer arguments can be pointers to any scalar numeric
+ // type, or one of:
+ // std::string (matched piece is copied to string)
+ // StringPiece (StringPiece is mutated to point to matched piece)
+ // T (where "bool T::ParseFrom(const char*, size_t)" exists)
+ // (void*)NULL (the corresponding matched sub-pattern is not copied)
+ //
+ // Returns true iff all of the following conditions are satisfied:
+ // a. "text" matches "pattern" exactly
+ // b. The number of matched sub-patterns is >= number of supplied pointers
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
+ // number of sub-patterns, "i"th captured sub-pattern is
+ // ignored.
+ //
+ // CAVEAT: An optional sub-pattern that does not exist in the
+ // matched string is assigned the empty string. Therefore, the
+ // following will return false (because the empty string is not a
+ // valid number):
+ // int number;
+ // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+ struct FullMatchFunctor {
+ bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const FullMatchFunctor FullMatch;
+
+ // Exactly like FullMatch(), except that "pattern" is allowed to match
+ // a substring of "text".
+ struct PartialMatchFunctor {
+ bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const PartialMatchFunctor PartialMatch;
+
+ // Like FullMatch() and PartialMatch(), except that pattern has to
+ // match a prefix of "text", and "input" is advanced past the matched
+ // text. Note: "input" is modified iff this routine returns true.
+ struct ConsumeFunctor {
+ bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const ConsumeFunctor Consume;
+
+ // Like Consume(..), but does not anchor the match at the beginning of the
+ // string. That is, "pattern" need not start its match at the beginning of
+ // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
+ // word in "s" and stores it in "word".
+ struct FindAndConsumeFunctor {
+ bool operator ()(StringPiece* input, const PCRE& pattern,
+ const Arg& ptr1 = no_more_args,
+ const Arg& ptr2 = no_more_args,
+ const Arg& ptr3 = no_more_args,
+ const Arg& ptr4 = no_more_args,
+ const Arg& ptr5 = no_more_args,
+ const Arg& ptr6 = no_more_args,
+ const Arg& ptr7 = no_more_args,
+ const Arg& ptr8 = no_more_args,
+ const Arg& ptr9 = no_more_args,
+ const Arg& ptr10 = no_more_args,
+ const Arg& ptr11 = no_more_args,
+ const Arg& ptr12 = no_more_args,
+ const Arg& ptr13 = no_more_args,
+ const Arg& ptr14 = no_more_args,
+ const Arg& ptr15 = no_more_args,
+ const Arg& ptr16 = no_more_args) const;
+ };
+
+ static const FindAndConsumeFunctor FindAndConsume;
+
+ // Replace the first match of "pattern" in "str" with "rewrite".
+ // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+ // used to insert text matching corresponding parenthesized group
+ // from the pattern. \0 in "rewrite" refers to the entire matching
+ // text. E.g.,
+ //
+ // std::string s = "yabba dabba doo";
+ // CHECK(PCRE::Replace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dabba doo"
+ //
+ // Returns true if the pattern matches and a replacement occurs,
+ // false otherwise.
+ static bool Replace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite);
+
+ // Like Replace(), except replaces all occurrences of the pattern in
+ // the string with the rewrite. Replacements are not subject to
+ // re-matching. E.g.,
+ //
+ // std::string s = "yabba dabba doo";
+ // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dada doo"
+ //
+ // Returns the number of replacements made.
+ static int GlobalReplace(std::string *str,
+ const PCRE& pattern,
+ const StringPiece& rewrite);
+
+ // Like Replace, except that if the pattern matches, "rewrite"
+ // is copied into "out" with substitutions. The non-matching
+ // portions of "text" are ignored.
+ //
+ // Returns true iff a match occurred and the extraction happened
+ // successfully; if no match occurs, the string is left unaffected.
+ static bool Extract(const StringPiece &text,
+ const PCRE& pattern,
+ const StringPiece &rewrite,
+ std::string *out);
+
+ // Check that the given @p rewrite string is suitable for use with
+ // this PCRE. It checks that:
+ // * The PCRE has enough parenthesized subexpressions to satisfy all
+ // of the \N tokens in @p rewrite, and
+ // * The @p rewrite string doesn't have any syntax errors
+ // ('\' followed by anything besides [0-9] and '\').
+ // Making this test will guarantee that "replace" and "extract"
+ // operations won't LOG(ERROR) or fail because of a bad rewrite
+ // string.
+ // @param rewrite The proposed rewrite string.
+ // @param error An error message is recorded here, iff we return false.
+ // Otherwise, it is unchanged.
+ // @return true, iff @p rewrite is suitable for use with the PCRE.
+ bool CheckRewriteString(const StringPiece& rewrite,
+ std::string* error) const;
+
+ // Returns a copy of 'unquoted' with all potentially meaningful
+ // regexp characters backslash-escaped. The returned string, used
+ // as a regular expression, will exactly match the original string.
+ // For example,
+ // 1.5-2.0?
+ // becomes:
+ // 1\.5\-2\.0\?
+ static std::string QuoteMeta(const StringPiece& unquoted);
+
+ /***** Generic matching interface (not so nice to use) *****/
+
+ // Type of match (TODO: Should be restructured as an Option)
+ enum Anchor {
+ UNANCHORED, // No anchoring
+ ANCHOR_START, // Anchor at start only
+ ANCHOR_BOTH, // Anchor at start and end
+ };
+
+ // General matching routine. Stores the length of the match in
+ // "*consumed" if successful.
+ bool DoMatch(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const* args, int n) const;
+
+ // Return the number of capturing subpatterns, or -1 if the
+ // regexp wasn't valid on construction.
+ int NumberOfCapturingGroups() const;
+
+ private:
+ void Init(const char* pattern, Option option, int match_limit,
+ int stack_limit, bool report_errors);
+
+ // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
+ // pairs of integers for the beginning and end positions of matched
+ // text. The first pair corresponds to the entire matched text;
+ // subsequent pairs correspond, in order, to parentheses-captured
+ // matches. Returns the number of pairs (one more than the number of
+ // the last subpattern with a match) if matching was successful
+ // and zero if the match failed.
+ // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
+ // against "foo", "bar", and "baz" respectively.
+ // When matching PCRE("(foo)|hello") against "hello", it will return 1.
+ // But the values for all subpattern are filled in into "vec".
+ int TryMatch(const StringPiece& text,
+ size_t startpos,
+ Anchor anchor,
+ bool empty_ok,
+ int *vec,
+ int vecsize) const;
+
+ // Append the "rewrite" string, with backslash subsitutions from "text"
+ // and "vec", to string "out".
+ bool Rewrite(std::string *out,
+ const StringPiece &rewrite,
+ const StringPiece &text,
+ int *vec,
+ int veclen) const;
+
+ // internal implementation for DoMatch
+ bool DoMatchImpl(const StringPiece& text,
+ Anchor anchor,
+ size_t* consumed,
+ const Arg* const args[],
+ int n,
+ int* vec,
+ int vecsize) const;
+
+ // Compile the regexp for the specified anchoring mode
+ pcre* Compile(Anchor anchor);
+
+ std::string pattern_;
+ Option options_;
+ pcre* re_full_; // For full matches
+ pcre* re_partial_; // For partial matches
+ const std::string* error_; // Error indicator (or empty string)
+ bool report_errors_; // Silences error logging if false
+ int match_limit_; // Limit on execution resources
+ int stack_limit_; // Limit on stack resources (bytes)
+ mutable int32_t hit_limit_; // Hit limit during execution (bool)
+
+ PCRE(const PCRE&) = delete;
+ PCRE& operator=(const PCRE&) = delete;
+};
+
+// PCRE_Options allow you to set the PCRE::Options, plus any pcre
+// "extra" options. The only extras are match_limit, which limits
+// the CPU time of a match, and stack_limit, which limits the
+// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
+// that should not cause too many problems in production code.
+// If PCRE hits a limit during a match, it may return a false negative,
+// but (hopefully) it won't crash.
+//
+// NOTE: If you are handling regular expressions specified by
+// (external or internal) users, rather than hard-coded ones,
+// you should be using PCRE2, which uses an alternate implementation
+// that avoids these issues. See http://go/re2quick.
+class PCRE_Options {
+ public:
+ // constructor
+ PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
+ // accessors
+ PCRE::Option option() const { return option_; }
+ void set_option(PCRE::Option option) {
+ option_ = option;
+ }
+ int match_limit() const { return match_limit_; }
+ void set_match_limit(int match_limit) {
+ match_limit_ = match_limit;
+ }
+ int stack_limit() const { return stack_limit_; }
+ void set_stack_limit(int stack_limit) {
+ stack_limit_ = stack_limit;
+ }
+
+ // If the regular expression is malformed, an error message will be printed
+ // iff report_errors() is true. Default: true.
+ bool report_errors() const { return report_errors_; }
+ void set_report_errors(bool report_errors) {
+ report_errors_ = report_errors;
+ }
+ private:
+ PCRE::Option option_;
+ int match_limit_;
+ int stack_limit_;
+ bool report_errors_;
+};
+
+
+/***** Implementation details *****/
+
+// Hex/Octal/Binary?
+
+// Special class for parsing into objects that define a ParseFrom() method
+template <typename T>
+class _PCRE_MatchObject {
+ public:
+ static inline bool Parse(const char* str, size_t n, void* dest) {
+ if (dest == NULL) return true;
+ T* object = reinterpret_cast<T*>(dest);
+ return object->ParseFrom(str, n);
+ }
+};
+
+class PCRE::Arg {
+ public:
+ // Empty constructor so we can declare arrays of PCRE::Arg
+ Arg();
+
+ // Constructor specially designed for NULL arguments
+ Arg(void*);
+
+ typedef bool (*Parser)(const char* str, size_t n, void* dest);
+
+// Type-specific parsers
+#define MAKE_PARSER(type, name) \
+ Arg(type* p) : arg_(p), parser_(name) {} \
+ Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
+
+ MAKE_PARSER(char, parse_char);
+ MAKE_PARSER(signed char, parse_schar);
+ MAKE_PARSER(unsigned char, parse_uchar);
+ MAKE_PARSER(float, parse_float);
+ MAKE_PARSER(double, parse_double);
+ MAKE_PARSER(std::string, parse_string);
+ MAKE_PARSER(StringPiece, parse_stringpiece);
+
+ MAKE_PARSER(short, parse_short);
+ MAKE_PARSER(unsigned short, parse_ushort);
+ MAKE_PARSER(int, parse_int);
+ MAKE_PARSER(unsigned int, parse_uint);
+ MAKE_PARSER(long, parse_long);
+ MAKE_PARSER(unsigned long, parse_ulong);
+ MAKE_PARSER(long long, parse_longlong);
+ MAKE_PARSER(unsigned long long, parse_ulonglong);
+
+#undef MAKE_PARSER
+
+ // Generic constructor
+ template <typename T> Arg(T*, Parser parser);
+ // Generic constructor template
+ template <typename T> Arg(T* p)
+ : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
+ }
+
+ // Parse the data
+ bool Parse(const char* str, size_t n) const;
+
+ private:
+ void* arg_;
+ Parser parser_;
+
+ static bool parse_null (const char* str, size_t n, void* dest);
+ static bool parse_char (const char* str, size_t n, void* dest);
+ static bool parse_schar (const char* str, size_t n, void* dest);
+ static bool parse_uchar (const char* str, size_t n, void* dest);
+ static bool parse_float (const char* str, size_t n, void* dest);
+ static bool parse_double (const char* str, size_t n, void* dest);
+ static bool parse_string (const char* str, size_t n, void* dest);
+ static bool parse_stringpiece (const char* str, size_t n, void* dest);
+
+#define DECLARE_INTEGER_PARSER(name) \
+ private: \
+ static bool parse_##name(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
+ int radix); \
+ \
+ public: \
+ static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
+ static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
+
+ DECLARE_INTEGER_PARSER(short);
+ DECLARE_INTEGER_PARSER(ushort);
+ DECLARE_INTEGER_PARSER(int);
+ DECLARE_INTEGER_PARSER(uint);
+ DECLARE_INTEGER_PARSER(long);
+ DECLARE_INTEGER_PARSER(ulong);
+ DECLARE_INTEGER_PARSER(longlong);
+ DECLARE_INTEGER_PARSER(ulonglong);
+
+#undef DECLARE_INTEGER_PARSER
+
+};
+
+inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
+inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
+
+inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
+ return (*parser_)(str, n, arg_);
+}
+
+// This part of the parser, appropriate only for ints, deals with bases
+#define MAKE_INTEGER_PARSER(type, name) \
+ inline PCRE::Arg Hex(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
+ } \
+ inline PCRE::Arg Octal(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
+ } \
+ inline PCRE::Arg CRadix(type* ptr) { \
+ return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
+ }
+
+MAKE_INTEGER_PARSER(short, short);
+MAKE_INTEGER_PARSER(unsigned short, ushort);
+MAKE_INTEGER_PARSER(int, int);
+MAKE_INTEGER_PARSER(unsigned int, uint);
+MAKE_INTEGER_PARSER(long, long);
+MAKE_INTEGER_PARSER(unsigned long, ulong);
+MAKE_INTEGER_PARSER(long long, longlong);
+MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
+
+#undef MAKE_INTEGER_PARSER
+
+} // namespace re2
+
+#endif // UTIL_PCRE_H_
diff --git a/contrib/libs/re2/util/strutil.cc b/contrib/libs/re2/util/strutil.cc
index f9af3a442c..fb7e6b1b0c 100644
--- a/contrib/libs/re2/util/strutil.cc
+++ b/contrib/libs/re2/util/strutil.cc
@@ -65,34 +65,34 @@ static size_t CEscapeString(const char* src, size_t src_len,
// Copies 'src' to result, escaping dangerous characters using
// C-style escape sequences. 'src' and 'dest' should not overlap.
// ----------------------------------------------------------------------
-std::string CEscape(const StringPiece& src) {
+std::string CEscape(const StringPiece& src) {
const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
char* dest = new char[dest_len];
const size_t used = CEscapeString(src.data(), src.size(),
dest, dest_len);
- std::string s = std::string(dest, used);
+ std::string s = std::string(dest, used);
delete[] dest;
return s;
}
-void PrefixSuccessor(std::string* prefix) {
+void PrefixSuccessor(std::string* prefix) {
// We can increment the last character in the string and be done
// unless that character is 255, in which case we have to erase the
// last character and increment the previous character, unless that
// is 255, etc. If the string is empty or consists entirely of
// 255's, we just return the empty string.
- while (!prefix->empty()) {
- char& c = prefix->back();
- if (c == '\xff') { // char literal avoids signed/unsigned.
- prefix->pop_back();
+ while (!prefix->empty()) {
+ char& c = prefix->back();
+ if (c == '\xff') { // char literal avoids signed/unsigned.
+ prefix->pop_back();
} else {
- ++c;
- break;
+ ++c;
+ break;
}
}
}
-static void StringAppendV(std::string* dst, const char* format, va_list ap) {
+static void StringAppendV(std::string* dst, const char* format, va_list ap) {
// First try with a small fixed size buffer
char space[1024];
@@ -137,10 +137,10 @@ static void StringAppendV(std::string* dst, const char* format, va_list ap) {
}
}
-std::string StringPrintf(const char* format, ...) {
+std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
- std::string result;
+ std::string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
diff --git a/contrib/libs/re2/util/strutil.h b/contrib/libs/re2/util/strutil.h
index 16631b0833..a69908a0dd 100644
--- a/contrib/libs/re2/util/strutil.h
+++ b/contrib/libs/re2/util/strutil.h
@@ -12,10 +12,10 @@
namespace re2 {
-std::string CEscape(const StringPiece& src);
-void PrefixSuccessor(std::string* prefix);
-std::string StringPrintf(const char* format, ...);
+std::string CEscape(const StringPiece& src);
+void PrefixSuccessor(std::string* prefix);
+std::string StringPrintf(const char* format, ...);
} // namespace re2
-
+
#endif // UTIL_STRUTIL_H_
diff --git a/contrib/libs/re2/util/test.cc b/contrib/libs/re2/util/test.cc
index 855295f5bf..028616b359 100644
--- a/contrib/libs/re2/util/test.cc
+++ b/contrib/libs/re2/util/test.cc
@@ -1,34 +1,34 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stdio.h>
-#include <string>
-
-#include "util/test.h"
-
-namespace testing {
-std::string TempDir() { return "/tmp/"; }
-} // namespace testing
-
-struct Test {
- void (*fn)(void);
- const char *name;
-};
-
-static Test tests[10000];
-static int ntests;
-
-void RegisterTest(void (*fn)(void), const char *name) {
- tests[ntests].fn = fn;
- tests[ntests++].name = name;
-}
-
-int main(int argc, char** argv) {
- for (int i = 0; i < ntests; i++) {
- printf("%s\n", tests[i].name);
- tests[i].fn();
- }
- printf("PASS\n");
- return 0;
-}
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string>
+
+#include "util/test.h"
+
+namespace testing {
+std::string TempDir() { return "/tmp/"; }
+} // namespace testing
+
+struct Test {
+ void (*fn)(void);
+ const char *name;
+};
+
+static Test tests[10000];
+static int ntests;
+
+void RegisterTest(void (*fn)(void), const char *name) {
+ tests[ntests].fn = fn;
+ tests[ntests++].name = name;
+}
+
+int main(int argc, char** argv) {
+ for (int i = 0; i < ntests; i++) {
+ printf("%s\n", tests[i].name);
+ tests[i].fn();
+ }
+ printf("PASS\n");
+ return 0;
+}
diff --git a/contrib/libs/re2/util/test.h b/contrib/libs/re2/util/test.h
index 40978b8fae..54e6f8fbbb 100644
--- a/contrib/libs/re2/util/test.h
+++ b/contrib/libs/re2/util/test.h
@@ -1,50 +1,50 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_TEST_H_
-#define UTIL_TEST_H_
-
-#include "util/util.h"
-#include "util/logging.h"
-
-namespace testing {
-std::string TempDir();
-} // namespace testing
-
-#define TEST(x, y) \
- void x##y(void); \
- TestRegisterer r##x##y(x##y, # x "." # y); \
- void x##y(void)
-
-void RegisterTest(void (*)(void), const char*);
-
-class TestRegisterer {
- public:
- TestRegisterer(void (*fn)(void), const char *s) {
- RegisterTest(fn, s);
- }
-};
-
-// fatal assertions
-#define ASSERT_TRUE CHECK
-#define ASSERT_FALSE(x) CHECK(!(x))
-#define ASSERT_EQ CHECK_EQ
-#define ASSERT_NE CHECK_NE
-#define ASSERT_LT CHECK_LT
-#define ASSERT_LE CHECK_LE
-#define ASSERT_GT CHECK_GT
-#define ASSERT_GE CHECK_GE
-
-// nonfatal assertions
-// TODO(rsc): Do a better job?
-#define EXPECT_TRUE CHECK
-#define EXPECT_FALSE(x) CHECK(!(x))
-#define EXPECT_EQ CHECK_EQ
-#define EXPECT_NE CHECK_NE
-#define EXPECT_LT CHECK_LT
-#define EXPECT_LE CHECK_LE
-#define EXPECT_GT CHECK_GT
-#define EXPECT_GE CHECK_GE
-
-#endif // UTIL_TEST_H_
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_TEST_H_
+#define UTIL_TEST_H_
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace testing {
+std::string TempDir();
+} // namespace testing
+
+#define TEST(x, y) \
+ void x##y(void); \
+ TestRegisterer r##x##y(x##y, # x "." # y); \
+ void x##y(void)
+
+void RegisterTest(void (*)(void), const char*);
+
+class TestRegisterer {
+ public:
+ TestRegisterer(void (*fn)(void), const char *s) {
+ RegisterTest(fn, s);
+ }
+};
+
+// fatal assertions
+#define ASSERT_TRUE CHECK
+#define ASSERT_FALSE(x) CHECK(!(x))
+#define ASSERT_EQ CHECK_EQ
+#define ASSERT_NE CHECK_NE
+#define ASSERT_LT CHECK_LT
+#define ASSERT_LE CHECK_LE
+#define ASSERT_GT CHECK_GT
+#define ASSERT_GE CHECK_GE
+
+// nonfatal assertions
+// TODO(rsc): Do a better job?
+#define EXPECT_TRUE CHECK
+#define EXPECT_FALSE(x) CHECK(!(x))
+#define EXPECT_EQ CHECK_EQ
+#define EXPECT_NE CHECK_NE
+#define EXPECT_LT CHECK_LT
+#define EXPECT_LE CHECK_LE
+#define EXPECT_GT CHECK_GT
+#define EXPECT_GE CHECK_GE
+
+#endif // UTIL_TEST_H_
diff --git a/contrib/libs/re2/util/util.h b/contrib/libs/re2/util/util.h
index 0d28a8ca74..56e46c1a33 100644
--- a/contrib/libs/re2/util/util.h
+++ b/contrib/libs/re2/util/util.h
@@ -5,35 +5,35 @@
#ifndef UTIL_UTIL_H_
#define UTIL_UTIL_H_
-#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
+#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
-#ifndef ATTRIBUTE_NORETURN
-#if defined(__GNUC__)
-#define ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define ATTRIBUTE_NORETURN __declspec(noreturn)
-#else
-#define ATTRIBUTE_NORETURN
-#endif
-#endif
+#ifndef ATTRIBUTE_NORETURN
+#if defined(__GNUC__)
+#define ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+#endif
+
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
-#ifndef ATTRIBUTE_UNUSED
-#if defined(__GNUC__)
-#define ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-#define ATTRIBUTE_UNUSED
-#endif
-#endif
-
#ifndef FALLTHROUGH_INTENDED
-#if defined(__clang__)
-#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
-#elif defined(__GNUC__) && __GNUC__ >= 7
-#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
-#else
-#define FALLTHROUGH_INTENDED do {} while (0)
+#if defined(__clang__)
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#else
+#define FALLTHROUGH_INTENDED do {} while (0)
+#endif
#endif
-#endif
#ifndef NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 3219b0fd7e..8072de2eb2 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -1,8 +1,8 @@
# Generated by devtools/yamaker from nixpkgs 21.11.
-
+
LIBRARY()
-OWNER(g:cpp-contrib)
+OWNER(g:cpp-contrib)
VERSION(2022-02-01)
@@ -12,22 +12,22 @@ LICENSE(
BSD-3-Clause AND
X11-Lucent
)
-
+
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
ADDINCL(
- GLOBAL contrib/libs/re2/include
- contrib/libs/re2
+ GLOBAL contrib/libs/re2/include
+ contrib/libs/re2
)
-NO_COMPILER_WARNINGS()
+NO_COMPILER_WARNINGS()
-IF (WITH_VALGRIND)
+IF (WITH_VALGRIND)
CFLAGS(
GLOBAL -DRE2_ON_VALGRIND
)
-ENDIF()
-
+ENDIF()
+
SRCS(
re2/bitstate.cc
re2/compile.cc
@@ -45,16 +45,16 @@ SRCS(
re2/regexp.cc
re2/set.cc
re2/simplify.cc
- re2/stringpiece.cc
+ re2/stringpiece.cc
re2/tostring.cc
- re2/unicode_casefold.cc
- re2/unicode_groups.cc
- util/rune.cc
- util/strutil.cc
+ re2/unicode_casefold.cc
+ re2/unicode_groups.cc
+ util/rune.cc
+ util/strutil.cc
)
END()
-
-RECURSE(
- re2/testing
-)
+
+RECURSE(
+ re2/testing
+)