diff options
author | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 12:29:46 +0300 |
---|---|---|
committer | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 13:14:22 +0300 |
commit | 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch) | |
tree | a8fb3181d5947c0d78cf402aa56e686130179049 /contrib/libs/pcre | |
parent | a44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff) | |
download | ydb-9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80.tar.gz |
publishFullContrib: true for ydb
<HIDDEN_URL>
commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/libs/pcre')
-rw-r--r-- | contrib/libs/pcre/.yandex_meta/__init__.py | 48 | ||||
-rw-r--r-- | contrib/libs/pcre/.yandex_meta/devtools.copyrights.report | 308 | ||||
-rw-r--r-- | contrib/libs/pcre/.yandex_meta/devtools.licenses.report | 351 | ||||
-rw-r--r-- | contrib/libs/pcre/.yandex_meta/licenses.list.txt | 321 | ||||
-rw-r--r-- | contrib/libs/pcre/.yandex_meta/override.nix | 23 | ||||
-rw-r--r-- | contrib/libs/pcre/HACKING | 528 | ||||
-rw-r--r-- | contrib/libs/pcre/LICENCE | 93 | ||||
-rw-r--r-- | contrib/libs/pcre/NON-AUTOTOOLS-BUILD | 773 | ||||
-rw-r--r-- | contrib/libs/pcre/NON-UNIX-USE | 7 | ||||
-rw-r--r-- | contrib/libs/pcre/patches/fix-group-name-comparison.patch | 24 | ||||
-rw-r--r-- | contrib/libs/pcre/patches/posix.patch | 20 | ||||
-rw-r--r-- | contrib/libs/pcre/patches/turn-off-jit-on-request.patch | 32 |
12 files changed, 2528 insertions, 0 deletions
diff --git a/contrib/libs/pcre/.yandex_meta/__init__.py b/contrib/libs/pcre/.yandex_meta/__init__.py new file mode 100644 index 0000000000..1fc4abde8c --- /dev/null +++ b/contrib/libs/pcre/.yandex_meta/__init__.py @@ -0,0 +1,48 @@ +import os + +from devtools.yamaker import fileutil +from devtools.yamaker.modules import GLOBAL, Linkable, Switch +from devtools.yamaker.project import NixProject + + +def post_install(self): + fileutil.convert_to_utf8(f"{self.dstdir}/ChangeLog", from_="latin-1") + + fileutil.re_sub_file(f"{self.dstdir}/config.h", "#define [^ ]*_EXP_DE", r"// \g<0>") + fileutil.re_sub_dir(self.dstdir, r'(# *include) "config\.h"', r'\1 "pcre_config.h"') + fileutil.re_sub_dir(self.dstdir, r"(# *include) <(pcre.*)>", r'\1 "\2"') + os.rename(f"{self.dstdir}/config.h", f"{self.dstdir}/pcre_config.h") + + with self.yamakes["."] as pcre: + pcre.CFLAGS.insert(0, GLOBAL("-DPCRE_STATIC")) + pcre.after( + "CFLAGS", + """ +# JIT adds ≈108KB to binary size which may be critical for mobile and embedded devices binary distributions +DEFAULT(ARCADIA_PCRE_ENABLE_JIT yes) + """.strip(), + ) + pcre.after( + "CFLAGS", + Switch(ARCADIA_PCRE_ENABLE_JIT=Linkable(CFLAGS=["-DARCADIA_PCRE_ENABLE_JIT"])), + ) + + +pcre = NixProject( + arcdir="contrib/libs/pcre", + nixattr="pcre", + license="BSD-3-Clause", + disable_includes=[ + "bits/type_traits.h", + "sys/cache.h", + "sljitNativeSPARC_64.c", + "sljitProtExecAllocator.c", + ], + put_with={"pcre": ["pcreposix"]}, + install_targets=["pcre", "pcre16", "pcre32", "pcrecpp"], + put={"pcre": "."}, + copy_sources=[ + "sljit/**", + ], + post_install=post_install, +) diff --git a/contrib/libs/pcre/.yandex_meta/devtools.copyrights.report b/contrib/libs/pcre/.yandex_meta/devtools.copyrights.report new file mode 100644 index 0000000000..60271ff529 --- /dev/null +++ b/contrib/libs/pcre/.yandex_meta/devtools.copyrights.report @@ -0,0 +1,308 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license id} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +KEEP COPYRIGHT_SERVICE_LABEL 02578fd01d3e29e6c82aba69bdb82970 +BELONGS ya.make + License text: + * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + sljit/sljitNativeTILEGX-encoder.c [4:4] + sljit/sljitNativeTILEGX_64.c [4:4] + +KEEP COPYRIGHT_SERVICE_LABEL 029b66de29e9893a9af854e4049ff264 +BELONGS ya.make + License text: + Copyright(c) 2009-2021 Zoltan Herczeg + All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + LICENCE [50:51] + +KEEP COPYRIGHT_SERVICE_LABEL 1e873553d39834f61b495e45fcd01679 +BELONGS ya.make + License text: + // Copyright (c) 2005, Google Inc. + // All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre_scanner.cc [1:2] + pcre_scanner.h [1:2] + pcre_stringpiece.cc [1:2] + pcre_stringpiece.h [1:2] + pcrecpp.h [1:2] + pcrecpp_internal.h [6:7] + pcrecpparg.h [1:2] + +KEEP COPYRIGHT_SERVICE_LABEL 20b71da944b9e6967071fc4782c1355f +BELONGS ya.make + License text: + // Copyright (c) 2010, Google Inc. + // All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcrecpp.cc [1:2] + +KEEP COPYRIGHT_SERVICE_LABEL 3ae28e7dc2a24b7c7ecd4b6d57ca4d94 +BELONGS ya.make + License text: + Written by Philip Hazel + Copyright (c) 1997-2017 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre_dfa_exec.c [9:10] + pcre_tables.c [8:9] + +KEEP COPYRIGHT_SERVICE_LABEL 50a557143a871aeb8473a4c0c56687f2 +BELONGS ya.make + License text: + Copyright (c) 1997-2014 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre.h [8:8] + pcre_byte_order.c [8:9] + pcre_globals.c [8:9] + pcre_string_utils.c [8:9] + +KEEP COPYRIGHT_SERVICE_LABEL 592125ca0e46e64eae2bb2293e947755 +BELONGS ya.make + License text: + Written by Philip Hazel + Copyright (c) 1997-2020 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcreposix.c [8:9] + +KEEP COPYRIGHT_SERVICE_LABEL 71499817b900aaf04d853fdffdc657b0 +BELONGS ya.make + License text: + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software + Foundation, Inc. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + INSTALL [4:5] + +KEEP COPYRIGHT_SERVICE_LABEL 71a195031e61150b1a1da7b8f788beec +BELONGS ya.make + License text: + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + LICENCE [59:60] + +KEEP COPYRIGHT_SERVICE_LABEL 9370509aa40e84590d30bdafb0dac440 +BELONGS ya.make + License text: + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + sljit/sljitConfig.h [4:4] + sljit/sljitConfigInternal.h [4:4] + sljit/sljitExecAllocator.c [4:4] + sljit/sljitLir.c [4:4] + sljit/sljitLir.h [4:4] + sljit/sljitNativeARM_32.c [4:4] + sljit/sljitNativeARM_64.c [4:4] + sljit/sljitNativeARM_T2_32.c [4:4] + sljit/sljitNativeMIPS_32.c [4:4] + sljit/sljitNativeMIPS_64.c [4:4] + sljit/sljitNativeMIPS_common.c [4:4] + sljit/sljitNativePPC_32.c [4:4] + sljit/sljitNativePPC_64.c [4:4] + sljit/sljitNativePPC_common.c [4:4] + sljit/sljitNativeSPARC_32.c [4:4] + sljit/sljitNativeSPARC_common.c [4:4] + sljit/sljitNativeTILEGX-encoder.c [5:5] + sljit/sljitNativeTILEGX_64.c [5:5] + sljit/sljitNativeX86_32.c [4:4] + sljit/sljitNativeX86_64.c [4:4] + sljit/sljitNativeX86_common.c [4:4] + sljit/sljitUtils.c [4:4] + +KEEP COPYRIGHT_SERVICE_LABEL 93ea79c5b4cd71715061d76f9f2a8a6e +BELONGS ya.make + License text: + Written by Philip Hazel + Copyright (c) 1997-2016 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre_internal.h [9:10] + +KEEP COPYRIGHT_SERVICE_LABEL 97c1bd68d12fc7ffbf6b6583c204031e +BELONGS ya.make + License text: + Copyright (c) 1997-2021 University of Cambridge + All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + LICENCE [28:29] + pcre_compile.c [8:9] + pcre_exec.c [8:9] + +KEEP COPYRIGHT_SERVICE_LABEL d2e7c83484cc274d175a441339febefd +BELONGS ya.make + License text: + Written by Philip Hazel + Copyright (c) 1997-2013 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre16_valid_utf16.c [8:9] + pcre32_valid_utf32.c [8:9] + pcre_fullinfo.c [8:9] + pcre_jit_compile.c [8:9] + pcre_valid_utf8.c [8:9] + pcre_xclass.c [8:9] + +KEEP COPYRIGHT_SERVICE_LABEL f210bdfe8075eda540c36de510ed81c0 +BELONGS ya.make + License text: + Copyright(c) 2010-2021 Zoltan Herczeg + All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + LICENCE [39:40] + +KEEP COPYRIGHT_SERVICE_LABEL f28a4750d9101477330316879bfef000 +BELONGS ya.make + License text: + Written by Philip Hazel + Copyright (c) 1997-2012 University of Cambridge + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre16_byte_order.c [8:9] + pcre16_chartables.c [8:9] + pcre16_compile.c [8:9] + pcre16_config.c [8:9] + pcre16_dfa_exec.c [8:9] + pcre16_exec.c [8:9] + pcre16_fullinfo.c [8:9] + pcre16_get.c [8:9] + pcre16_globals.c [8:9] + pcre16_jit_compile.c [8:9] + pcre16_maketables.c [8:9] + pcre16_newline.c [8:9] + pcre16_ord2utf16.c [8:9] + pcre16_refcount.c [8:9] + pcre16_string_utils.c [8:9] + pcre16_study.c [8:9] + pcre16_tables.c [8:9] + pcre16_ucd.c [8:9] + pcre16_utf16_utils.c [8:9] + pcre16_version.c [8:9] + pcre16_xclass.c [8:9] + pcre32_byte_order.c [8:9] + pcre32_chartables.c [8:9] + pcre32_compile.c [8:9] + pcre32_config.c [8:9] + pcre32_dfa_exec.c [8:9] + pcre32_exec.c [8:9] + pcre32_fullinfo.c [8:9] + pcre32_get.c [8:9] + pcre32_globals.c [8:9] + pcre32_jit_compile.c [8:9] + pcre32_maketables.c [8:9] + pcre32_newline.c [8:9] + pcre32_ord2utf32.c [8:9] + pcre32_refcount.c [8:9] + pcre32_string_utils.c [8:9] + pcre32_study.c [8:9] + pcre32_tables.c [8:9] + pcre32_ucd.c [8:9] + pcre32_utf32_utils.c [8:9] + pcre32_version.c [8:9] + pcre32_xclass.c [8:9] + pcre_config.c [8:9] + pcre_get.c [8:9] + pcre_maketables.c [8:9] + pcre_newline.c [8:9] + pcre_ord2utf8.c [8:9] + pcre_refcount.c [8:9] + pcre_study.c [8:9] + pcre_version.c [8:9] + pcreposix.h [12:12] + +KEEP COPYRIGHT_SERVICE_LABEL f739539008599df8a5179e14aed6a3a4 +BELONGS ya.make + License text: + The machine code generator part (this module) was written by Zoltan Herczeg + Copyright (c) 2010-2013 + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + pcre_jit_compile.c [11:12] diff --git a/contrib/libs/pcre/.yandex_meta/devtools.licenses.report b/contrib/libs/pcre/.yandex_meta/devtools.licenses.report new file mode 100644 index 0000000000..bce209798f --- /dev/null +++ b/contrib/libs/pcre/.yandex_meta/devtools.licenses.report @@ -0,0 +1,351 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +SKIP LicenseRef-scancode-pcre AND BSD-3-Clause 097b48a9b1aaa62de087c91ca588596c +BELONGS ya.make + # changelog + License text: + 24. Changed the PCRE licence to be the more standard "BSD" licence. + Scancode info: + Original SPDX id: LicenseRef-scancode-pcre + Score : 11.00 + Match type : REFERENCE + Links : http://www.pcre.org/licence.txt, https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE + Files with this license: + ChangeLog [4772:4772] + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + ChangeLog [4772:4772] + +KEEP Public-Domain 18645a531a9d976f5e74253296440788 +BELONGS ya.make + License text: + in the testdata directory is not copyrighted and is in the public domain. + Scancode info: + Original SPDX id: LicenseRef-scancode-public-domain + Score : 100.00 + Match type : TEXT + Links : http://www.linfo.org/publicdomain.html, https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/public-domain.LICENSE + Files with this license: + LICENCE [10:10] + +KEEP PCRE 284428f37f95a7afd7ab8de1885e8cdb +BELONGS ya.make +FILE_INCLUDE LICENCE found in files: LICENCE at line 1 + License text: + PCRE LICENCE + Scancode info: + Original SPDX id: LicenseRef-scancode-pcre + Score : 11.00 + Match type : REFERENCE + Links : http://www.pcre.org/licence.txt, https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE + Files with this license: + LICENCE [1:1] + +SKIP GPL-1.0-or-later 3678b29d3643ef75c6842943acf50d72 +BELONGS ya.make + # our pcre does not contain libreadline + License text: + Note that libreadline is GPL-licenced, so if you distribute a binary of + Scancode info: + Original SPDX id: GPL-1.0-or-later + Score : 50.00 + Match type : REFERENCE + Links : http://www.gnu.org/licenses/old-licenses/gpl-1.0-standalone.html, https://spdx.org/licenses/GPL-1.0-or-later + Files with this license: + README [401:401] + +KEEP BSD-3-Clause 5276af3fc24d12afb931e0faed359b1b +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + pcre.h [11:35] + pcre16_byte_order.c [12:36] + pcre16_chartables.c [12:36] + pcre16_compile.c [12:36] + pcre16_config.c [12:36] + pcre16_dfa_exec.c [12:36] + pcre16_exec.c [12:36] + pcre16_fullinfo.c [12:36] + pcre16_get.c [12:36] + pcre16_globals.c [12:36] + pcre16_jit_compile.c [12:36] + pcre16_maketables.c [12:36] + pcre16_newline.c [12:36] + pcre16_ord2utf16.c [12:36] + pcre16_refcount.c [12:36] + pcre16_string_utils.c [12:36] + pcre16_study.c [12:36] + pcre16_tables.c [12:36] + pcre16_ucd.c [12:36] + pcre16_utf16_utils.c [12:36] + pcre16_valid_utf16.c [12:36] + pcre16_version.c [12:36] + pcre16_xclass.c [12:36] + pcre32_byte_order.c [12:36] + pcre32_chartables.c [12:36] + pcre32_compile.c [12:36] + pcre32_config.c [12:36] + pcre32_dfa_exec.c [12:36] + pcre32_exec.c [12:36] + pcre32_fullinfo.c [12:36] + pcre32_get.c [12:36] + pcre32_globals.c [12:36] + pcre32_jit_compile.c [12:36] + pcre32_maketables.c [12:36] + pcre32_newline.c [12:36] + pcre32_ord2utf32.c [12:36] + pcre32_refcount.c [12:36] + pcre32_string_utils.c [12:36] + pcre32_study.c [12:36] + pcre32_tables.c [12:36] + pcre32_ucd.c [12:36] + pcre32_utf32_utils.c [12:36] + pcre32_valid_utf32.c [12:36] + pcre32_version.c [12:36] + pcre32_xclass.c [12:36] + pcre_byte_order.c [12:36] + pcre_compile.c [12:36] + pcre_config.c [12:36] + pcre_dfa_exec.c [13:37] + pcre_exec.c [12:36] + pcre_fullinfo.c [12:36] + pcre_get.c [12:36] + pcre_globals.c [12:36] + pcre_internal.h [13:37] + pcre_jit_compile.c [15:39] + pcre_maketables.c [12:36] + pcre_newline.c [12:36] + pcre_ord2utf8.c [12:36] + pcre_refcount.c [12:36] + pcre_string_utils.c [12:36] + pcre_study.c [12:36] + pcre_tables.c [12:36] + pcre_valid_utf8.c [12:36] + pcre_version.c [12:36] + pcre_xclass.c [12:36] + pcrecpp_internal.h [10:34] + pcreposix.c [12:36] + pcreposix.h [15:39] + +KEEP BSD-3-Clause 6aa235708ac9f5dd8e5c6ac415fc5837 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + pcre_scanner.cc [4:28] + pcre_scanner.h [4:28] + pcre_stringpiece.cc [4:28] + pcre_stringpiece.h [4:28] + pcrecpp.cc [4:28] + pcrecpp.h [4:28] + pcrecpparg.h [4:28] + +KEEP BSD-3-Clause 7854eb9ba6db8008b77942822d1018fe +BELONGS ya.make + License text: + conventional "BSD" licence. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + NEWS [572:572] + +KEEP BSD-2-Clause 7b9b15809e143335a85813bb93b561a4 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-2-Clause + Score : 100.00 + Match type : TEXT + Links : http://opensource.org/licenses/bsd-license.php, http://www.opensource.org/licenses/BSD-2-Clause, https://spdx.org/licenses/BSD-2-Clause + Files with this license: + sljit/sljitConfig.h [6:24] + sljit/sljitConfigInternal.h [6:24] + sljit/sljitExecAllocator.c [6:24] + sljit/sljitLir.c [6:24] + sljit/sljitLir.h [6:24] + sljit/sljitNativeARM_32.c [6:24] + sljit/sljitNativeARM_64.c [6:24] + sljit/sljitNativeARM_T2_32.c [6:24] + sljit/sljitNativeMIPS_32.c [6:24] + sljit/sljitNativeMIPS_64.c [6:24] + sljit/sljitNativeMIPS_common.c [6:24] + sljit/sljitNativePPC_32.c [6:24] + sljit/sljitNativePPC_64.c [6:24] + sljit/sljitNativePPC_common.c [6:24] + sljit/sljitNativeSPARC_32.c [6:24] + sljit/sljitNativeSPARC_common.c [6:24] + sljit/sljitNativeTILEGX-encoder.c [7:25] + sljit/sljitNativeTILEGX_64.c [7:25] + sljit/sljitNativeX86_32.c [6:24] + sljit/sljitNativeX86_64.c [6:24] + sljit/sljitNativeX86_common.c [6:24] + sljit/sljitUtils.c [6:24] + +KEEP PCRE 7ddbee06474ee949f8d2b1e6cc17f09f +BELONGS ya.make +FILE_INCLUDE LICENCE found in files: COPYING at line 1, COPYING at line 3 + License text: + PCRE LICENCE + Please see the file LICENCE in the PCRE distribution for licensing details. + Scancode info: + Original SPDX id: LicenseRef-scancode-pcre + Score : 100.00 + Match type : NOTICE + Links : http://www.pcre.org/licence.txt, https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE + Files with this license: + COPYING [1:3] + +SKIP GPL-1.0-or-later 7f1778c4f216a7885343f128b6c3cd3d +BELONGS ya.make + # only for cygwin + License text: + licence, this forces not only PCRE to be under the GPL, but also the entire + Scancode info: + Original SPDX id: GPL-1.0-or-later + Score : 100.00 + Match type : NOTICE + Links : http://www.gnu.org/licenses/old-licenses/gpl-1.0-standalone.html, https://spdx.org/licenses/GPL-1.0-or-later + Files with this license: + NON-AUTOTOOLS-BUILD [353:353] + +KEEP BSD-3-Clause 81767233d62cdab4a67573b277ec0f37 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + LICENCE [66:91] + +KEEP BSD-3-Clause a1ed101cefe2f975d6aef1dceeff7c8e +BELONGS ya.make + License text: + of multiple projects. In sljit, the code is under BSD licence. */ + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 95.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + sljit/sljitNativeTILEGX-encoder.c [29:29] + +KEEP BSD-3-Clause aa36d6d984971367f4fda7d892144cd4 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : NOTICE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + LICENCE [7:9] + +SKIP GPL-1.0-or-later ce2168f95c81d34ebb78d28c1801a759 +BELONGS ya.make + # only for cygwin + License text: + cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL + licence, this forces not only PCRE to be under the GPL, but also the entire + Scancode info: + Original SPDX id: GPL-1.0-or-later + Score : 100.00 + Match type : REFERENCE + Links : http://www.gnu.org/licenses/old-licenses/gpl-1.0-standalone.html, https://spdx.org/licenses/GPL-1.0-or-later + Files with this license: + NON-AUTOTOOLS-BUILD [352:353] + +KEEP FSFAP d02cc4799cbd521d2aa8c3ff19e655f6 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: FSFAP + Score : 100.00 + Match type : TEXT + Links : http://www.gnu.org/prep/maintain/html_node/License-Notices-for-Other-Files.html, https://spdx.org/licenses/FSFAP + Files with this license: + INSTALL [7:10] + +KEEP BSD-3-Clause d5e3a1e92f7c1348d74dfa716d281369 +BELONGS ya.make +FILE_INCLUDE LICENCE found in files: LICENCE at line 63 + License text: + THE "BSD" LICENCE + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 99.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + LICENCE [63:63] + +KEEP BSD-3-Clause e73c88530490fbb7d73165c511d60f9d +BELONGS ya.make + License text: + avoided by linking with libedit (which has a BSD licence) instead. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + README [403:403] + +SKIP GPL-1.0-or-later f7abe239790339a908564d9a088dde6d +BELONGS ya.make + # changelog + License text: + 2. Added 4th condition (GPL supersedes if conflict) and created separate + Scancode info: + Original SPDX id: GPL-1.0-or-later + Score : 50.00 + Match type : REFERENCE + Links : http://www.gnu.org/licenses/old-licenses/gpl-1.0-standalone.html, https://spdx.org/licenses/GPL-1.0-or-later + Files with this license: + ChangeLog [5848:5848] diff --git a/contrib/libs/pcre/.yandex_meta/licenses.list.txt b/contrib/libs/pcre/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..d1349fc3d4 --- /dev/null +++ b/contrib/libs/pcre/.yandex_meta/licenses.list.txt @@ -0,0 +1,321 @@ +====================BSD-2-Clause==================== + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== + of multiple projects. In sljit, the code is under BSD licence. */ + + +====================BSD-3-Clause==================== + avoided by linking with libedit (which has a BSD licence) instead. + + +====================BSD-3-Clause==================== +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== +Release 8 of PCRE is distributed under the terms of the "BSD" licence, as +specified below. The documentation for PCRE, supplied in the "doc" +directory, is distributed under the same terms as the software itself. The data + + +====================BSD-3-Clause==================== +THE "BSD" LICENCE + + +====================BSD-3-Clause==================== +conventional "BSD" licence. + + +====================COPYRIGHT==================== + Written by Philip Hazel + Copyright (c) 1997-2012 University of Cambridge + + +====================COPYRIGHT==================== + Written by Philip Hazel + Copyright (c) 1997-2013 University of Cambridge + + +====================COPYRIGHT==================== + Written by Philip Hazel + Copyright (c) 1997-2016 University of Cambridge + + +====================COPYRIGHT==================== + Written by Philip Hazel + Copyright (c) 1997-2017 University of Cambridge + + +====================COPYRIGHT==================== + Written by Philip Hazel + Copyright (c) 1997-2020 University of Cambridge + + +====================COPYRIGHT==================== + Copyright (c) 1997-2014 University of Cambridge + + +====================COPYRIGHT==================== + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software +Foundation, Inc. + + +====================COPYRIGHT==================== + The machine code generator part (this module) was written by Zoltan Herczeg + Copyright (c) 2010-2013 + + +====================COPYRIGHT==================== + * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. + + +====================COPYRIGHT==================== + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + + +====================COPYRIGHT==================== +// Copyright (c) 2005, Google Inc. +// All rights reserved. + + +====================COPYRIGHT==================== +// Copyright (c) 2010, Google Inc. +// All rights reserved. + + +====================COPYRIGHT==================== +Copyright (c) 1997-2021 University of Cambridge +All rights reserved. + + +====================COPYRIGHT==================== +Copyright (c) 2007-2012, Google Inc. +All rights reserved. + + +====================COPYRIGHT==================== +Copyright(c) 2009-2021 Zoltan Herczeg +All rights reserved. + + +====================COPYRIGHT==================== +Copyright(c) 2010-2021 Zoltan Herczeg +All rights reserved. + + +====================FSFAP==================== + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + + +====================File: LICENCE==================== +PCRE LICENCE +------------ + +PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + +Release 8 of PCRE is distributed under the terms of the "BSD" licence, as +specified below. The documentation for PCRE, supplied in the "doc" +directory, is distributed under the same terms as the software itself. The data +in the testdata directory is not copyrighted and is in the public domain. + +The basic library functions are written in C and are freestanding. Also +included in the distribution is a set of C++ wrapper functions, and a +just-in-time compiler that can be used to optimize pattern matching. These +are both optional features that can be omitted when the library is built. + + +THE BASIC LIBRARY FUNCTIONS +--------------------------- + +Written by: Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com + +University of Cambridge Computing Service, +Cambridge, England. + +Copyright (c) 1997-2021 University of Cambridge +All rights reserved. + + +PCRE JUST-IN-TIME COMPILATION SUPPORT +------------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2010-2021 Zoltan Herczeg +All rights reserved. + + +STACK-LESS JUST-IN-TIME COMPILER +-------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2009-2021 Zoltan Herczeg +All rights reserved. + + +THE C++ WRAPPER FUNCTIONS +------------------------- + +Contributed by: Google Inc. + +Copyright (c) 2007-2012, Google Inc. +All rights reserved. + + +THE "BSD" LICENCE +----------------- + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +End + + +====================PCRE==================== +PCRE LICENCE + + +====================PCRE==================== +PCRE LICENCE + +Please see the file LICENCE in the PCRE distribution for licensing details. + + +====================Public-Domain==================== +in the testdata directory is not copyrighted and is in the public domain. diff --git a/contrib/libs/pcre/.yandex_meta/override.nix b/contrib/libs/pcre/.yandex_meta/override.nix new file mode 100644 index 0000000000..81635c459f --- /dev/null +++ b/contrib/libs/pcre/.yandex_meta/override.nix @@ -0,0 +1,23 @@ +pkgs: attrs: with pkgs; rec { + version = "8.45"; + + src = fetchurl { + url = "https://downloads.sourceforge.net/project/pcre/pcre/${version}/pcre-${version}.tar.bz2"; + hash = "sha256-Ta5v3NK7C7bDe1+Xwzwr6VTadDmFNpzdrDVG4yGL/7g="; + }; + + + buildInputs = [ zlib ]; + + configureFlags = [ + "--enable-cpp" + "--enable-unicode-properties" + "--enable-pcre16" + "--enable-pcre32" + "--enable-jit" + ]; + + postConfigure = '' + cp pcre_chartables.c.dist pcre_chartables.c + ''; +} diff --git a/contrib/libs/pcre/HACKING b/contrib/libs/pcre/HACKING new file mode 100644 index 0000000000..691b7a14e5 --- /dev/null +++ b/contrib/libs/pcre/HACKING @@ -0,0 +1,528 @@ +Technical Notes about PCRE +-------------------------- + +These are very rough technical notes that record potentially useful information +about PCRE internals. For information about testing PCRE, see the pcretest +documentation and the comment at the head of the RunTest file. + + +Historical note 1 +----------------- + +Many years ago I implemented some regular expression functions to an algorithm +suggested by Martin Richards. These were not Unix-like in form, and were quite +restricted in what they could do by comparison with Perl. The interesting part +about the algorithm was that the amount of space required to hold the compiled +form of an expression was known in advance. The code to apply an expression did +not operate by backtracking, as the original Henry Spencer code and current +Perl code does, but instead checked all possibilities simultaneously by keeping +a list of current states and checking all of them as it advanced through the +subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA +algorithm", though it was not a traditional Finite State Machine (FSM). When +the pattern was all used up, all remaining states were possible matches, and +the one matching the longest subset of the subject string was chosen. This did +not necessarily maximize the individual wild portions of the pattern, as is +expected in Unix and Perl-style regular expressions. + + +Historical note 2 +----------------- + +By contrast, the code originally written by Henry Spencer (which was +subsequently heavily modified for Perl) compiles the expression twice: once in +a dummy mode in order to find out how much store will be needed, and then for +real. (The Perl version probably doesn't do this any more; I'm talking about +the original library.) The execution function operates by backtracking and +maximizing (or, optionally, minimizing in Perl) the amount of the subject that +matches individual wild portions of the pattern. This is an "NFA algorithm" in +Friedl's terminology. + + +OK, here's the real stuff +------------------------- + +For the set of functions that form the "basic" PCRE library (which are +unrelated to those mentioned above), I tried at first to invent an algorithm +that used an amount of store bounded by a multiple of the number of characters +in the pattern, to save on compiling time. However, because of the greater +complexity in Perl regular expressions, I couldn't do this. In any case, a +first pass through the pattern is helpful for other reasons. + + +Support for 16-bit and 32-bit data strings +------------------------------------------- + +From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from +release 8.32, PCRE supports 32-bit data strings. The library can be compiled +in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three +different libraries. In the description that follows, the word "short" is used +for a 16-bit data quantity, and the word "unit" is used for a quantity that is +a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode. +However, so as not to over-complicate the text, the names of PCRE functions are +given in 8-bit form only. + + +Computing the memory requirement: how it was +-------------------------------------------- + +Up to and including release 6.7, PCRE worked by running a very degenerate first +pass to calculate a maximum store size, and then a second pass to do the real +compile - which might use a bit less than the predicted amount of memory. The +idea was that this would turn out faster than the Henry Spencer code because +the first pass is degenerate and the second pass can just store stuff straight +into the vector, which it knows is big enough. + + +Computing the memory requirement: how it is +------------------------------------------- + +By the time I was working on a potential 6.8 release, the degenerate first pass +had become very complicated and hard to maintain. Indeed one of the early +things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then +I had a flash of inspiration as to how I could run the real compile function in +a "fake" mode that enables it to compute how much memory it would need, while +actually only ever using a few hundred bytes of working memory, and without too +many tests of the mode that might slow it down. So I refactored the compiling +functions to work this way. This got rid of about 600 lines of source. It +should make future maintenance and development easier. As this was such a major +change, I never released 6.8, instead upping the number to 7.0 (other quite +major changes were also present in the 7.0 release). + +A side effect of this work was that the previous limit of 200 on the nesting +depth of parentheses was removed. However, there is a downside: pcre_compile() +runs more slowly than before (30% or more, depending on the pattern) because it +is doing a full analysis of the pattern. My hope was that this would not be a +big issue, and in the event, nobody has commented on it. + +At release 8.34, a limit on the nesting depth of parentheses was re-introduced +(default 250, settable at build time) so as to put a limit on the amount of +system stack used by pcre_compile(). This is a safety feature for environments +with small stacks where the patterns are provided by users. + + +Traditional matching function +----------------------------- + +The "traditional", and original, matching function is called pcre_exec(), and +it implements an NFA algorithm, similar to the original Henry Spencer algorithm +and the way that Perl works. This is not surprising, since it is intended to be +as compatible with Perl as possible. This is the function most users of PCRE +will use most of the time. From release 8.20, if PCRE is compiled with +just-in-time (JIT) support, and studying a compiled pattern with JIT is +successful, the JIT code is run instead of the normal pcre_exec() code, but the +result is the same. + + +Supplementary matching function +------------------------------- + +From PCRE 6.0, there is also a supplementary matching function called +pcre_dfa_exec(). This implements a DFA matching algorithm that searches +simultaneously for all possible matches that start at one point in the subject +string. (Going back to my roots: see Historical Note 1 above.) This function +intreprets the same compiled pattern data as pcre_exec(); however, not all the +facilities are available, and those that are do not always work in quite the +same way. See the user documentation for details. + +The algorithm that is used for pcre_dfa_exec() is not a traditional FSM, +because it may have a number of states active at one time. More work would be +needed at compile time to produce a traditional FSM where only one state is +ever active at once. I believe some other regex matchers work this way. JIT +support is not available for this kind of matching. + + +Changeable options +------------------ + +The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some +others) may change in the middle of patterns. From PCRE 8.13, their processing +is handled entirely at compile time by generating different opcodes for the +different settings. The runtime functions do not need to keep track of an +options state any more. + + +Format of compiled patterns +--------------------------- + +The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit +mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of +variable length. The first unit in an item contains an opcode, and the length +of the item is either implicit in the opcode or contained in the data that +follows it. + +In many cases listed below, LINK_SIZE data values are specified for offsets +within the compiled pattern. LINK_SIZE always specifies a number of bytes. The +default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or +4-byte values for these offsets, although this impairs the performance. (3-byte +LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE +larger than 2 is necessary only when patterns whose compiled length is greater +than 64K are going to be processed. In this description, we assume the "normal" +compilation options. Data values that are counts (e.g. quantifiers) are two +bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit +and 32-bit modes. + + +Opcodes with no following data +------------------------------ + +These items are all just one unit long + + OP_END end of pattern + OP_ANY match any one character other than newline + OP_ALLANY match any one character, including newline + OP_ANYBYTE match any single unit, even in UTF-8/16 mode + OP_SOD match start of data: \A + OP_SOM, start of match (subject + offset): \G + OP_SET_SOM, set start of match (\K) + OP_CIRC ^ (start of data) + OP_CIRCM ^ multiline mode (start of data or after newline) + OP_NOT_WORD_BOUNDARY \W + OP_WORD_BOUNDARY \w + OP_NOT_DIGIT \D + OP_DIGIT \d + OP_NOT_HSPACE \H + OP_HSPACE \h + OP_NOT_WHITESPACE \S + OP_WHITESPACE \s + OP_NOT_VSPACE \V + OP_VSPACE \v + OP_NOT_WORDCHAR \W + OP_WORDCHAR \w + OP_EODN match end of data or newline at end: \Z + OP_EOD match end of data: \z + OP_DOLL $ (end of data, or before final newline) + OP_DOLLM $ multiline mode (end of data or before newline) + OP_EXTUNI match an extended Unicode grapheme cluster + OP_ANYNL match any Unicode newline sequence + + OP_ASSERT_ACCEPT ) + OP_ACCEPT ) These are Perl 5.10's "backtracking control + OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing + OP_FAIL ) parentheses, it may be preceded by one or more + OP_PRUNE ) OP_CLOSE, each followed by a count that + OP_SKIP ) indicates which parentheses must be closed. + OP_THEN ) + +OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. +This ends the assertion, not the entire pattern match. + + +Backtracking control verbs with optional data +--------------------------------------------- + +(*THEN) without an argument generates the opcode OP_THEN and no following data. +OP_MARK is followed by the mark name, preceded by a one-unit length, and +followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments, +the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name +following in the same format as OP_MARK. + + +Matching literal characters +--------------------------- + +The OP_CHAR opcode is followed by a single character that is to be matched +casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes, +the character may be more than one unit long. In UTF-32 mode, characters +are always exactly one unit long. + +If there is only one character in a character class, OP_CHAR or OP_CHARI is +used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is, +for something like [^a]). + + +Repeating single characters +--------------------------- + +The common repeats (*, +, ?), when applied to a single character, use the +following opcodes, which come in caseful and caseless versions: + + Caseful Caseless + OP_STAR OP_STARI + OP_MINSTAR OP_MINSTARI + OP_POSSTAR OP_POSSTARI + OP_PLUS OP_PLUSI + OP_MINPLUS OP_MINPLUSI + OP_POSPLUS OP_POSPLUSI + OP_QUERY OP_QUERYI + OP_MINQUERY OP_MINQUERYI + OP_POSQUERY OP_POSQUERYI + +Each opcode is followed by the character that is to be repeated. In ASCII mode, +these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in +UTF-32 mode these are one-unit items. Those with "MIN" in their names are the +minimizing versions. Those with "POS" in their names are possessive versions. +Other repeats make use of these opcodes: + + Caseful Caseless + OP_UPTO OP_UPTOI + OP_MINUPTO OP_MINUPTOI + OP_POSUPTO OP_POSUPTOI + OP_EXACT OP_EXACTI + +Each of these is followed by a count and then the repeated character. OP_UPTO +matches from 0 to the given number. A repeat with a non-zero minimum and a +fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or +OPT_POSUPTO). + +Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI, +etc.) are used for repeated, negated, single-character classes such as [^a]*. +The normal single-character opcodes (OP_STAR, etc.) are used for repeated +positive single-character classes. + + +Repeating character types +------------------------- + +Repeats of things like \d are done exactly as for single characters, except +that instead of a character, the opcode for the type is stored in the data +unit. The opcodes are: + + OP_TYPESTAR + OP_TYPEMINSTAR + OP_TYPEPOSSTAR + OP_TYPEPLUS + OP_TYPEMINPLUS + OP_TYPEPOSPLUS + OP_TYPEQUERY + OP_TYPEMINQUERY + OP_TYPEPOSQUERY + OP_TYPEUPTO + OP_TYPEMINUPTO + OP_TYPEPOSUPTO + OP_TYPEEXACT + + +Match by Unicode property +------------------------- + +OP_PROP and OP_NOTPROP are used for positive and negative matches of a +character by testing its Unicode property (the \p and \P escape sequences). +Each is followed by two units that encode the desired property as a type and a +value. The types are a set of #defines of the form PT_xxx, and the values are +enumerations of the form ucp_xx, defined in the ucp.h source file. The value is +relevant only for PT_GC (General Category), PT_PC (Particular Category), and +PT_SC (Script). + +Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by +three units: OP_PROP or OP_NOTPROP, and then the desired property type and +value. + + +Character classes +----------------- + +If there is only one character in a class, OP_CHAR or OP_CHARI is used for a +positive class, and OP_NOT or OP_NOTI for a negative one (that is, for +something like [^a]). + +A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated, +negated, single-character classes. The normal single-character opcodes +(OP_STAR, etc.) are used for repeated positive single-character classes. + +When there is more than one character in a class, and all the code points are +less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a +negative one. In either case, the opcode is followed by a 32-byte (16-short, +8-word) bit map containing a 1 bit for every character that is acceptable. The +bits are counted from the least significant end of each unit. In caseless mode, +bits for both cases are set. + +The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 +mode, subject characters with values greater than 255 can be handled correctly. +For OP_CLASS they do not match, whereas for OP_NCLASS they do. + +For classes containing characters with values greater than 255 or that contain +\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points +are less than 256, followed by a list of pairs (for a range) and single +characters. In caseless mode, both cases are explicitly listed. + +OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that +this is a negative class, and XCL_MAP indicates that a bit map is present. +There follows the bit map, if XCL_MAP is set, and then a sequence of items +coded as follows: + + XCL_END marks the end of the list + XCL_SINGLE one character follows + XCL_RANGE two characters follow + XCL_PROP a Unicode property (type, value) follows + XCL_NOTPROP a Unicode property (type, value) follows + +If a range starts with a code point less than 256 and ends with one greater +than 256, an XCL_RANGE item is used, without setting any bits in the bit map. +This means that if no other items in the class set bits in the map, a map is +not needed. + + +Back references +--------------- + +OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the +reference number if the reference is to a unique capturing group (either by +number or by name). When named groups are used, there may be more than one +group with the same name. In this case, a reference by name generates OP_DNREF +or OP_DNREFI. These are followed by two counts: the index (not the byte offset) +in the group name table of the first entry for the requred name, followed by +the number of groups with the same name. + + +Repeating character classes and back references +----------------------------------------------- + +Single-character classes are handled specially (see above). This section +applies to other classes and also to back references. In both cases, the repeat +information follows the base item. The matching code looks at the following +opcode to see if it is one of + + OP_CRSTAR + OP_CRMINSTAR + OP_CRPOSSTAR + OP_CRPLUS + OP_CRMINPLUS + OP_CRPOSPLUS + OP_CRQUERY + OP_CRMINQUERY + OP_CRPOSQUERY + OP_CRRANGE + OP_CRMINRANGE + OP_CRPOSRANGE + +All but the last three are single-unit items, with no data. The others are +followed by the minimum and maximum repeat counts. + + +Brackets and alternation +------------------------ + +A pair of non-capturing round brackets is wrapped round each expression at +compile time, so alternation always happens in the context of brackets. + +[Note for North Americans: "bracket" to some English speakers, including +myself, can be round, square, curly, or pointy. Hence this usage rather than +"parentheses".] + +Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99 +capturing brackets and it used a different opcode for each one. From release +3.5, the limit was removed by putting the bracket number into the data for +higher-numbered brackets. From release 7.0 all capturing brackets are handled +this way, using the single opcode OP_CBRA. + +A bracket opcode is followed by LINK_SIZE bytes which give the offset to the +next alternative OP_ALT or, if there aren't any branches, to the matching +OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to +the next one, or to the OP_KET opcode. For capturing brackets, the bracket +number is a count that immediately follows the offset. + +OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN +and OP_KETRMAX are used for indefinite repetitions, minimally or maximally +respectively (see below for possessive repetitions). All three are followed by +LINK_SIZE bytes giving (as a positive number) the offset back to the matching +bracket opcode. + +If a subpattern is quantified such that it is permitted to match zero times, it +is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are +single-unit opcodes that tell the matcher that skipping the following +subpattern entirely is a valid branch. In the case of the first two, not +skipping the pattern is also valid (greedy and non-greedy). The third is used +when a pattern has the quantifier {0,0}. It cannot be entirely discarded, +because it may be called as a subroutine from elsewhere in the regex. + +A subpattern with an indefinite maximum repetition is replicated in the +compiled data its minimum number of times (or once with OP_BRAZERO if the +minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX +as appropriate. + +A subpattern with a bounded maximum repetition is replicated in a nested +fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO +before each replication after the minimum, so that, for example, (abc){2,5} is +compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group +has the same number. + +When a repeated subpattern has an unbounded upper limit, it is checked to see +whether it could match an empty string. If this is the case, the opcode in the +final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher +that it needs to check for matching an empty string when it hits OP_KETRMIN or +OP_KETRMAX, and if so, to break the loop. + + +Possessive brackets +------------------- + +When a repeated group (capturing or non-capturing) is marked as possessive by +the "+" notation, e.g. (abc)++, different opcodes are used. Their names all +have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead +of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum +repetition is zero, the group is preceded by OP_BRAPOSZERO. + + +Once-only (atomic) groups +------------------------- + +These are just like other subpatterns, but they start with the opcode +OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets +within the atomic group; the latter when there are. The distinction is needed +for when there is a backtrack to before the group - any captures within the +group must be reset, so it is necessary to retain backtracking points inside +the group even after it is complete in order to do this. When there are no +captures in an atomic group, all the backtracking can be discarded when it is +complete. This is more efficient, and also uses less stack. + +The check for matching an empty string in an unbounded repeat is handled +entirely at runtime, so there are just these two opcodes for atomic groups. + + +Assertions +---------- + +Forward assertions are also just like other subpatterns, but starting with one +of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes +OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion +is OP_REVERSE, followed by a count of the number of characters to move back the +pointer in the subject string. In ASCII mode, the count is a number of units, +but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32 +mode each character occupies exactly one unit. A separate count is present in +each alternative of a lookbehind assertion, allowing them to have different +fixed lengths. + + +Conditional subpatterns +----------------------- + +These are like other subpatterns, but they start with the opcode OP_COND, or +OP_SCOND for one that might match an empty string in an unbounded repeat. If +the condition is a back reference, this is stored at the start of the +subpattern using the opcode OP_CREF followed by a count containing the +reference number, provided that the reference is to a unique capturing group. +If the reference was by name and there is more than one group with that name, +OP_DNCREF is used instead. It is followed by two counts: the index in the group +names table, and the number of groups with the same name. + +If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of +group x" (coded as "(?(Rx)"), the group number is stored at the start of the +subpattern using the opcode OP_RREF (with a value of zero for "the whole +pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition, +just the single unit OP_DEF is used (it has no associated data). Otherwise, a +conditional subpattern always starts with one of the assertions. + + +Recursion +--------- + +Recursion either matches the current regex, or some subexpression. The opcode +OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting +bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is +automatically wrapped inside OP_ONCE brackets, because otherwise some patterns +broke it. OP_RECURSE is also used for "subroutine" calls, even though they are +not strictly a recursion. + + +Callout +------- + +OP_CALLOUT is followed by one unit of data that holds a callout number in the +range 0 to 254 for manual callouts, or 255 for an automatic callout. In both +cases there follows a count giving the offset in the pattern string to the +start of the following item, and another count giving the length of this item. +These values make is possible for pcretest to output useful tracing information +using automatic callouts. + +Philip Hazel +November 2013 diff --git a/contrib/libs/pcre/LICENCE b/contrib/libs/pcre/LICENCE new file mode 100644 index 0000000000..803b4119e5 --- /dev/null +++ b/contrib/libs/pcre/LICENCE @@ -0,0 +1,93 @@ +PCRE LICENCE +------------ + +PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + +Release 8 of PCRE is distributed under the terms of the "BSD" licence, as +specified below. The documentation for PCRE, supplied in the "doc" +directory, is distributed under the same terms as the software itself. The data +in the testdata directory is not copyrighted and is in the public domain. + +The basic library functions are written in C and are freestanding. Also +included in the distribution is a set of C++ wrapper functions, and a +just-in-time compiler that can be used to optimize pattern matching. These +are both optional features that can be omitted when the library is built. + + +THE BASIC LIBRARY FUNCTIONS +--------------------------- + +Written by: Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com + +University of Cambridge Computing Service, +Cambridge, England. + +Copyright (c) 1997-2021 University of Cambridge +All rights reserved. + + +PCRE JUST-IN-TIME COMPILATION SUPPORT +------------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2010-2021 Zoltan Herczeg +All rights reserved. + + +STACK-LESS JUST-IN-TIME COMPILER +-------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2009-2021 Zoltan Herczeg +All rights reserved. + + +THE C++ WRAPPER FUNCTIONS +------------------------- + +Contributed by: Google Inc. + +Copyright (c) 2007-2012, Google Inc. +All rights reserved. + + +THE "BSD" LICENCE +----------------- + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +End diff --git a/contrib/libs/pcre/NON-AUTOTOOLS-BUILD b/contrib/libs/pcre/NON-AUTOTOOLS-BUILD new file mode 100644 index 0000000000..23c4e64f84 --- /dev/null +++ b/contrib/libs/pcre/NON-AUTOTOOLS-BUILD @@ -0,0 +1,773 @@ +Building PCRE without using autotools +------------------------------------- + +NOTE: This document relates to PCRE releases that use the original API, with +library names libpcre, libpcre16, and libpcre32. January 2015 saw the first +release of a new API, known as PCRE2, with release numbers starting at 10.00 +and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries +(now called PCRE1) are now at end of life, and 8.45 is the final release. New +projects are advised to use the new PCRE2 libraries. + + +This document contains the following sections: + + General + Generic instructions for the PCRE C library + The C++ wrapper functions + Building for virtual Pascal + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE on Windows with CMake + Use of relative paths with CMake on Windows + Testing with RunTest.bat + Building under Windows CE with Visual Studio 200x + Building under Windows with BCC5.5 + Building using Borland C++ Builder 2007 (CB2007) and higher + Building PCRE on OpenVMS + Building PCRE on Stratus OpenVOS + Building PCRE on native z/OS and z/VM + + +GENERAL + +I (Philip Hazel) have no experience of Windows or VMS sytems and how their +libraries work. The items in the PCRE distribution and Makefile that relate to +anything other than Linux systems are untested by me. + +There are some other comments and files (including some documentation in CHM +format) in the Contrib directory on the FTP site: + + ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib + +The basic PCRE library consists entirely of code written in Standard C, and so +should compile successfully on any system that has a Standard C compiler and +library. The C++ wrapper functions are a separate issue (see below). + +The PCRE distribution includes a "configure" file for use by the configure/make +(autotools) build system, as found in many Unix-like environments. The README +file contains information about the options for "configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE on Windows with CMake" below. + +Versions of config.h and pcre.h are distributed in the PCRE tarballs under the +names config.h.generic and pcre.h.generic. These are provided for those who +build PCRE without using "configure" or CMake. If you use "configure" or CMake, +the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY + +The following are generic instructions for building the PCRE C library "by +hand". If you are going to use CMake, this section does not apply to you; you +can skip ahead to the CMake section. + + (1) Copy or rename the file config.h.generic as config.h, and edit the macro + settings that it contains to whatever is appropriate for your environment. + + In particular, you can alter the definition of the NEWLINE macro to + specify what character(s) you want to be interpreted as line terminators. + In an EBCDIC environment, you MUST change NEWLINE, because its default + value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15, + NL), though in some cases it may be 37 (0x25). + + When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H + to your compiler so that config.h is included in the sources. + + An alternative approach is not to edit config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in config.h are used has changed between releases. (In the configure/make + world, this is handled automatically.) When upgrading to a new release, + you are strongly advised to review config.h.generic before re-using what + you had previously. + + (2) Copy or rename the file pcre.h.generic as pcre.h. + + (3) EITHER: + Copy or rename file pcre_chartables.c.dist as pcre_chartables.c. + + OR: + Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if + you have set up config.h), and then run it with the single argument + "pcre_chartables.c". This generates a set of standard character tables + and writes them to that file. The tables are generated using the default + C locale for your system. If you want to use a locale that is specified + by LC_xxx environment variables, add the -L option to the dftables + command. You must use this method if you are building on a system that + uses EBCDIC code. + + The tables in pcre_chartables.c are defaults. The caller of PCRE can + specify alternative tables at run time. + + (4) Ensure that you have the following header files: + + pcre_internal.h + ucp.h + + (5) For an 8-bit library, compile the following source files, setting + -DHAVE_CONFIG_H as a compiler option if you have set up config.h with your + configuration, or else use other -D settings to change the configuration + as required. + + pcre_byte_order.c + pcre_chartables.c + pcre_compile.c + pcre_config.c + pcre_dfa_exec.c + pcre_exec.c + pcre_fullinfo.c + pcre_get.c + pcre_globals.c + pcre_jit_compile.c + pcre_maketables.c + pcre_newline.c + pcre_ord2utf8.c + pcre_refcount.c + pcre_string_utils.c + pcre_study.c + pcre_tables.c + pcre_ucd.c + pcre_valid_utf8.c + pcre_version.c + pcre_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE header files are first + sought in the current directory. Otherwise you run the risk of picking up + a previously-installed file from somewhere else. + + Note that you must still compile pcre_jit_compile.c, even if you have not + defined SUPPORT_JIT in config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre_jit_compile.c #includes sources from the sljit subdirectory, where + there should be 16 files, all of whose names begin with "sljit". + + (6) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the basic PCRE C 8-bit library. + If your system has static and shared libraries, you may have to do this + once for each type. + + (7) If you want to build a 16-bit library (as well as, or instead of the 8-bit + or 32-bit libraries) repeat steps 5-6 with the following files: + + pcre16_byte_order.c + pcre16_chartables.c + pcre16_compile.c + pcre16_config.c + pcre16_dfa_exec.c + pcre16_exec.c + pcre16_fullinfo.c + pcre16_get.c + pcre16_globals.c + pcre16_jit_compile.c + pcre16_maketables.c + pcre16_newline.c + pcre16_ord2utf16.c + pcre16_refcount.c + pcre16_string_utils.c + pcre16_study.c + pcre16_tables.c + pcre16_ucd.c + pcre16_utf16_utils.c + pcre16_valid_utf16.c + pcre16_version.c + pcre16_xclass.c + + (8) If you want to build a 32-bit library (as well as, or instead of the 8-bit + or 16-bit libraries) repeat steps 5-6 with the following files: + + pcre32_byte_order.c + pcre32_chartables.c + pcre32_compile.c + pcre32_config.c + pcre32_dfa_exec.c + pcre32_exec.c + pcre32_fullinfo.c + pcre32_get.c + pcre32_globals.c + pcre32_jit_compile.c + pcre32_maketables.c + pcre32_newline.c + pcre32_ord2utf32.c + pcre32_refcount.c + pcre32_string_utils.c + pcre32_study.c + pcre32_tables.c + pcre32_ucd.c + pcre32_utf32_utils.c + pcre32_valid_utf32.c + pcre32_version.c + pcre32_xclass.c + + (9) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the pcreposix.h file and then compile + pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result + (on its own) as the pcreposix library. + +(10) The pcretest program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you selected in config.h). + Compile pcretest.c and pcre_printint.c (again, don't forget + -DHAVE_CONFIG_H) and link them together with the appropriate library/ies. + If you compiled an 8-bit library, pcretest also needs the pcreposix + wrapper library unless you compiled it with -DNOPOSIX. + +(11) Run pcretest on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcretest with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run + if you have built PCRE without it. See the comments at the start of each + testinput file. If you have a suitable Unix-like shell, the RunTest script + will run the appropriate tests for you. The command "RunTest list" will + output a list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. If you are using Windows, you probably + should use the wintestinput3 file instead of testinput3 (and the + corresponding output file). This is a locale test; wintestinput3 sets the + locale to "french" rather than "fr_FR", and there some minor output + differences. + +(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested + by the testdata files. However, you might also like to build and run + the freestanding JIT test program, pcre_jit_test.c. + +(13) If you want to use the pcregrep command, compile and link pcregrep.c; it + uses only the basic 8-bit PCRE library (it does not need the pcreposix + library). + + +THE C++ WRAPPER FUNCTIONS + +The PCRE distribution also contains some C++ wrapper functions and tests, +applicable to the 8-bit library, which were contributed by Google Inc. On a +system that can use "configure" and "make", the functions are automatically +built into a library called pcrecpp. It should be straightforward to compile +the .cc files manually on other systems. The files called xxx_unittest.cc are +test programs for each of the corresponding xxx.cc files. + + +BUILDING FOR VIRTUAL PASCAL + +A script for building PCRE using Borland's C++ compiler for use with VPASCAL +was contributed by Alexander Tokarev. Stefan Weber updated the script and added +additional files. The following files in the distribution are for building PCRE +for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +The default processor stack size of 1Mb in some Windows environments is too +small for matching patterns that need much recursion. In particular, test 2 may +fail because of this. Normally, running out of stack causes a crash, but there +have been cases where the test program has just died silently. See your linker +documentation for how to increase stack size if you experience problems. The +Linux default of 8Mb is a reasonable choice for the stack, though even that can +be too small for some pattern/subject combinations. + +PCRE has a compile configuration option to disable the use of stack for +recursion so that heap is used instead. However, pattern matching is +significantly slower when this is done. There is more about stack usage in the +"pcrestack" documentation. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE library in the form of +a non-dll .a file, you must define PCRE_STATIC before including pcre.h or +pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will +be declared __declspec(dllimport), with unwanted results. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in the +PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + + The Cygwin DLL currently works with all recent, commercially released x86 32 + bit and 64 bit versions of Windows, with the exception of Windows CE. + +On both MinGW and Cygwin, PCRE should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre and libpcreposix, and, if you +have enabled building the C++ wrapper, a third one called libpcrecpp. These are +independent libraries: when you link with libpcreposix or libpcrecpp you must +also link with libpcre, which contains the basic functions. (Some earlier +releases of PCRE included the basic libpcre functions in libpcreposix. This no +longer happens.) + +A user submitted a special-purpose patch that makes it easy to create +"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll" +as a special target. If you use this target, no other files are built, and in +particular, the pcretest and pcregrep programs are not built. An example of how +this might be used is: + + ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE are in UNIX format, with LF +characters as line terminators. Unless your PCRE library uses a default newline +option that includes LF as a valid newline, it may be necessary to change the +line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE source and build +directories. + +The following instructions were contributed by a PCRE user. If they are not +followed exactly, errors may occur. In the event that errors do occur, it is +recommended that you delete the CMake cache before attempting to repeat the +CMake build process. In the CMake GUI, the cache can be deleted by selecting +"File > Delete Cache". + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE source tree into a source + directory such as C:\pcre. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre\pcre-xx\build. + +4. Run cmake-gui from the Shell envirornment of your build tool, for example, + Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try + to start Cmake from the Windows Start menu, as this can lead to errors. + +5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build + directories, respectively. + +6. Hit the "Configure" button. + +7. Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + +8. The GUI will then list several configuration options. This is where + you can enable UTF-8 support or other PCRE optional features. + +9. Hit "Configure" again. The adjacent "Generate" button should now be + active. + +10. Hit "Generate". + +11. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + +12. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + +USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS + +A PCRE user comments as follows: I thought that others may want to know the +current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is: + +-- AdditionalIncludeDirectories is only partially modified (only the + first path - see below) +-- Only some of the contained file paths are modified - shown below for + pcre.vcproj +-- It properly modifies + +I am sure CMake people can fix that if they want to. Until then one will +need to replace existing absolute paths in project files with relative +paths manually (e.g. from VS) - relative to project file location. I did +just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big +deal. + +AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;" +AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;" + +RelativePath="pcre.h" +RelativePath="pcre_chartables.c" +RelativePath="pcre_chartables.c.rule" + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe + have been created. + +2. Edit RunTest.bat to indentify the full or relative location of + the pcre source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre\pcre-8.20 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre_jit_test.exe. +To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and +pcre_scanner_unittest.exe. + + +BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x + +Vincent Richomme sent a zip archive of files to help with this process. They +can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP +site. + + +BUILDING UNDER WINDOWS WITH BCC5.5 + +Michael Roy sent these comments about building PCRE under Windows with BCC5.5: + +Some of the core BCC libraries have a version of PCRE from 1998 built in, which +can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version +mismatch. I'm including an easy workaround below, if you'd like to include it +in the non-unix instructions: + +When linking a project with BCC5.5, pcre.lib must be included before any of the +libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line. + + +BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER + +A PCRE user sent these comments about this environment (see also the comment +from another user that follows them): + +The XE versions of C++ Builder come with a RegularExpressionsCore class which +contain a version of TPerlRegEx. However, direct use of the C PCRE library may +be desirable. + +The default makevp.bat, however, supplied with PCRE builds a version of PCRE +that is not usable with any version of C++ Builder because the compiler ships +with an embedded version of PCRE, version 2.01 from 1998! [See also the note +about BCC5.5 above.] If you want to use PCRE you'll need to rename the +functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just +use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the +embedded version of PCRE does not have the 16 bit function names, there is no +conflict. + +Building PCRE using a C++ Builder static library project file (recommended): + +1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder +original include path. + +2. Download PCRE from pcre.org and extract to a directory. + +3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to +pcre.h, and config.h.generic to config.h. + +4. Edit pcre.h and pcre_config.c so that they include config.h. + +5. Edit config.h like so: + +Comment out the following lines: +#define PACKAGE "pcre" +#define PACKAGE_BUGREPORT "" +#define PACKAGE_NAME "PCRE" +#define PACKAGE_STRING "PCRE 8.32" +#define PACKAGE_TARNAME "pcre" +#define PACKAGE_URL "" +#define PACKAGE_VERSION "8.32" + +Add the following lines: +#ifndef SUPPORT_UTF +#define SUPPORT_UTF 100 // any value is fine +#endif + +#ifndef SUPPORT_UCP +#define SUPPORT_UCP 101 // any value is fine +#endif + +#ifndef SUPPORT_UCP +#define SUPPORT_PCRE16 102 // any value is fine +#endif + +#ifndef SUPPORT_UTF8 +#define SUPPORT_UTF8 103 // any value is fine +#endif + +6. Build a C++ Builder project using the IDE. Go to File / New / Other and +choose Static Library. You can name it pcre.cbproj or whatever. Now set your +paths by going to Project / Options. Set the Include path. Do this from the +"Base" option to apply to both Release and Debug builds. Now add the following +files to the project: + +pcre.h +pcre16_byte_order.c +pcre16_chartables.c +pcre16_compile.c +pcre16_config.c +pcre16_dfa_exec.c +pcre16_exec.c +pcre16_fullinfo.c +pcre16_get.c +pcre16_globals.c +pcre16_maketables.c +pcre16_newline.c +pcre16_ord2utf16.c +pcre16_printint.c +pcre16_refcount.c +pcre16_string_utils.c +pcre16_study.c +pcre16_tables.c +pcre16_ucd.c +pcre16_utf16_utils.c +pcre16_valid_utf16.c +pcre16_version.c +pcre16_xclass.c + +//Optional +pcre_version.c + +7. After compiling the .lib file, copy the .lib and header files to a project +you want to use PCRE with. Enjoy. + +Optional ... Building PCRE using the makevp.bat file: + +1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit +versions. + +2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat. + +Another PCRE user added this comment: + +Another approach I successfully used for some years with BCB 5 and 6 was to +make sure that include and library paths of PCRE are configured before the +default paths of the IDE in the dialogs where one can manage those paths. +Afterwards one can open the project files using a text editor and manually add +the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in +the library nodes where the IDE manages its own libraries to link against in +front of the IDE-own libraries. This way one can use the default PCRE function +names without getting access violations on runtime. + + <ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/> + + +BUILDING PCRE ON OPENVMS + +Stephen Hoffman sent the following, in December 2012: + +"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the +OpenVMS port and here + +<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip> + +is a zip with the OpenVMS files, and with one modified testing-related PCRE +file." This is a port of PCRE 8.32. + +Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS. +They relate to an older version of PCRE that used fewer source files, so the +exact commands will need changing. See the current list of source files above. + +"It was quite easy to compile and link the library. I don't have a formal +make file but the attached file [reproduced below] contains the OpenVMS DCL +commands I used to build the library. I had to add #define +POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere. + +The library was built on: +O/S: HP OpenVMS v7.3-1 +Compiler: Compaq C v6.5-001-48BCD +Linker: vA13-01 + +The test results did not match 100% due to the issues you mention in your +documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I +modified some of the character tables temporarily and was able to get the +results to match. Tests using the fr locale did not match since I don't have +that locale loaded. The study size was always reported to be 3 less than the +value in the standard test output files." + +========================= +$! This DCL procedure builds PCRE on OpenVMS +$! +$! I followed the instructions in the non-unix-use file in the distribution. +$! +$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES +$ COMPILE DFTABLES.C +$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ +$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C +$ COMPILE MAKETABLES.C +$ COMPILE GET.C +$ COMPILE STUDY.C +$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol +$! did not seem to be defined anywhere. +$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support. +$ COMPILE PCRE.C +$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ +$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol +$! did not seem to be defined anywhere. +$ COMPILE PCREPOSIX.C +$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ +$ COMPILE PCRETEST.C +$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB +$! C programs that want access to command line arguments must be +$! defined as a symbol +$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE" +$! Arguments must be enclosed in quotes. +$ PCRETEST "-C" +$! Test results: +$! +$! The test results did not match 100%. The functions isprint(), iscntrl(), +$! isgraph() and ispunct() on OpenVMS must not produce the same results +$! as the system that built the test output files provided with the +$! distribution. +$! +$! The study size did not match and was always 3 less on OpenVMS. +$! +$! Locale could not be set to fr +$! +========================= + + +BUILDING PCRE ON STRATUS OPENVOS + +These notes on the port of PCRE to VOS (lightly edited) were supplied by +Ashutosh Warikoo, whose email address has the local part awarikoo and the +domain nse.co.in. The port was for version 7.9 in August 2009. + +1. Building PCRE + +I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any +problems. I used the following packages to build PCRE: + + ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz + +Please read and follow the instructions that come with these packages. To start +the build of pcre, from the root of the package type: + + ./build.sh + +2. Installing PCRE + +Once you have successfully built PCRE, login to the SysAdmin group, switch to +the root user, and type + + [ !create_dir (master_disk)>usr --if needed ] + [ !create_dir (master_disk)>usr>local --if needed ] + !gmake install + +This installs PCRE and its man pages into /usr/local. You can add +(master_disk)>usr>local>bin to your command search paths, or if you are in +BASH, add /usr/local/bin to the PATH environment variable. + +4. Restrictions + +This port requires readline library optionally. However during the build I +faced some yet unexplored errors while linking with readline. As it was an +optional component I chose to disable it. + +5. Known Problems + +I ran the test suite, but you will have to be your own judge of whether this +command, and this port, suits your purposes. If you find any problems that +appear to be related to the port itself, please let me know. Please see the +build.log file in the root of the package also. + + +BUILDING PCRE ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment PCRE can be built in the same way as in other systems. However, in +native z/OS (without UNIX System Services) and in z/VM, special ports are +required. PCRE1 version 8.39 is available in file 882 on this site: + + http://www.cbttape.org + +Everything, source and executable, is in EBCDIC and native z/OS file formats. +However, this software is not maintained and will not be upgraded. If you are +new to PCRE you should be looking at PCRE2 (version 10.30 or later). + +========================== +Last Updated: 15 June 2021 +========================== diff --git a/contrib/libs/pcre/NON-UNIX-USE b/contrib/libs/pcre/NON-UNIX-USE new file mode 100644 index 0000000000..a25546b6ff --- /dev/null +++ b/contrib/libs/pcre/NON-UNIX-USE @@ -0,0 +1,7 @@ +Compiling PCRE on non-Unix systems +---------------------------------- + +This has been renamed to better reflect its contents. Please see the file +NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools. + +#### diff --git a/contrib/libs/pcre/patches/fix-group-name-comparison.patch b/contrib/libs/pcre/patches/fix-group-name-comparison.patch new file mode 100644 index 0000000000..e4bba332d1 --- /dev/null +++ b/contrib/libs/pcre/patches/fix-group-name-comparison.patch @@ -0,0 +1,24 @@ +From: Александр Сомов <somov@yandex-team.ru> +Date: Fri, 28 Apr 2017 13:18:37 +0000 +Subject: [PATCH] Fix Clang 3.9 build #35 Clang 3.9 uses the strict memcmp mode + in the address sanitizer by default. Do not use memcmp to compare strings. + +DEVTOOLS-2459 +REVIEW: 269899 + +git-svn-id: svn+ssh://arcadia.yandex.ru/arc/trunk/arcadia/contrib/libs/pcre@2895347 41d65440-b5be-11dd-afe3-b2e846d9b4f8 + +This is compatible only with the 8-bit pcre build. + +This was fixed in pcre2 in a different way: +https://vcs.pcre.org/pcre2/code/trunk/src/pcre2_compile.c?annotate=185&pathrev=185#l7344 + +--- a/pcre_compile.c ++++ b/pcre_compile.c +@@ -8978,5 +8978,5 @@ pcre_uchar *slot = cd->name_table; + for (i = 0; i < cd->names_found; i++) + { +- int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length)); ++ int crc = strncmp(name, slot+IMM2_SIZE, IN_UCHARS(length)); + if (crc == 0 && slot[IMM2_SIZE+length] != 0) + crc = -1; /* Current name is a substring */ diff --git a/contrib/libs/pcre/patches/posix.patch b/contrib/libs/pcre/patches/posix.patch new file mode 100644 index 0000000000..b68a8621ff --- /dev/null +++ b/contrib/libs/pcre/patches/posix.patch @@ -0,0 +1,20 @@ +From: Георгий Кондратьев <orivej@yandex-team.ru> +Date: Sun, 22 Sep 2019 17:28:22 +0000 +Subject: [PATCH] Rename PCRE POSIX regex symbols. DEVTOOLS-5955 + +They are subtly ABI-incompatible with libc regex due to values of REG_NOMATCH and other constants. + +REVIEW: 962891 + +--- a/pcreposix.h ++++ b/pcreposix.h +@@ -134,4 +134,9 @@ file. */ + /* The functions */ + ++#define regcomp pcre_regcomp ++#define regexec pcre_regexec ++#define regerror pcre_regerror ++#define regfree pcre_regfree ++ + PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int); + PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t, diff --git a/contrib/libs/pcre/patches/turn-off-jit-on-request.patch b/contrib/libs/pcre/patches/turn-off-jit-on-request.patch new file mode 100644 index 0000000000..0d09fa1df2 --- /dev/null +++ b/contrib/libs/pcre/patches/turn-off-jit-on-request.patch @@ -0,0 +1,32 @@ +From: Дмитрий Потапов <orivej@yandex-team.ru> +Date: Sun, 14 Feb 2021 19:43:48 +0300 +Subject: [PATCH] Conditional PCRE JIT compilation. + +PCRE JIT adds ≈104KB to binary size which can be critical for embedded software, so we need turn off switch which can be overriden in ya.make. + +REVIEW: 1640869 + +--- a/config.h ++++ b/config.h +@@ -293,8 +293,10 @@ + backward compatibility; new code need not use it. */ + #define STDC_HEADERS 1 + ++#ifdef ARCADIA_PCRE_ENABLE_JIT + /* Define to any value to enable support for Just-In-Time compiling. */ + #define SUPPORT_JIT /**/ ++#endif + + /* Define to any value to allow pcregrep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +@@ -319,8 +321,10 @@ + /* Define to any value to enable the 8 bit PCRE library. */ + #define SUPPORT_PCRE8 /**/ + ++#ifdef ARCADIA_PCRE_ENABLE_JIT + /* Define to any value to enable JIT support in pcregrep. */ + #define SUPPORT_PCREGREP_JIT /**/ ++#endif + + /* Define to any value to enable support for Unicode properties. */ + #define SUPPORT_UCP /**/ |