diff options
author | aozeritsky <aozeritsky@yandex-team.ru> | 2022-02-10 16:46:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:39 +0300 |
commit | 4a6816dea1bcaee46ce29a51a5fd7d3495012858 (patch) | |
tree | d98d6003e190b9e761bd77a83cd98428f9657b35 /contrib | |
parent | 7aa4cf700385ff96999c5cc301171ff157974773 (diff) | |
download | ydb-4a6816dea1bcaee46ce29a51a5fd7d3495012858.tar.gz |
Restoring authorship annotation for <aozeritsky@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/libs/libevent/evutil.c | 8 | ||||
-rw-r--r-- | contrib/libs/libevent/http-internal.h | 12 | ||||
-rw-r--r-- | contrib/libs/libevent/http.c | 226 | ||||
-rw-r--r-- | contrib/libs/libevent/include/event2/http.h | 78 | ||||
-rw-r--r-- | contrib/libs/libevent/include/event2/http_struct.h | 6 | ||||
-rw-r--r-- | contrib/libs/pcre/pcre_scanner.cc | 396 | ||||
-rw-r--r-- | contrib/libs/pcre/pcre_stringpiece.cc | 84 | ||||
-rw-r--r-- | contrib/libs/pcre/pcre_stringpiece.h | 338 | ||||
-rw-r--r-- | contrib/libs/pcre/pcrecpp.cc | 1840 | ||||
-rw-r--r-- | contrib/libs/pcre/pcrecpp.h | 1414 | ||||
-rw-r--r-- | contrib/libs/pcre/pcrecpp/ya.make | 24 | ||||
-rw-r--r-- | contrib/libs/pcre/pcrecpp_internal.h | 142 | ||||
-rw-r--r-- | contrib/libs/pcre/pcrecpparg.h | 346 |
13 files changed, 2457 insertions, 2457 deletions
diff --git a/contrib/libs/libevent/evutil.c b/contrib/libs/libevent/evutil.c index 9976e79beb..159dd08609 100644 --- a/contrib/libs/libevent/evutil.c +++ b/contrib/libs/libevent/evutil.c @@ -1250,11 +1250,11 @@ evutil_adjust_hints_for_addrconfig_(struct evutil_addrinfo *hints) #ifdef USE_NATIVE_GETADDRINFO static int need_numeric_port_hack_=0; static int need_socktype_protocol_hack_=0; -#ifdef WIN32 -static int tested_for_getaddrinfo_hacks=1; -#else +#ifdef WIN32 +static int tested_for_getaddrinfo_hacks=1; +#else static int tested_for_getaddrinfo_hacks=0; -#endif +#endif /* Some older BSDs (like OpenBSD up to 4.6) used to believe that giving a numeric port without giving an ai_socktype was verboten. We test for this so we can apply an appropriate workaround. If it diff --git a/contrib/libs/libevent/http-internal.h b/contrib/libs/libevent/http-internal.h index 85760eafc2..95d70525ec 100644 --- a/contrib/libs/libevent/http-internal.h +++ b/contrib/libs/libevent/http-internal.h @@ -13,7 +13,7 @@ #include "event2/event_struct.h" #include "util-internal.h" #include "defer-internal.h" -#include "event2/http.h" +#include "event2/http.h" #define HTTP_CONNECT_TIMEOUT 45 #define HTTP_WRITE_TIMEOUT 50 @@ -56,8 +56,8 @@ struct evhttp_connection { evutil_socket_t fd; struct bufferevent *bufev; - bev_factory_cb bufcb; - void *bufcb_arg; + bev_factory_cb bufcb; + void *bufcb_arg; struct event retry_ev; /* for retrying connects */ @@ -112,7 +112,7 @@ struct evhttp_cb { TAILQ_ENTRY(evhttp_cb) next; char *what; - int chunked; + int chunked; void (*cb)(struct evhttp_request *req, void *); void *cbarg; @@ -169,8 +169,8 @@ struct evhttp { don't match. */ void (*gencb)(struct evhttp_request *req, void *); void *gencbarg; - struct bufferevent* (*bevcb)(struct event_base *, void *); - void *bevcbarg; + struct bufferevent* (*bevcb)(struct event_base *, void *); + void *bevcbarg; struct event_base *base; }; diff --git a/contrib/libs/libevent/http.c b/contrib/libs/libevent/http.c index b750d491c9..e3ce49f0ba 100644 --- a/contrib/libs/libevent/http.c +++ b/contrib/libs/libevent/http.c @@ -200,9 +200,9 @@ static void evhttp_write_buffer(struct evhttp_connection *, void (*)(struct evhttp_connection *, void *), void *); static void evhttp_make_header(struct evhttp_connection *, struct evhttp_request *); -static struct evhttp_cb * -evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req, int chunked); - +static struct evhttp_cb * +evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req, int chunked); + /* callbacks for bufferevent */ static void evhttp_read_cb(struct bufferevent *, void *); static void evhttp_write_cb(struct bufferevent *, void *); @@ -983,7 +983,7 @@ evhttp_handle_chunked_read(struct evhttp_request *req, struct evbuffer *buf) req->ntoread = -1; if (req->chunk_cb != NULL) { req->flags |= EVHTTP_REQ_DEFER_FREE; - (*req->chunk_cb)(req, req->chunk_cb_arg); + (*req->chunk_cb)(req, req->chunk_cb_arg); evbuffer_drain(req->input_buffer, evbuffer_get_length(req->input_buffer)); req->flags &= ~EVHTTP_REQ_DEFER_FREE; @@ -993,12 +993,12 @@ evhttp_handle_chunked_read(struct evhttp_request *req, struct evbuffer *buf) } } - if (req->chunk_cb != NULL) { - req->flags |= EVHTTP_REQ_PROCESS_CHUNKS_END; - (*req->chunk_cb)(req, req->chunk_cb_arg); - req->flags &= ~EVHTTP_REQ_PROCESS_CHUNKS_END; - } - + if (req->chunk_cb != NULL) { + req->flags |= EVHTTP_REQ_PROCESS_CHUNKS_END; + (*req->chunk_cb)(req, req->chunk_cb_arg); + req->flags &= ~EVHTTP_REQ_PROCESS_CHUNKS_END; + } + return (MORE_DATA_EXPECTED); } @@ -1112,7 +1112,7 @@ evhttp_read_body(struct evhttp_connection *evcon, struct evhttp_request *req) if (evbuffer_get_length(req->input_buffer) > 0 && req->chunk_cb != NULL) { req->flags |= EVHTTP_REQ_DEFER_FREE; - (*req->chunk_cb)(req, req->chunk_cb_arg); + (*req->chunk_cb)(req, req->chunk_cb_arg); req->flags &= ~EVHTTP_REQ_DEFER_FREE; evbuffer_drain(req->input_buffer, evbuffer_get_length(req->input_buffer)); @@ -1364,23 +1364,23 @@ evhttp_connection_reset_(struct evhttp_connection *evcon) if (evhttp_connected(evcon) && evcon->closecb != NULL) (*evcon->closecb)(evcon, evcon->closecb_arg); - /* if we have a bufferevent factory callback set, get a new bufferevent */ - if (NULL != evcon->bufcb && -1 != bufferevent_getfd(evcon->bufev)) { - struct bufferevent *bev = (*evcon->bufcb)(evcon->bufcb_arg); - - if (NULL == bev) { - event_warn("%s: bufferevent factory callback failed", __func__); - } - else { - if (bufferevent_get_base(bev) != evcon->base) { - bufferevent_base_set(evcon->base, bev); - } - - bufferevent_free(evcon->bufev); - evcon->bufev = bev; - } - } - + /* if we have a bufferevent factory callback set, get a new bufferevent */ + if (NULL != evcon->bufcb && -1 != bufferevent_getfd(evcon->bufev)) { + struct bufferevent *bev = (*evcon->bufcb)(evcon->bufcb_arg); + + if (NULL == bev) { + event_warn("%s: bufferevent factory callback failed", __func__); + } + else { + if (bufferevent_get_base(bev) != evcon->base) { + bufferevent_base_set(evcon->base, bev); + } + + bufferevent_free(evcon->bufev); + evcon->bufev = bev; + } + } + shutdown(evcon->fd, EVUTIL_SHUT_WR); evutil_closesocket(evcon->fd); evcon->fd = -1; @@ -1585,7 +1585,7 @@ evhttp_error_cb(struct bufferevent *bufev, short what, void *arg) } evhttp_connection_fail_(evcon, EVREQ_HTTP_EOF); - } else if (what == BEV_EVENT_CONNECTED) { + } else if (what == BEV_EVENT_CONNECTED) { } else { evhttp_connection_fail_(evcon, EVREQ_HTTP_BUFFER_ERROR); } @@ -1918,13 +1918,13 @@ evhttp_parse_request_line(struct evhttp_request *req, char *line, size_t len) !evhttp_find_vhost(req->evcon->http_server, NULL, hostname)) req->flags |= EVHTTP_PROXY_REQUEST; - { - struct evhttp_cb * chunkcb = evhttp_dispatch_callback(&req->evcon->http_server->callbacks, req, 1); - if (chunkcb) { - req->chunk_cb = chunkcb->cb; - req->chunk_cb_arg = chunkcb->cbarg; - } - } + { + struct evhttp_cb * chunkcb = evhttp_dispatch_callback(&req->evcon->http_server->callbacks, req, 1); + if (chunkcb) { + req->chunk_cb = chunkcb->cb; + req->chunk_cb_arg = chunkcb->cbarg; + } + } return 0; } @@ -2403,30 +2403,30 @@ evhttp_read_header(struct evhttp_connection *evcon, * happen elsewhere. */ -struct evhttp_connection *evhttp_connection_base_bufferevent_factory_new( - struct event_base *base, struct evdns_base *dnsbase, - bev_factory_cb cb, void * arg, const char *address, unsigned short port) -{ - struct bufferevent *bev = NULL; - - if (NULL != cb) { - if (NULL == (bev = (*cb)(arg))) { - event_warn("%s: bufferevent factory callback failed", __func__); - return (NULL); - } - } - - struct evhttp_connection *ret = - evhttp_connection_base_bufferevent_new(base, dnsbase, bev, address, port); - - if (NULL != ret) { - ret->bufcb = cb; - ret->bufcb_arg = arg; - } - - return (ret); -} - +struct evhttp_connection *evhttp_connection_base_bufferevent_factory_new( + struct event_base *base, struct evdns_base *dnsbase, + bev_factory_cb cb, void * arg, const char *address, unsigned short port) +{ + struct bufferevent *bev = NULL; + + if (NULL != cb) { + if (NULL == (bev = (*cb)(arg))) { + event_warn("%s: bufferevent factory callback failed", __func__); + return (NULL); + } + } + + struct evhttp_connection *ret = + evhttp_connection_base_bufferevent_new(base, dnsbase, bev, address, port); + + if (NULL != ret) { + ret->bufcb = cb; + ret->bufcb_arg = arg; + } + + return (ret); +} + struct evhttp_connection * evhttp_connection_new(const char *address, ev_uint16_t port) { @@ -2434,7 +2434,7 @@ evhttp_connection_new(const char *address, ev_uint16_t port) } struct evhttp_connection * -evhttp_connection_base_bufferevent_new(struct event_base *base, struct evdns_base *dnsbase, struct bufferevent* bev, +evhttp_connection_base_bufferevent_new(struct event_base *base, struct evdns_base *dnsbase, struct bufferevent* bev, const char *address, ev_uint16_t port) { struct evhttp_connection *evcon = NULL; @@ -2460,11 +2460,11 @@ evhttp_connection_base_bufferevent_new(struct event_base *base, struct evdns_bas goto error; } - if (bev == NULL) { + if (bev == NULL) { if (!(bev = bufferevent_socket_new(base, -1, 0))) { event_warn("%s: bufferevent_socket_new failed", __func__); - goto error; - } + goto error; + } } bufferevent_setcb(bev, evhttp_read_cb, evhttp_write_cb, evhttp_error_cb, evcon); @@ -2505,17 +2505,17 @@ struct bufferevent* evhttp_connection_get_bufferevent(struct evhttp_connection * struct evhttp * evhttp_connection_get_server(struct evhttp_connection *evcon) -{ +{ return evcon->http_server; } - -struct evhttp_connection * -evhttp_connection_base_new(struct event_base *base, struct evdns_base *dnsbase, + +struct evhttp_connection * +evhttp_connection_base_new(struct event_base *base, struct evdns_base *dnsbase, const char *address, ev_uint16_t port) -{ - return evhttp_connection_base_bufferevent_new(base, dnsbase, NULL, address, port); -} - +{ + return evhttp_connection_base_bufferevent_new(base, dnsbase, NULL, address, port); +} + void evhttp_connection_set_family(struct evhttp_connection *evcon, int family) { @@ -3390,7 +3390,7 @@ evhttp_parse_query_str(const char *uri, struct evkeyvalq *headers) } static struct evhttp_cb * -evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req, int chunked) +evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req, int chunked) { struct evhttp_cb *cb; size_t offset = 0; @@ -3407,13 +3407,13 @@ evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req, TAILQ_FOREACH(cb, callbacks, next) { if (!strcmp(cb->what, translated)) { - if (chunked < 0) { - mm_free(translated); - return (cb); - } else if (chunked == cb->chunked) { - mm_free(translated); - return (cb); - } + if (chunked < 0) { + mm_free(translated); + return (cb); + } else if (chunked == cb->chunked) { + mm_free(translated); + return (cb); + } } } @@ -3554,7 +3554,7 @@ evhttp_handle_request(struct evhttp_request *req, void *arg) evhttp_find_vhost(http, &http, hostname); } - if ((cb = evhttp_dispatch_callback(&http->callbacks, req, -1)) != NULL) { + if ((cb = evhttp_dispatch_callback(&http->callbacks, req, -1)) != NULL) { (*cb->cb)(req, cb->cbarg); return; } @@ -3974,8 +3974,8 @@ evhttp_set_allowed_methods(struct evhttp* http, ev_uint16_t methods) } int -evhttp_set_cb_internal(struct evhttp *http, const char *uri, - void (*cb)(struct evhttp_request *, void *), void *cbarg, int chunked) +evhttp_set_cb_internal(struct evhttp *http, const char *uri, + void (*cb)(struct evhttp_request *, void *), void *cbarg, int chunked) { struct evhttp_cb *http_cb; @@ -3997,7 +3997,7 @@ evhttp_set_cb_internal(struct evhttp *http, const char *uri, } http_cb->cb = cb; http_cb->cbarg = cbarg; - http_cb->chunked = chunked; + http_cb->chunked = chunked; TAILQ_INSERT_TAIL(&http->callbacks, http_cb, next); @@ -4005,19 +4005,19 @@ evhttp_set_cb_internal(struct evhttp *http, const char *uri, } int -evhttp_set_cb(struct evhttp *http, const char *uri, - void (*cb)(struct evhttp_request *, void *), void *cbarg) -{ - return evhttp_set_cb_internal(http, uri, cb, cbarg, 0); -} - -int evhttp_set_chunk_cb(struct evhttp *http, const char *path, - void (*chunk_cb)(struct evhttp_request *, void *), void *cb_arg) -{ - return evhttp_set_cb_internal(http, path, chunk_cb, cb_arg, 1); -} - -int +evhttp_set_cb(struct evhttp *http, const char *uri, + void (*cb)(struct evhttp_request *, void *), void *cbarg) +{ + return evhttp_set_cb_internal(http, uri, cb, cbarg, 0); +} + +int evhttp_set_chunk_cb(struct evhttp *http, const char *path, + void (*chunk_cb)(struct evhttp_request *, void *), void *cb_arg) +{ + return evhttp_set_cb_internal(http, path, chunk_cb, cb_arg, 1); +} + +int evhttp_del_cb(struct evhttp *http, const char *uri) { struct evhttp_cb *http_cb; @@ -4044,14 +4044,14 @@ evhttp_set_gencb(struct evhttp *http, http->gencbarg = cbarg; } -void -evhttp_set_bevcb(struct evhttp *http, - struct bufferevent* (*cb)(struct event_base *, void *), void *cbarg) -{ - http->bevcb = cb; - http->bevcbarg = cbarg; -} - +void +evhttp_set_bevcb(struct evhttp *http, + struct bufferevent* (*cb)(struct event_base *, void *), void *cbarg) +{ + http->bevcb = cb; + http->bevcbarg = cbarg; +} + /* * Request related functions */ @@ -4166,10 +4166,10 @@ evhttp_connection_get_base(struct evhttp_connection *conn) void evhttp_request_set_chunked_cb(struct evhttp_request *req, - void (*cb)(struct evhttp_request *, void *), void *arg) + void (*cb)(struct evhttp_request *, void *), void *arg) { req->chunk_cb = cb; - req->chunk_cb_arg = arg; + req->chunk_cb_arg = arg; } void @@ -4305,7 +4305,7 @@ evhttp_get_request_connection( { struct evhttp_connection *evcon; char *hostname = NULL, *portname = NULL; - struct bufferevent* bev = NULL; + struct bufferevent* bev = NULL; #ifdef EVENT__HAVE_STRUCT_SOCKADDR_UN if (sa->sa_family == AF_UNIX) { @@ -4325,11 +4325,11 @@ evhttp_get_request_connection( __func__, hostname, portname, EV_SOCK_ARG(fd))); /* we need a connection object to put the http request on */ - if (http->bevcb != NULL) { - bev = (*http->bevcb)(http->base, http->bevcbarg); - } - evcon = evhttp_connection_base_bufferevent_new( - http->base, NULL, bev, hostname, atoi(portname)); + if (http->bevcb != NULL) { + bev = (*http->bevcb)(http->base, http->bevcbarg); + } + evcon = evhttp_connection_base_bufferevent_new( + http->base, NULL, bev, hostname, atoi(portname)); mm_free(hostname); mm_free(portname); if (evcon == NULL) diff --git a/contrib/libs/libevent/include/event2/http.h b/contrib/libs/libevent/include/event2/http.h index 40d411ddd2..c98cd98f62 100644 --- a/contrib/libs/libevent/include/event2/http.h +++ b/contrib/libs/libevent/include/event2/http.h @@ -38,7 +38,7 @@ extern "C" { /* In case we haven't included the right headers yet. */ struct evbuffer; struct event_base; -struct bufferevent; +struct bufferevent; struct evhttp_connection; /** @file event2/http.h @@ -72,7 +72,7 @@ struct evhttp_request; struct evkeyvalq; struct evhttp_bound_socket; struct evconnlistener; -struct evdns_base; +struct evdns_base; /** * Create a new HTTP server. @@ -265,9 +265,9 @@ int evhttp_set_cb(struct evhttp *http, const char *path, void (*cb)(struct evhttp_request *, void *), void *cb_arg); EVENT2_EXPORT_SYMBOL -int evhttp_set_chunk_cb(struct evhttp *http, const char *path, - void (*chunk_cb)(struct evhttp_request *, void *), void *cb_arg); - +int evhttp_set_chunk_cb(struct evhttp *http, const char *path, + void (*chunk_cb)(struct evhttp_request *, void *), void *cb_arg); + /** Removes the callback for a specified URI */ EVENT2_EXPORT_SYMBOL int evhttp_del_cb(struct evhttp *, const char *); @@ -302,9 +302,9 @@ void evhttp_set_gencb(struct evhttp *http, @param arg an context argument for the callback */ EVENT2_EXPORT_SYMBOL -void evhttp_set_bevcb(struct evhttp *http, +void evhttp_set_bevcb(struct evhttp *http, struct bufferevent *(*cb)(struct event_base *, void *), void *arg); - + /** Adds a virtual host to the http server. @@ -537,38 +537,38 @@ struct evhttp_connection *evhttp_connection_base_bufferevent_new( struct event_base *base, struct evdns_base *dnsbase, struct bufferevent* bev, const char *address, ev_uint16_t port); /** - * Creates and returns a new bufferevent object. - */ -typedef struct bufferevent* (*bev_factory_cb)(void *); - -/** - * Create and return a connection object that can be used for making HTTP - * requests. The connection object tries to resolve address and establish the - * connection when it is given an http request object. The specified factory - * function is called with the user-supplied argument to retrieve a new - * bufferevent whenever the underlying HTTP connection needs to be - * reestablished. This is what you want if, for example, you have a bufferevent - * that needs to perform some setup for new connections, such as an SSL - * bufferevent. - * - * @param base the event_base to use for handling the connection - * @param dnsbase the dns_base to use for resolving host names; if not - * specified host name resolution will block. - * @param cb a callback that returns a new bufferevent to use for connecting to - * the server; if NULL, behavior is the same as in calling - * evhttp_connection_base_bufferevent_new with a NULL bufferevent. The - * returned bufferevents will be freed as necessary. The returned - * bufferevents must have no fd set on them. - * @param arg the argument to supply to the callback - * @param address the address to which to connect - * @param port the port to connect to - * @return an evhttp_connection object that can be used for making requests - */ -struct evhttp_connection *evhttp_connection_base_bufferevent_factory_new( - struct event_base *base, struct evdns_base *dnsbase, - bev_factory_cb cb, void * arg, const char *address, unsigned short port); - -/** + * Creates and returns a new bufferevent object. + */ +typedef struct bufferevent* (*bev_factory_cb)(void *); + +/** + * Create and return a connection object that can be used for making HTTP + * requests. The connection object tries to resolve address and establish the + * connection when it is given an http request object. The specified factory + * function is called with the user-supplied argument to retrieve a new + * bufferevent whenever the underlying HTTP connection needs to be + * reestablished. This is what you want if, for example, you have a bufferevent + * that needs to perform some setup for new connections, such as an SSL + * bufferevent. + * + * @param base the event_base to use for handling the connection + * @param dnsbase the dns_base to use for resolving host names; if not + * specified host name resolution will block. + * @param cb a callback that returns a new bufferevent to use for connecting to + * the server; if NULL, behavior is the same as in calling + * evhttp_connection_base_bufferevent_new with a NULL bufferevent. The + * returned bufferevents will be freed as necessary. The returned + * bufferevents must have no fd set on them. + * @param arg the argument to supply to the callback + * @param address the address to which to connect + * @param port the port to connect to + * @return an evhttp_connection object that can be used for making requests + */ +struct evhttp_connection *evhttp_connection_base_bufferevent_factory_new( + struct event_base *base, struct evdns_base *dnsbase, + bev_factory_cb cb, void * arg, const char *address, unsigned short port); + +/** * Return the bufferevent that an evhttp_connection is using. */ EVENT2_EXPORT_SYMBOL diff --git a/contrib/libs/libevent/include/event2/http_struct.h b/contrib/libs/libevent/include/event2/http_struct.h index 2691c8c037..ff2538354b 100644 --- a/contrib/libs/libevent/include/event2/http_struct.h +++ b/contrib/libs/libevent/include/event2/http_struct.h @@ -78,8 +78,8 @@ struct { /** The request should be freed upstack */ #define EVHTTP_REQ_NEEDS_FREE 0x0010 -#define EVHTTP_REQ_PROCESS_CHUNKS_END 0x0024 - +#define EVHTTP_REQ_PROCESS_CHUNKS_END 0x0024 + struct evkeyvalq *input_headers; struct evkeyvalq *output_headers; @@ -122,7 +122,7 @@ struct { * the regular callback. */ void (*chunk_cb)(struct evhttp_request *, void *); - void *chunk_cb_arg; + void *chunk_cb_arg; /* * Callback added for forked-daapd so they can collect ICY diff --git a/contrib/libs/pcre/pcre_scanner.cc b/contrib/libs/pcre/pcre_scanner.cc index 30400294d2..3715dc161f 100644 --- a/contrib/libs/pcre/pcre_scanner.cc +++ b/contrib/libs/pcre/pcre_scanner.cc @@ -1,199 +1,199 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat - -#ifdef HAVE_CONFIG_H +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include <vector> -#include <assert.h> - -#include "pcrecpp_internal.h" -#include "pcre_scanner.h" - -using std::vector; - -namespace pcrecpp { - -Scanner::Scanner() - : data_(), - input_(data_), - skip_(NULL), - should_skip_(false), - skip_repeat_(false), - save_comments_(false), - comments_(NULL), - comments_offset_(0) { -} - -Scanner::Scanner(const string& in) - : data_(in), - input_(data_), - skip_(NULL), - should_skip_(false), - skip_repeat_(false), - save_comments_(false), - comments_(NULL), - comments_offset_(0) { -} - -Scanner::~Scanner() { - delete skip_; - delete comments_; -} - -void Scanner::SetSkipExpression(const char* re) { - delete skip_; - if (re != NULL) { - skip_ = new RE(re); - should_skip_ = true; - skip_repeat_ = true; - ConsumeSkip(); - } else { - skip_ = NULL; - should_skip_ = false; - skip_repeat_ = false; - } -} - -void Scanner::Skip(const char* re) { - delete skip_; - if (re != NULL) { - skip_ = new RE(re); - should_skip_ = true; - skip_repeat_ = false; - ConsumeSkip(); - } else { - skip_ = NULL; - should_skip_ = false; - skip_repeat_ = false; - } -} - -void Scanner::DisableSkip() { - assert(skip_ != NULL); - should_skip_ = false; -} - -void Scanner::EnableSkip() { - assert(skip_ != NULL); - should_skip_ = true; - ConsumeSkip(); -} - -int Scanner::LineNumber() const { - // TODO: Make it more efficient by keeping track of the last point - // where we computed line numbers and counting newlines since then. - // We could use std:count, but not all systems have it. :-( - int count = 1; - for (const char* p = data_.data(); p < input_.data(); ++p) - if (*p == '\n') - ++count; - return count; -} - -int Scanner::Offset() const { - return (int)(input_.data() - data_.c_str()); -} - -bool Scanner::LookingAt(const RE& re) const { - int consumed; - return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); -} - - -bool Scanner::Consume(const RE& re, - const Arg& arg0, - const Arg& arg1, - const Arg& arg2) { - const bool result = re.Consume(&input_, arg0, arg1, arg2); - if (result && should_skip_) ConsumeSkip(); - return result; -} - -// helper function to consume *skip_ and honour save_comments_ -void Scanner::ConsumeSkip() { - const char* start_data = input_.data(); - while (skip_->Consume(&input_)) { - if (!skip_repeat_) { - // Only one skip allowed. - break; - } - } - if (save_comments_) { - if (comments_ == NULL) { - comments_ = new vector<StringPiece>; - } - // already pointing one past end, so no need to +1 - int length = (int)(input_.data() - start_data); - if (length > 0) { - comments_->push_back(StringPiece(start_data, length)); - } - } -} - - -void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { - // short circuit out if we've not yet initialized comments_ - // (e.g., when save_comments is false) - if (!comments_) { - return; - } - // TODO: if we guarantee that comments_ will contain StringPieces - // that are ordered by their start, then we can do a binary search - // for the first StringPiece at or past start and then scan for the - // ones contained in the range, quit early (use equal_range or - // lower_bound) - for (vector<StringPiece>::const_iterator it = comments_->begin(); - it != comments_->end(); ++it) { - if ((it->data() >= data_.c_str() + start && - it->data() + it->size() <= data_.c_str() + end)) { - ranges->push_back(*it); - } - } -} - - -void Scanner::GetNextComments(vector<StringPiece> *ranges) { - // short circuit out if we've not yet initialized comments_ - // (e.g., when save_comments is false) - if (!comments_) { - return; - } - for (vector<StringPiece>::const_iterator it = - comments_->begin() + comments_offset_; - it != comments_->end(); ++it) { - ranges->push_back(*it); - ++comments_offset_; - } -} - -} // namespace pcrecpp +#endif + +#include <vector> +#include <assert.h> + +#include "pcrecpp_internal.h" +#include "pcre_scanner.h" + +using std::vector; + +namespace pcrecpp { + +Scanner::Scanner() + : data_(), + input_(data_), + skip_(NULL), + should_skip_(false), + skip_repeat_(false), + save_comments_(false), + comments_(NULL), + comments_offset_(0) { +} + +Scanner::Scanner(const string& in) + : data_(in), + input_(data_), + skip_(NULL), + should_skip_(false), + skip_repeat_(false), + save_comments_(false), + comments_(NULL), + comments_offset_(0) { +} + +Scanner::~Scanner() { + delete skip_; + delete comments_; +} + +void Scanner::SetSkipExpression(const char* re) { + delete skip_; + if (re != NULL) { + skip_ = new RE(re); + should_skip_ = true; + skip_repeat_ = true; + ConsumeSkip(); + } else { + skip_ = NULL; + should_skip_ = false; + skip_repeat_ = false; + } +} + +void Scanner::Skip(const char* re) { + delete skip_; + if (re != NULL) { + skip_ = new RE(re); + should_skip_ = true; + skip_repeat_ = false; + ConsumeSkip(); + } else { + skip_ = NULL; + should_skip_ = false; + skip_repeat_ = false; + } +} + +void Scanner::DisableSkip() { + assert(skip_ != NULL); + should_skip_ = false; +} + +void Scanner::EnableSkip() { + assert(skip_ != NULL); + should_skip_ = true; + ConsumeSkip(); +} + +int Scanner::LineNumber() const { + // TODO: Make it more efficient by keeping track of the last point + // where we computed line numbers and counting newlines since then. + // We could use std:count, but not all systems have it. :-( + int count = 1; + for (const char* p = data_.data(); p < input_.data(); ++p) + if (*p == '\n') + ++count; + return count; +} + +int Scanner::Offset() const { + return (int)(input_.data() - data_.c_str()); +} + +bool Scanner::LookingAt(const RE& re) const { + int consumed; + return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); +} + + +bool Scanner::Consume(const RE& re, + const Arg& arg0, + const Arg& arg1, + const Arg& arg2) { + const bool result = re.Consume(&input_, arg0, arg1, arg2); + if (result && should_skip_) ConsumeSkip(); + return result; +} + +// helper function to consume *skip_ and honour save_comments_ +void Scanner::ConsumeSkip() { + const char* start_data = input_.data(); + while (skip_->Consume(&input_)) { + if (!skip_repeat_) { + // Only one skip allowed. + break; + } + } + if (save_comments_) { + if (comments_ == NULL) { + comments_ = new vector<StringPiece>; + } + // already pointing one past end, so no need to +1 + int length = (int)(input_.data() - start_data); + if (length > 0) { + comments_->push_back(StringPiece(start_data, length)); + } + } +} + + +void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { + // short circuit out if we've not yet initialized comments_ + // (e.g., when save_comments is false) + if (!comments_) { + return; + } + // TODO: if we guarantee that comments_ will contain StringPieces + // that are ordered by their start, then we can do a binary search + // for the first StringPiece at or past start and then scan for the + // ones contained in the range, quit early (use equal_range or + // lower_bound) + for (vector<StringPiece>::const_iterator it = comments_->begin(); + it != comments_->end(); ++it) { + if ((it->data() >= data_.c_str() + start && + it->data() + it->size() <= data_.c_str() + end)) { + ranges->push_back(*it); + } + } +} + + +void Scanner::GetNextComments(vector<StringPiece> *ranges) { + // short circuit out if we've not yet initialized comments_ + // (e.g., when save_comments is false) + if (!comments_) { + return; + } + for (vector<StringPiece>::const_iterator it = + comments_->begin() + comments_offset_; + it != comments_->end(); ++it) { + ranges->push_back(*it); + ++comments_offset_; + } +} + +} // namespace pcrecpp diff --git a/contrib/libs/pcre/pcre_stringpiece.cc b/contrib/libs/pcre/pcre_stringpiece.cc index b51b5482d0..3fd01db0ab 100644 --- a/contrib/libs/pcre/pcre_stringpiece.cc +++ b/contrib/libs/pcre/pcre_stringpiece.cc @@ -1,43 +1,43 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wilsonh@google.com (Wilson Hsieh) -// - -#ifdef HAVE_CONFIG_H +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wilsonh@google.com (Wilson Hsieh) +// + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include <iostream> -#include "pcrecpp_internal.h" -#include "pcre_stringpiece.h" - -std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) { - return (o << piece.as_string()); -} +#endif + +#include <iostream> +#include "pcrecpp_internal.h" +#include "pcre_stringpiece.h" + +std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) { + return (o << piece.as_string()); +} diff --git a/contrib/libs/pcre/pcre_stringpiece.h b/contrib/libs/pcre/pcre_stringpiece.h index cd0a7c9bf4..a1dd26df71 100644 --- a/contrib/libs/pcre/pcre_stringpiece.h +++ b/contrib/libs/pcre/pcre_stringpiece.h @@ -1,47 +1,47 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat -// -// A string like object that points into another piece of memory. -// Useful for providing an interface that allows clients to easily -// pass in either a "const char*" or a "string". -// -// Arghh! I wish C++ literals were automatically of type "string". - -#ifndef _PCRE_STRINGPIECE_H -#define _PCRE_STRINGPIECE_H - -#include <cstring> -#include <string> -#include <iosfwd> // for ostream forward-declaration - +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// +// A string like object that points into another piece of memory. +// Useful for providing an interface that allows clients to easily +// pass in either a "const char*" or a "string". +// +// Arghh! I wish C++ literals were automatically of type "string". + +#ifndef _PCRE_STRINGPIECE_H +#define _PCRE_STRINGPIECE_H + +#include <cstring> +#include <string> +#include <iosfwd> // for ostream forward-declaration + #if 0 #define HAVE_TYPE_TRAITS #include <type_traits.h> @@ -49,132 +49,132 @@ #define HAVE_TYPE_TRAITS #error #include <bits/type_traits.h> #endif - + #include "pcre.h" namespace pcrecpp { -using std::memcmp; -using std::strlen; -using std::string; - -class PCRECPP_EXP_DEFN StringPiece { - private: - const char* ptr_; - int length_; - - public: - // We provide non-explicit singleton constructors so users can pass - // in a "const char*" or a "string" wherever a "StringPiece" is - // expected. - StringPiece() - : ptr_(NULL), length_(0) { } - StringPiece(const char* str) - : ptr_(str), length_(static_cast<int>(strlen(ptr_))) { } - StringPiece(const unsigned char* str) - : ptr_(reinterpret_cast<const char*>(str)), - length_(static_cast<int>(strlen(ptr_))) { } - StringPiece(const string& str) - : ptr_(str.data()), length_(static_cast<int>(str.size())) { } - StringPiece(const char* offset, int len) - : ptr_(offset), length_(len) { } - - // data() may return a pointer to a buffer with embedded NULs, and the - // returned buffer may or may not be null terminated. Therefore it is - // typically a mistake to pass data() to a routine that expects a NUL - // terminated string. Use "as_string().c_str()" if you really need to do - // this. Or better yet, change your routine so it does not rely on NUL - // termination. - const char* data() const { return ptr_; } - int size() const { return length_; } - bool empty() const { return length_ == 0; } - - void clear() { ptr_ = NULL; length_ = 0; } - void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; } - void set(const char* str) { - ptr_ = str; - length_ = static_cast<int>(strlen(str)); - } - void set(const void* buffer, int len) { - ptr_ = reinterpret_cast<const char*>(buffer); - length_ = len; - } - - char operator[](int i) const { return ptr_[i]; } - - void remove_prefix(int n) { - ptr_ += n; - length_ -= n; - } - - void remove_suffix(int n) { - length_ -= n; - } - - bool operator==(const StringPiece& x) const { - return ((length_ == x.length_) && - (memcmp(ptr_, x.ptr_, length_) == 0)); - } - bool operator!=(const StringPiece& x) const { - return !(*this == x); - } - -#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \ - bool operator cmp (const StringPiece& x) const { \ - int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \ - return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \ - } - STRINGPIECE_BINARY_PREDICATE(<, <); - STRINGPIECE_BINARY_PREDICATE(<=, <); - STRINGPIECE_BINARY_PREDICATE(>=, >); - STRINGPIECE_BINARY_PREDICATE(>, >); -#undef STRINGPIECE_BINARY_PREDICATE - - int compare(const StringPiece& x) const { - int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); - if (r == 0) { - if (length_ < x.length_) r = -1; - else if (length_ > x.length_) r = +1; - } - return r; - } - - string as_string() const { - return string(data(), size()); - } - - void CopyToString(string* target) const { - target->assign(ptr_, length_); - } - - // Does "this" start with "x" - bool starts_with(const StringPiece& x) const { - return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0)); - } -}; - -} // namespace pcrecpp - -// ------------------------------------------------------------------ -// Functions used to create STL containers that use StringPiece -// Remember that a StringPiece's lifetime had better be less than -// that of the underlying string or char*. If it is not, then you -// cannot safely store a StringPiece into an STL container -// ------------------------------------------------------------------ - -#ifdef HAVE_TYPE_TRAITS -// This makes vector<StringPiece> really fast for some STL implementations -template<> struct __type_traits<pcrecpp::StringPiece> { - typedef __true_type has_trivial_default_constructor; - typedef __true_type has_trivial_copy_constructor; - typedef __true_type has_trivial_assignment_operator; - typedef __true_type has_trivial_destructor; - typedef __true_type is_POD_type; -}; -#endif - -// allow StringPiece to be logged -PCRECPP_EXP_DECL std::ostream& operator<<(std::ostream& o, - const pcrecpp::StringPiece& piece); - -#endif /* _PCRE_STRINGPIECE_H */ +using std::memcmp; +using std::strlen; +using std::string; + +class PCRECPP_EXP_DEFN StringPiece { + private: + const char* ptr_; + int length_; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + StringPiece() + : ptr_(NULL), length_(0) { } + StringPiece(const char* str) + : ptr_(str), length_(static_cast<int>(strlen(ptr_))) { } + StringPiece(const unsigned char* str) + : ptr_(reinterpret_cast<const char*>(str)), + length_(static_cast<int>(strlen(ptr_))) { } + StringPiece(const string& str) + : ptr_(str.data()), length_(static_cast<int>(str.size())) { } + StringPiece(const char* offset, int len) + : ptr_(offset), length_(len) { } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. Use "as_string().c_str()" if you really need to do + // this. Or better yet, change your routine so it does not rely on NUL + // termination. + const char* data() const { return ptr_; } + int size() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { ptr_ = NULL; length_ = 0; } + void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; } + void set(const char* str) { + ptr_ = str; + length_ = static_cast<int>(strlen(str)); + } + void set(const void* buffer, int len) { + ptr_ = reinterpret_cast<const char*>(buffer); + length_ = len; + } + + char operator[](int i) const { return ptr_[i]; } + + void remove_prefix(int n) { + ptr_ += n; + length_ -= n; + } + + void remove_suffix(int n) { + length_ -= n; + } + + bool operator==(const StringPiece& x) const { + return ((length_ == x.length_) && + (memcmp(ptr_, x.ptr_, length_) == 0)); + } + bool operator!=(const StringPiece& x) const { + return !(*this == x); + } + +#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \ + bool operator cmp (const StringPiece& x) const { \ + int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \ + return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \ + } + STRINGPIECE_BINARY_PREDICATE(<, <); + STRINGPIECE_BINARY_PREDICATE(<=, <); + STRINGPIECE_BINARY_PREDICATE(>=, >); + STRINGPIECE_BINARY_PREDICATE(>, >); +#undef STRINGPIECE_BINARY_PREDICATE + + int compare(const StringPiece& x) const { + int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + string as_string() const { + return string(data(), size()); + } + + void CopyToString(string* target) const { + target->assign(ptr_, length_); + } + + // Does "this" start with "x" + bool starts_with(const StringPiece& x) const { + return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0)); + } +}; + +} // namespace pcrecpp + +// ------------------------------------------------------------------ +// Functions used to create STL containers that use StringPiece +// Remember that a StringPiece's lifetime had better be less than +// that of the underlying string or char*. If it is not, then you +// cannot safely store a StringPiece into an STL container +// ------------------------------------------------------------------ + +#ifdef HAVE_TYPE_TRAITS +// This makes vector<StringPiece> really fast for some STL implementations +template<> struct __type_traits<pcrecpp::StringPiece> { + typedef __true_type has_trivial_default_constructor; + typedef __true_type has_trivial_copy_constructor; + typedef __true_type has_trivial_assignment_operator; + typedef __true_type has_trivial_destructor; + typedef __true_type is_POD_type; +}; +#endif + +// allow StringPiece to be logged +PCRECPP_EXP_DECL std::ostream& operator<<(std::ostream& o, + const pcrecpp::StringPiece& piece); + +#endif /* _PCRE_STRINGPIECE_H */ diff --git a/contrib/libs/pcre/pcrecpp.cc b/contrib/libs/pcre/pcrecpp.cc index 57daa1f59f..33d186c7d0 100644 --- a/contrib/libs/pcre/pcrecpp.cc +++ b/contrib/libs/pcre/pcrecpp.cc @@ -1,86 +1,86 @@ -// Copyright (c) 2010, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat - -#ifdef HAVE_CONFIG_H +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat + +#ifdef HAVE_CONFIG_H #include "pcre_config.h" -#endif - -#include <stdlib.h> -#include <stdio.h> -#include <ctype.h> -#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */ -#include <string.h> /* for memcpy */ -#include <assert.h> -#include <errno.h> -#include <string> -#include <algorithm> - -#include "pcrecpp_internal.h" -#include "pcre.h" -#include "pcrecpp.h" -#include "pcre_stringpiece.h" - - -namespace pcrecpp { - -// Maximum number of args we can set -static const int kMaxArgs = 16; -static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace - -// Special object that stands-in for no argument -Arg RE::no_arg((void*)NULL); - -// This is for ABI compatibility with old versions of pcre (pre-7.6), -// which defined a global no_arg variable instead of putting it in the -// RE class. This works on GCC >= 3, at least. It definitely works -// for ELF, but may not for other object formats (Mach-O, for -// instance, does not support aliases.) We could probably have a more -// inclusive test if we ever needed it. (Note that not only the -// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are -// gnu-specific.) +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */ +#include <string.h> /* for memcpy */ +#include <assert.h> +#include <errno.h> +#include <string> +#include <algorithm> + +#include "pcrecpp_internal.h" +#include "pcre.h" +#include "pcrecpp.h" +#include "pcre_stringpiece.h" + + +namespace pcrecpp { + +// Maximum number of args we can set +static const int kMaxArgs = 16; +static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace + +// Special object that stands-in for no argument +Arg RE::no_arg((void*)NULL); + +// This is for ABI compatibility with old versions of pcre (pre-7.6), +// which defined a global no_arg variable instead of putting it in the +// RE class. This works on GCC >= 3, at least. It definitely works +// for ELF, but may not for other object formats (Mach-O, for +// instance, does not support aliases.) We could probably have a more +// inclusive test if we ever needed it. (Note that not only the +// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are +// gnu-specific.) #if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) \ && !defined(__INTEL_COMPILER) && !defined(__LCC__) -# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x) -# define ULP_AS_STRING_INTERNAL(x) #x -# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__) -extern Arg no_arg - __attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE"))); -#endif - -// If a regular expression has no error, its error_ field points here -static const string empty_string; - -// If the user doesn't ask for any options, we just use this one -static RE_Options default_options; - +# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x) +# define ULP_AS_STRING_INTERNAL(x) #x +# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__) +extern Arg no_arg + __attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE"))); +#endif + +// If a regular expression has no error, its error_ field points here +static const string empty_string; + +// If the user doesn't ask for any options, we just use this one +static RE_Options default_options; + // Specials for the start of patterns. See comments where start_options is used // below. (PH June 2018) static const char *start_options[] = { @@ -100,61 +100,61 @@ static const char *start_options[] = { "(*ANY)", "" }; -void RE::Init(const string& pat, const RE_Options* options) { - pattern_ = pat; - if (options == NULL) { - options_ = default_options; - } else { - options_ = *options; - } - error_ = &empty_string; - re_full_ = NULL; - re_partial_ = NULL; - - re_partial_ = Compile(UNANCHORED); - if (re_partial_ != NULL) { - re_full_ = Compile(ANCHOR_BOTH); - } -} - -void RE::Cleanup() { - if (re_full_ != NULL) (*pcre_free)(re_full_); - if (re_partial_ != NULL) (*pcre_free)(re_partial_); - if (error_ != &empty_string) delete error_; -} - - -RE::~RE() { - Cleanup(); -} - - -pcre* RE::Compile(Anchor anchor) { - // First, convert RE_Options into pcre options - int pcre_options = 0; - pcre_options = options_.all_options(); - - // Special treatment for anchoring. This is needed because at - // runtime pcre only provides an option for anchoring at the - // beginning of a string (unless you use offset). - // - // There are three types of anchoring we want: - // UNANCHORED Compile the original pattern, and use - // a pcre unanchored match. - // ANCHOR_START Compile the original pattern, and use - // a pcre anchored match. - // ANCHOR_BOTH Tack a "\z" to the end of the original pattern - // and use a pcre anchored match. - - const char* compile_error; - int eoffset; - pcre* re; - if (anchor != ANCHOR_BOTH) { - re = pcre_compile(pattern_.c_str(), pcre_options, - &compile_error, &eoffset, NULL); - } else { - // Tack a '\z' at the end of RE. Parenthesize it first so that - // the '\z' applies to all top-level alternatives in the regexp. +void RE::Init(const string& pat, const RE_Options* options) { + pattern_ = pat; + if (options == NULL) { + options_ = default_options; + } else { + options_ = *options; + } + error_ = &empty_string; + re_full_ = NULL; + re_partial_ = NULL; + + re_partial_ = Compile(UNANCHORED); + if (re_partial_ != NULL) { + re_full_ = Compile(ANCHOR_BOTH); + } +} + +void RE::Cleanup() { + if (re_full_ != NULL) (*pcre_free)(re_full_); + if (re_partial_ != NULL) (*pcre_free)(re_partial_); + if (error_ != &empty_string) delete error_; +} + + +RE::~RE() { + Cleanup(); +} + + +pcre* RE::Compile(Anchor anchor) { + // First, convert RE_Options into pcre options + int pcre_options = 0; + pcre_options = options_.all_options(); + + // Special treatment for anchoring. This is needed because at + // runtime pcre only provides an option for anchoring at the + // beginning of a string (unless you use offset). + // + // There are three types of anchoring we want: + // UNANCHORED Compile the original pattern, and use + // a pcre unanchored match. + // ANCHOR_START Compile the original pattern, and use + // a pcre anchored match. + // ANCHOR_BOTH Tack a "\z" to the end of the original pattern + // and use a pcre anchored match. + + const char* compile_error; + int eoffset; + pcre* re; + if (anchor != ANCHOR_BOTH) { + re = pcre_compile(pattern_.c_str(), pcre_options, + &compile_error, &eoffset, NULL); + } else { + // Tack a '\z' at the end of RE. Parenthesize it first so that + // the '\z' applies to all top-level alternatives in the regexp. /* When this code was written (for PCRE 6.0) it was enough just to parenthesize the entire pattern. Unfortunately, when the feature of @@ -198,789 +198,789 @@ pcre* RE::Compile(Anchor anchor) { // Wrap the rest of the pattern. wrapped += "(?:"; // A non-counting grouping operator - wrapped += pattern_; - wrapped += ")\\z"; - re = pcre_compile(wrapped.c_str(), pcre_options, - &compile_error, &eoffset, NULL); - } - if (re == NULL) { - if (error_ == &empty_string) error_ = new string(compile_error); - } - return re; -} - -/***** Matching interfaces *****/ - -bool RE::FullMatch(const StringPiece& text, - const Arg& ptr1, - const Arg& ptr2, - const Arg& ptr3, - const Arg& ptr4, - const Arg& ptr5, - const Arg& ptr6, - const Arg& ptr7, - const Arg& ptr8, - const Arg& ptr9, - const Arg& ptr10, - const Arg& ptr11, - const Arg& ptr12, - const Arg& ptr13, - const Arg& ptr14, - const Arg& ptr15, - const Arg& ptr16) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; - if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; - if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; - if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; - if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; - if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; - if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; - if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; - if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; - if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; - if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; - if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; - if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; - if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; - if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; - if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; - done: - - int consumed; - int vec[kVecSize]; - return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); -} - -bool RE::PartialMatch(const StringPiece& text, - const Arg& ptr1, - const Arg& ptr2, - const Arg& ptr3, - const Arg& ptr4, - const Arg& ptr5, - const Arg& ptr6, - const Arg& ptr7, - const Arg& ptr8, - const Arg& ptr9, - const Arg& ptr10, - const Arg& ptr11, - const Arg& ptr12, - const Arg& ptr13, - const Arg& ptr14, - const Arg& ptr15, - const Arg& ptr16) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; - if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; - if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; - if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; - if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; - if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; - if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; - if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; - if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; - if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; - if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; - if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; - if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; - if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; - if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; - if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; - done: - - int consumed; - int vec[kVecSize]; - return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); -} - -bool RE::Consume(StringPiece* input, - const Arg& ptr1, - const Arg& ptr2, - const Arg& ptr3, - const Arg& ptr4, - const Arg& ptr5, - const Arg& ptr6, - const Arg& ptr7, - const Arg& ptr8, - const Arg& ptr9, - const Arg& ptr10, - const Arg& ptr11, - const Arg& ptr12, - const Arg& ptr13, - const Arg& ptr14, - const Arg& ptr15, - const Arg& ptr16) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; - if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; - if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; - if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; - if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; - if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; - if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; - if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; - if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; - if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; - if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; - if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; - if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; - if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; - if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; - if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; - done: - - int consumed; - int vec[kVecSize]; - if (DoMatchImpl(*input, ANCHOR_START, &consumed, - args, n, vec, kVecSize)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - -bool RE::FindAndConsume(StringPiece* input, - const Arg& ptr1, - const Arg& ptr2, - const Arg& ptr3, - const Arg& ptr4, - const Arg& ptr5, - const Arg& ptr6, - const Arg& ptr7, - const Arg& ptr8, - const Arg& ptr9, - const Arg& ptr10, - const Arg& ptr11, - const Arg& ptr12, - const Arg& ptr13, - const Arg& ptr14, - const Arg& ptr15, - const Arg& ptr16) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; - if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; - if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; - if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; - if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; - if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; - if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; - if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; - if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; - if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; - if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; - if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; - if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; - if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; - if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; - if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; - done: - - int consumed; - int vec[kVecSize]; - if (DoMatchImpl(*input, UNANCHORED, &consumed, - args, n, vec, kVecSize)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - -bool RE::Replace(const StringPiece& rewrite, - string *str) const { - int vec[kVecSize]; - int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); - if (matches == 0) - return false; - - string s; - if (!Rewrite(&s, rewrite, *str, vec, matches)) - return false; - - assert(vec[0] >= 0); - assert(vec[1] >= 0); - str->replace(vec[0], vec[1] - vec[0], s); - return true; -} - -// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. -// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. -// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF. - -static int NewlineMode(int pcre_options) { - // TODO: if we can make it threadsafe, cache this var - int newline_mode = 0; - /* if (newline_mode) return newline_mode; */ // do this once it's cached - if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| - PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) { - newline_mode = (pcre_options & - (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| - PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)); - } else { - int newline; - pcre_config(PCRE_CONFIG_NEWLINE, &newline); - if (newline == 10) - newline_mode = PCRE_NEWLINE_LF; - else if (newline == 13) - newline_mode = PCRE_NEWLINE_CR; - else if (newline == 3338) - newline_mode = PCRE_NEWLINE_CRLF; - else if (newline == -1) - newline_mode = PCRE_NEWLINE_ANY; - else if (newline == -2) - newline_mode = PCRE_NEWLINE_ANYCRLF; - else - assert(NULL == "Unexpected return value from pcre_config(NEWLINE)"); - } - return newline_mode; -} - -int RE::GlobalReplace(const StringPiece& rewrite, - string *str) const { - int count = 0; - int vec[kVecSize]; - string out; - int start = 0; - bool last_match_was_empty_string = false; - - while (start <= static_cast<int>(str->length())) { - // If the previous match was for the empty string, we shouldn't - // just match again: we'll match in the same way and get an - // infinite loop. Instead, we do the match in a special way: - // anchored -- to force another try at the same position -- - // and with a flag saying that this time, ignore empty matches. - // If this special match returns, that means there's a non-empty - // match at this position as well, and we can continue. If not, - // we do what perl does, and just advance by one. - // Notice that perl prints '@@@' for this; - // perl -le '$_ = "aa"; s/b*|aa/@/g; print' - int matches; - if (last_match_was_empty_string) { - matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize); - if (matches <= 0) { - int matchend = start + 1; // advance one character. - // If the current char is CR and we're in CRLF mode, skip LF too. - // Note it's better to call pcre_fullinfo() than to examine - // all_options(), since options_ could have changed bewteen - // compile-time and now, but this is simpler and safe enough. - // Modified by PH to add ANY and ANYCRLF. - if (matchend < static_cast<int>(str->length()) && - (*str)[start] == '\r' && (*str)[matchend] == '\n' && - (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || - NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || - NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) { - matchend++; - } - // We also need to advance more than one char if we're in utf8 mode. + wrapped += pattern_; + wrapped += ")\\z"; + re = pcre_compile(wrapped.c_str(), pcre_options, + &compile_error, &eoffset, NULL); + } + if (re == NULL) { + if (error_ == &empty_string) error_ = new string(compile_error); + } + return re; +} + +/***** Matching interfaces *****/ + +bool RE::FullMatch(const StringPiece& text, + const Arg& ptr1, + const Arg& ptr2, + const Arg& ptr3, + const Arg& ptr4, + const Arg& ptr5, + const Arg& ptr6, + const Arg& ptr7, + const Arg& ptr8, + const Arg& ptr9, + const Arg& ptr10, + const Arg& ptr11, + const Arg& ptr12, + const Arg& ptr13, + const Arg& ptr14, + const Arg& ptr15, + const Arg& ptr16) const { + const Arg* args[kMaxArgs]; + int n = 0; + if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; + if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; + if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; + if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; + if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; + if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; + if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; + if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; + if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; + if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; + if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; + if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; + if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; + if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; + if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; + if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; + done: + + int consumed; + int vec[kVecSize]; + return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); +} + +bool RE::PartialMatch(const StringPiece& text, + const Arg& ptr1, + const Arg& ptr2, + const Arg& ptr3, + const Arg& ptr4, + const Arg& ptr5, + const Arg& ptr6, + const Arg& ptr7, + const Arg& ptr8, + const Arg& ptr9, + const Arg& ptr10, + const Arg& ptr11, + const Arg& ptr12, + const Arg& ptr13, + const Arg& ptr14, + const Arg& ptr15, + const Arg& ptr16) const { + const Arg* args[kMaxArgs]; + int n = 0; + if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; + if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; + if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; + if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; + if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; + if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; + if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; + if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; + if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; + if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; + if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; + if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; + if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; + if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; + if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; + if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; + done: + + int consumed; + int vec[kVecSize]; + return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); +} + +bool RE::Consume(StringPiece* input, + const Arg& ptr1, + const Arg& ptr2, + const Arg& ptr3, + const Arg& ptr4, + const Arg& ptr5, + const Arg& ptr6, + const Arg& ptr7, + const Arg& ptr8, + const Arg& ptr9, + const Arg& ptr10, + const Arg& ptr11, + const Arg& ptr12, + const Arg& ptr13, + const Arg& ptr14, + const Arg& ptr15, + const Arg& ptr16) const { + const Arg* args[kMaxArgs]; + int n = 0; + if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; + if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; + if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; + if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; + if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; + if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; + if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; + if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; + if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; + if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; + if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; + if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; + if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; + if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; + if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; + if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; + done: + + int consumed; + int vec[kVecSize]; + if (DoMatchImpl(*input, ANCHOR_START, &consumed, + args, n, vec, kVecSize)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + +bool RE::FindAndConsume(StringPiece* input, + const Arg& ptr1, + const Arg& ptr2, + const Arg& ptr3, + const Arg& ptr4, + const Arg& ptr5, + const Arg& ptr6, + const Arg& ptr7, + const Arg& ptr8, + const Arg& ptr9, + const Arg& ptr10, + const Arg& ptr11, + const Arg& ptr12, + const Arg& ptr13, + const Arg& ptr14, + const Arg& ptr15, + const Arg& ptr16) const { + const Arg* args[kMaxArgs]; + int n = 0; + if (&ptr1 == &no_arg) { goto done; } args[n++] = &ptr1; + if (&ptr2 == &no_arg) { goto done; } args[n++] = &ptr2; + if (&ptr3 == &no_arg) { goto done; } args[n++] = &ptr3; + if (&ptr4 == &no_arg) { goto done; } args[n++] = &ptr4; + if (&ptr5 == &no_arg) { goto done; } args[n++] = &ptr5; + if (&ptr6 == &no_arg) { goto done; } args[n++] = &ptr6; + if (&ptr7 == &no_arg) { goto done; } args[n++] = &ptr7; + if (&ptr8 == &no_arg) { goto done; } args[n++] = &ptr8; + if (&ptr9 == &no_arg) { goto done; } args[n++] = &ptr9; + if (&ptr10 == &no_arg) { goto done; } args[n++] = &ptr10; + if (&ptr11 == &no_arg) { goto done; } args[n++] = &ptr11; + if (&ptr12 == &no_arg) { goto done; } args[n++] = &ptr12; + if (&ptr13 == &no_arg) { goto done; } args[n++] = &ptr13; + if (&ptr14 == &no_arg) { goto done; } args[n++] = &ptr14; + if (&ptr15 == &no_arg) { goto done; } args[n++] = &ptr15; + if (&ptr16 == &no_arg) { goto done; } args[n++] = &ptr16; + done: + + int consumed; + int vec[kVecSize]; + if (DoMatchImpl(*input, UNANCHORED, &consumed, + args, n, vec, kVecSize)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + +bool RE::Replace(const StringPiece& rewrite, + string *str) const { + int vec[kVecSize]; + int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); + if (matches == 0) + return false; + + string s; + if (!Rewrite(&s, rewrite, *str, vec, matches)) + return false; + + assert(vec[0] >= 0); + assert(vec[1] >= 0); + str->replace(vec[0], vec[1] - vec[0], s); + return true; +} + +// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. +// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. +// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF. + +static int NewlineMode(int pcre_options) { + // TODO: if we can make it threadsafe, cache this var + int newline_mode = 0; + /* if (newline_mode) return newline_mode; */ // do this once it's cached + if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| + PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) { + newline_mode = (pcre_options & + (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| + PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)); + } else { + int newline; + pcre_config(PCRE_CONFIG_NEWLINE, &newline); + if (newline == 10) + newline_mode = PCRE_NEWLINE_LF; + else if (newline == 13) + newline_mode = PCRE_NEWLINE_CR; + else if (newline == 3338) + newline_mode = PCRE_NEWLINE_CRLF; + else if (newline == -1) + newline_mode = PCRE_NEWLINE_ANY; + else if (newline == -2) + newline_mode = PCRE_NEWLINE_ANYCRLF; + else + assert(NULL == "Unexpected return value from pcre_config(NEWLINE)"); + } + return newline_mode; +} + +int RE::GlobalReplace(const StringPiece& rewrite, + string *str) const { + int count = 0; + int vec[kVecSize]; + string out; + int start = 0; + bool last_match_was_empty_string = false; + + while (start <= static_cast<int>(str->length())) { + // If the previous match was for the empty string, we shouldn't + // just match again: we'll match in the same way and get an + // infinite loop. Instead, we do the match in a special way: + // anchored -- to force another try at the same position -- + // and with a flag saying that this time, ignore empty matches. + // If this special match returns, that means there's a non-empty + // match at this position as well, and we can continue. If not, + // we do what perl does, and just advance by one. + // Notice that perl prints '@@@' for this; + // perl -le '$_ = "aa"; s/b*|aa/@/g; print' + int matches; + if (last_match_was_empty_string) { + matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize); + if (matches <= 0) { + int matchend = start + 1; // advance one character. + // If the current char is CR and we're in CRLF mode, skip LF too. + // Note it's better to call pcre_fullinfo() than to examine + // all_options(), since options_ could have changed bewteen + // compile-time and now, but this is simpler and safe enough. + // Modified by PH to add ANY and ANYCRLF. + if (matchend < static_cast<int>(str->length()) && + (*str)[start] == '\r' && (*str)[matchend] == '\n' && + (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || + NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || + NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) { + matchend++; + } + // We also need to advance more than one char if we're in utf8 mode. #ifdef SUPPORT_UTF - if (options_.utf8()) { - while (matchend < static_cast<int>(str->length()) && - ((*str)[matchend] & 0xc0) == 0x80) - matchend++; - } -#endif - if (start < static_cast<int>(str->length())) - out.append(*str, start, matchend - start); - start = matchend; - last_match_was_empty_string = false; - continue; - } - } else { - matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); - if (matches <= 0) - break; - } - int matchstart = vec[0], matchend = vec[1]; - assert(matchstart >= start); - assert(matchend >= matchstart); - out.append(*str, start, matchstart - start); - Rewrite(&out, rewrite, *str, vec, matches); - start = matchend; - count++; - last_match_was_empty_string = (matchstart == matchend); - } - - if (count == 0) - return 0; - - if (start < static_cast<int>(str->length())) - out.append(*str, start, str->length() - start); - swap(out, *str); - return count; -} - -bool RE::Extract(const StringPiece& rewrite, - const StringPiece& text, - string *out) const { - int vec[kVecSize]; - int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); - if (matches == 0) - return false; - out->erase(); - return Rewrite(out, rewrite, text, vec, matches); -} - -/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) { - string result; - - // Escape any ascii character not in [A-Za-z_0-9]. - // - // Note that it's legal to escape a character even if it has no - // special meaning in a regular expression -- so this function does - // that. (This also makes it identical to the perl function of the - // same name; see `perldoc -f quotemeta`.) The one exception is - // escaping NUL: rather than doing backslash + NUL, like perl does, - // we do '\0', because pcre itself doesn't take embedded NUL chars. - for (int ii = 0; ii < unquoted.size(); ++ii) { - // Note that using 'isalnum' here raises the benchmark time from - // 32ns to 58ns: - if (unquoted[ii] == '\0') { - result += "\\0"; - } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && - (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && - (unquoted[ii] < '0' || unquoted[ii] > '9') && - unquoted[ii] != '_' && - // If this is the part of a UTF8 or Latin1 character, we need - // to copy this byte without escaping. Experimentally this is - // what works correctly with the regexp library. - !(unquoted[ii] & 128)) { - result += '\\'; - result += unquoted[ii]; - } else { - result += unquoted[ii]; - } - } - - return result; -} - -/***** Actual matching and rewriting code *****/ - -int RE::TryMatch(const StringPiece& text, - int startpos, - Anchor anchor, - bool empty_ok, - int *vec, - int vecsize) const { - pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; - if (re == NULL) { - //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str()); - return 0; - } - - pcre_extra extra = { 0, 0, 0, 0, 0, 0, 0, 0 }; - if (options_.match_limit() > 0) { - extra.flags |= PCRE_EXTRA_MATCH_LIMIT; - extra.match_limit = options_.match_limit(); - } - if (options_.match_limit_recursion() > 0) { - extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - extra.match_limit_recursion = options_.match_limit_recursion(); - } - - // int options = 0; - // Changed by PH as a result of bugzilla #1288 - int options = (options_.all_options() & PCRE_NO_UTF8_CHECK); - - if (anchor != UNANCHORED) - options |= PCRE_ANCHORED; - if (!empty_ok) - options |= PCRE_NOTEMPTY; - - int rc = pcre_exec(re, // The regular expression object - &extra, - (text.data() == NULL) ? "" : text.data(), - text.size(), - startpos, - options, - vec, - vecsize); - - // Handle errors - if (rc == PCRE_ERROR_NOMATCH) { - return 0; - } else if (rc < 0) { - //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n", - // re, pattern_.c_str()); - return 0; - } else if (rc == 0) { - // pcre_exec() returns 0 as a special case when the number of - // capturing subpatterns exceeds the size of the vector. - // When this happens, there is a match and the output vector - // is filled, but we miss out on the positions of the extra subpatterns. - rc = vecsize / 2; - } - - return rc; -} - -bool RE::DoMatchImpl(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const* args, - int n, - int* vec, - int vecsize) const { - assert((1 + n) * 3 <= vecsize); // results + PCRE workspace - int matches = TryMatch(text, 0, anchor, true, vec, vecsize); - assert(matches >= 0); // TryMatch never returns negatives - if (matches == 0) - return false; - - *consumed = vec[1]; - - if (n == 0 || args == NULL) { - // We are not interested in results - return true; - } - - if (NumberOfCapturingGroups() < n) { - // RE has fewer capturing groups than number of arg pointers passed in - return false; - } - - // If we got here, we must have matched the whole pattern. - // We do not need (can not do) any more checks on the value of 'matches' here - // -- see the comment for TryMatch. - for (int i = 0; i < n; i++) { - const int start = vec[2*(i+1)]; - const int limit = vec[2*(i+1)+1]; - if (!args[i]->Parse(text.data() + start, limit-start)) { - // TODO: Should we indicate what the error was? - return false; - } - } - - return true; -} - -bool RE::DoMatch(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const args[], - int n) const { - assert(n >= 0); - size_t const vecsize = (1 + n) * 3; // results + PCRE workspace - // (as for kVecSize) - int space[21]; // use stack allocation for small vecsize (common case) - int* vec = vecsize <= 21 ? space : new int[vecsize]; - bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, (int)vecsize); - if (vec != space) delete [] vec; - return retval; -} - -bool RE::Rewrite(string *out, const StringPiece &rewrite, - const StringPiece &text, int *vec, int veclen) const { - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) { - int c = *s; - if (c == '\\') { - c = *++s; - if (isdigit(c)) { - int n = (c - '0'); - if (n >= veclen) { - //fprintf(stderr, requested group %d in regexp %.*s\n", - // n, rewrite.size(), rewrite.data()); - return false; - } - int start = vec[2 * n]; - if (start >= 0) - out->append(text.data() + start, vec[2 * n + 1] - start); - } else if (c == '\\') { - *out += '\\'; - } else { - //fprintf(stderr, "invalid rewrite pattern: %.*s\n", - // rewrite.size(), rewrite.data()); - return false; - } - } else { - *out += c; - } - } - return true; -} - -// Return the number of capturing subpatterns, or -1 if the -// regexp wasn't valid on construction. -int RE::NumberOfCapturingGroups() const { - if (re_partial_ == NULL) return -1; - - int result; - int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object - NULL, // We did not study the pattern - PCRE_INFO_CAPTURECOUNT, - &result); - assert(pcre_retval == 0); - return result; -} - -/***** Parsers for various types *****/ - -bool Arg::parse_null(const char* str, int n, void* dest) { - (void)str; - (void)n; - // We fail if somebody asked us to store into a non-NULL void* pointer - return (dest == NULL); -} - -bool Arg::parse_string(const char* str, int n, void* dest) { - if (dest == NULL) return true; - reinterpret_cast<string*>(dest)->assign(str, n); - return true; -} - -bool Arg::parse_stringpiece(const char* str, int n, void* dest) { - if (dest == NULL) return true; - reinterpret_cast<StringPiece*>(dest)->set(str, n); - return true; -} - -bool Arg::parse_char(const char* str, int n, void* dest) { - if (n != 1) return false; - if (dest == NULL) return true; - *(reinterpret_cast<char*>(dest)) = str[0]; - return true; -} - -bool Arg::parse_uchar(const char* str, int n, void* dest) { - if (n != 1) return false; - if (dest == NULL) return true; - *(reinterpret_cast<unsigned char*>(dest)) = str[0]; - return true; -} - -// Largest number spec that we are willing to parse -static const int kMaxNumberLength = 32; - -// REQUIRES "buf" must have length at least kMaxNumberLength+1 -// REQUIRES "n > 0" -// Copies "str" into "buf" and null-terminates if necessary. -// Returns one of: -// a. "str" if no termination is needed -// b. "buf" if the string was copied and null-terminated -// c. "" if the input was invalid and has no hope of being parsed -static const char* TerminateNumber(char* buf, const char* str, int n) { - if ((n > 0) && isspace(*str)) { - // We are less forgiving than the strtoxxx() routines and do not - // allow leading spaces. - return ""; - } - - // See if the character right after the input text may potentially - // look like a digit. - if (isdigit(str[n]) || - ((str[n] >= 'a') && (str[n] <= 'f')) || - ((str[n] >= 'A') && (str[n] <= 'F'))) { - if (n > kMaxNumberLength) return ""; // Input too big to be a valid number - memcpy(buf, str, n); - buf[n] = '\0'; - return buf; - } else { - // We can parse right out of the supplied string, so return it. - return str; - } -} - -bool Arg::parse_long_radix(const char* str, - int n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - char* end; - errno = 0; - long r = strtol(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast<long*>(dest)) = r; - return true; -} - -bool Arg::parse_ulong_radix(const char* str, - int n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - if (str[0] == '-') return false; // strtoul() on a negative number?! - char* end; - errno = 0; - unsigned long r = strtoul(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast<unsigned long*>(dest)) = r; - return true; -} - -bool Arg::parse_short_radix(const char* str, - int n, - void* dest, - int radix) { - long r; - if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse - if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast<short*>(dest)) = static_cast<short>(r); - return true; -} - -bool Arg::parse_ushort_radix(const char* str, - int n, - void* dest, - int radix) { - unsigned long r; - if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse - if (r > USHRT_MAX) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r); - return true; -} - -bool Arg::parse_int_radix(const char* str, - int n, - void* dest, - int radix) { - long r; - if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse - if (r < INT_MIN || r > INT_MAX) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast<int*>(dest)) = r; - return true; -} - -bool Arg::parse_uint_radix(const char* str, - int n, - void* dest, - int radix) { - unsigned long r; - if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse - if (r > UINT_MAX) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast<unsigned int*>(dest)) = r; - return true; -} - -bool Arg::parse_longlong_radix(const char* str, - int n, - void* dest, - int radix) { -#ifndef HAVE_LONG_LONG - return false; -#else - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - char* end; - errno = 0; -#if defined HAVE_STRTOQ - long long r = strtoq(str, &end, radix); -#elif defined HAVE_STRTOLL - long long r = strtoll(str, &end, radix); -#elif defined HAVE__STRTOI64 - long long r = _strtoi64(str, &end, radix); -#elif defined HAVE_STRTOIMAX - long long r = strtoimax(str, &end, radix); -#else -#error parse_longlong_radix: cannot convert input to a long-long -#endif - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast<long long*>(dest)) = r; - return true; -#endif /* HAVE_LONG_LONG */ -} - -bool Arg::parse_ulonglong_radix(const char* str, - int n, - void* dest, - int radix) { -#ifndef HAVE_UNSIGNED_LONG_LONG - return false; -#else - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - if (str[0] == '-') return false; // strtoull() on a negative number?! - char* end; - errno = 0; -#if defined HAVE_STRTOQ - unsigned long long r = strtouq(str, &end, radix); -#elif defined HAVE_STRTOLL - unsigned long long r = strtoull(str, &end, radix); -#elif defined HAVE__STRTOI64 - unsigned long long r = _strtoui64(str, &end, radix); -#elif defined HAVE_STRTOIMAX - unsigned long long r = strtoumax(str, &end, radix); -#else -#error parse_ulonglong_radix: cannot convert input to a long-long -#endif - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast<unsigned long long*>(dest)) = r; - return true; -#endif /* HAVE_UNSIGNED_LONG_LONG */ -} - -bool Arg::parse_double(const char* str, int n, void* dest) { - if (n == 0) return false; - static const int kMaxLength = 200; - char buf[kMaxLength]; - if (n >= kMaxLength) return false; - memcpy(buf, str, n); - buf[n] = '\0'; - errno = 0; - char* end; - double r = strtod(buf, &end); - if (end != buf + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast<double*>(dest)) = r; - return true; -} - -bool Arg::parse_float(const char* str, int n, void* dest) { - double r; - if (!parse_double(str, n, &r)) return false; - if (dest == NULL) return true; - *(reinterpret_cast<float*>(dest)) = static_cast<float>(r); - return true; -} - - -#define DEFINE_INTEGER_PARSERS(name) \ - bool Arg::parse_##name(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 10); \ - } \ - bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 16); \ - } \ - bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 8); \ - } \ - bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 0); \ - } - -DEFINE_INTEGER_PARSERS(short) /* */ -DEFINE_INTEGER_PARSERS(ushort) /* */ -DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */ -DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */ -DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */ -DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */ -DEFINE_INTEGER_PARSERS(longlong) /* */ -DEFINE_INTEGER_PARSERS(ulonglong) /* */ - -#undef DEFINE_INTEGER_PARSERS - -} // namespace pcrecpp + if (options_.utf8()) { + while (matchend < static_cast<int>(str->length()) && + ((*str)[matchend] & 0xc0) == 0x80) + matchend++; + } +#endif + if (start < static_cast<int>(str->length())) + out.append(*str, start, matchend - start); + start = matchend; + last_match_was_empty_string = false; + continue; + } + } else { + matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); + if (matches <= 0) + break; + } + int matchstart = vec[0], matchend = vec[1]; + assert(matchstart >= start); + assert(matchend >= matchstart); + out.append(*str, start, matchstart - start); + Rewrite(&out, rewrite, *str, vec, matches); + start = matchend; + count++; + last_match_was_empty_string = (matchstart == matchend); + } + + if (count == 0) + return 0; + + if (start < static_cast<int>(str->length())) + out.append(*str, start, str->length() - start); + swap(out, *str); + return count; +} + +bool RE::Extract(const StringPiece& rewrite, + const StringPiece& text, + string *out) const { + int vec[kVecSize]; + int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); + if (matches == 0) + return false; + out->erase(); + return Rewrite(out, rewrite, text, vec, matches); +} + +/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) { + string result; + + // Escape any ascii character not in [A-Za-z_0-9]. + // + // Note that it's legal to escape a character even if it has no + // special meaning in a regular expression -- so this function does + // that. (This also makes it identical to the perl function of the + // same name; see `perldoc -f quotemeta`.) The one exception is + // escaping NUL: rather than doing backslash + NUL, like perl does, + // we do '\0', because pcre itself doesn't take embedded NUL chars. + for (int ii = 0; ii < unquoted.size(); ++ii) { + // Note that using 'isalnum' here raises the benchmark time from + // 32ns to 58ns: + if (unquoted[ii] == '\0') { + result += "\\0"; + } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { + result += '\\'; + result += unquoted[ii]; + } else { + result += unquoted[ii]; + } + } + + return result; +} + +/***** Actual matching and rewriting code *****/ + +int RE::TryMatch(const StringPiece& text, + int startpos, + Anchor anchor, + bool empty_ok, + int *vec, + int vecsize) const { + pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; + if (re == NULL) { + //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str()); + return 0; + } + + pcre_extra extra = { 0, 0, 0, 0, 0, 0, 0, 0 }; + if (options_.match_limit() > 0) { + extra.flags |= PCRE_EXTRA_MATCH_LIMIT; + extra.match_limit = options_.match_limit(); + } + if (options_.match_limit_recursion() > 0) { + extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; + extra.match_limit_recursion = options_.match_limit_recursion(); + } + + // int options = 0; + // Changed by PH as a result of bugzilla #1288 + int options = (options_.all_options() & PCRE_NO_UTF8_CHECK); + + if (anchor != UNANCHORED) + options |= PCRE_ANCHORED; + if (!empty_ok) + options |= PCRE_NOTEMPTY; + + int rc = pcre_exec(re, // The regular expression object + &extra, + (text.data() == NULL) ? "" : text.data(), + text.size(), + startpos, + options, + vec, + vecsize); + + // Handle errors + if (rc == PCRE_ERROR_NOMATCH) { + return 0; + } else if (rc < 0) { + //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n", + // re, pattern_.c_str()); + return 0; + } else if (rc == 0) { + // pcre_exec() returns 0 as a special case when the number of + // capturing subpatterns exceeds the size of the vector. + // When this happens, there is a match and the output vector + // is filled, but we miss out on the positions of the extra subpatterns. + rc = vecsize / 2; + } + + return rc; +} + +bool RE::DoMatchImpl(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const* args, + int n, + int* vec, + int vecsize) const { + assert((1 + n) * 3 <= vecsize); // results + PCRE workspace + int matches = TryMatch(text, 0, anchor, true, vec, vecsize); + assert(matches >= 0); // TryMatch never returns negatives + if (matches == 0) + return false; + + *consumed = vec[1]; + + if (n == 0 || args == NULL) { + // We are not interested in results + return true; + } + + if (NumberOfCapturingGroups() < n) { + // RE has fewer capturing groups than number of arg pointers passed in + return false; + } + + // If we got here, we must have matched the whole pattern. + // We do not need (can not do) any more checks on the value of 'matches' here + // -- see the comment for TryMatch. + for (int i = 0; i < n; i++) { + const int start = vec[2*(i+1)]; + const int limit = vec[2*(i+1)+1]; + if (!args[i]->Parse(text.data() + start, limit-start)) { + // TODO: Should we indicate what the error was? + return false; + } + } + + return true; +} + +bool RE::DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const args[], + int n) const { + assert(n >= 0); + size_t const vecsize = (1 + n) * 3; // results + PCRE workspace + // (as for kVecSize) + int space[21]; // use stack allocation for small vecsize (common case) + int* vec = vecsize <= 21 ? space : new int[vecsize]; + bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, (int)vecsize); + if (vec != space) delete [] vec; + return retval; +} + +bool RE::Rewrite(string *out, const StringPiece &rewrite, + const StringPiece &text, int *vec, int veclen) const { + for (const char *s = rewrite.data(), *end = s + rewrite.size(); + s < end; s++) { + int c = *s; + if (c == '\\') { + c = *++s; + if (isdigit(c)) { + int n = (c - '0'); + if (n >= veclen) { + //fprintf(stderr, requested group %d in regexp %.*s\n", + // n, rewrite.size(), rewrite.data()); + return false; + } + int start = vec[2 * n]; + if (start >= 0) + out->append(text.data() + start, vec[2 * n + 1] - start); + } else if (c == '\\') { + *out += '\\'; + } else { + //fprintf(stderr, "invalid rewrite pattern: %.*s\n", + // rewrite.size(), rewrite.data()); + return false; + } + } else { + *out += c; + } + } + return true; +} + +// Return the number of capturing subpatterns, or -1 if the +// regexp wasn't valid on construction. +int RE::NumberOfCapturingGroups() const { + if (re_partial_ == NULL) return -1; + + int result; + int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object + NULL, // We did not study the pattern + PCRE_INFO_CAPTURECOUNT, + &result); + assert(pcre_retval == 0); + return result; +} + +/***** Parsers for various types *****/ + +bool Arg::parse_null(const char* str, int n, void* dest) { + (void)str; + (void)n; + // We fail if somebody asked us to store into a non-NULL void* pointer + return (dest == NULL); +} + +bool Arg::parse_string(const char* str, int n, void* dest) { + if (dest == NULL) return true; + reinterpret_cast<string*>(dest)->assign(str, n); + return true; +} + +bool Arg::parse_stringpiece(const char* str, int n, void* dest) { + if (dest == NULL) return true; + reinterpret_cast<StringPiece*>(dest)->set(str, n); + return true; +} + +bool Arg::parse_char(const char* str, int n, void* dest) { + if (n != 1) return false; + if (dest == NULL) return true; + *(reinterpret_cast<char*>(dest)) = str[0]; + return true; +} + +bool Arg::parse_uchar(const char* str, int n, void* dest) { + if (n != 1) return false; + if (dest == NULL) return true; + *(reinterpret_cast<unsigned char*>(dest)) = str[0]; + return true; +} + +// Largest number spec that we are willing to parse +static const int kMaxNumberLength = 32; + +// REQUIRES "buf" must have length at least kMaxNumberLength+1 +// REQUIRES "n > 0" +// Copies "str" into "buf" and null-terminates if necessary. +// Returns one of: +// a. "str" if no termination is needed +// b. "buf" if the string was copied and null-terminated +// c. "" if the input was invalid and has no hope of being parsed +static const char* TerminateNumber(char* buf, const char* str, int n) { + if ((n > 0) && isspace(*str)) { + // We are less forgiving than the strtoxxx() routines and do not + // allow leading spaces. + return ""; + } + + // See if the character right after the input text may potentially + // look like a digit. + if (isdigit(str[n]) || + ((str[n] >= 'a') && (str[n] <= 'f')) || + ((str[n] >= 'A') && (str[n] <= 'F'))) { + if (n > kMaxNumberLength) return ""; // Input too big to be a valid number + memcpy(buf, str, n); + buf[n] = '\0'; + return buf; + } else { + // We can parse right out of the supplied string, so return it. + return str; + } +} + +bool Arg::parse_long_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, str, n); + char* end; + errno = 0; + long r = strtol(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast<long*>(dest)) = r; + return true; +} + +bool Arg::parse_ulong_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, str, n); + if (str[0] == '-') return false; // strtoul() on a negative number?! + char* end; + errno = 0; + unsigned long r = strtoul(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast<unsigned long*>(dest)) = r; + return true; +} + +bool Arg::parse_short_radix(const char* str, + int n, + void* dest, + int radix) { + long r; + if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse + if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast<short*>(dest)) = static_cast<short>(r); + return true; +} + +bool Arg::parse_ushort_radix(const char* str, + int n, + void* dest, + int radix) { + unsigned long r; + if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse + if (r > USHRT_MAX) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r); + return true; +} + +bool Arg::parse_int_radix(const char* str, + int n, + void* dest, + int radix) { + long r; + if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse + if (r < INT_MIN || r > INT_MAX) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast<int*>(dest)) = r; + return true; +} + +bool Arg::parse_uint_radix(const char* str, + int n, + void* dest, + int radix) { + unsigned long r; + if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse + if (r > UINT_MAX) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast<unsigned int*>(dest)) = r; + return true; +} + +bool Arg::parse_longlong_radix(const char* str, + int n, + void* dest, + int radix) { +#ifndef HAVE_LONG_LONG + return false; +#else + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, str, n); + char* end; + errno = 0; +#if defined HAVE_STRTOQ + long long r = strtoq(str, &end, radix); +#elif defined HAVE_STRTOLL + long long r = strtoll(str, &end, radix); +#elif defined HAVE__STRTOI64 + long long r = _strtoi64(str, &end, radix); +#elif defined HAVE_STRTOIMAX + long long r = strtoimax(str, &end, radix); +#else +#error parse_longlong_radix: cannot convert input to a long-long +#endif + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast<long long*>(dest)) = r; + return true; +#endif /* HAVE_LONG_LONG */ +} + +bool Arg::parse_ulonglong_radix(const char* str, + int n, + void* dest, + int radix) { +#ifndef HAVE_UNSIGNED_LONG_LONG + return false; +#else + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, str, n); + if (str[0] == '-') return false; // strtoull() on a negative number?! + char* end; + errno = 0; +#if defined HAVE_STRTOQ + unsigned long long r = strtouq(str, &end, radix); +#elif defined HAVE_STRTOLL + unsigned long long r = strtoull(str, &end, radix); +#elif defined HAVE__STRTOI64 + unsigned long long r = _strtoui64(str, &end, radix); +#elif defined HAVE_STRTOIMAX + unsigned long long r = strtoumax(str, &end, radix); +#else +#error parse_ulonglong_radix: cannot convert input to a long-long +#endif + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast<unsigned long long*>(dest)) = r; + return true; +#endif /* HAVE_UNSIGNED_LONG_LONG */ +} + +bool Arg::parse_double(const char* str, int n, void* dest) { + if (n == 0) return false; + static const int kMaxLength = 200; + char buf[kMaxLength]; + if (n >= kMaxLength) return false; + memcpy(buf, str, n); + buf[n] = '\0'; + errno = 0; + char* end; + double r = strtod(buf, &end); + if (end != buf + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast<double*>(dest)) = r; + return true; +} + +bool Arg::parse_float(const char* str, int n, void* dest) { + double r; + if (!parse_double(str, n, &r)) return false; + if (dest == NULL) return true; + *(reinterpret_cast<float*>(dest)) = static_cast<float>(r); + return true; +} + + +#define DEFINE_INTEGER_PARSERS(name) \ + bool Arg::parse_##name(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 10); \ + } \ + bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 16); \ + } \ + bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 8); \ + } \ + bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 0); \ + } + +DEFINE_INTEGER_PARSERS(short) /* */ +DEFINE_INTEGER_PARSERS(ushort) /* */ +DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */ +DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */ +DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */ +DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */ +DEFINE_INTEGER_PARSERS(longlong) /* */ +DEFINE_INTEGER_PARSERS(ulonglong) /* */ + +#undef DEFINE_INTEGER_PARSERS + +} // namespace pcrecpp diff --git a/contrib/libs/pcre/pcrecpp.h b/contrib/libs/pcre/pcrecpp.h index e57066b545..dffe9a34ad 100644 --- a/contrib/libs/pcre/pcrecpp.h +++ b/contrib/libs/pcre/pcrecpp.h @@ -1,710 +1,710 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat -// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005 - -#ifndef _PCRECPP_H -#define _PCRECPP_H - -// C++ interface to the pcre regular-expression library. RE supports -// Perl-style regular expressions (with extensions like \d, \w, \s, -// ...). -// -// ----------------------------------------------------------------------- -// REGEXP SYNTAX: -// -// This module is part of the pcre library and hence supports its syntax -// for regular expressions. -// -// The syntax is pretty similar to Perl's. For those not familiar -// with Perl's regular expressions, here are some examples of the most -// commonly used extensions: -// -// "hello (\\w+) world" -- \w matches a "word" character -// "version (\\d+)" -- \d matches a digit -// "hello\\s+world" -- \s matches any whitespace character -// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary -// "(?i)hello" -- (?i) turns on case-insensitive matching -// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible -// -// ----------------------------------------------------------------------- -// MATCHING INTERFACE: -// -// The "FullMatch" operation checks that supplied text matches a -// supplied pattern exactly. -// -// Example: successful match -// pcrecpp::RE re("h.*o"); -// re.FullMatch("hello"); -// -// Example: unsuccessful match (requires full match): -// pcrecpp::RE re("e"); -// !re.FullMatch("hello"); -// -// Example: creating a temporary RE object: -// pcrecpp::RE("h.*o").FullMatch("hello"); -// -// You can pass in a "const char*" or a "string" for "text". The -// examples below tend to use a const char*. -// -// You can, as in the different examples above, store the RE object -// explicitly in a variable or use a temporary RE object. The -// examples below use one mode or the other arbitrarily. Either -// could correctly be used for any of these examples. -// -// ----------------------------------------------------------------------- -// MATCHING WITH SUB-STRING EXTRACTION: -// -// You can supply extra pointer arguments to extract matched subpieces. -// -// Example: extracts "ruby" into "s" and 1234 into "i" -// int i; -// string s; -// pcrecpp::RE re("(\\w+):(\\d+)"); -// re.FullMatch("ruby:1234", &s, &i); -// -// Example: does not try to extract any extra sub-patterns -// re.FullMatch("ruby:1234", &s); -// -// Example: does not try to extract into NULL -// re.FullMatch("ruby:1234", NULL, &i); -// -// Example: integer overflow causes failure -// !re.FullMatch("ruby:1234567891234", NULL, &i); -// -// Example: fails because there aren't enough sub-patterns: -// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s); -// -// Example: fails because string cannot be stored in integer -// !pcrecpp::RE("(.*)").FullMatch("ruby", &i); -// -// The provided pointer arguments can be pointers to any scalar numeric -// type, or one of -// string (matched piece is copied to string) -// StringPiece (StringPiece is mutated to point to matched piece) -// T (where "bool T::ParseFrom(const char*, int)" exists) -// NULL (the corresponding matched sub-pattern is not copied) -// -// CAVEAT: An optional sub-pattern that does not exist in the matched -// string is assigned the empty string. Therefore, the following will -// return false (because the empty string is not a valid number): -// int number; -// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number); -// -// ----------------------------------------------------------------------- -// DO_MATCH -// -// The matching interface supports at most 16 arguments per call. -// If you need more, consider using the more general interface -// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch. -// -// ----------------------------------------------------------------------- -// PARTIAL MATCHES -// -// You can use the "PartialMatch" operation when you want the pattern -// to match any substring of the text. -// -// Example: simple search for a string: -// pcrecpp::RE("ell").PartialMatch("hello"); -// -// Example: find first number in a string: -// int number; -// pcrecpp::RE re("(\\d+)"); -// re.PartialMatch("x*100 + 20", &number); -// assert(number == 100); -// -// ----------------------------------------------------------------------- -// UTF-8 AND THE MATCHING INTERFACE: -// -// By default, pattern and text are plain text, one byte per character. -// The UTF8 flag, passed to the constructor, causes both pattern -// and string to be treated as UTF-8 text, still a byte stream but -// potentially multiple bytes per character. In practice, the text -// is likelier to be UTF-8 than the pattern, but the match returned -// may depend on the UTF8 flag, so always use it when matching -// UTF8 text. E.g., "." will match one byte normally but with UTF8 -// set may match up to three bytes of a multi-byte character. -// -// Example: -// pcrecpp::RE_Options options; -// options.set_utf8(); -// pcrecpp::RE re(utf8_pattern, options); -// re.FullMatch(utf8_string); -// -// Example: using the convenience function UTF8(): -// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8()); -// re.FullMatch(utf8_string); -// -// NOTE: The UTF8 option is ignored if pcre was not configured with the -// --enable-utf8 flag. -// -// ----------------------------------------------------------------------- -// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE -// -// PCRE defines some modifiers to change the behavior of the regular -// expression engine. -// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle -// to pass such modifiers to a RE class. -// -// Currently, the following modifiers are supported -// -// modifier description Perl corresponding -// -// PCRE_CASELESS case insensitive match /i -// PCRE_MULTILINE multiple lines match /m -// PCRE_DOTALL dot matches newlines /s -// PCRE_DOLLAR_ENDONLY $ matches only at end N/A -// PCRE_EXTRA strict escape parsing N/A -// PCRE_EXTENDED ignore whitespaces /x -// PCRE_UTF8 handles UTF8 chars built-in -// PCRE_UNGREEDY reverses * and *? N/A -// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*) -// -// (For a full account on how each modifier works, please check the -// PCRE API reference manual). -// -// (*) Both Perl and PCRE allow non matching parentheses by means of the -// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not -// capture, while (ab|cd) does. -// -// For each modifier, there are two member functions whose name is made -// out of the modifier in lowercase, without the "PCRE_" prefix. For -// instance, PCRE_CASELESS is handled by -// bool caseless(), -// which returns true if the modifier is set, and -// RE_Options & set_caseless(bool), -// which sets or unsets the modifier. -// -// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the -// set_match_limit() and match_limit() member functions. -// Setting match_limit to a non-zero value will limit the executation of -// pcre to keep it from doing bad things like blowing the stack or taking -// an eternity to return a result. A value of 5000 is good enough to stop -// stack blowup in a 2MB thread stack. Setting match_limit to zero will -// disable match limiting. Alternately, you can set match_limit_recursion() -// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre -// recurses. match_limit() caps the number of matches pcre does; -// match_limit_recrusion() caps the depth of recursion. -// -// Normally, to pass one or more modifiers to a RE class, you declare -// a RE_Options object, set the appropriate options, and pass this -// object to a RE constructor. Example: -// -// RE_options opt; -// opt.set_caseless(true); -// -// if (RE("HELLO", opt).PartialMatch("hello world")) ... -// -// RE_options has two constructors. The default constructor takes no -// arguments and creates a set of flags that are off by default. -// -// The optional parameter 'option_flags' is to facilitate transfer -// of legacy code from C programs. This lets you do -// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); -// -// But new code is better off doing -// RE(pattern, -// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); -// (See below) -// -// If you are going to pass one of the most used modifiers, there are some -// convenience functions that return a RE_Options class with the -// appropriate modifier already set: -// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED() -// -// If you need to set several options at once, and you don't want to go -// through the pains of declaring a RE_Options object and setting several -// options, there is a parallel method that give you such ability on the -// fly. You can concatenate several set_xxxxx member functions, since each -// of them returns a reference to its class object. e.g.: to pass -// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one -// statement, you may write -// -// RE(" ^ xyz \\s+ .* blah$", RE_Options() -// .set_caseless(true) -// .set_extended(true) -// .set_multiline(true)).PartialMatch(sometext); -// -// ----------------------------------------------------------------------- -// SCANNING TEXT INCREMENTALLY -// -// The "Consume" operation may be useful if you want to repeatedly -// match regular expressions at the front of a string and skip over -// them as they match. This requires use of the "StringPiece" type, -// which represents a sub-range of a real string. Like RE, StringPiece -// is defined in the pcrecpp namespace. -// -// Example: read lines of the form "var = value" from a string. -// string contents = ...; // Fill string somehow -// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece -// -// string var; -// int value; -// pcrecpp::RE re("(\\w+) = (\\d+)\n"); -// while (re.Consume(&input, &var, &value)) { -// ...; -// } -// -// Each successful call to "Consume" will set "var/value", and also -// advance "input" so it points past the matched text. -// -// The "FindAndConsume" operation is similar to "Consume" but does not -// anchor your match at the beginning of the string. For example, you -// could extract all words from a string by repeatedly calling -// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word) -// -// ----------------------------------------------------------------------- -// PARSING HEX/OCTAL/C-RADIX NUMBERS -// -// By default, if you pass a pointer to a numeric value, the -// corresponding text is interpreted as a base-10 number. You can -// instead wrap the pointer with a call to one of the operators Hex(), -// Octal(), or CRadix() to interpret the text in another base. The -// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) -// prefixes, but defaults to base-10. -// -// Example: -// int a, b, c, d; -// pcrecpp::RE re("(.*) (.*) (.*) (.*)"); -// re.FullMatch("100 40 0100 0x40", -// pcrecpp::Octal(&a), pcrecpp::Hex(&b), -// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d)); -// will leave 64 in a, b, c, and d. -// -// ----------------------------------------------------------------------- -// REPLACING PARTS OF STRINGS -// -// You can replace the first match of "pattern" in "str" with -// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9) -// can be used to insert text matching corresponding parenthesized -// group from the pattern. \0 in "rewrite" refers to the entire -// matching text. E.g., -// -// string s = "yabba dabba doo"; -// pcrecpp::RE("b+").Replace("d", &s); -// -// will leave "s" containing "yada dabba doo". The result is true if -// the pattern matches and a replacement occurs, or false otherwise. -// -// GlobalReplace() is like Replace(), except that it replaces all -// occurrences of the pattern in the string with the rewrite. -// Replacements are not subject to re-matching. E.g., -// -// string s = "yabba dabba doo"; -// pcrecpp::RE("b+").GlobalReplace("d", &s); -// -// will leave "s" containing "yada dada doo". It returns the number -// of replacements made. -// -// Extract() is like Replace(), except that if the pattern matches, -// "rewrite" is copied into "out" (an additional argument) with -// substitutions. The non-matching portions of "text" are ignored. -// Returns true iff a match occurred and the extraction happened -// successfully. If no match occurs, the string is left unaffected. - - -#include <string> +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005 + +#ifndef _PCRECPP_H +#define _PCRECPP_H + +// C++ interface to the pcre regular-expression library. RE supports +// Perl-style regular expressions (with extensions like \d, \w, \s, +// ...). +// +// ----------------------------------------------------------------------- +// REGEXP SYNTAX: +// +// This module is part of the pcre library and hence supports its syntax +// for regular expressions. +// +// The syntax is pretty similar to Perl's. For those not familiar +// with Perl's regular expressions, here are some examples of the most +// commonly used extensions: +// +// "hello (\\w+) world" -- \w matches a "word" character +// "version (\\d+)" -- \d matches a digit +// "hello\\s+world" -- \s matches any whitespace character +// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary +// "(?i)hello" -- (?i) turns on case-insensitive matching +// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible +// +// ----------------------------------------------------------------------- +// MATCHING INTERFACE: +// +// The "FullMatch" operation checks that supplied text matches a +// supplied pattern exactly. +// +// Example: successful match +// pcrecpp::RE re("h.*o"); +// re.FullMatch("hello"); +// +// Example: unsuccessful match (requires full match): +// pcrecpp::RE re("e"); +// !re.FullMatch("hello"); +// +// Example: creating a temporary RE object: +// pcrecpp::RE("h.*o").FullMatch("hello"); +// +// You can pass in a "const char*" or a "string" for "text". The +// examples below tend to use a const char*. +// +// You can, as in the different examples above, store the RE object +// explicitly in a variable or use a temporary RE object. The +// examples below use one mode or the other arbitrarily. Either +// could correctly be used for any of these examples. +// +// ----------------------------------------------------------------------- +// MATCHING WITH SUB-STRING EXTRACTION: +// +// You can supply extra pointer arguments to extract matched subpieces. +// +// Example: extracts "ruby" into "s" and 1234 into "i" +// int i; +// string s; +// pcrecpp::RE re("(\\w+):(\\d+)"); +// re.FullMatch("ruby:1234", &s, &i); +// +// Example: does not try to extract any extra sub-patterns +// re.FullMatch("ruby:1234", &s); +// +// Example: does not try to extract into NULL +// re.FullMatch("ruby:1234", NULL, &i); +// +// Example: integer overflow causes failure +// !re.FullMatch("ruby:1234567891234", NULL, &i); +// +// Example: fails because there aren't enough sub-patterns: +// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s); +// +// Example: fails because string cannot be stored in integer +// !pcrecpp::RE("(.*)").FullMatch("ruby", &i); +// +// The provided pointer arguments can be pointers to any scalar numeric +// type, or one of +// string (matched piece is copied to string) +// StringPiece (StringPiece is mutated to point to matched piece) +// T (where "bool T::ParseFrom(const char*, int)" exists) +// NULL (the corresponding matched sub-pattern is not copied) +// +// CAVEAT: An optional sub-pattern that does not exist in the matched +// string is assigned the empty string. Therefore, the following will +// return false (because the empty string is not a valid number): +// int number; +// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number); +// +// ----------------------------------------------------------------------- +// DO_MATCH +// +// The matching interface supports at most 16 arguments per call. +// If you need more, consider using the more general interface +// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch. +// +// ----------------------------------------------------------------------- +// PARTIAL MATCHES +// +// You can use the "PartialMatch" operation when you want the pattern +// to match any substring of the text. +// +// Example: simple search for a string: +// pcrecpp::RE("ell").PartialMatch("hello"); +// +// Example: find first number in a string: +// int number; +// pcrecpp::RE re("(\\d+)"); +// re.PartialMatch("x*100 + 20", &number); +// assert(number == 100); +// +// ----------------------------------------------------------------------- +// UTF-8 AND THE MATCHING INTERFACE: +// +// By default, pattern and text are plain text, one byte per character. +// The UTF8 flag, passed to the constructor, causes both pattern +// and string to be treated as UTF-8 text, still a byte stream but +// potentially multiple bytes per character. In practice, the text +// is likelier to be UTF-8 than the pattern, but the match returned +// may depend on the UTF8 flag, so always use it when matching +// UTF8 text. E.g., "." will match one byte normally but with UTF8 +// set may match up to three bytes of a multi-byte character. +// +// Example: +// pcrecpp::RE_Options options; +// options.set_utf8(); +// pcrecpp::RE re(utf8_pattern, options); +// re.FullMatch(utf8_string); +// +// Example: using the convenience function UTF8(): +// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8()); +// re.FullMatch(utf8_string); +// +// NOTE: The UTF8 option is ignored if pcre was not configured with the +// --enable-utf8 flag. +// +// ----------------------------------------------------------------------- +// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE +// +// PCRE defines some modifiers to change the behavior of the regular +// expression engine. +// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle +// to pass such modifiers to a RE class. +// +// Currently, the following modifiers are supported +// +// modifier description Perl corresponding +// +// PCRE_CASELESS case insensitive match /i +// PCRE_MULTILINE multiple lines match /m +// PCRE_DOTALL dot matches newlines /s +// PCRE_DOLLAR_ENDONLY $ matches only at end N/A +// PCRE_EXTRA strict escape parsing N/A +// PCRE_EXTENDED ignore whitespaces /x +// PCRE_UTF8 handles UTF8 chars built-in +// PCRE_UNGREEDY reverses * and *? N/A +// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*) +// +// (For a full account on how each modifier works, please check the +// PCRE API reference manual). +// +// (*) Both Perl and PCRE allow non matching parentheses by means of the +// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not +// capture, while (ab|cd) does. +// +// For each modifier, there are two member functions whose name is made +// out of the modifier in lowercase, without the "PCRE_" prefix. For +// instance, PCRE_CASELESS is handled by +// bool caseless(), +// which returns true if the modifier is set, and +// RE_Options & set_caseless(bool), +// which sets or unsets the modifier. +// +// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the +// set_match_limit() and match_limit() member functions. +// Setting match_limit to a non-zero value will limit the executation of +// pcre to keep it from doing bad things like blowing the stack or taking +// an eternity to return a result. A value of 5000 is good enough to stop +// stack blowup in a 2MB thread stack. Setting match_limit to zero will +// disable match limiting. Alternately, you can set match_limit_recursion() +// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre +// recurses. match_limit() caps the number of matches pcre does; +// match_limit_recrusion() caps the depth of recursion. +// +// Normally, to pass one or more modifiers to a RE class, you declare +// a RE_Options object, set the appropriate options, and pass this +// object to a RE constructor. Example: +// +// RE_options opt; +// opt.set_caseless(true); +// +// if (RE("HELLO", opt).PartialMatch("hello world")) ... +// +// RE_options has two constructors. The default constructor takes no +// arguments and creates a set of flags that are off by default. +// +// The optional parameter 'option_flags' is to facilitate transfer +// of legacy code from C programs. This lets you do +// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); +// +// But new code is better off doing +// RE(pattern, +// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); +// (See below) +// +// If you are going to pass one of the most used modifiers, there are some +// convenience functions that return a RE_Options class with the +// appropriate modifier already set: +// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED() +// +// If you need to set several options at once, and you don't want to go +// through the pains of declaring a RE_Options object and setting several +// options, there is a parallel method that give you such ability on the +// fly. You can concatenate several set_xxxxx member functions, since each +// of them returns a reference to its class object. e.g.: to pass +// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one +// statement, you may write +// +// RE(" ^ xyz \\s+ .* blah$", RE_Options() +// .set_caseless(true) +// .set_extended(true) +// .set_multiline(true)).PartialMatch(sometext); +// +// ----------------------------------------------------------------------- +// SCANNING TEXT INCREMENTALLY +// +// The "Consume" operation may be useful if you want to repeatedly +// match regular expressions at the front of a string and skip over +// them as they match. This requires use of the "StringPiece" type, +// which represents a sub-range of a real string. Like RE, StringPiece +// is defined in the pcrecpp namespace. +// +// Example: read lines of the form "var = value" from a string. +// string contents = ...; // Fill string somehow +// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece +// +// string var; +// int value; +// pcrecpp::RE re("(\\w+) = (\\d+)\n"); +// while (re.Consume(&input, &var, &value)) { +// ...; +// } +// +// Each successful call to "Consume" will set "var/value", and also +// advance "input" so it points past the matched text. +// +// The "FindAndConsume" operation is similar to "Consume" but does not +// anchor your match at the beginning of the string. For example, you +// could extract all words from a string by repeatedly calling +// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word) +// +// ----------------------------------------------------------------------- +// PARSING HEX/OCTAL/C-RADIX NUMBERS +// +// By default, if you pass a pointer to a numeric value, the +// corresponding text is interpreted as a base-10 number. You can +// instead wrap the pointer with a call to one of the operators Hex(), +// Octal(), or CRadix() to interpret the text in another base. The +// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +// prefixes, but defaults to base-10. +// +// Example: +// int a, b, c, d; +// pcrecpp::RE re("(.*) (.*) (.*) (.*)"); +// re.FullMatch("100 40 0100 0x40", +// pcrecpp::Octal(&a), pcrecpp::Hex(&b), +// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d)); +// will leave 64 in a, b, c, and d. +// +// ----------------------------------------------------------------------- +// REPLACING PARTS OF STRINGS +// +// You can replace the first match of "pattern" in "str" with +// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9) +// can be used to insert text matching corresponding parenthesized +// group from the pattern. \0 in "rewrite" refers to the entire +// matching text. E.g., +// +// string s = "yabba dabba doo"; +// pcrecpp::RE("b+").Replace("d", &s); +// +// will leave "s" containing "yada dabba doo". The result is true if +// the pattern matches and a replacement occurs, or false otherwise. +// +// GlobalReplace() is like Replace(), except that it replaces all +// occurrences of the pattern in the string with the rewrite. +// Replacements are not subject to re-matching. E.g., +// +// string s = "yabba dabba doo"; +// pcrecpp::RE("b+").GlobalReplace("d", &s); +// +// will leave "s" containing "yada dada doo". It returns the number +// of replacements made. +// +// Extract() is like Replace(), except that if the pattern matches, +// "rewrite" is copied into "out" (an additional argument) with +// substitutions. The non-matching portions of "text" are ignored. +// Returns true iff a match occurred and the extraction happened +// successfully. If no match occurs, the string is left unaffected. + + +#include <string> #include "pcre.h" #include "pcrecpparg.h" // defines the Arg class -// This isn't technically needed here, but we include it -// anyway so folks who include pcrecpp.h don't have to. +// This isn't technically needed here, but we include it +// anyway so folks who include pcrecpp.h don't have to. #include "pcre_stringpiece.h" - -namespace pcrecpp { - -#define PCRE_SET_OR_CLEAR(b, o) \ - if (b) all_options_ |= (o); else all_options_ &= ~(o); \ - return *this - -#define PCRE_IS_SET(o) \ - (all_options_ & o) == o - -/***** Compiling regular expressions: the RE class *****/ - -// RE_Options allow you to set options to be passed along to pcre, -// along with other options we put on top of pcre. -// Only 9 modifiers, plus match_limit and match_limit_recursion, -// are supported now. -class PCRECPP_EXP_DEFN RE_Options { - public: - // constructor - RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {} - - // alternative constructor. - // To facilitate transfer of legacy code from C programs - // - // This lets you do - // RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); - // But new code is better off doing - // RE(pattern, - // RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); - RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0), - all_options_(option_flags) {} - // we're fine with the default destructor, copy constructor, etc. - - // accessors and mutators - int match_limit() const { return match_limit_; }; - RE_Options &set_match_limit(int limit) { - match_limit_ = limit; - return *this; - } - - int match_limit_recursion() const { return match_limit_recursion_; }; - RE_Options &set_match_limit_recursion(int limit) { - match_limit_recursion_ = limit; - return *this; - } - - bool caseless() const { - return PCRE_IS_SET(PCRE_CASELESS); - } - RE_Options &set_caseless(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_CASELESS); - } - - bool multiline() const { - return PCRE_IS_SET(PCRE_MULTILINE); - } - RE_Options &set_multiline(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE); - } - - bool dotall() const { - return PCRE_IS_SET(PCRE_DOTALL); - } - RE_Options &set_dotall(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_DOTALL); - } - - bool extended() const { - return PCRE_IS_SET(PCRE_EXTENDED); - } - RE_Options &set_extended(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED); - } - - bool dollar_endonly() const { - return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY); - } - RE_Options &set_dollar_endonly(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY); - } - - bool extra() const { - return PCRE_IS_SET(PCRE_EXTRA); - } - RE_Options &set_extra(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_EXTRA); - } - - bool ungreedy() const { - return PCRE_IS_SET(PCRE_UNGREEDY); - } - RE_Options &set_ungreedy(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY); - } - - bool utf8() const { - return PCRE_IS_SET(PCRE_UTF8); - } - RE_Options &set_utf8(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_UTF8); - } - - bool no_auto_capture() const { - return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE); - } - RE_Options &set_no_auto_capture(bool x) { - PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE); - } - - RE_Options &set_all_options(int opt) { - all_options_ = opt; - return *this; - } - int all_options() const { - return all_options_ ; - } - - // TODO: add other pcre flags - - private: - int match_limit_; - int match_limit_recursion_; - int all_options_; -}; - -// These functions return some common RE_Options -static inline RE_Options UTF8() { - return RE_Options().set_utf8(true); -} - -static inline RE_Options CASELESS() { - return RE_Options().set_caseless(true); -} -static inline RE_Options MULTILINE() { - return RE_Options().set_multiline(true); -} - -static inline RE_Options DOTALL() { - return RE_Options().set_dotall(true); -} - -static inline RE_Options EXTENDED() { - return RE_Options().set_extended(true); -} - -// Interface for regular expression matching. Also corresponds to a -// pre-compiled regular expression. An "RE" object is safe for -// concurrent use by multiple threads. -class PCRECPP_EXP_DEFN RE { - public: - // We provide implicit conversions from strings so that users can - // pass in a string or a "const char*" wherever an "RE" is expected. - RE(const string& pat) { Init(pat, NULL); } - RE(const string& pat, const RE_Options& option) { Init(pat, &option); } - RE(const char* pat) { Init(pat, NULL); } - RE(const char* pat, const RE_Options& option) { Init(pat, &option); } - RE(const unsigned char* pat) { - Init(reinterpret_cast<const char*>(pat), NULL); - } - RE(const unsigned char* pat, const RE_Options& option) { - Init(reinterpret_cast<const char*>(pat), &option); - } - - // Copy constructor & assignment - note that these are expensive - // because they recompile the expression. - RE(const RE& re) { Init(re.pattern_, &re.options_); } - const RE& operator=(const RE& re) { - if (this != &re) { - Cleanup(); - - // This is the code that originally came from Google - // Init(re.pattern_.c_str(), &re.options_); - - // This is the replacement from Ari Pollak - Init(re.pattern_, &re.options_); - } - return *this; - } - - - ~RE(); - - // The string specification for this RE. E.g. - // RE re("ab*c?d+"); - // re.pattern(); // "ab*c?d+" - const string& pattern() const { return pattern_; } - - // If RE could not be created properly, returns an error string. - // Else returns the empty string. - const string& error() const { return *error_; } - - /***** The useful part: the matching interface *****/ - - // This is provided so one can do pattern.ReplaceAll() just as - // easily as ReplaceAll(pattern-text, ....) - - bool FullMatch(const StringPiece& text, - const Arg& ptr1 = no_arg, - const Arg& ptr2 = no_arg, - const Arg& ptr3 = no_arg, - const Arg& ptr4 = no_arg, - const Arg& ptr5 = no_arg, - const Arg& ptr6 = no_arg, - const Arg& ptr7 = no_arg, - const Arg& ptr8 = no_arg, - const Arg& ptr9 = no_arg, - const Arg& ptr10 = no_arg, - const Arg& ptr11 = no_arg, - const Arg& ptr12 = no_arg, - const Arg& ptr13 = no_arg, - const Arg& ptr14 = no_arg, - const Arg& ptr15 = no_arg, - const Arg& ptr16 = no_arg) const; - - bool PartialMatch(const StringPiece& text, - const Arg& ptr1 = no_arg, - const Arg& ptr2 = no_arg, - const Arg& ptr3 = no_arg, - const Arg& ptr4 = no_arg, - const Arg& ptr5 = no_arg, - const Arg& ptr6 = no_arg, - const Arg& ptr7 = no_arg, - const Arg& ptr8 = no_arg, - const Arg& ptr9 = no_arg, - const Arg& ptr10 = no_arg, - const Arg& ptr11 = no_arg, - const Arg& ptr12 = no_arg, - const Arg& ptr13 = no_arg, - const Arg& ptr14 = no_arg, - const Arg& ptr15 = no_arg, - const Arg& ptr16 = no_arg) const; - - bool Consume(StringPiece* input, - const Arg& ptr1 = no_arg, - const Arg& ptr2 = no_arg, - const Arg& ptr3 = no_arg, - const Arg& ptr4 = no_arg, - const Arg& ptr5 = no_arg, - const Arg& ptr6 = no_arg, - const Arg& ptr7 = no_arg, - const Arg& ptr8 = no_arg, - const Arg& ptr9 = no_arg, - const Arg& ptr10 = no_arg, - const Arg& ptr11 = no_arg, - const Arg& ptr12 = no_arg, - const Arg& ptr13 = no_arg, - const Arg& ptr14 = no_arg, - const Arg& ptr15 = no_arg, - const Arg& ptr16 = no_arg) const; - - bool FindAndConsume(StringPiece* input, - const Arg& ptr1 = no_arg, - const Arg& ptr2 = no_arg, - const Arg& ptr3 = no_arg, - const Arg& ptr4 = no_arg, - const Arg& ptr5 = no_arg, - const Arg& ptr6 = no_arg, - const Arg& ptr7 = no_arg, - const Arg& ptr8 = no_arg, - const Arg& ptr9 = no_arg, - const Arg& ptr10 = no_arg, - const Arg& ptr11 = no_arg, - const Arg& ptr12 = no_arg, - const Arg& ptr13 = no_arg, - const Arg& ptr14 = no_arg, - const Arg& ptr15 = no_arg, - const Arg& ptr16 = no_arg) const; - - bool Replace(const StringPiece& rewrite, - string *str) const; - - int GlobalReplace(const StringPiece& rewrite, - string *str) const; - - bool Extract(const StringPiece &rewrite, - const StringPiece &text, - string *out) const; - - // Escapes all potentially meaningful regexp characters in - // 'unquoted'. The returned string, used as a regular expression, - // will exactly match the original string. For example, - // 1.5-2.0? - // may become: - // 1\.5\-2\.0\? - // Note QuoteMeta behaves the same as perl's QuoteMeta function, - // *except* that it escapes the NUL character (\0) as backslash + 0, - // rather than backslash + NUL. - static string QuoteMeta(const StringPiece& unquoted); - - - /***** Generic matching interface *****/ - - // Type of match (TODO: Should be restructured as part of RE_Options) - enum Anchor { - UNANCHORED, // No anchoring - ANCHOR_START, // Anchor at start only - ANCHOR_BOTH // Anchor at start and end - }; - - // General matching routine. Stores the length of the match in - // "*consumed" if successful. - bool DoMatch(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const* args, int n) const; - - // Return the number of capturing subpatterns, or -1 if the - // regexp wasn't valid on construction. - int NumberOfCapturingGroups() const; - - // The default value for an argument, to indicate the end of the argument - // list. This must be used only in optional argument defaults. It should NOT - // be passed explicitly. Some people have tried to use it like this: - // - // FullMatch(x, y, &z, no_arg, &w); - // - // This is a mistake, and will not work. - static Arg no_arg; - - private: - - void Init(const string& pattern, const RE_Options* options); - void Cleanup(); - - // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with - // pairs of integers for the beginning and end positions of matched - // text. The first pair corresponds to the entire matched text; - // subsequent pairs correspond, in order, to parentheses-captured - // matches. Returns the number of pairs (one more than the number of - // the last subpattern with a match) if matching was successful - // and zero if the match failed. - // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching - // against "foo", "bar", and "baz" respectively. - // When matching RE("(foo)|hello") against "hello", it will return 1. - // But the values for all subpattern are filled in into "vec". - int TryMatch(const StringPiece& text, - int startpos, - Anchor anchor, - bool empty_ok, - int *vec, - int vecsize) const; - - // Append the "rewrite" string, with backslash subsitutions from "text" - // and "vec", to string "out". - bool Rewrite(string *out, - const StringPiece& rewrite, - const StringPiece& text, - int *vec, - int veclen) const; - - // internal implementation for DoMatch - bool DoMatchImpl(const StringPiece& text, - Anchor anchor, - int* consumed, - const Arg* const args[], - int n, - int* vec, - int vecsize) const; - - // Compile the regexp for the specified anchoring mode - pcre* Compile(Anchor anchor); - - string pattern_; - RE_Options options_; - pcre* re_full_; // For full matches - pcre* re_partial_; // For partial matches - const string* error_; // Error indicator (or points to empty string) -}; - -} // namespace pcrecpp - -#endif /* _PCRECPP_H */ + +namespace pcrecpp { + +#define PCRE_SET_OR_CLEAR(b, o) \ + if (b) all_options_ |= (o); else all_options_ &= ~(o); \ + return *this + +#define PCRE_IS_SET(o) \ + (all_options_ & o) == o + +/***** Compiling regular expressions: the RE class *****/ + +// RE_Options allow you to set options to be passed along to pcre, +// along with other options we put on top of pcre. +// Only 9 modifiers, plus match_limit and match_limit_recursion, +// are supported now. +class PCRECPP_EXP_DEFN RE_Options { + public: + // constructor + RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {} + + // alternative constructor. + // To facilitate transfer of legacy code from C programs + // + // This lets you do + // RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); + // But new code is better off doing + // RE(pattern, + // RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); + RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0), + all_options_(option_flags) {} + // we're fine with the default destructor, copy constructor, etc. + + // accessors and mutators + int match_limit() const { return match_limit_; }; + RE_Options &set_match_limit(int limit) { + match_limit_ = limit; + return *this; + } + + int match_limit_recursion() const { return match_limit_recursion_; }; + RE_Options &set_match_limit_recursion(int limit) { + match_limit_recursion_ = limit; + return *this; + } + + bool caseless() const { + return PCRE_IS_SET(PCRE_CASELESS); + } + RE_Options &set_caseless(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_CASELESS); + } + + bool multiline() const { + return PCRE_IS_SET(PCRE_MULTILINE); + } + RE_Options &set_multiline(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE); + } + + bool dotall() const { + return PCRE_IS_SET(PCRE_DOTALL); + } + RE_Options &set_dotall(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_DOTALL); + } + + bool extended() const { + return PCRE_IS_SET(PCRE_EXTENDED); + } + RE_Options &set_extended(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED); + } + + bool dollar_endonly() const { + return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY); + } + RE_Options &set_dollar_endonly(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY); + } + + bool extra() const { + return PCRE_IS_SET(PCRE_EXTRA); + } + RE_Options &set_extra(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_EXTRA); + } + + bool ungreedy() const { + return PCRE_IS_SET(PCRE_UNGREEDY); + } + RE_Options &set_ungreedy(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY); + } + + bool utf8() const { + return PCRE_IS_SET(PCRE_UTF8); + } + RE_Options &set_utf8(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_UTF8); + } + + bool no_auto_capture() const { + return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE); + } + RE_Options &set_no_auto_capture(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE); + } + + RE_Options &set_all_options(int opt) { + all_options_ = opt; + return *this; + } + int all_options() const { + return all_options_ ; + } + + // TODO: add other pcre flags + + private: + int match_limit_; + int match_limit_recursion_; + int all_options_; +}; + +// These functions return some common RE_Options +static inline RE_Options UTF8() { + return RE_Options().set_utf8(true); +} + +static inline RE_Options CASELESS() { + return RE_Options().set_caseless(true); +} +static inline RE_Options MULTILINE() { + return RE_Options().set_multiline(true); +} + +static inline RE_Options DOTALL() { + return RE_Options().set_dotall(true); +} + +static inline RE_Options EXTENDED() { + return RE_Options().set_extended(true); +} + +// Interface for regular expression matching. Also corresponds to a +// pre-compiled regular expression. An "RE" object is safe for +// concurrent use by multiple threads. +class PCRECPP_EXP_DEFN RE { + public: + // We provide implicit conversions from strings so that users can + // pass in a string or a "const char*" wherever an "RE" is expected. + RE(const string& pat) { Init(pat, NULL); } + RE(const string& pat, const RE_Options& option) { Init(pat, &option); } + RE(const char* pat) { Init(pat, NULL); } + RE(const char* pat, const RE_Options& option) { Init(pat, &option); } + RE(const unsigned char* pat) { + Init(reinterpret_cast<const char*>(pat), NULL); + } + RE(const unsigned char* pat, const RE_Options& option) { + Init(reinterpret_cast<const char*>(pat), &option); + } + + // Copy constructor & assignment - note that these are expensive + // because they recompile the expression. + RE(const RE& re) { Init(re.pattern_, &re.options_); } + const RE& operator=(const RE& re) { + if (this != &re) { + Cleanup(); + + // This is the code that originally came from Google + // Init(re.pattern_.c_str(), &re.options_); + + // This is the replacement from Ari Pollak + Init(re.pattern_, &re.options_); + } + return *this; + } + + + ~RE(); + + // The string specification for this RE. E.g. + // RE re("ab*c?d+"); + // re.pattern(); // "ab*c?d+" + const string& pattern() const { return pattern_; } + + // If RE could not be created properly, returns an error string. + // Else returns the empty string. + const string& error() const { return *error_; } + + /***** The useful part: the matching interface *****/ + + // This is provided so one can do pattern.ReplaceAll() just as + // easily as ReplaceAll(pattern-text, ....) + + bool FullMatch(const StringPiece& text, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool PartialMatch(const StringPiece& text, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool Consume(StringPiece* input, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool FindAndConsume(StringPiece* input, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool Replace(const StringPiece& rewrite, + string *str) const; + + int GlobalReplace(const StringPiece& rewrite, + string *str) const; + + bool Extract(const StringPiece &rewrite, + const StringPiece &text, + string *out) const; + + // Escapes all potentially meaningful regexp characters in + // 'unquoted'. The returned string, used as a regular expression, + // will exactly match the original string. For example, + // 1.5-2.0? + // may become: + // 1\.5\-2\.0\? + // Note QuoteMeta behaves the same as perl's QuoteMeta function, + // *except* that it escapes the NUL character (\0) as backslash + 0, + // rather than backslash + NUL. + static string QuoteMeta(const StringPiece& unquoted); + + + /***** Generic matching interface *****/ + + // Type of match (TODO: Should be restructured as part of RE_Options) + enum Anchor { + UNANCHORED, // No anchoring + ANCHOR_START, // Anchor at start only + ANCHOR_BOTH // Anchor at start and end + }; + + // General matching routine. Stores the length of the match in + // "*consumed" if successful. + bool DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const* args, int n) const; + + // Return the number of capturing subpatterns, or -1 if the + // regexp wasn't valid on construction. + int NumberOfCapturingGroups() const; + + // The default value for an argument, to indicate the end of the argument + // list. This must be used only in optional argument defaults. It should NOT + // be passed explicitly. Some people have tried to use it like this: + // + // FullMatch(x, y, &z, no_arg, &w); + // + // This is a mistake, and will not work. + static Arg no_arg; + + private: + + void Init(const string& pattern, const RE_Options* options); + void Cleanup(); + + // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with + // pairs of integers for the beginning and end positions of matched + // text. The first pair corresponds to the entire matched text; + // subsequent pairs correspond, in order, to parentheses-captured + // matches. Returns the number of pairs (one more than the number of + // the last subpattern with a match) if matching was successful + // and zero if the match failed. + // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching + // against "foo", "bar", and "baz" respectively. + // When matching RE("(foo)|hello") against "hello", it will return 1. + // But the values for all subpattern are filled in into "vec". + int TryMatch(const StringPiece& text, + int startpos, + Anchor anchor, + bool empty_ok, + int *vec, + int vecsize) const; + + // Append the "rewrite" string, with backslash subsitutions from "text" + // and "vec", to string "out". + bool Rewrite(string *out, + const StringPiece& rewrite, + const StringPiece& text, + int *vec, + int veclen) const; + + // internal implementation for DoMatch + bool DoMatchImpl(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const args[], + int n, + int* vec, + int vecsize) const; + + // Compile the regexp for the specified anchoring mode + pcre* Compile(Anchor anchor); + + string pattern_; + RE_Options options_; + pcre* re_full_; // For full matches + pcre* re_partial_; // For partial matches + const string* error_; // Error indicator (or points to empty string) +}; + +} // namespace pcrecpp + +#endif /* _PCRECPP_H */ diff --git a/contrib/libs/pcre/pcrecpp/ya.make b/contrib/libs/pcre/pcrecpp/ya.make index c832b9e56e..6b654d4420 100644 --- a/contrib/libs/pcre/pcrecpp/ya.make +++ b/contrib/libs/pcre/pcrecpp/ya.make @@ -1,7 +1,7 @@ # Generated by devtools/yamaker. -LIBRARY() - +LIBRARY() + WITHOUT_LICENSE_TEXTS() OWNER( @@ -11,12 +11,12 @@ OWNER( LICENSE(BSD-3-Clause) -PEERDIR( +PEERDIR( contrib/libs/pcre -) - +) + ADDINCL(contrib/libs/pcre) - + NO_COMPILER_WARNINGS() NO_UTIL() @@ -25,10 +25,10 @@ CFLAGS(-DHAVE_CONFIG_H) SRCDIR(contrib/libs/pcre) -SRCS( - pcre_scanner.cc - pcre_stringpiece.cc +SRCS( + pcre_scanner.cc + pcre_stringpiece.cc pcrecpp.cc -) - -END() +) + +END() diff --git a/contrib/libs/pcre/pcrecpp_internal.h b/contrib/libs/pcre/pcrecpp_internal.h index 827f9e04e2..552a1a6daa 100644 --- a/contrib/libs/pcre/pcrecpp_internal.h +++ b/contrib/libs/pcre/pcrecpp_internal.h @@ -1,71 +1,71 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -Copyright (c) 2005, Google Inc. -All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -#ifndef PCRECPP_INTERNAL_H -#define PCRECPP_INTERNAL_H - -/* When compiling a DLL for Windows, the exported symbols have to be declared -using some MS magic. I found some useful information on this web page: -http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the -information there, using __declspec(dllexport) without "extern" we have a -definition; with "extern" we have a declaration. The settings here override the -setting in pcre.h. We use: - - PCRECPP_EXP_DECL for declarations - PCRECPP_EXP_DEFN for definitions of exported functions - -*/ - -#ifndef PCRECPP_EXP_DECL -# ifdef _WIN32 -# ifndef PCRE_STATIC -# define PCRECPP_EXP_DECL extern __declspec(dllexport) -# define PCRECPP_EXP_DEFN __declspec(dllexport) -# else -# define PCRECPP_EXP_DECL extern -# define PCRECPP_EXP_DEFN -# endif -# else -# define PCRECPP_EXP_DECL extern -# define PCRECPP_EXP_DEFN -# endif -#endif - -#endif /* PCRECPP_INTERNAL_H */ - -/* End of pcrecpp_internal.h */ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* +Copyright (c) 2005, Google Inc. +All rights reserved. + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifndef PCRECPP_INTERNAL_H +#define PCRECPP_INTERNAL_H + +/* When compiling a DLL for Windows, the exported symbols have to be declared +using some MS magic. I found some useful information on this web page: +http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the +information there, using __declspec(dllexport) without "extern" we have a +definition; with "extern" we have a declaration. The settings here override the +setting in pcre.h. We use: + + PCRECPP_EXP_DECL for declarations + PCRECPP_EXP_DEFN for definitions of exported functions + +*/ + +#ifndef PCRECPP_EXP_DECL +# ifdef _WIN32 +# ifndef PCRE_STATIC +# define PCRECPP_EXP_DECL extern __declspec(dllexport) +# define PCRECPP_EXP_DEFN __declspec(dllexport) +# else +# define PCRECPP_EXP_DECL extern +# define PCRECPP_EXP_DEFN +# endif +# else +# define PCRECPP_EXP_DECL extern +# define PCRECPP_EXP_DEFN +# endif +#endif + +#endif /* PCRECPP_INTERNAL_H */ + +/* End of pcrecpp_internal.h */ diff --git a/contrib/libs/pcre/pcrecpparg.h b/contrib/libs/pcre/pcrecpparg.h index 9818347653..a9915d09b5 100644 --- a/contrib/libs/pcre/pcrecpparg.h +++ b/contrib/libs/pcre/pcrecpparg.h @@ -1,174 +1,174 @@ -// Copyright (c) 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: Sanjay Ghemawat - -#ifndef _PCRECPPARG_H -#define _PCRECPPARG_H - -#include <stdlib.h> // for NULL -#include <string> - +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat + +#ifndef _PCRECPPARG_H +#define _PCRECPPARG_H + +#include <stdlib.h> // for NULL +#include <string> + #include "pcre.h" - -namespace pcrecpp { - -class StringPiece; - -// Hex/Octal/Binary? - -// Special class for parsing into objects that define a ParseFrom() method -template <class T> -class _RE_MatchObject { - public: - static inline bool Parse(const char* str, int n, void* dest) { - if (dest == NULL) return true; - T* object = reinterpret_cast<T*>(dest); - return object->ParseFrom(str, n); - } -}; - -class PCRECPP_EXP_DEFN Arg { - public: - // Empty constructor so we can declare arrays of Arg - Arg(); - - // Constructor specially designed for NULL arguments - Arg(void*); - - typedef bool (*Parser)(const char* str, int n, void* dest); - -// Type-specific parsers -#define PCRE_MAKE_PARSER(type,name) \ - Arg(type* p) : arg_(p), parser_(name) { } \ - Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } - - - PCRE_MAKE_PARSER(char, parse_char); - PCRE_MAKE_PARSER(unsigned char, parse_uchar); - PCRE_MAKE_PARSER(short, parse_short); - PCRE_MAKE_PARSER(unsigned short, parse_ushort); - PCRE_MAKE_PARSER(int, parse_int); - PCRE_MAKE_PARSER(unsigned int, parse_uint); - PCRE_MAKE_PARSER(long, parse_long); - PCRE_MAKE_PARSER(unsigned long, parse_ulong); -#if 1 - PCRE_MAKE_PARSER(long long, parse_longlong); -#endif -#if 1 - PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong); -#endif - PCRE_MAKE_PARSER(float, parse_float); - PCRE_MAKE_PARSER(double, parse_double); - PCRE_MAKE_PARSER(std::string, parse_string); - PCRE_MAKE_PARSER(StringPiece, parse_stringpiece); - -#undef PCRE_MAKE_PARSER - - // Generic constructor - template <class T> Arg(T*, Parser parser); - // Generic constructor template - template <class T> Arg(T* p) - : arg_(p), parser_(_RE_MatchObject<T>::Parse) { - } - - // Parse the data - bool Parse(const char* str, int n) const; - - private: - void* arg_; - Parser parser_; - - static bool parse_null (const char* str, int n, void* dest); - static bool parse_char (const char* str, int n, void* dest); - static bool parse_uchar (const char* str, int n, void* dest); - static bool parse_float (const char* str, int n, void* dest); - static bool parse_double (const char* str, int n, void* dest); - static bool parse_string (const char* str, int n, void* dest); - static bool parse_stringpiece (const char* str, int n, void* dest); - -#define PCRE_DECLARE_INTEGER_PARSER(name) \ - private: \ - static bool parse_ ## name(const char* str, int n, void* dest); \ - static bool parse_ ## name ## _radix( \ - const char* str, int n, void* dest, int radix); \ - public: \ - static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ - static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ - static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) - - PCRE_DECLARE_INTEGER_PARSER(short); - PCRE_DECLARE_INTEGER_PARSER(ushort); - PCRE_DECLARE_INTEGER_PARSER(int); - PCRE_DECLARE_INTEGER_PARSER(uint); - PCRE_DECLARE_INTEGER_PARSER(long); - PCRE_DECLARE_INTEGER_PARSER(ulong); - PCRE_DECLARE_INTEGER_PARSER(longlong); - PCRE_DECLARE_INTEGER_PARSER(ulonglong); - -#undef PCRE_DECLARE_INTEGER_PARSER -}; - -inline Arg::Arg() : arg_(NULL), parser_(parse_null) { } -inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } - -inline bool Arg::Parse(const char* str, int n) const { - return (*parser_)(str, n, arg_); -} - -// This part of the parser, appropriate only for ints, deals with bases -#define MAKE_INTEGER_PARSER(type, name) \ - inline Arg Hex(type* ptr) { \ - return Arg(ptr, Arg::parse_ ## name ## _hex); } \ - inline Arg Octal(type* ptr) { \ - return Arg(ptr, Arg::parse_ ## name ## _octal); } \ - inline Arg CRadix(type* ptr) { \ - return Arg(ptr, Arg::parse_ ## name ## _cradix); } - -MAKE_INTEGER_PARSER(short, short) /* */ -MAKE_INTEGER_PARSER(unsigned short, ushort) /* */ -MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */ -MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */ -MAKE_INTEGER_PARSER(long, long) /* because they can cause */ -MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */ -#if 1 /* the checking level is */ -MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */ -#endif /* */ -#if 1 /* */ -MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */ -#endif - -#undef PCRE_IS_SET -#undef PCRE_SET_OR_CLEAR -#undef MAKE_INTEGER_PARSER - -} // namespace pcrecpp - - -#endif /* _PCRECPPARG_H */ + +namespace pcrecpp { + +class StringPiece; + +// Hex/Octal/Binary? + +// Special class for parsing into objects that define a ParseFrom() method +template <class T> +class _RE_MatchObject { + public: + static inline bool Parse(const char* str, int n, void* dest) { + if (dest == NULL) return true; + T* object = reinterpret_cast<T*>(dest); + return object->ParseFrom(str, n); + } +}; + +class PCRECPP_EXP_DEFN Arg { + public: + // Empty constructor so we can declare arrays of Arg + Arg(); + + // Constructor specially designed for NULL arguments + Arg(void*); + + typedef bool (*Parser)(const char* str, int n, void* dest); + +// Type-specific parsers +#define PCRE_MAKE_PARSER(type,name) \ + Arg(type* p) : arg_(p), parser_(name) { } \ + Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } + + + PCRE_MAKE_PARSER(char, parse_char); + PCRE_MAKE_PARSER(unsigned char, parse_uchar); + PCRE_MAKE_PARSER(short, parse_short); + PCRE_MAKE_PARSER(unsigned short, parse_ushort); + PCRE_MAKE_PARSER(int, parse_int); + PCRE_MAKE_PARSER(unsigned int, parse_uint); + PCRE_MAKE_PARSER(long, parse_long); + PCRE_MAKE_PARSER(unsigned long, parse_ulong); +#if 1 + PCRE_MAKE_PARSER(long long, parse_longlong); +#endif +#if 1 + PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong); +#endif + PCRE_MAKE_PARSER(float, parse_float); + PCRE_MAKE_PARSER(double, parse_double); + PCRE_MAKE_PARSER(std::string, parse_string); + PCRE_MAKE_PARSER(StringPiece, parse_stringpiece); + +#undef PCRE_MAKE_PARSER + + // Generic constructor + template <class T> Arg(T*, Parser parser); + // Generic constructor template + template <class T> Arg(T* p) + : arg_(p), parser_(_RE_MatchObject<T>::Parse) { + } + + // Parse the data + bool Parse(const char* str, int n) const; + + private: + void* arg_; + Parser parser_; + + static bool parse_null (const char* str, int n, void* dest); + static bool parse_char (const char* str, int n, void* dest); + static bool parse_uchar (const char* str, int n, void* dest); + static bool parse_float (const char* str, int n, void* dest); + static bool parse_double (const char* str, int n, void* dest); + static bool parse_string (const char* str, int n, void* dest); + static bool parse_stringpiece (const char* str, int n, void* dest); + +#define PCRE_DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_ ## name(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _radix( \ + const char* str, int n, void* dest, int radix); \ + public: \ + static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) + + PCRE_DECLARE_INTEGER_PARSER(short); + PCRE_DECLARE_INTEGER_PARSER(ushort); + PCRE_DECLARE_INTEGER_PARSER(int); + PCRE_DECLARE_INTEGER_PARSER(uint); + PCRE_DECLARE_INTEGER_PARSER(long); + PCRE_DECLARE_INTEGER_PARSER(ulong); + PCRE_DECLARE_INTEGER_PARSER(longlong); + PCRE_DECLARE_INTEGER_PARSER(ulonglong); + +#undef PCRE_DECLARE_INTEGER_PARSER +}; + +inline Arg::Arg() : arg_(NULL), parser_(parse_null) { } +inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } + +inline bool Arg::Parse(const char* str, int n) const { + return (*parser_)(str, n, arg_); +} + +// This part of the parser, appropriate only for ints, deals with bases +#define MAKE_INTEGER_PARSER(type, name) \ + inline Arg Hex(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _hex); } \ + inline Arg Octal(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _octal); } \ + inline Arg CRadix(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _cradix); } + +MAKE_INTEGER_PARSER(short, short) /* */ +MAKE_INTEGER_PARSER(unsigned short, ushort) /* */ +MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */ +MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */ +MAKE_INTEGER_PARSER(long, long) /* because they can cause */ +MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */ +#if 1 /* the checking level is */ +MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */ +#endif /* */ +#if 1 /* */ +MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */ +#endif + +#undef PCRE_IS_SET +#undef PCRE_SET_OR_CLEAR +#undef MAKE_INTEGER_PARSER + +} // namespace pcrecpp + + +#endif /* _PCRECPPARG_H */ |