diff options
author | antonovvk <antonovvk@yandex-team.ru> | 2022-02-10 16:47:51 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:51 +0300 |
commit | 37a63debdc21e372d99e1808cdd31aecf75018c3 (patch) | |
tree | fd84293fb9a1b16381dd6c1a5e14c78afacb8710 /contrib | |
parent | 1fe621e70df847cc201ac942fe6d7804ea10508d (diff) | |
download | ydb-37a63debdc21e372d99e1808cdd31aecf75018c3.tar.gz |
Restoring authorship annotation for <antonovvk@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib')
78 files changed, 19634 insertions, 19634 deletions
diff --git a/contrib/libs/antlr3_cpp_runtime/antlr3.cpp b/contrib/libs/antlr3_cpp_runtime/antlr3.cpp index d64127a6bb..f55b919cbd 100644 --- a/contrib/libs/antlr3_cpp_runtime/antlr3.cpp +++ b/contrib/libs/antlr3_cpp_runtime/antlr3.cpp @@ -1 +1 @@ -#include "include/antlr3.hpp" +#include "include/antlr3.hpp" diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp index d8cccb4aac..d74861d247 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp @@ -1,33 +1,33 @@ #ifndef _ANTLR3_HPP #define _ANTLR3_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include <cassert> #include <cstddef> // ptrdiff_t #include <cstdint> // uint32_t, ... @@ -45,21 +45,21 @@ #include <new> #include <set> #include <sstream> -#include <string> +#include <string> #include <vector> - + #include "antlr3defs.hpp" - + /* Pre declare the typedefs for all the interfaces, then * they can be inter-dependant and we will let the linker * sort it out for us. */ #include "antlr3interfaces.hpp" - + // Include the unicode.org conversion library header. // #include "antlr3convertutf.hpp" - + #include "antlr3errors.hpp" #include "antlr3memory.hpp" @@ -86,4 +86,4 @@ #include "antlr3traits.hpp" #include "antlr3treeparser.hpp" -#endif +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl index b2d223398d..30f287ce19 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl @@ -1,9 +1,9 @@ namespace antlr3 { - + //static -ANTLR_INLINE void GenericStream::displayRecognitionError( const StringType& str ) -{ +ANTLR_INLINE void GenericStream::displayRecognitionError( const StringType& str ) +{ fprintf(stderr, str.c_str() ); -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp index 0374b3a1ea..9086b20658 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp @@ -1,49 +1,49 @@ -/** \file - * Defines the basic structure to support recognizing by either a lexer, - * parser, or tree parser. - * \addtogroup BaseRecognizer - * @{ - */ +/** \file + * Defines the basic structure to support recognizing by either a lexer, + * parser, or tree parser. + * \addtogroup BaseRecognizer + * @{ + */ #ifndef _ANTLR3_BASERECOGNIZER_HPP #define _ANTLR3_BASERECOGNIZER_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** \brief Base tracking context structure for all types of - * recognizers. - */ -template< class ImplTraits, class StreamType > -class BaseRecognizer : public ImplTraits::AllocPolicyType -{ -public: + +/** \brief Base tracking context structure for all types of + * recognizers. + */ +template< class ImplTraits, class StreamType > +class BaseRecognizer : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename StreamType::IntStreamType IntStreamType; typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType SuperType; @@ -57,17 +57,17 @@ public: typedef typename ImplTraits::LexerType LexerType; typedef typename ImplTraits::ParserType ParserType; typedef typename ImplTraits::TreeParserType TreeParserType; - + typedef typename AllocPolicyType::template StackType<StringType> StringStackType; typedef typename AllocPolicyType::template ListType<StringType> StringListType; - -private: + +private: /// A pointer to the shared recognizer state, such that multiple /// recognizers can use the same inputs streams and so on (in /// the case of grammar inheritance for instance. /// RecognizerSharedStateType* m_state; - + /// If set to something other than NULL, then this structure is /// points to an instance of the debugger interface. In general, the /// debugger is only referenced internally in recovery/error operations @@ -75,18 +75,18 @@ private: /// in every function/method /// DebugEventListenerType* m_debugger; - - -public: + + +public: BaseRecognizer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); - + SuperType* get_super(); RecognizerSharedStateType* get_state() const; DebugEventListenerType* get_debugger() const; void set_state( RecognizerSharedStateType* state ); void set_debugger( DebugEventListenerType* debugger ); - - /// Match current input symbol against ttype. Upon error, do one token + + /// Match current input symbol against ttype. Upon error, do one token /// insertion or deletion if possible. /// To turn off single token insertion or deletion error /// recovery, override mismatchRecover() and have it call @@ -96,7 +96,7 @@ public: /// symbols that can follow rule ref. /// const UnitType* match(ANTLR_UINT32 ttype, BitsetListType* follow); - + /// Consumes the next token, whatever it is, and resets the recognizer state /// so that it is not in error. /// @@ -104,20 +104,20 @@ public: /// Recognizer context pointer /// void matchAny(); - + /// function that decides if the token ahead of the current one is the /// one we were loking for, in which case the curernt one is very likely extraneous /// and can be reported that way. /// bool mismatchIsUnwantedToken(IntStreamType* input, ANTLR_UINT32 ttype); - + /// function that decides if the current token is one that can logically /// follow the one we were looking for, in which case the one we were looking for is /// probably missing from the input. /// bool mismatchIsMissingToken(IntStreamType* input, BitsetListType* follow); - - /// Factor out what to do upon token mismatch so tree parsers can behave + + /// Factor out what to do upon token mismatch so tree parsers can behave /// differently. Override and call mismatchRecover(input, ttype, follow) /// to get single token insertion and deletion. Use this to turn off /// single token insertion and deletion. Override mismatchRecover @@ -125,9 +125,9 @@ public: /// /// \remark mismatch only works for parsers and must be overridden for anything else. /// - void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); - - /// Report a recognition problem. + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); + + /// Report a recognition problem. /// /// This method sets errorRecovery to indicate the parser is recovering /// not parsing. Once in recovery mode, no errors are generated. @@ -146,13 +146,13 @@ public: void reportError( ClassForwarder<LexerType> ); template<typename CompType> void reportError( ClassForwarder<CompType> ); - - /** Function that is called to display a recognition error message. You may - * override this function independently of (*reportError)() above as that function calls - * this one to do the actual exception printing. - */ + + /** Function that is called to display a recognition error message. You may + * override this function independently of (*reportError)() above as that function calls + * this one to do the actual exception printing. + */ void displayRecognitionError(ANTLR_UINT8** tokenNames); - + /// Get number of recognition errors (lexer, parser, tree parser). Each /// recognizer tracks its own number. So parser and lexer each have /// separate count. Does not count the spurious errors found between @@ -161,34 +161,34 @@ public: /// \see reportError() /// ANTLR_UINT32 getNumberOfSyntaxErrors(); - - /** Function that recovers from an error found in the input stream. - * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also - * be from a mismatched token that the (*match)() could not recover from. - */ + + /** Function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ void recover(); - - /** function that is a hook to listen to token consumption during error recovery. - * This is mainly used by the debug parser to send events to the listener. - */ + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ void beginResync(); - - /** function that is a hook to listen to token consumption during error recovery. - * This is mainly used by the debug parser to send events to the listener. - */ + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ void endResync(); - + /** function that is a hook to listen to token consumption during error recovery. - * This is mainly used by the debug parser to send events to the listener. - */ + * This is mainly used by the debug parser to send events to the listener. + */ void beginBacktrack(ANTLR_UINT32 level); - - /** function that is a hook to listen to token consumption during error recovery. - * This is mainly used by the debug parser to send events to the listener. - */ + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ void endBacktrack(ANTLR_UINT32 level, bool successful); - - /// Compute the error recovery set for the current rule. + + /// Compute the error recovery set for the current rule. /// Documentation below is from the Java implementation. /// /// During rule invocation, the parser pushes the set of tokens that can @@ -282,8 +282,8 @@ public: /// at run-time upon error to avoid overhead during parsing. /// BitsetType* computeErrorRecoverySet(); - - /// Compute the context-sensitive FOLLOW set for current rule. + + /// Compute the context-sensitive FOLLOW set for current rule. /// Documentation below is from the Java runtime. /// /// This is the set of token types that can follow a specific rule @@ -338,12 +338,12 @@ public: /// throwing an exception. /// BitsetType* computeCSRuleFollow(); - - /// Compute the current followset for the input stream. + + /// Compute the current followset for the input stream. /// BitsetType* combineFollows(bool exact); - - /// Attempt to recover from a single missing or extra token. + + /// Attempt to recover from a single missing or extra token. /// /// EXTRA TOKEN /// @@ -377,67 +377,67 @@ public: /// error flag and rules cascade back when this is set. /// const UnitType* recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow); - - /** Function that recovers from a mismatched set in the token stream, in a similar manner - * to (*recoverFromMismatchedToken) - */ + + /** Function that recovers from a mismatched set in the token stream, in a similar manner + * to (*recoverFromMismatchedToken) + */ const UnitType* recoverFromMismatchedSet(BitsetListType* follow); - - /** common routine to handle single token insertion for recovery functions. - */ + + /** common routine to handle single token insertion for recovery functions. + */ /// This code is factored out from mismatched token and mismatched set /// recovery. It handles "single token insertion" error recovery for /// both. No tokens are consumed to recover from insertions. Return /// true if recovery was possible else return false. /// bool recoverFromMismatchedElement(BitsetListType* follow); - - /** function that consumes input until the next token matches - * the given token. - */ + + /** function that consumes input until the next token matches + * the given token. + */ void consumeUntil(ANTLR_UINT32 tokenType); - - /** function that consumes input until the next token matches - * one in the given set. - */ + + /** function that consumes input until the next token matches + * one in the given set. + */ void consumeUntilSet(BitsetType* set); - - /** function that returns an ANTLR3_LIST of the strings that identify - * the rules in the parser that got you to this point. Can be overridden by installing your + + /** function that returns an ANTLR3_LIST of the strings that identify + * the rules in the parser that got you to this point. Can be overridden by installing your * own function set. - * - * \todo Document how to override invocation stack functions. - */ + * + * \todo Document how to override invocation stack functions. + */ StringStackType getRuleInvocationStack(); StringStackType getRuleInvocationStackNamed(ANTLR_UINT8* name); - - /** function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of - * string token names. As this is mostly used in string template processing it may not be useful - * in the C runtime. - */ + + /** function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of + * string token names. As this is mostly used in string template processing it may not be useful + * in the C runtime. + */ StringListType toStrings( const StringListType& ); - - /** function to return whether the rule has parsed input starting at the supplied - * start index before. If the rule has not parsed input starting from the supplied start index, - * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point - * then it will return the point where it last stopped parsing after that start point. - */ + + /** function to return whether the rule has parsed input starting at the supplied + * start index before. If the rule has not parsed input starting from the supplied start index, + * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point + * then it will return the point where it last stopped parsing after that start point. + */ ANTLR_MARKER getRuleMemoization( ANTLR_INTKEY ruleIndex, ANTLR_MARKER ruleParseStart); - - /** function that determines whether the rule has parsed input at the current index - * in the input stream - */ + + /** function that determines whether the rule has parsed input at the current index + * in the input stream + */ bool alreadyParsedRule(ANTLR_MARKER ruleIndex); - - /** Function that records whether the rule has parsed the input at a - * current position successfully or not. - */ + + /** Function that records whether the rule has parsed the input at a + * current position successfully or not. + */ void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); - + /// Function that returns the current input symbol. - /// The is placed into any label for the associated token ref; e.g., x=ID. Token + /// The is placed into any label for the associated token ref; e.g., x=ID. Token /// and tree parsers need to return different objects. Rather than test /// for input stream type or change the IntStream interface, I use /// a simple method to ask the recognizer to tell me what the current @@ -450,7 +450,7 @@ public: const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<LexerType>); const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<ParserType>); const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<TreeParserType>); - + /// Conjure up a missing token during error recovery. /// /// The recognizer attempts to recover from single missing @@ -473,37 +473,37 @@ public: UnitType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, ANTLR_UINT32 expectedTokenType, BitsetListType* follow); - - /** Function that returns whether the supplied grammar function - * will parse the current input stream or not. This is the way that syntactic - * predicates are evaluated. Unlike java, C is perfectly happy to invoke code - * via a pointer to a function (hence that's what all the ANTLR3 C interfaces - * do. - */ + + /** Function that returns whether the supplied grammar function + * will parse the current input stream or not. This is the way that syntactic + * predicates are evaluated. Unlike java, C is perfectly happy to invoke code + * via a pointer to a function (hence that's what all the ANTLR3 C interfaces + * do. + */ template<typename Predicate> - bool synpred( ClassForwarder<Predicate> ); - + bool synpred( ClassForwarder<Predicate> ); + //In place of exConstruct, just directly instantiate the Exception Object - - /** Reset the recognizer - */ - void reset(); + + /** Reset the recognizer + */ + void reset(); void reset( ClassForwarder<LexerType> ); template<typename CompType> void reset( ClassForwarder<CompType> ); - + void exConstruct(); - - ~BaseRecognizer(); - -}; - + + ~BaseRecognizer(); + +}; + } - -#include "antlr3baserecognizer.inl" - -/// @} -/// - + +#include "antlr3baserecognizer.inl" + +/// @} +/// + #endif /* _ANTLR3_BASERECOGNIZER_H */ - + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl index b0c3fe8d51..254178bef2 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl @@ -1,11 +1,11 @@ namespace antlr3 { - -template< class ImplTraits, class StreamType > -BaseRecognizer<ImplTraits, StreamType>::BaseRecognizer(ANTLR_UINT32 sizeHint, + +template< class ImplTraits, class StreamType > +BaseRecognizer<ImplTraits, StreamType>::BaseRecognizer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) -{ +{ m_debugger = NULL; - + // If we have been supplied with a pre-existing recognizer state // then we just install it, otherwise we must create one from scratch // @@ -22,46 +22,46 @@ BaseRecognizer<ImplTraits, StreamType>::BaseRecognizer(ANTLR_UINT32 sizeHint, // m_state = state; } -} - -template< class ImplTraits, class StreamType > -ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::SuperType* BaseRecognizer<ImplTraits, StreamType>::get_super() -{ +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::SuperType* BaseRecognizer<ImplTraits, StreamType>::get_super() +{ return static_cast<SuperType*>(this); -} - -template< class ImplTraits, class StreamType > -ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::RecognizerSharedStateType* BaseRecognizer<ImplTraits, StreamType>::get_state() const -{ +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::RecognizerSharedStateType* BaseRecognizer<ImplTraits, StreamType>::get_state() const +{ return m_state; -} -template< class ImplTraits, class StreamType > -ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::DebugEventListenerType* BaseRecognizer<ImplTraits, StreamType>::get_debugger() const -{ +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::DebugEventListenerType* BaseRecognizer<ImplTraits, StreamType>::get_debugger() const +{ return m_debugger; -} -template< class ImplTraits, class StreamType > -ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_state( RecognizerSharedStateType* state ) -{ +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_state( RecognizerSharedStateType* state ) +{ m_state = state; -} -template< class ImplTraits, class StreamType > -ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_debugger( DebugEventListenerType* debugger ) -{ +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_debugger( DebugEventListenerType* debugger ) +{ m_debugger = debugger; -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* -BaseRecognizer<ImplTraits, StreamType>::match(ANTLR_UINT32 ttype, BitsetListType* follow) -{ +BaseRecognizer<ImplTraits, StreamType>::match(ANTLR_UINT32 ttype, BitsetListType* follow) +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_istream(); - + // Pick up the current input token/node for assignment to labels // const UnitType* matchedSymbol = this->getCurrentInputSymbol(is); - + //if (is->LA(1) == ttype) if (matchedSymbol->get_type() == ttype) { @@ -72,44 +72,44 @@ BaseRecognizer<ImplTraits, StreamType>::match(ANTLR_UINT32 ttype, BitsetListType m_state->set_failed(false); // The match was a success return matchedSymbol; // We are done } - - // We did not find the expected token type, if we are backtracking then - // we just set the failed flag and return. - // + + // We did not find the expected token type, if we are backtracking then + // we just set the failed flag and return. + // if ( m_state->get_backtracking() > 0) - { + { // Backtracking is going on // m_state->set_failed(true); return matchedSymbol; } - - // We did not find the expected token and there is no backtracking - // going on, so we mismatch, which creates an exception in the recognizer exception - // stack. - // + + // We did not find the expected token and there is no backtracking + // going on, so we mismatch, which creates an exception in the recognizer exception + // stack. + // matchedSymbol = this->recoverFromMismatchedToken(ttype, follow); - return matchedSymbol; - -} - -template< class ImplTraits, class StreamType > -void BaseRecognizer<ImplTraits, StreamType>::matchAny() -{ + return matchedSymbol; + +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::matchAny() +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_istream(); - + is->consume(); m_state->set_errorRecovery(false); m_state->set_failed(false); - return; -} - -template< class ImplTraits, class StreamType > -bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsUnwantedToken(IntStreamType* is, ANTLR_UINT32 ttype) -{ + return; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsUnwantedToken(IntStreamType* is, ANTLR_UINT32 ttype) +{ ANTLR_UINT32 nextt = is->LA(2); - + if (nextt == ttype) { if(m_state->get_exception() != NULL) @@ -118,15 +118,15 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsUnwantedToken(IntStreamTy } else return false; // Neither this token, nor the one following is the one we wanted -} - -template< class ImplTraits, class StreamType > -bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamType* is, BitsetListType* follow) -{ +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamType* is, BitsetListType* follow) +{ bool retcode; BitsetType* followClone; BitsetType* viableTokensFollowingThisRule; - + if (follow == NULL) { // There is no information about the tokens that can follow the last one @@ -136,10 +136,10 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamTyp // return false; } - + followClone = NULL; viableTokensFollowingThisRule = NULL; - + // The C bitset maps are laid down at compile time by the // C code generation. Hence we cannot remove things from them // and so on. So, in order to remove EOR (if we need to) then @@ -148,7 +148,7 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamTyp followClone = follow->bitsetLoad(); if (followClone == NULL) return false; - + // Compute what can follow this grammar reference // if (followClone->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE)) @@ -157,14 +157,14 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamTyp // need to remove it. // followClone->remove(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); - + // Now compute the visiable tokens that can follow this rule, according to context // and make them part of the follow set. // viableTokensFollowingThisRule = this->computeCSRuleFollow(); followClone->borInPlace(viableTokensFollowingThisRule); } - + /// if current token is consistent with what could come after set /// then we know we're missing a token; error recovery is free to /// "insert" the missing token @@ -183,7 +183,7 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamTyp { retcode = false; } - + if (viableTokensFollowingThisRule != NULL) { delete viableTokensFollowingThisRule; @@ -192,199 +192,199 @@ bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamTyp { delete followClone; } - + return retcode; -} - -template< class ImplTraits, class StreamType > -void BaseRecognizer<ImplTraits, StreamType>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) -{ +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ this->get_super()->mismatch( ttype, follow ); -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::reportError() -{ +{ this->reportError( ClassForwarder<SuperType>() ); -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::reportError( ClassForwarder<LexerType> ) -{ +{ // Indicate this recognizer had an error while processing. // m_state->inc_errorCount(); - - this->displayRecognitionError(m_state->get_tokenNames()); -} - -template< class ImplTraits, class StreamType > -template<typename CompType> + + this->displayRecognitionError(m_state->get_tokenNames()); +} + +template< class ImplTraits, class StreamType > +template<typename CompType> void BaseRecognizer<ImplTraits, StreamType>::reportError(ClassForwarder<CompType> ) -{ +{ // Invoke the debugger event if there is a debugger listening to us // if ( m_debugger != NULL) { m_debugger->recognitionException( m_state->get_exception() ); } - + if ( m_state->get_errorRecovery() == true) - { + { // Already in error recovery so don't display another error while doing so // return; - } - - // Signal we are in error recovery now - // - m_state->set_errorRecovery(true); - + } + + // Signal we are in error recovery now + // + m_state->set_errorRecovery(true); + // Indicate this recognizer had an error while processing. // m_state->inc_errorCount(); - + // Call the error display routine // - this->displayRecognitionError( m_state->get_tokenNames() ); -} - -template< class ImplTraits, class StreamType > + this->displayRecognitionError( m_state->get_tokenNames() ); +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::displayRecognitionError(ANTLR_UINT8** tokenNames) -{ +{ // Retrieve some info for easy reading. // ExceptionBaseType* ex = m_state->get_exception(); StringType ttext; - + // See if there is a 'filename' we can use // SuperType* super = static_cast<SuperType*>(this); super->displayRecognitionError(tokenNames, ex); -} - -template< class ImplTraits, class StreamType > -ANTLR_UINT32 BaseRecognizer<ImplTraits, StreamType>::getNumberOfSyntaxErrors() -{ +} + +template< class ImplTraits, class StreamType > +ANTLR_UINT32 BaseRecognizer<ImplTraits, StreamType>::getNumberOfSyntaxErrors() +{ return m_state->get_errorCount(); -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::recover() -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); // Are we about to repeat the same error? // if ( m_state->get_lastErrorIndex() == is->index()) - { + { // The last error was at the same token index point. This must be a case // where LT(1) is in the recovery token set so nothing is // consumed. Consume a single token so at least to prevent // an infinite loop; this is a failsafe. // is->consume(); - } - - // Record error index position - // - m_state->set_lastErrorIndex( is->index() ); - - // Work out the follows set for error recovery - // + } + + // Record error index position + // + m_state->set_lastErrorIndex( is->index() ); + + // Work out the follows set for error recovery + // BitsetType* followSet = this->computeErrorRecoverySet(); - - // Call resync hook (for debuggers and so on) - // - this->beginResync(); - - // Consume tokens until we have resynced to something in the follows set - // - this->consumeUntilSet(followSet); - - // End resync hook - // - this->endResync(); - - // Destroy the temporary bitset we produced. - // - delete followSet; - - // Reset the inError flag so we don't re-report the exception - // - m_state->set_error(false); - m_state->set_failed(false); -} - -template< class ImplTraits, class StreamType > + + // Call resync hook (for debuggers and so on) + // + this->beginResync(); + + // Consume tokens until we have resynced to something in the follows set + // + this->consumeUntilSet(followSet); + + // End resync hook + // + this->endResync(); + + // Destroy the temporary bitset we produced. + // + delete followSet; + + // Reset the inError flag so we don't re-report the exception + // + m_state->set_error(false); + m_state->set_failed(false); +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::beginResync() -{ +{ if (m_debugger != NULL) { m_debugger->beginResync(); } -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::endResync() -{ +{ if (m_debugger != NULL) { m_debugger->endResync(); } -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::beginBacktrack(ANTLR_UINT32 level) -{ +{ if (m_debugger != NULL) { m_debugger->beginBacktrack(level); } -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::endBacktrack(ANTLR_UINT32 level, bool /*successful*/) -{ +{ if (m_debugger != NULL) { m_debugger->endBacktrack(level); } -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::computeErrorRecoverySet() -{ +{ return this->combineFollows(false); -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::computeCSRuleFollow() -{ +{ return this->combineFollows(false); -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::combineFollows(bool exact) -{ +{ BitsetType* followSet; BitsetType* localFollowSet; ANTLR_UINT32 top; ANTLR_UINT32 i; - + top = static_cast<ANTLR_UINT32>( m_state->get_following().size() ); - + followSet = new BitsetType(0); localFollowSet = NULL; - - for (i = top; i>0; i--) - { + + for (i = top; i>0; i--) + { localFollowSet = m_state->get_following().at(i-1).bitsetLoad(); - + if (localFollowSet != NULL) { followSet->borInPlace(localFollowSet); - + if (exact == true) { if (localFollowSet->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == false) @@ -405,23 +405,23 @@ typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<Impl delete localFollowSet; localFollowSet = NULL; } - } - + } + if (localFollowSet != NULL) { delete localFollowSet; } - return followSet; -} - -template< class ImplTraits, class StreamType > + return followSet; +} + +template< class ImplTraits, class StreamType > const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow) -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); const UnitType* matchedSymbol; - + // If the next token after the one we are looking at in the input stream // is what we are looking for then we remove the one we have discovered // from the stream by consuming it, then consume this next one along too as @@ -432,14 +432,14 @@ BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedToken( ANTLR_UINT32 // Create an exception if we need one // new ANTLR_Exception<ImplTraits, UNWANTED_TOKEN_EXCEPTION, StreamType>(this, ""); - + // Call resync hook (for debuggers and so on) // if (m_debugger != NULL) { m_debugger->beginResync(); } - + // "delete" the extra token // this->beginResync(); @@ -451,26 +451,26 @@ BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedToken( ANTLR_UINT32 { m_debugger->endResync(); } - + // Print out the error after we consume so that ANTLRWorks sees the // token in the exception. // this->reportError(); - + // Return the token we are actually matching // matchedSymbol = this->getCurrentInputSymbol(is); - + // Consume the token that the rule actually expected to get as if everything // was hunky dory. // is->consume(); - + m_state->set_error(false); // Exception is not outstanding any more - + return matchedSymbol; } - + // Single token deletion (Unwanted above) did not work // so we see if we can insert a token instead by calculating which // token would be missing @@ -483,36 +483,36 @@ BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedToken( ANTLR_UINT32 matchedSymbol = this->getMissingSymbol( is, m_state->get_exception(), ttype, follow); m_state->get_exception()->set_token( matchedSymbol ); m_state->get_exception()->set_expecting(ttype); - + // Print out the error after we insert so that ANTLRWorks sees the // token in the exception. // this->reportError(); - + m_state->set_error(false); // Exception is not outstanding any more - + return matchedSymbol; } - + // Create an exception if we need one // new ANTLR_Exception<ImplTraits, RECOGNITION_EXCEPTION, StreamType>(this, ""); - + // Neither deleting nor inserting tokens allows recovery // must just report the exception. // m_state->set_error(true); return NULL; -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedSet(BitsetListType* follow) -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); const UnitType* matchedSymbol; - + if (this->mismatchIsMissingToken(is, follow) == true) { // We can fake the missing token and proceed @@ -520,68 +520,68 @@ BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedSet(BitsetListType* new ANTLR_Exception<ImplTraits, MISSING_TOKEN_EXCEPTION, StreamType>(this); matchedSymbol = this->getMissingSymbol(is, m_state->get_exception(), follow); m_state->get_exception()->set_token(matchedSymbol); - + // Print out the error after we insert so that ANTLRWorks sees the // token in the exception. // this->reportError(); - + m_state->set_error(false); // Exception is not outstanding any more - + return matchedSymbol; } - - // TODO - Single token deletion like in recoverFromMismatchedToken() - // - m_state->set_error(true); + + // TODO - Single token deletion like in recoverFromMismatchedToken() + // + m_state->set_error(true); m_state->set_failed(true); return NULL; -} - -template< class ImplTraits, class StreamType > +} + +template< class ImplTraits, class StreamType > bool BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedElement(BitsetListType* followBits) -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); - + BitsetType* follow = followBits->load(); BitsetType* viableToksFollowingRule; - + if (follow == NULL) - { + { /* The follow set is NULL, which means we don't know what can come * next, so we "hit and hope" by just signifying that we cannot * recover, which will just cause the next token to be consumed, * which might dig us out. */ return false; - } - - /* We have a bitmap for the follow set, hence we can compute - * what can follow this grammar element reference. - */ + } + + /* We have a bitmap for the follow set, hence we can compute + * what can follow this grammar element reference. + */ if (follow->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == true) - { + { /* First we need to know which of the available tokens are viable * to follow this reference. */ viableToksFollowingRule = this->computeCSRuleFollow(); - + /* Remove the EOR token, which we do not wish to compute with */ follow->remove( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); delete viableToksFollowingRule; /* We now have the computed set of what can follow the current token */ - } - - /* We can now see if the current token works with the set of tokens - * that could follow the current grammar reference. If it looks like it - * is consistent, then we can "insert" that token by not throwing - * an exception and assuming that we saw it. - */ + } + + /* We can now see if the current token works with the set of tokens + * that could follow the current grammar reference. If it looks like it + * is consistent, then we can "insert" that token by not throwing + * an exception and assuming that we saw it. + */ if ( follow->isMember(is->LA(1)) == true) - { + { /* report the error, but don't cause any rules to abort and stuff */ this->reportError(); @@ -592,77 +592,77 @@ bool BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedElement(Bitse m_state->set_error(false); m_state->set_failed(false); return true; /* Success in recovery */ - } - + } + if (follow != NULL) - { + { delete follow; - } - - /* We could not find anything viable to do, so this is going to - * cause an exception. - */ - return false; -} - -template< class ImplTraits, class StreamType > + } + + /* We could not find anything viable to do, so this is going to + * cause an exception. + */ + return false; +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::consumeUntil(ANTLR_UINT32 tokenType) -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); - + // What do have at the moment? - // + // ANTLR_UINT32 ttype = is->LA(1); - - // Start eating tokens until we get to the one we want. - // - while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && ttype != tokenType) - { + + // Start eating tokens until we get to the one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && ttype != tokenType) + { is->consume(); ttype = is->LA(1); - } -} - -template< class ImplTraits, class StreamType > + } +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::consumeUntilSet(BitsetType* set) -{ +{ ANTLR_UINT32 ttype; SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_parser_istream(); - - // What do have at the moment? - // + + // What do have at the moment? + // ttype = is->LA(1); - - // Start eating tokens until we get to one we want. - // - while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && set->isMember(ttype) == false) - { + + // Start eating tokens until we get to one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && set->isMember(ttype) == false) + { is->consume(); ttype = is->LA(1); - } - -} - -template< class ImplTraits, class StreamType > + } + +} + +template< class ImplTraits, class StreamType > ANTLR_MARKER BaseRecognizer<ImplTraits, StreamType>::getRuleMemoization( ANTLR_INTKEY ruleIndex, ANTLR_MARKER ruleParseStart) -{ +{ /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. - */ + */ typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; typedef TrieEntry<ImplTraits, std::shared_ptr<RuleListType>> EntryType; typedef TrieEntry<ImplTraits, ANTLR_MARKER> SubEntryType; ANTLR_MARKER stopIndex; EntryType* entry; - - /* See if we have a list in the ruleMemos for this rule, and if not, then create one - * as we will need it eventually if we are being asked for the memo here. - */ + + /* See if we have a list in the ruleMemos for this rule, and if not, then create one + * as we will need it eventually if we are being asked for the memo here. + */ entry = m_state->get_ruleMemo()->get(ruleIndex); - + if (entry == NULL) - { + { /* Did not find it, so create a new one for it, with a bit depth based on the * size of the input stream. We need the bit depth to incorporate the number if * bits required to represent the largest possible stop index in the input, which is the @@ -671,190 +671,190 @@ ANTLR_MARKER BaseRecognizer<ImplTraits, StreamType>::getRuleMemoization( ANTLR_I * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) */ m_state->get_ruleMemo()->add( ruleIndex, std::make_shared<RuleListType>(63) ); - + /* We cannot have a stopIndex in a trie we have just created of course */ return MEMO_RULE_UNKNOWN; - } - + } + std::shared_ptr<RuleListType> ruleList = entry->get_data(); - - /* See if there is a stop index associated with the supplied start index. - */ + + /* See if there is a stop index associated with the supplied start index. + */ stopIndex = 0; - - SubEntryType* sub_entry = ruleList->get(ruleParseStart); - if (sub_entry != NULL) - { + + SubEntryType* sub_entry = ruleList->get(ruleParseStart); + if (sub_entry != NULL) + { stopIndex = sub_entry->get_data(); - } - + } + if (stopIndex == 0) - { + { return MEMO_RULE_UNKNOWN; - } - - return stopIndex; -} - -template< class ImplTraits, class StreamType > + } + + return stopIndex; +} + +template< class ImplTraits, class StreamType > bool BaseRecognizer<ImplTraits, StreamType>::alreadyParsedRule(ANTLR_MARKER ruleIndex) -{ +{ SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_istream(); - - /* See if we have a memo marker for this. - */ + + /* See if we have a memo marker for this. + */ ANTLR_MARKER stopIndex = this->getRuleMemoization( ruleIndex, is->index() ); - + if (stopIndex == MEMO_RULE_UNKNOWN) - { + { return false; - } - + } + if (stopIndex == MEMO_RULE_FAILED) - { + { m_state->set_failed(true); - } - else - { + } + else + { is->seek(stopIndex+1); - } - - /* If here then the rule was executed for this input already - */ - return true; -} - -template< class ImplTraits, class StreamType > + } + + /* If here then the rule was executed for this input already + */ + return true; +} + +template< class ImplTraits, class StreamType > void BaseRecognizer<ImplTraits, StreamType>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) -{ - /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. - */ +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; typedef TrieEntry<ImplTraits, std::shared_ptr<RuleListType>> EntryType; EntryType* entry; ANTLR_MARKER stopIndex; SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_istream(); - + stopIndex = (m_state->get_failed() == true) ? MEMO_RULE_FAILED : is->index() - 1; - + entry = m_state->get_ruleMemo()->get(ruleIndex); - + if (entry != NULL) - { + { std::shared_ptr<RuleListType> ruleList = entry->get_data(); - + /* If we don't already have this entry, append it. The memoize trie does not * accept duplicates so it won't add it if already there and we just ignore the * return code as we don't care if it is there already. */ ruleList->add(ruleParseStart, stopIndex); - } -} - -template< class ImplTraits, class StreamType > -const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* -BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol( IntStreamType* istream ) -{ + } +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol( IntStreamType* istream ) +{ return this->getCurrentInputSymbol( istream, ClassForwarder<SuperType>() ); -} - -template< class ImplTraits, class StreamType > -const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* /*istream*/, ClassForwarder<LexerType>) -{ +{ return NULL; -} - -template< class ImplTraits, class StreamType > -const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* -BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<ParserType>) -{ +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<ParserType>) +{ typedef typename ImplTraits::TokenStreamType TokenStreamType; TokenStreamType* token_stream = static_cast<TokenStreamType*>(istream); return token_stream->LT(1); -} - -template< class ImplTraits, class StreamType > -const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* -BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<TreeParserType>) -{ +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<TreeParserType>) +{ typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; TreeNodeStreamType* ctns = static_cast<TreeNodeStreamType*>(istream); return ctns->LT(1); -} - - -template< class ImplTraits, class StreamType > +} + + +template< class ImplTraits, class StreamType > typename BaseRecognizer<ImplTraits, StreamType>::UnitType* BaseRecognizer<ImplTraits, StreamType>::getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, ANTLR_UINT32 expectedTokenType, BitsetListType* follow) -{ +{ return this->get_super()->getMissingSymbol( istream, e, expectedTokenType, follow ); -} - - -template< class ImplTraits, class StreamType > +} + + +template< class ImplTraits, class StreamType > template<typename Predicate> -bool BaseRecognizer<ImplTraits, StreamType>::synpred(ClassForwarder<Predicate> pred) -{ +bool BaseRecognizer<ImplTraits, StreamType>::synpred(ClassForwarder<Predicate> pred) +{ ANTLR_MARKER start; - SuperType* super = static_cast<SuperType*>(this); + SuperType* super = static_cast<SuperType*>(this); IntStreamType* is = super->get_istream(); - - /* Begin backtracking so we can get back to where we started after trying out - * the syntactic predicate. - */ - start = is->mark(); - m_state->inc_backtracking(); - - /* Try the syntactical predicate - */ - this->get_super()->synpred( pred ); - - /* Reset - */ - is->rewind(start); - m_state->dec_backtracking(); - + + /* Begin backtracking so we can get back to where we started after trying out + * the syntactic predicate. + */ + start = is->mark(); + m_state->inc_backtracking(); + + /* Try the syntactical predicate + */ + this->get_super()->synpred( pred ); + + /* Reset + */ + is->rewind(start); + m_state->dec_backtracking(); + if ( m_state->get_failed() == true) - { + { /* Predicate failed */ m_state->set_failed(false); return false; - } - else - { + } + else + { /* Predicate was successful */ m_state->set_failed(false); return true; - } -} - -template< class ImplTraits, class StreamType > -void BaseRecognizer<ImplTraits, StreamType>::exConstruct() -{ + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::exConstruct() +{ this->get_super()->exConstruct(); -} - -template< class ImplTraits, class StreamType > -void BaseRecognizer<ImplTraits, StreamType>::reset() -{ +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reset() +{ this->reset( ClassForwarder<SuperType>() ); -} - -template< class ImplTraits, class StreamType > -template< typename CompType > -void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<CompType> ) -{ +} + +template< class ImplTraits, class StreamType > +template< typename CompType > +void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<CompType> ) +{ typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; m_state->get_following().clear(); - + // Reset the state flags // m_state->set_errorRecovery(false); @@ -862,29 +862,29 @@ void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<CompType> ) m_state->set_failed(false); m_state->set_errorCount(0); m_state->set_backtracking(0); - + if (m_state->get_ruleMemo() != NULL) { delete m_state->get_ruleMemo(); m_state->set_ruleMemo( new RuleMemoType(15) ); /* 16 bit depth is enough for 32768 rules! */ } -} - -template< class ImplTraits, class StreamType > -void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<LexerType> ) -{ +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<LexerType> ) +{ m_state->set_token_present( false ); - m_state->set_type( ImplTraits::CommonTokenType::TOKEN_INVALID ); - m_state->set_channel( TOKEN_DEFAULT_CHANNEL ); - m_state->set_tokenStartCharIndex( -1 ); - m_state->set_tokenStartCharPositionInLine(-1); - m_state->set_tokenStartLine( -1 ); - m_state->set_text(""); -} - -template< class ImplTraits, class StreamType > -BaseRecognizer<ImplTraits, StreamType>::~BaseRecognizer() -{ + m_state->set_type( ImplTraits::CommonTokenType::TOKEN_INVALID ); + m_state->set_channel( TOKEN_DEFAULT_CHANNEL ); + m_state->set_tokenStartCharIndex( -1 ); + m_state->set_tokenStartCharPositionInLine(-1); + m_state->set_tokenStartLine( -1 ); + m_state->set_text(""); +} + +template< class ImplTraits, class StreamType > +BaseRecognizer<ImplTraits, StreamType>::~BaseRecognizer() +{ // Did we have a state allocated? // if (m_state != NULL) @@ -896,8 +896,8 @@ BaseRecognizer<ImplTraits, StreamType>::~BaseRecognizer() delete m_state->get_ruleMemo(); m_state->set_ruleMemo(NULL); } - - + + // Free any exception space we have left around // ExceptionBaseType* thisE = m_state->get_exception(); @@ -905,16 +905,16 @@ BaseRecognizer<ImplTraits, StreamType>::~BaseRecognizer() { delete thisE; } - + // Free the shared state memory // delete m_state; } - + // Free the actual recognizer space // -} - - - +} + + + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp index 68eab69568..4c33e377ff 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp @@ -1,96 +1,96 @@ -/** - * \file +/** + * \file * Defines the basic structures of an ANTLR3 bitset. this is a C version of the - * cut down Bitset class provided with the java version of antlr 3. + * cut down Bitset class provided with the java version of antlr 3. * * - */ + */ #ifndef _ANTLR3_BITSET_HPP #define _ANTLR3_BITSET_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** How many bits in the elements - */ + +/** How many bits in the elements + */ static const ANTLR_UINT32 ANTLR_BITSET_BITS = 64; - -/** How many bits in a nible of bits - */ + +/** How many bits in a nible of bits + */ static const ANTLR_UINT32 ANTLR_BITSET_NIBBLE = 4; - -/** log2 of ANTLR3_BITSET_BITS 2^ANTLR3_BITSET_LOG_BITS = ANTLR3_BITSET_BITS - */ + +/** log2 of ANTLR3_BITSET_BITS 2^ANTLR3_BITSET_LOG_BITS = ANTLR3_BITSET_BITS + */ static const ANTLR_UINT32 ANTLR_BITSET_LOG_BITS = 6; - -/** We will often need to do a mod operator (i mod nbits). - * For powers of two, this mod operation is the - * same as: + +/** We will often need to do a mod operator (i mod nbits). + * For powers of two, this mod operation is the + * same as: * - (i & (nbits-1)). - * - * Since mod is relatively slow, we use an easily - * precomputed mod mask to do the mod instead. - */ + * + * Since mod is relatively slow, we use an easily + * precomputed mod mask to do the mod instead. + */ static const ANTLR_UINT32 ANTLR_BITSET_MOD_MASK = ANTLR_BITSET_BITS - 1; - -template <class ImplTraits> -class BitsetList : public ImplTraits::AllocPolicyType -{ -public: + +template <class ImplTraits> +class BitsetList : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::BitsetType BitsetType; - -private: + +private: /// Pointer to the allocated array of bits for this bit set, which /// is an array of 64 bit elements (of the architecture). If we find a - /// machine/C compiler that does not know anything about 64 bit values + /// machine/C compiler that does not know anything about 64 bit values /// then it should be easy enough to produce a 32 bit (or less) version - /// of the bitset code. Note that the pointer here may be static if laid down + /// of the bitset code. Note that the pointer here may be static if laid down /// by the code generation, and it must be copied if it is to be manipulated /// to perform followset calculations. - /// - ANTLR_BITWORD* m_bits; - - /// Length of the current bit set in ANTLR3_UINT64 units. - /// - ANTLR_UINT32 m_length; - -public: + /// + ANTLR_BITWORD* m_bits; + + /// Length of the current bit set in ANTLR3_UINT64 units. + /// + ANTLR_UINT32 m_length; + +public: BitsetList(); BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ); - + ANTLR_BITWORD* get_bits() const; ANTLR_UINT32 get_length() const; void set_bits( ANTLR_BITWORD* bits ); void set_length( ANTLR_UINT32 length ); - + /// /// \brief /// Creates a new bitset with at least one 64 bit bset of bits, but as @@ -113,30 +113,30 @@ public: /// /// BitsetType* bitsetLoad(); - + BitsetType* bitsetCopy(); - -}; - -template <class ImplTraits> -class Bitset : public ImplTraits::AllocPolicyType -{ -public: + +}; + +template <class ImplTraits> +class Bitset : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; typedef typename ImplTraits::BitsetListType BitsetListType; - -private: + +private: /// The actual bits themselves /// BitsetListType m_blist; - -public: + +public: Bitset( ANTLR_UINT32 nbits=0 ); Bitset( const Bitset& bitset ); - Bitset* clone() const; + Bitset* clone() const; Bitset* bor(Bitset* bitset2); - + BitsetListType& get_blist(); void borInPlace(Bitset* bitset2); ANTLR_UINT32 size() const; @@ -147,7 +147,7 @@ public: ANTLR_UINT32 numBits() const; void remove(ANTLR_UINT32 bit); bool isNilNode() const; - + /** Produce an integer list of all the bits that are turned on * in this bitset. Used for error processing in the main as the bitset * reresents a number of integer tokens which we use for follow sets @@ -156,7 +156,7 @@ public: * The first entry is the number of elements following in the list. */ ANTLR_INT32* toIntList() const; - + /// /// \brief /// Creates a new bitset with at least one element, but as @@ -204,19 +204,19 @@ public: ///antlr3BitsetList static Bitset* BitsetFromList(const IntListType& list); ~Bitset(); - -private: + +private: void growToInclude(ANTLR_INT32 bit); static ANTLR_UINT64 BitMask(ANTLR_UINT32 bitNumber); static ANTLR_UINT32 NumWordsToHold(ANTLR_UINT32 bit); static ANTLR_UINT32 WordNumber(ANTLR_UINT32 bit); void bitsetORInPlace(Bitset* bitset2); -}; - +}; + } - -#include "antlr3bitset.inl" - -#endif - + +#include "antlr3bitset.inl" + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl index 64318ea0ea..37dfa88a1f 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl @@ -1,46 +1,46 @@ namespace antlr3 { - -template <class ImplTraits> -ANTLR_INLINE BitsetList<ImplTraits>::BitsetList() -{ + +template <class ImplTraits> +ANTLR_INLINE BitsetList<ImplTraits>::BitsetList() +{ m_bits = NULL; m_length = 0; -} - -template <class ImplTraits> -ANTLR_INLINE BitsetList<ImplTraits>::BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ) -{ +} + +template <class ImplTraits> +ANTLR_INLINE BitsetList<ImplTraits>::BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ) +{ m_bits = bits; m_length = length; -} - -template <class ImplTraits> -ANTLR_INLINE ANTLR_BITWORD* BitsetList<ImplTraits>::get_bits() const -{ +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_BITWORD* BitsetList<ImplTraits>::get_bits() const +{ return m_bits; -} - -template <class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 BitsetList<ImplTraits>::get_length() const -{ +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 BitsetList<ImplTraits>::get_length() const +{ return m_length; -} - -template <class ImplTraits> -ANTLR_INLINE void BitsetList<ImplTraits>::set_bits( ANTLR_BITWORD* bits ) -{ +} + +template <class ImplTraits> +ANTLR_INLINE void BitsetList<ImplTraits>::set_bits( ANTLR_BITWORD* bits ) +{ m_bits = bits; -} - -template <class ImplTraits> -ANTLR_INLINE void BitsetList<ImplTraits>::set_length( ANTLR_UINT32 length ) -{ +} + +template <class ImplTraits> +ANTLR_INLINE void BitsetList<ImplTraits>::set_length( ANTLR_UINT32 length ) +{ m_length = length; -} - -template <class ImplTraits> -typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetLoad() -{ +} + +template <class ImplTraits> +typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetLoad() +{ // Allocate memory for the bitset structure itself // the input parameter is the bit number (0 based) // to include in the bitset, so we need at at least @@ -50,7 +50,7 @@ typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetLoad( // of it. // BitsetType* bitset = new BitsetType(); - + if (this != NULL) { // Now we can add the element bits into the set @@ -60,142 +60,142 @@ typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetLoad( { if( bitset->get_blist().get_length() <= count) bitset->grow(count+1); - + typename ImplTraits::BitsetListType& blist = bitset->get_blist(); blist.m_bits[count] = *(m_bits+count); count++; } } - + // return the new bitset // return bitset; -} - -template <class ImplTraits> -typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetCopy() -{ +} + +template <class ImplTraits> +typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetCopy() +{ BitsetType* bitset; ANTLR_UINT32 numElements = m_length; - - // Avoid memory thrashing at the expense of a few more bytes - // + + // Avoid memory thrashing at the expense of a few more bytes + // if (numElements < 8) numElements = 8; - - // Allocate memory for the bitset structure itself - // - bitset = new Bitset<ImplTraits>(numElements); + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset<ImplTraits>(numElements); memcpy(bitset->get_blist().get_bits(), m_bits, numElements * sizeof(ANTLR_BITWORD)); - - // All seems good - // - return bitset; -} - -template <class ImplTraits> -Bitset<ImplTraits>::Bitset( ANTLR_UINT32 numBits ) -{ + + // All seems good + // + return bitset; +} + +template <class ImplTraits> +Bitset<ImplTraits>::Bitset( ANTLR_UINT32 numBits ) +{ // Avoid memory thrashing at the up front expense of a few bytes if (numBits < (8 * ANTLR_BITSET_BITS)) numBits = 8 * ANTLR_BITSET_BITS; - + // No we need to allocate the memory for the number of bits asked for // in multiples of ANTLR3_UINT64. // ANTLR_UINT32 numelements = ((numBits -1) >> ANTLR_BITSET_LOG_BITS) + 1; - + m_blist.set_bits( (ANTLR_BITWORD*) AllocPolicyType::alloc0(numelements * sizeof(ANTLR_BITWORD))); - + m_blist.set_length( numelements ); -} - -template <class ImplTraits> -Bitset<ImplTraits>::Bitset( const Bitset& bitset ) +} + +template <class ImplTraits> +Bitset<ImplTraits>::Bitset( const Bitset& bitset ) :m_blist(bitset.m_blist) -{ -} - -template <class ImplTraits> -ANTLR_INLINE Bitset<ImplTraits>* Bitset<ImplTraits>::clone() const -{ +{ +} + +template <class ImplTraits> +ANTLR_INLINE Bitset<ImplTraits>* Bitset<ImplTraits>::clone() const +{ Bitset* bitset; - - // Allocate memory for the bitset structure itself - // - bitset = new Bitset( ANTLR_BITSET_BITS * m_blist.get_length() ); - - // Install the actual bits in the source set - // + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset( ANTLR_BITSET_BITS * m_blist.get_length() ); + + // Install the actual bits in the source set + // memcpy(bitset->m_blist.get_bits(), m_blist.get_bits(), m_blist.get_length() * sizeof(ANTLR_BITWORD) ); - - // All seems good - // - return bitset; -} - -template <class ImplTraits> -Bitset<ImplTraits>* Bitset<ImplTraits>::bor(Bitset* bitset2) -{ + + // All seems good + // + return bitset; +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::bor(Bitset* bitset2) +{ Bitset* bitset; - + if (this == NULL) return bitset2->clone(); - + if (bitset2 == NULL) return this->clone(); - - // Allocate memory for the newly ordered bitset structure itself. - // - bitset = this->clone(); - bitset->bitsetORInPlace(bitset2); - return bitset; -} - -template <class ImplTraits> + + // Allocate memory for the newly ordered bitset structure itself. + // + bitset = this->clone(); + bitset->bitsetORInPlace(bitset2); + return bitset; +} + +template <class ImplTraits> void Bitset<ImplTraits>::borInPlace(Bitset* bitset2) -{ +{ ANTLR_UINT32 minimum; - + if (bitset2 == NULL) return; - + // First make sure that the target bitset is big enough - // for the new bits to be ored in. - // + // for the new bits to be ored in. + // if ( m_blist.get_length() < bitset2->m_blist.get_length() ) this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); - // Or the miniimum number of bits after any resizing went on - // + // Or the miniimum number of bits after any resizing went on + // if ( m_blist.get_length() < bitset2->m_blist.get_length() ) minimum = m_blist.get_length(); else minimum = bitset2->m_blist.get_length(); - + ANTLR_BITWORD* bits1 = m_blist.get_bits(); ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); for (ANTLR_UINT32 i = minimum; i > 0; i--) bits1[i-1] |= bits2[i-1]; -} - -template <class ImplTraits> -ANTLR_UINT32 Bitset<ImplTraits>::size() const -{ - ANTLR_UINT32 degree; - ANTLR_INT32 i; - ANTLR_INT8 bit; +} + +template <class ImplTraits> +ANTLR_UINT32 Bitset<ImplTraits>::size() const +{ + ANTLR_UINT32 degree; + ANTLR_INT32 i; + ANTLR_INT8 bit; - // TODO: Come back to this, it may be faster to & with 0x01 - // then shift right a copy of the 4 bits, than shift left a constant of 1. - // But then again, the optimizer might just work this out - // anyway. - // - degree = 0; + // TODO: Come back to this, it may be faster to & with 0x01 + // then shift right a copy of the 4 bits, than shift left a constant of 1. + // But then again, the optimizer might just work this out + // anyway. + // + degree = 0; ANTLR_BITWORD* bits = m_blist.get_bits(); for (i = m_blist.get_length() - 1; i>= 0; i--) - { + { if (bits[i] != 0) { for(bit = ANTLR_BITSET_BITS - 1; bit >= 0; bit--) @@ -206,141 +206,141 @@ ANTLR_UINT32 Bitset<ImplTraits>::size() const } } } - } - return degree; -} - -template <class ImplTraits> + } + return degree; +} + +template <class ImplTraits> ANTLR_INLINE void Bitset<ImplTraits>::add(ANTLR_INT32 bit) -{ +{ ANTLR_UINT32 word = Bitset::WordNumber(bit); - + if (word >= m_blist.get_length() ) this->growToInclude(bit); ANTLR_BITWORD* bits = m_blist.get_bits(); bits[word] |= Bitset::BitMask(bit); -} - -template <class ImplTraits> +} + +template <class ImplTraits> void Bitset<ImplTraits>::grow(ANTLR_INT32 newSize) -{ +{ ANTLR_BITWORD* newBits; - - // Space for newly sized bitset - TODO: come back to this and use realloc?, it may - // be more efficient... - // - newBits = (ANTLR_BITWORD*) AllocPolicyType::alloc0(newSize * sizeof(ANTLR_BITWORD) ); + + // Space for newly sized bitset - TODO: come back to this and use realloc?, it may + // be more efficient... + // + newBits = (ANTLR_BITWORD*) AllocPolicyType::alloc0(newSize * sizeof(ANTLR_BITWORD) ); if ( m_blist.get_bits() != NULL) - { + { // Copy existing bits // memcpy( newBits, m_blist.get_bits(), m_blist.get_length() * sizeof(ANTLR_BITWORD) ); - + // Out with the old bits... de de de derrr // AllocPolicyType::free( m_blist.get_bits() ); - } - - // In with the new bits... keerrrang. - // - m_blist.set_bits(newBits); - m_blist.set_length(newSize); -} - -template <class ImplTraits> + } + + // In with the new bits... keerrrang. + // + m_blist.set_bits(newBits); + m_blist.set_length(newSize); +} + +template <class ImplTraits> bool Bitset<ImplTraits>::equals(Bitset* bitset2) const -{ - ANTLR_UINT32 minimum; - ANTLR_UINT32 i; - +{ + ANTLR_UINT32 minimum; + ANTLR_UINT32 i; + if (this == NULL || bitset2 == NULL) return false; - - // Work out the minimum comparison set - // + + // Work out the minimum comparison set + // if ( m_blist.get_length() < bitset2->m_blist.get_length() ) minimum = m_blist.get_length(); - else + else minimum = bitset2->m_blist.get_length(); - - // Make sure explict in common bits are equal - // + + // Make sure explict in common bits are equal + // for (i = minimum - 1; i < minimum ; i--) - { + { ANTLR_BITWORD* bits1 = m_blist.get_bits(); ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); if ( bits1[i] != bits2[i]) return false; - } - - // Now make sure the bits of the larger set are all turned - // off. - // + } + + // Now make sure the bits of the larger set are all turned + // off. + // if ( m_blist.get_length() > minimum) - { + { for (i = minimum ; i < m_blist.get_length(); i++) { ANTLR_BITWORD* bits = m_blist.get_bits(); if(bits[i] != 0) return false; } - } - else if (bitset2->m_blist.get_length() > minimum) - { + } + else if (bitset2->m_blist.get_length() > minimum) + { ANTLR_BITWORD* bits = m_blist.get_bits(); for (i = minimum; i < bitset2->m_blist.get_length(); i++) { if ( bits[i] != 0 ) return false; } - } - - return true; -} - -template <class ImplTraits> + } + + return true; +} + +template <class ImplTraits> bool Bitset<ImplTraits>::isMember(ANTLR_UINT32 bit) const -{ - ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); - +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + if (wordNo >= m_blist.get_length()) return false; ANTLR_BITWORD* bits = m_blist.get_bits(); if ( (bits[wordNo] & Bitset::BitMask(bit)) == 0) return false; - else + else return true; -} - -template <class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::numBits() const -{ +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::numBits() const +{ return m_blist.get_length() << ANTLR_BITSET_LOG_BITS; -} - -template <class ImplTraits> -ANTLR_INLINE typename ImplTraits::BitsetListType& Bitset<ImplTraits>::get_blist() -{ +} + +template <class ImplTraits> +ANTLR_INLINE typename ImplTraits::BitsetListType& Bitset<ImplTraits>::get_blist() +{ return m_blist; -} - -template <class ImplTraits> -ANTLR_INLINE void Bitset<ImplTraits>::remove(ANTLR_UINT32 bit) -{ - ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); - +} + +template <class ImplTraits> +ANTLR_INLINE void Bitset<ImplTraits>::remove(ANTLR_UINT32 bit) +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + if (wordNo < m_blist.get_length()) { ANTLR_BITWORD* bits = m_blist.get_bits(); bits[wordNo] &= ~(Bitset::BitMask(bit)); } -} - -template <class ImplTraits> -ANTLR_INLINE bool Bitset<ImplTraits>::isNilNode() const -{ +} + +template <class ImplTraits> +ANTLR_INLINE bool Bitset<ImplTraits>::isNilNode() const +{ ANTLR_UINT32 i; ANTLR_BITWORD* bits = m_blist.get_bits(); for (i = m_blist.get_length() -1 ; i < m_blist.get_length(); i--) @@ -349,144 +349,144 @@ ANTLR_INLINE bool Bitset<ImplTraits>::isNilNode() const return false; } return true; -} - -template <class ImplTraits> -ANTLR_INT32* Bitset<ImplTraits>::toIntList() const -{ +} + +template <class ImplTraits> +ANTLR_INT32* Bitset<ImplTraits>::toIntList() const +{ ANTLR_UINT32 numInts; // How many integers we will need ANTLR_UINT32 numBits; // How many bits are in the set - ANTLR_UINT32 i; - ANTLR_UINT32 index; - - ANTLR_INT32* intList; - - numInts = this->size() + 1; - numBits = this->numBits(); - - intList = (ANTLR_INT32*) AllocPolicyType::alloc(numInts * sizeof(ANTLR_INT32)); + ANTLR_UINT32 i; + ANTLR_UINT32 index; + + ANTLR_INT32* intList; + + numInts = this->size() + 1; + numBits = this->numBits(); + + intList = (ANTLR_INT32*) AllocPolicyType::alloc(numInts * sizeof(ANTLR_INT32)); - intList[0] = numInts; - - // Enumerate the bits that are turned on - // + intList[0] = numInts; + + // Enumerate the bits that are turned on + // for (i = 0, index = 1; i<numBits; i++) - { + { if (this->isMember(i) == true) intList[index++] = i; - } - - // Result set - // - return intList; -} - -template <class ImplTraits> -ANTLR_INLINE Bitset<ImplTraits>::~Bitset() -{ + } + + // Result set + // + return intList; +} + +template <class ImplTraits> +ANTLR_INLINE Bitset<ImplTraits>::~Bitset() +{ if (m_blist.get_bits() != NULL) AllocPolicyType::free(m_blist.get_bits()); - return; -} - -template <class ImplTraits> + return; +} + +template <class ImplTraits> void Bitset<ImplTraits>::growToInclude(ANTLR_INT32 bit) -{ +{ ANTLR_UINT32 bl; ANTLR_UINT32 nw; - + bl = (m_blist.get_length() << 1); nw = Bitset::NumWordsToHold(bit); - + if (bl > nw) this->grow(bl); else this->grow(nw); -} - -template <class ImplTraits> +} + +template <class ImplTraits> ANTLR_INLINE ANTLR_UINT64 Bitset<ImplTraits>::BitMask(ANTLR_UINT32 bitNumber) -{ +{ return ((ANTLR_UINT64)1) << (bitNumber & (ANTLR_BITSET_MOD_MASK)); -} - -template <class ImplTraits> +} + +template <class ImplTraits> ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::NumWordsToHold(ANTLR_UINT32 bit) -{ +{ return (bit >> ANTLR_BITSET_LOG_BITS) + 1; -} - -template <class ImplTraits> +} + +template <class ImplTraits> ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::WordNumber(ANTLR_UINT32 bit) -{ +{ return bit >> ANTLR_BITSET_LOG_BITS; -} - -template <class ImplTraits> -void Bitset<ImplTraits>::bitsetORInPlace(Bitset* bitset2) -{ +} + +template <class ImplTraits> +void Bitset<ImplTraits>::bitsetORInPlace(Bitset* bitset2) +{ ANTLR_UINT32 minimum; - ANTLR_UINT32 i; - + ANTLR_UINT32 i; + if (bitset2 == NULL) return; - - // First make sure that the target bitset is big enough - // for the new bits to be ored in. - // + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // if ( m_blist.get_length() < bitset2->m_blist.get_length() ) this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); - // Or the miniimum number of bits after any resizing went on - // + // Or the miniimum number of bits after any resizing went on + // if ( m_blist.get_length() < bitset2->m_blist.get_length() ) minimum = m_blist.get_length(); else minimum = bitset2->m_blist.get_length(); - + ANTLR_BITWORD* bits1 = m_blist.get_bits(); ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); for (i = minimum; i > 0; i--) bits1[i-1] |= bits2[i-1]; -} - -template <class ImplTraits> -Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit) -{ +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit) +{ // Allocate memory for the bitset structure itself - // the input parameter is the bit number (0 based) - // to include in the bitset, so we need at at least + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least // bit + 1 bits. If any arguments indicate a - // a bit higher than the default number of bits (0 menas default size) - // then Add() will take care - // of it. - // + // a bit higher than the default number of bits (0 menas default size) + // then Add() will take care + // of it. + // Bitset<ImplTraits>* bitset = new Bitset<ImplTraits>(0); bitset->add(bit); return bitset; -} - -template <class ImplTraits> -Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2) -{ +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2) +{ Bitset<ImplTraits>* bitset = Bitset<ImplTraits>::BitsetOf(bit1); bitset->add(bit2); return bitset; -} - +} + //static -template <class ImplTraits> -Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetFromList(const IntListType& list) -{ +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetFromList(const IntListType& list) +{ // We have no idea what exactly is in the list - // so create a default bitset and then just add stuff - // as we enumerate. - // - Bitset<ImplTraits>* bitset = new Bitset<ImplTraits>(0); + // so create a default bitset and then just add stuff + // as we enumerate. + // + Bitset<ImplTraits>* bitset = new Bitset<ImplTraits>(0); for( int i = 0; i < list.size(); ++i ) bitset->add( list[i] ); - + return bitset; -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp index 7551c243d0..26cd78387a 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp @@ -1,78 +1,78 @@ #ifndef ANTLR3COLLECTIONS_HPP #define ANTLR3COLLECTIONS_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/* -------------- TRIE Interfaces ---------------- */ - -/** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE - */ -template< class ImplTraits, class DataType > -class TrieEntry : public ImplTraits::AllocPolicyType -{ -public: + +/* -------------- TRIE Interfaces ---------------- */ + +/** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE + */ +template< class ImplTraits, class DataType > +class TrieEntry : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicy; - -private: + +private: DataType m_data; TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */ - -public: + +public: TrieEntry(const DataType& data, TrieEntry* next); DataType& get_data(); const DataType& get_data() const; TrieEntry* get_next() const; void set_next( TrieEntry* next ); -}; - -/** Structure that defines an element/node in an ANTLR_INT_TRIE - */ -template< class ImplTraits, class DataType > -class IntTrieNode : public ImplTraits::AllocPolicyType -{ -public: +}; + +/** Structure that defines an element/node in an ANTLR_INT_TRIE + */ +template< class ImplTraits, class DataType > +class IntTrieNode : public ImplTraits::AllocPolicyType +{ +public: typedef TrieEntry<ImplTraits, DataType> TrieEntryType; typedef TrieEntryType BucketsType; -private: +private: ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */ ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */ BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ - -public: + +public: IntTrieNode(); ~IntTrieNode(); - + ANTLR_UINT32 get_bitNum() const; ANTLR_INTKEY get_key() const; BucketsType* get_buckets() const; @@ -83,56 +83,56 @@ public: void set_buckets( BucketsType* buckets ); void set_leftN( IntTrieNode* leftN ); void set_rightN( IntTrieNode* rightN ); -}; +}; -/** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, - * as you might expect, the key is turned into a "string" by looking at bit(key, depth) - * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) - * and potentially a huge trie. This is the algorithm for a Patricia Trie. - * Note also that this trie [can] accept multiple entries for the same key and is - * therefore a kind of elastic bucket patricia trie. - * - * If you find this code useful, please feel free to 'steal' it for any purpose - * as covered by the BSD license under which ANTLR is issued. You can cut the code +/** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, + * as you might expect, the key is turned into a "string" by looking at bit(key, depth) + * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) + * and potentially a huge trie. This is the algorithm for a Patricia Trie. + * Note also that this trie [can] accept multiple entries for the same key and is + * therefore a kind of elastic bucket patricia trie. + * + * If you find this code useful, please feel free to 'steal' it for any purpose + * as covered by the BSD license under which ANTLR is issued. You can cut the code * but as the ANTLR library is only about 50K (Windows Vista), you might find it - * easier to just link the library. Please keep all comments and licenses and so on - * in any version of this you create of course. - * - * Jim Idle. + * easier to just link the library. Please keep all comments and licenses and so on + * in any version of this you create of course. + * + * Jim Idle. * - */ -class IntTrieBase -{ -public: + */ +class IntTrieBase +{ +public: static const ANTLR_UINT8* get_bitIndex(); static const ANTLR_UINT64* get_bitMask(); -}; +}; -template< class ImplTraits, class DataType > -class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase -{ -public: +template< class ImplTraits, class DataType > +class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase +{ +public: typedef TrieEntry<ImplTraits, DataType> TrieEntryType; typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType; -private: +private: IntTrieNodeType* m_root; /* Root node of this integer trie */ IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */ ANTLR_UINT32 m_count; /* Current entry count */ bool m_allowDups; /* Whether this trie accepts duplicate keys */ - -public: + +public: /* INT TRIE Implementation of depth 64 bits, being the number of bits * in a 64 bit integer. */ - IntTrie( ANTLR_UINT32 depth ); - + IntTrie( ANTLR_UINT32 depth ); + /** Search the int Trie and return a pointer to the first bucket indexed * by the key if it is contained in the trie, otherwise NULL. */ TrieEntryType* get( ANTLR_INTKEY key); bool del( ANTLR_INTKEY key); - + /** Add an entry into the INT trie. * Basically we descend the trie as we do when searching it, which will * locate the only node in the trie that can be reached by the bit pattern of the @@ -145,139 +145,139 @@ public: * node pointing to itself or the data node we are inserting 'before'. */ bool add( ANTLR_INTKEY key, const DataType& data ); - ~IntTrie(); -}; - -/** - * A topological sort system that given a set of dependencies of a node m on node n, - * can sort them in dependency order. This is a generally useful utility object - * that does not care what the things are it is sorting. Generally the set - * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. - * I have provided a sort method that given ANTLR3_VECTOR as an input will sort - * the vector entries in place, as well as a sort method that just returns an - * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but - * some set of your own device. - * - * Of the two main algorithms that could be used, I chose to use the depth first - * search for unvisited nodes as a) This runs in linear time, and b) it is what - * we used in the ANTLR Tool to perform a topological sort of the input grammar files - * based on their dependencies. - */ -template<class ImplTraits> -class Topo : public ImplTraits::AllocPolicyType -{ -public: + ~IntTrie(); +}; + +/** + * A topological sort system that given a set of dependencies of a node m on node n, + * can sort them in dependency order. This is a generally useful utility object + * that does not care what the things are it is sorting. Generally the set + * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. + * I have provided a sort method that given ANTLR3_VECTOR as an input will sort + * the vector entries in place, as well as a sort method that just returns an + * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but + * some set of your own device. + * + * Of the two main algorithms that could be used, I chose to use the depth first + * search for unvisited nodes as a) This runs in linear time, and b) it is what + * we used in the ANTLR Tool to perform a topological sort of the input grammar files + * based on their dependencies. + */ +template<class ImplTraits> +class Topo : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::BitsetType BitsetType; typedef typename ImplTraits::AllocPolicyType AllocPolicyType; - -private: - /** - * A vector of vectors of edges, built by calling the addEdge method() - * to indicate that node number n depends on node number m. Each entry in the vector - * contains a bitset, which has a bit index set for each node upon which the - * entry node depends. - */ + +private: + /** + * A vector of vectors of edges, built by calling the addEdge method() + * to indicate that node number n depends on node number m. Each entry in the vector + * contains a bitset, which has a bit index set for each node upon which the + * entry node depends. + */ BitsetType** m_edges; - - /** - * A vector used to build up the sorted output order. Note that - * as the vector contains UINT32 then the maximum node index is - * 'limited' to 2^32, as nodes should be zero based. - */ + + /** + * A vector used to build up the sorted output order. Note that + * as the vector contains UINT32 then the maximum node index is + * 'limited' to 2^32, as nodes should be zero based. + */ ANTLR_UINT32* m_sorted; - - /** - * A vector used to detect cycles in the edge dependecies. It is used - * as a stack and each time we descend a node to one of its edges we - * add the node into this stack. If we find a node that we have already - * visited in the stack, then it means there wasa cycle such as 9->8->1->9 - * as the only way a node can be on the stack is if we are currently - * descnding from it as we remove it from the stack as we exit from - * descending its dependencies - */ + + /** + * A vector used to detect cycles in the edge dependecies. It is used + * as a stack and each time we descend a node to one of its edges we + * add the node into this stack. If we find a node that we have already + * visited in the stack, then it means there wasa cycle such as 9->8->1->9 + * as the only way a node can be on the stack is if we are currently + * descnding from it as we remove it from the stack as we exit from + * descending its dependencies + */ ANTLR_UINT32* m_cycle; - - /** - * A flag that indicates the algorithm found a cycle in the edges - * such as 9->8->1->9 - * If this flag is set after you have called one of the sort routines - * then the detected cycle will be contained in the cycle array and - * cycleLimit will point to the one after the last entry in the cycle. - */ + + /** + * A flag that indicates the algorithm found a cycle in the edges + * such as 9->8->1->9 + * If this flag is set after you have called one of the sort routines + * then the detected cycle will be contained in the cycle array and + * cycleLimit will point to the one after the last entry in the cycle. + */ bool m_hasCycle; - - /** - * A watermark used to accumulate potential cycles in the cycle array. - * This should be zero when we are done. Check hasCycle after calling one - * of the sort methods and if it is true then you can find the cycle - * in cycle[0]...cycle[cycleMark-1] - */ + + /** + * A watermark used to accumulate potential cycles in the cycle array. + * This should be zero when we are done. Check hasCycle after calling one + * of the sort methods and if it is true then you can find the cycle + * in cycle[0]...cycle[cycleMark-1] + */ ANTLR_UINT32 m_cycleMark; - /** - * One more than the largest node index that is contained in edges/sorted. - */ + /** + * One more than the largest node index that is contained in edges/sorted. + */ ANTLR_UINT32 m_limit; - - /** - * The set of visited nodes as determined by a set entry in - * the bitmap. - */ + + /** + * The set of visited nodes as determined by a set entry in + * the bitmap. + */ BitsetType* m_visited; - -public: + +public: Topo(); - /** - * A method that adds an edge from one node to another. An edge - * of n -> m indicates that node n is dependent on node m. Note that - * while building these edges, it is perfectly OK to add nodes out of - * sequence. So, if you have edges: - * - * 3 -> 0 - * 2 -> 1 - * 1 -> 3 - * - * The you can add them in that order and so add node 3 before nodes 2 and 1 - * - */ - void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); - - - /** - * A method that returns a pointer to an array of sorted node indexes. - * The array is sorted in topological sorted order. Note that the array - * is only as large as the largest node index you created an edge for. This means - * that if you had an input of 32 nodes, but that largest node with an edge - * was 16, then the returned array will be the sorted order of the first 16 - * nodes and the last 16 nodes of your array are basically fine as they are - * as they had no dependencies and do not need any particular sort order. - * - * NB: If the structure that contains the array is freed, then the sorted - * array will be freed too so you should use the value of limit to - * make a long term copy of this array if you do not want to keep the topo - * structure around as well. - */ - ANTLR_UINT32* sortToArray(); - /** - * A method that sorts the supplied ANTLR3_VECTOR in place based - * on the previously supplied edge data. - */ + * A method that adds an edge from one node to another. An edge + * of n -> m indicates that node n is dependent on node m. Note that + * while building these edges, it is perfectly OK to add nodes out of + * sequence. So, if you have edges: + * + * 3 -> 0 + * 2 -> 1 + * 1 -> 3 + * + * The you can add them in that order and so add node 3 before nodes 2 and 1 + * + */ + void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); + + + /** + * A method that returns a pointer to an array of sorted node indexes. + * The array is sorted in topological sorted order. Note that the array + * is only as large as the largest node index you created an edge for. This means + * that if you had an input of 32 nodes, but that largest node with an edge + * was 16, then the returned array will be the sorted order of the first 16 + * nodes and the last 16 nodes of your array are basically fine as they are + * as they had no dependencies and do not need any particular sort order. + * + * NB: If the structure that contains the array is freed, then the sorted + * array will be freed too so you should use the value of limit to + * make a long term copy of this array if you do not want to keep the topo + * structure around as well. + */ + ANTLR_UINT32* sortToArray(); + + /** + * A method that sorts the supplied ANTLR3_VECTOR in place based + * on the previously supplied edge data. + */ template<typename DataType> - void sortVector( typename ImplTraits::template VectorType<DataType>& v); - + void sortVector( typename ImplTraits::template VectorType<DataType>& v); + void DFS(ANTLR_UINT32 node); - - /** - * A method to free this structure and any associated memory. - */ + + /** + * A method to free this structure and any associated memory. + */ ~Topo(); -}; - +}; + } - -#include "antlr3collections.inl" + +#include "antlr3collections.inl" -#endif - - +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl index 3a2d06c9c3..1244f8dfd7 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl @@ -1,86 +1,86 @@ namespace antlr3 { - -template< class ImplTraits, class DataType > -ANTLR_INLINE TrieEntry<ImplTraits, DataType>::TrieEntry(const DataType& data, TrieEntry* next) + +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry<ImplTraits, DataType>::TrieEntry(const DataType& data, TrieEntry* next) :m_data(data) -{ +{ m_next = next; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE DataType& TrieEntry<ImplTraits, DataType>::get_data() -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE DataType& TrieEntry<ImplTraits, DataType>::get_data() +{ return m_data; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE const DataType& TrieEntry<ImplTraits, DataType>::get_data() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE const DataType& TrieEntry<ImplTraits, DataType>::get_data() const +{ return m_data; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE TrieEntry<ImplTraits, DataType>* TrieEntry<ImplTraits, DataType>::get_next() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry<ImplTraits, DataType>* TrieEntry<ImplTraits, DataType>::get_next() const +{ return m_next; -} - -template< class ImplTraits, class DataType > -ANTLR_INLINE void TrieEntry<ImplTraits, DataType>::set_next( TrieEntry* next ) -{ +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE void TrieEntry<ImplTraits, DataType>::set_next( TrieEntry* next ) +{ m_next = next; -} - -template< class ImplTraits, class DataType > -ANTLR_INLINE ANTLR_UINT32 IntTrieNode<ImplTraits, DataType>::get_bitNum() const -{ +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_UINT32 IntTrieNode<ImplTraits, DataType>::get_bitNum() const +{ return m_bitNum; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE ANTLR_INTKEY IntTrieNode<ImplTraits, DataType>::get_key() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_INTKEY IntTrieNode<ImplTraits, DataType>::get_key() const +{ return m_key; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE typename IntTrieNode<ImplTraits, DataType>::BucketsType* IntTrieNode<ImplTraits, DataType>::get_buckets() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE typename IntTrieNode<ImplTraits, DataType>::BucketsType* IntTrieNode<ImplTraits, DataType>::get_buckets() const +{ return m_buckets; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_leftN() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_leftN() const +{ return m_leftN; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_rightN() const -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_rightN() const +{ return m_rightN; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_bitNum( ANTLR_UINT32 bitNum ) -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_bitNum( ANTLR_UINT32 bitNum ) +{ m_bitNum = bitNum; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_key( ANTLR_INTKEY key ) -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_key( ANTLR_INTKEY key ) +{ m_key = key; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_buckets( BucketsType* buckets ) -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_buckets( BucketsType* buckets ) +{ m_buckets = buckets; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_leftN( IntTrieNode* leftN ) -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_leftN( IntTrieNode* leftN ) +{ m_leftN = leftN; -} -template< class ImplTraits, class DataType > -ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_rightN( IntTrieNode* rightN ) -{ +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_rightN( IntTrieNode* rightN ) +{ m_rightN = rightN; -} - -ANTLR_INLINE const ANTLR_UINT8* IntTrieBase::get_bitIndex() -{ +} + +ANTLR_INLINE const ANTLR_UINT8* IntTrieBase::get_bitIndex() +{ static ANTLR_UINT8 bitIndex[256] = { 0, // 0 - Just for padding @@ -105,10 +105,10 @@ ANTLR_INLINE const ANTLR_UINT8* IntTrieBase::get_bitIndex() 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; return bitIndex; -} - -ANTLR_INLINE const ANTLR_UINT64* IntTrieBase::get_bitMask() -{ +} + +ANTLR_INLINE const ANTLR_UINT64* IntTrieBase::get_bitMask() +{ static ANTLR_UINT64 bitMask[64] = { 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000004ULL, 0x0000000000000008ULL, @@ -128,61 +128,61 @@ ANTLR_INLINE const ANTLR_UINT64* IntTrieBase::get_bitMask() 0x0100000000000000ULL, 0x0200000000000000ULL, 0x0400000000000000ULL, 0x0800000000000000ULL, 0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL, 0x8000000000000000ULL }; - + return bitMask; -} - -template< class ImplTraits, class DataType > -IntTrie<ImplTraits, DataType>::IntTrie( ANTLR_UINT32 depth ) -{ +} + +template< class ImplTraits, class DataType > +IntTrie<ImplTraits, DataType>::IntTrie( ANTLR_UINT32 depth ) +{ /* Now we need to allocate the root node. This makes it easier * to use the tree as we don't have to do anything special * for the root node. */ m_root = new IntTrieNodeType; - + /* Now we seed the root node with the index being the * highest left most bit we want to test, which limits the * keys in the trie. This is the trie 'depth'. The limit for * this implementation is 63 (bits 0..63). */ m_root->set_bitNum( depth ); - + /* And as we have nothing in here yet, we set both child pointers * of the root node to point back to itself. */ m_root->set_leftN( m_root ); m_root->set_rightN( m_root ); m_count = 0; - + /* Finally, note that the key for this root node is 0 because * we use calloc() to initialise it. */ m_allowDups = false; m_current = NULL; -} - -template< class ImplTraits, class DataType > -IntTrie<ImplTraits, DataType>::~IntTrie() -{ - /* Descend from the root and free all the nodes - */ - delete m_root; - - /* the nodes are all gone now, so we need only free the memory - * for the structure itself - */ -} - -template< class ImplTraits, class DataType > +} + +template< class ImplTraits, class DataType > +IntTrie<ImplTraits, DataType>::~IntTrie() +{ + /* Descend from the root and free all the nodes + */ + delete m_root; + + /* the nodes are all gone now, so we need only free the memory + * for the structure itself + */ +} + +template< class ImplTraits, class DataType > typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataType>::get( ANTLR_INTKEY key) -{ +{ IntTrieNodeType* thisNode; IntTrieNodeType* nextNode; - + if (m_count == 0) return NULL; /* Nothing in this trie yet */ - + /* Starting at the root node in the trie, compare the bit index * of the current node with its next child node (starts left from root). * When the bit index of the child node is greater than the bit index of the current node @@ -196,7 +196,7 @@ typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataT */ thisNode = m_root; /* Start at the root node */ nextNode = thisNode->get_leftN(); /* Examine the left node from the root */ - + /* While we are descending the tree nodes... */ const ANTLR_UINT64* bitMask = this->get_bitMask(); @@ -205,7 +205,7 @@ typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataT /* Next node now becomes the new 'current' node */ thisNode = nextNode; - + /* We now test the bit indicated by the bitmap in the next node * in the key we are searching for. The new next node is the * right node if that bit is set and the left node it is not. @@ -219,7 +219,7 @@ typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataT nextNode = nextNode->get_leftN(); /* 0 is left */ } } - + /* Here we have reached a node where the bitMap index is lower than * its parent. This means it is pointing backward in the tree and * must therefore be a terminal node, being the only point than can @@ -241,22 +241,22 @@ typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataT { return NULL; /* That key is not in the trie (note that we set the pointer to -1 if no payload) */ } -} - -template< class ImplTraits, class DataType > +} + +template< class ImplTraits, class DataType > bool IntTrie<ImplTraits, DataType>::del( ANTLR_INTKEY /*key*/) -{ - IntTrieNodeType* p; - - p = m_root; +{ + IntTrieNodeType* p; + + p = m_root; - return false; - -} - -template< class ImplTraits, class DataType > + return false; + +} + +template< class ImplTraits, class DataType > bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data ) -{ +{ IntTrieNodeType* thisNode; IntTrieNodeType* nextNode; IntTrieNodeType* entNode; @@ -264,15 +264,15 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data TrieEntryType* newEnt; TrieEntryType* nextEnt; ANTLR_INTKEY xorKey; - + /* Cache the bit depth of this trie, which is always the highest index, * which is in the root node */ depth = m_root->get_bitNum(); - + thisNode = m_root; /* Start with the root node */ nextNode = m_root->get_leftN(); /* And assume we start to the left */ - + /* Now find the only node that can be currently reached by the bits in the * key we are being asked to insert. */ @@ -282,7 +282,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data /* Still descending the structure, next node becomes current. */ thisNode = nextNode; - + if (key & bitMask[nextNode->get_bitNum()]) { /* Bit at the required index was 1, so travers the right node from here @@ -310,7 +310,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data /* Yes, we are accepting duplicates */ newEnt = new TrieEntryType(data, NULL); - + /* We want to be able to traverse the stored elements in the order that they were * added as duplicate keys. We might need to revise this opinion if we end up having many duplicate keys * as perhaps reverse order is just as good, so long as it is ordered. @@ -321,7 +321,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data nextEnt = nextEnt->get_next(); } nextEnt->set_next(newEnt); - + m_count++; return true; } @@ -333,7 +333,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data return false; } } - + /* Here we have discovered the only node that can be reached by the bits in the key * but we have found that this node is not the key we need to insert. We must find the * the leftmost bit by which the current key for that node and the new key we are going @@ -341,7 +341,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data * showed that it allows a machine code path that works well with predicated execution */ xorKey = (key ^ nextNode->get_key() ); /* Gives 1 bits only where they differ then we find the left most 1 bit*/ - + /* Most common case is a 32 bit key really */ const ANTLR_UINT8* bitIndex = this->get_bitIndex(); @@ -372,7 +372,7 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data } } else -#endif +#endif { if (xorKey & 0x00000000FFFF0000) { @@ -397,29 +397,29 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data } } } - - /* We have located the leftmost differing bit, indicated by the depth variable. So, we know what - * bit index we are to insert the new entry at. There are two cases, being where the two keys - * differ at a bit position that is not currently part of the bit testing, where they differ on a bit - * that is currently being skipped in the indexed comparisons, and where they differ on a bit - * that is merely lower down in the current bit search. If the bit index went bit 4, bit 2 and they differ - * at bit 3, then we have the "skipped" bit case. But if that chain was Bit 4, Bit 2 and they differ at bit 1 - * then we have the easy bit <pun>. - * - * So, set up to descend the tree again, but this time looking for the insert point - * according to whether we skip the bit that differs or not. - */ + + /* We have located the leftmost differing bit, indicated by the depth variable. So, we know what + * bit index we are to insert the new entry at. There are two cases, being where the two keys + * differ at a bit position that is not currently part of the bit testing, where they differ on a bit + * that is currently being skipped in the indexed comparisons, and where they differ on a bit + * that is merely lower down in the current bit search. If the bit index went bit 4, bit 2 and they differ + * at bit 3, then we have the "skipped" bit case. But if that chain was Bit 4, Bit 2 and they differ at bit 1 + * then we have the easy bit <pun>. + * + * So, set up to descend the tree again, but this time looking for the insert point + * according to whether we skip the bit that differs or not. + */ thisNode = m_root; entNode = m_root->get_leftN(); - - /* Note the slight difference in the checks here to cover both cases - */ - while (thisNode->get_bitNum() > entNode->get_bitNum() && entNode->get_bitNum() > depth) - { + + /* Note the slight difference in the checks here to cover both cases + */ + while (thisNode->get_bitNum() > entNode->get_bitNum() && entNode->get_bitNum() > depth) + { /* Still descending the structure, next node becomes current. */ thisNode = entNode; - + if (key & bitMask[entNode->get_bitNum()]) { /* Bit at the required index was 1, so traverse the right node from here @@ -432,564 +432,564 @@ bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data */ entNode = entNode->get_leftN(); } - } - - /* We have located the correct insert point for this new key, so we need - * to allocate our entry and insert it etc. - */ + } + + /* We have located the correct insert point for this new key, so we need + * to allocate our entry and insert it etc. + */ nextNode = new IntTrieNodeType(); - - /* Build a new entry block for the new node - */ - newEnt = new TrieEntryType(data, NULL); - + + /* Build a new entry block for the new node + */ + newEnt = new TrieEntryType(data, NULL); + /* Install it - */ - nextNode->set_buckets(newEnt); - nextNode->set_key(key); - nextNode->set_bitNum( depth ); - - /* Work out the right and left pointers for this new node, which involve - * terminating with the current found node either right or left according - * to whether the current index bit is 1 or 0 - */ - if (key & bitMask[depth]) - { + */ + nextNode->set_buckets(newEnt); + nextNode->set_key(key); + nextNode->set_bitNum( depth ); + + /* Work out the right and left pointers for this new node, which involve + * terminating with the current found node either right or left according + * to whether the current index bit is 1 or 0 + */ + if (key & bitMask[depth]) + { nextNode->set_leftN(entNode); /* Terminates at previous position */ nextNode->set_rightN(nextNode); /* Terminates with itself */ - } - else - { + } + else + { nextNode->set_rightN(entNode); /* Terminates at previous position */ nextNode->set_leftN(nextNode); /* Terminates with itself */ - } - - /* Finally, we need to change the pointers at the node we located - * for inserting. If the key bit at its index is set then the right + } + + /* Finally, we need to change the pointers at the node we located + * for inserting. If the key bit at its index is set then the right * pointer for that node becomes the newly created node, otherwise the left - * pointer does. - */ - if (key & bitMask[thisNode->get_bitNum()] ) - { + * pointer does. + */ + if (key & bitMask[thisNode->get_bitNum()] ) + { thisNode->set_rightN( nextNode ); - } - else - { + } + else + { thisNode->set_leftN(nextNode); - } - - /* Et voila - */ - m_count++; - return true; -} - -template< class ImplTraits, class DataType > -IntTrieNode<ImplTraits, DataType>::IntTrieNode() -{ + } + + /* Et voila + */ + m_count++; + return true; +} + +template< class ImplTraits, class DataType > +IntTrieNode<ImplTraits, DataType>::IntTrieNode() +{ m_bitNum = 0; m_key = 0; m_buckets = NULL; m_leftN = NULL; m_rightN = NULL; -} - -template< class ImplTraits, class DataType > -IntTrieNode<ImplTraits, DataType>::~IntTrieNode() -{ +} + +template< class ImplTraits, class DataType > +IntTrieNode<ImplTraits, DataType>::~IntTrieNode() +{ TrieEntryType* thisEntry; TrieEntryType* nextEntry; - - /* If this node has a left pointer that is not a back pointer - * then recursively call to free this - */ - if ( m_bitNum > m_leftN->get_bitNum()) - { + + /* If this node has a left pointer that is not a back pointer + * then recursively call to free this + */ + if ( m_bitNum > m_leftN->get_bitNum()) + { /* We have a left node that needs descending, so do it. */ delete m_leftN; - } - + } + /* The left nodes from here should now be dealt with, so - * we need to descend any right nodes that are not back pointers - */ - if ( m_bitNum > m_rightN->get_bitNum() ) - { + * we need to descend any right nodes that are not back pointers + */ + if ( m_bitNum > m_rightN->get_bitNum() ) + { /* There are some right nodes to descend and deal with. */ delete m_rightN; - } - - /* Now all the children are dealt with, we can destroy - * this node too - */ + } + + /* Now all the children are dealt with, we can destroy + * this node too + */ thisEntry = m_buckets; - - while (thisEntry != NULL) - { + + while (thisEntry != NULL) + { nextEntry = thisEntry->get_next(); - + /* Now free the data for this bucket entry */ delete thisEntry; thisEntry = nextEntry; /* See if there are any more to free */ - } - - /* The bucket entry is now gone, so we can free the memory for - * the entry itself. - */ - - /* And that should be it for everything under this node and itself - */ -} - -/** - * Allocate and initialize a new ANTLR3 topological sorter, which can be - * used to define edges that identify numerical node indexes that depend on other - * numerical node indexes, which can then be sorted topologically such that - * any node is sorted after all its dependent nodes. - * - * Use: - * - * /verbatim - - pANTLR3_TOPO topo; - topo = antlr3NewTopo(); - - if (topo == NULL) { out of memory } - - topo->addEdge(topo, 3, 0); // Node 3 depends on node 0 - topo->addEdge(topo, 0, 1); // Node - depends on node 1 - topo->sortVector(topo, myVector); // Sort the vector in place (node numbers are the vector entry numbers) - - * /verbatim - */ -template<class ImplTraits> -Topo<ImplTraits>::Topo() -{ - // Initialize variables - // - m_visited = NULL; // Don't know how big it is yet - m_limit = 1; // No edges added yet - m_edges = NULL; // No edges added yet - m_sorted = NULL; // Nothing sorted at the start - m_cycle = NULL; // No cycles at the start - m_cycleMark = 0; // No cycles at the start - m_hasCycle = false; // No cycle at the start -} - -// Topological sorter -// -template<class ImplTraits> -void Topo<ImplTraits>::addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency) -{ + } + + /* The bucket entry is now gone, so we can free the memory for + * the entry itself. + */ + + /* And that should be it for everything under this node and itself + */ +} + +/** + * Allocate and initialize a new ANTLR3 topological sorter, which can be + * used to define edges that identify numerical node indexes that depend on other + * numerical node indexes, which can then be sorted topologically such that + * any node is sorted after all its dependent nodes. + * + * Use: + * + * /verbatim + + pANTLR3_TOPO topo; + topo = antlr3NewTopo(); + + if (topo == NULL) { out of memory } + + topo->addEdge(topo, 3, 0); // Node 3 depends on node 0 + topo->addEdge(topo, 0, 1); // Node - depends on node 1 + topo->sortVector(topo, myVector); // Sort the vector in place (node numbers are the vector entry numbers) + + * /verbatim + */ +template<class ImplTraits> +Topo<ImplTraits>::Topo() +{ + // Initialize variables + // + m_visited = NULL; // Don't know how big it is yet + m_limit = 1; // No edges added yet + m_edges = NULL; // No edges added yet + m_sorted = NULL; // Nothing sorted at the start + m_cycle = NULL; // No cycles at the start + m_cycleMark = 0; // No cycles at the start + m_hasCycle = false; // No cycle at the start +} + +// Topological sorter +// +template<class ImplTraits> +void Topo<ImplTraits>::addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency) +{ ANTLR_UINT32 i; - ANTLR_UINT32 maxEdge; - BitsetType* edgeDeps; - - if (edge>dependency) - { - maxEdge = edge; - } - else - { - maxEdge = dependency; - } - // We need to add an edge to says that the node indexed by 'edge' is - // dependent on the node indexed by 'dependency' - // - - // First see if we have enough room in the edges array to add the edge? - // - if ( m_edges == NULL) - { - // We don't have any edges yet, so create an array to hold them - // - m_edges = AllocPolicyType::alloc0(sizeof(BitsetType*) * (maxEdge + 1)); - - // Set the limit to what we have now - // - m_limit = maxEdge + 1; - } - else if (m_limit <= maxEdge) - { - // WE have some edges but not enough - // - m_edges = AllocPolicyType::realloc(m_edges, sizeof(BitsetType*) * (maxEdge + 1)); - - // Initialize the new bitmaps to ;indicate we have no edges defined yet - // - for (i = m_limit; i <= maxEdge; i++) - { - *((m_edges) + i) = NULL; - } - - // Set the limit to what we have now - // - m_limit = maxEdge + 1; - } - - // If the edge was flagged as depending on itself, then we just - // do nothing as it means this routine was just called to add it - // in to the list of nodes. - // - if (edge == dependency) - { - return; - } - - // Pick up the bit map for the requested edge - // - edgeDeps = *((m_edges) + edge); - - if (edgeDeps == NULL) - { - // No edges are defined yet for this node - // - edgeDeps = new BitsetType(0); - *((m_edges) + edge) = edgeDeps; - } - - // Set the bit in the bitmap that corresponds to the requested - // dependency. - // - edgeDeps->add(dependency); - - // And we are all set - // - return; - -} - -/** - * Given a starting node, descend its dependent nodes (ones that it has edges - * to) until we find one without edges. Having found a node without edges, we have - * discovered the bottom of a depth first search, which we can then ascend, adding - * the nodes in order from the bottom, which gives us the dependency order. - */ -template<class ImplTraits> -void Topo<ImplTraits>::DFS(ANTLR_UINT32 node) -{ + ANTLR_UINT32 maxEdge; + BitsetType* edgeDeps; + + if (edge>dependency) + { + maxEdge = edge; + } + else + { + maxEdge = dependency; + } + // We need to add an edge to says that the node indexed by 'edge' is + // dependent on the node indexed by 'dependency' + // + + // First see if we have enough room in the edges array to add the edge? + // + if ( m_edges == NULL) + { + // We don't have any edges yet, so create an array to hold them + // + m_edges = AllocPolicyType::alloc0(sizeof(BitsetType*) * (maxEdge + 1)); + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + else if (m_limit <= maxEdge) + { + // WE have some edges but not enough + // + m_edges = AllocPolicyType::realloc(m_edges, sizeof(BitsetType*) * (maxEdge + 1)); + + // Initialize the new bitmaps to ;indicate we have no edges defined yet + // + for (i = m_limit; i <= maxEdge; i++) + { + *((m_edges) + i) = NULL; + } + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + + // If the edge was flagged as depending on itself, then we just + // do nothing as it means this routine was just called to add it + // in to the list of nodes. + // + if (edge == dependency) + { + return; + } + + // Pick up the bit map for the requested edge + // + edgeDeps = *((m_edges) + edge); + + if (edgeDeps == NULL) + { + // No edges are defined yet for this node + // + edgeDeps = new BitsetType(0); + *((m_edges) + edge) = edgeDeps; + } + + // Set the bit in the bitmap that corresponds to the requested + // dependency. + // + edgeDeps->add(dependency); + + // And we are all set + // + return; + +} + +/** + * Given a starting node, descend its dependent nodes (ones that it has edges + * to) until we find one without edges. Having found a node without edges, we have + * discovered the bottom of a depth first search, which we can then ascend, adding + * the nodes in order from the bottom, which gives us the dependency order. + */ +template<class ImplTraits> +void Topo<ImplTraits>::DFS(ANTLR_UINT32 node) +{ BitsetType* edges; - - // Guard against a revisit and check for cycles - // - if (m_hasCycle == true) - { - return; // We don't do anything else if we found a cycle - } - - if ( m_visited->isMember(node)) - { - // Check to see if we found a cycle. To do this we search the - // current cycle stack and see if we find this node already in the stack. - // - ANTLR_UINT32 i; - - for (i=0; i< m_cycleMark; i++) - { - if ( m_cycle[i] == node) - { - // Stop! We found a cycle in the input, so rejig the cycle - // stack so that it only contains the cycle and set the cycle flag - // which will tell the caller what happened - // - ANTLR_UINT32 l; - - for (l = i; l < m_cycleMark; l++) - { - m_cycle[l - i] = m_cycle[l]; // Move to zero base in the cycle list - } - - // Recalculate the limit - // - m_cycleMark -= i; - - // Signal disaster - // - m_hasCycle = true; - } - } - return; - } - - // So far, no cycles have been found and we have not visited this node yet, - // so this node needs to go into the cycle stack before we continue - // then we will take it out of the stack once we have descended all its - // dependencies. - // - m_cycle[m_cycleMark++] = node; - - // First flag that we have visited this node - // - m_visited->add(node); - - // Now, if this node has edges, then we want to ensure we visit - // them all before we drop through and add this node into the sorted - // list. - // - edges = *((m_edges) + node); - if (edges != NULL) - { - // We have some edges, so visit each of the edge nodes - // that have not already been visited. - // + + // Guard against a revisit and check for cycles + // + if (m_hasCycle == true) + { + return; // We don't do anything else if we found a cycle + } + + if ( m_visited->isMember(node)) + { + // Check to see if we found a cycle. To do this we search the + // current cycle stack and see if we find this node already in the stack. + // + ANTLR_UINT32 i; + + for (i=0; i< m_cycleMark; i++) + { + if ( m_cycle[i] == node) + { + // Stop! We found a cycle in the input, so rejig the cycle + // stack so that it only contains the cycle and set the cycle flag + // which will tell the caller what happened + // + ANTLR_UINT32 l; + + for (l = i; l < m_cycleMark; l++) + { + m_cycle[l - i] = m_cycle[l]; // Move to zero base in the cycle list + } + + // Recalculate the limit + // + m_cycleMark -= i; + + // Signal disaster + // + m_hasCycle = true; + } + } + return; + } + + // So far, no cycles have been found and we have not visited this node yet, + // so this node needs to go into the cycle stack before we continue + // then we will take it out of the stack once we have descended all its + // dependencies. + // + m_cycle[m_cycleMark++] = node; + + // First flag that we have visited this node + // + m_visited->add(node); + + // Now, if this node has edges, then we want to ensure we visit + // them all before we drop through and add this node into the sorted + // list. + // + edges = *((m_edges) + node); + if (edges != NULL) + { + // We have some edges, so visit each of the edge nodes + // that have not already been visited. + // ANTLR_UINT32 numBits; // How many bits are in the set - ANTLR_UINT32 i; - ANTLR_UINT32 range; - - numBits = edges->numBits(); - range = edges->size(); // Number of set bits - - // Stop if we exahust the bit list or have checked the - // number of edges that this node refers to (so we don't - // check bits at the end that cannot possibly be set). - // - for (i=0; i<= numBits && range > 0; i++) - { - if (edges->isMember(i)) - { - range--; // About to check another one - - // Found an edge, make sure we visit and descend it - // - this->DFS(i); - } - } - } - - // At this point we will have visited all the dependencies - // of this node and they will be ordered (even if there are cycles) - // So we just add the node into the sorted list at the - // current index position. - // - m_sorted[m_limit++] = node; - - // Remove this node from the cycle list if we have not detected a cycle - // - if (m_hasCycle == false) - { - m_cycleMark--; - } - - return; -} - -template<class ImplTraits> -ANTLR_UINT32* Topo<ImplTraits>::sortToArray() -{ + ANTLR_UINT32 i; + ANTLR_UINT32 range; + + numBits = edges->numBits(); + range = edges->size(); // Number of set bits + + // Stop if we exahust the bit list or have checked the + // number of edges that this node refers to (so we don't + // check bits at the end that cannot possibly be set). + // + for (i=0; i<= numBits && range > 0; i++) + { + if (edges->isMember(i)) + { + range--; // About to check another one + + // Found an edge, make sure we visit and descend it + // + this->DFS(i); + } + } + } + + // At this point we will have visited all the dependencies + // of this node and they will be ordered (even if there are cycles) + // So we just add the node into the sorted list at the + // current index position. + // + m_sorted[m_limit++] = node; + + // Remove this node from the cycle list if we have not detected a cycle + // + if (m_hasCycle == false) + { + m_cycleMark--; + } + + return; +} + +template<class ImplTraits> +ANTLR_UINT32* Topo<ImplTraits>::sortToArray() +{ ANTLR_UINT32 v; - ANTLR_UINT32 oldLimit; - - // Guard against being called with no edges defined - // - if (m_edges == NULL) - { - return 0; - } - // First we need a vector to populate with enough - // entries to accomodate the sorted list and another to accomodate - // the maximum cycle we could detect which is all nodes such as 0->1->2->3->0 - // - m_sorted = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32) ); - m_cycle = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32)); - - // Next we need an empty bitset to show whether we have visited a node - // or not. This is the bit that gives us linear time of course as we are essentially - // dropping through the nodes in depth first order and when we get to a node that - // has no edges, we pop back up the stack adding the nodes we traversed in reverse - // order. - // - m_visited = new BitsetType(0); - - // Now traverse the nodes as if we were just going left to right, but - // then descend each node unless it has already been visited. - // - oldLimit = m_limit; // Number of nodes to traverse linearly - m_limit = 0; // Next entry in the sorted table - - for (v = 0; v < oldLimit; v++) - { - // If we did not already visit this node, then descend it until we - // get a node without edges or arrive at a node we have already visited. - // - if (m_visited->isMember(v) == false) - { - // We have not visited this one so descend it - // - this->DFS(v); - } - - // Break the loop if we detect a cycle as we have no need to go any - // further - // - if (m_hasCycle == true) - { - break; - } - } - - // Reset the limit to the number we recorded as if we hit a - // cycle, then limit will have stopped at the node where we - // discovered the cycle, but in order to free the edge bitmaps - // we need to know how many we may have allocated and traverse them all. - // - m_limit = oldLimit; - - // Having traversed all the nodes we were given, we - // are guaranteed to have ordered all the nodes or detected a - // cycle. - // - return m_sorted; -} - -template<class ImplTraits> + ANTLR_UINT32 oldLimit; + + // Guard against being called with no edges defined + // + if (m_edges == NULL) + { + return 0; + } + // First we need a vector to populate with enough + // entries to accomodate the sorted list and another to accomodate + // the maximum cycle we could detect which is all nodes such as 0->1->2->3->0 + // + m_sorted = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32) ); + m_cycle = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32)); + + // Next we need an empty bitset to show whether we have visited a node + // or not. This is the bit that gives us linear time of course as we are essentially + // dropping through the nodes in depth first order and when we get to a node that + // has no edges, we pop back up the stack adding the nodes we traversed in reverse + // order. + // + m_visited = new BitsetType(0); + + // Now traverse the nodes as if we were just going left to right, but + // then descend each node unless it has already been visited. + // + oldLimit = m_limit; // Number of nodes to traverse linearly + m_limit = 0; // Next entry in the sorted table + + for (v = 0; v < oldLimit; v++) + { + // If we did not already visit this node, then descend it until we + // get a node without edges or arrive at a node we have already visited. + // + if (m_visited->isMember(v) == false) + { + // We have not visited this one so descend it + // + this->DFS(v); + } + + // Break the loop if we detect a cycle as we have no need to go any + // further + // + if (m_hasCycle == true) + { + break; + } + } + + // Reset the limit to the number we recorded as if we hit a + // cycle, then limit will have stopped at the node where we + // discovered the cycle, but in order to free the edge bitmaps + // we need to know how many we may have allocated and traverse them all. + // + m_limit = oldLimit; + + // Having traversed all the nodes we were given, we + // are guaranteed to have ordered all the nodes or detected a + // cycle. + // + return m_sorted; +} + +template<class ImplTraits> template<typename DataType> -void Topo<ImplTraits>::sortVector( typename ImplTraits::template VectorType<DataType>& v ) -{ - // To sort a vector, we first perform the - // sort to an array, then use the results to reorder the vector - // we are given. This is just a convenience routine that allows you to - // sort the children of a tree node into topological order before or - // during an AST walk. This can be useful for optimizations that require - // dag reorders and also when the input stream defines thigns that are - // interdependent and you want to walk the list of the generated trees - // for those things in topological order so you can ignore the interdependencies - // at that point. - // - ANTLR_UINT32 i; - - // Used as a lookup index to find the current location in the vector of - // the vector entry that was originally at position [0], [1], [2] etc - // - ANTLR_UINT32* vIndex; - - // Sort into an array, then we can use the array that is - // stored in the topo - // - if (this->sortToArray() == 0) - { - return; // There were no edges - } - - if (m_hasCycle == true) - { - return; // Do nothing if we detected a cycle - } - - // Ensure that the vector we are sorting is at least as big as the - // the input sequence we were adsked to sort. It does not matter if it is - // bigger as thaat probably just means that nodes numbered higher than the - // limit had no dependencies and so can be left alone. - // - if (m_limit > v.size() ) - { - // We can only sort the entries that we have dude! The caller is - // responsible for ensuring the vector is the correct one and is the - // correct size etc. - // - m_limit = v.size(); - } - // We need to know the locations of each of the entries - // in the vector as we don't want to duplicate them in a new vector. We - // just use an indirection table to get the vector entry for a particular sequence - // acording to where we moved it last. Then we can just swap vector entries until - // we are done :-) - // - vIndex = AllocPolicyType::alloc(m_limit * sizeof(ANTLR_UINT32)); - - // Start index, each vector entry is located where you think it is - // - for (i = 0; i < m_limit; i++) - { - vIndex[i] = i; - } - - // Now we traverse the sorted array and moved the entries of - // the vector around according to the sort order and the indirection - // table we just created. The index telsl us where in the vector the - // original element entry n is now located via vIndex[n]. - // - for (i=0; i < m_limit; i++) - { - ANTLR_UINT32 ind; - - // If the vector entry at i is already the one that it - // should be, then we skip moving it of course. - // - if (vIndex[m_sorted[i]] == i) - { - continue; - } - - // The vector entry at i, should be replaced with the - // vector entry indicated by topo->sorted[i]. The vector entry - // at topo->sorted[i] may have already been swapped out though, so we - // find where it is now and move it from there to i. - // - ind = vIndex[m_sorted[i]]; +void Topo<ImplTraits>::sortVector( typename ImplTraits::template VectorType<DataType>& v ) +{ + // To sort a vector, we first perform the + // sort to an array, then use the results to reorder the vector + // we are given. This is just a convenience routine that allows you to + // sort the children of a tree node into topological order before or + // during an AST walk. This can be useful for optimizations that require + // dag reorders and also when the input stream defines thigns that are + // interdependent and you want to walk the list of the generated trees + // for those things in topological order so you can ignore the interdependencies + // at that point. + // + ANTLR_UINT32 i; + + // Used as a lookup index to find the current location in the vector of + // the vector entry that was originally at position [0], [1], [2] etc + // + ANTLR_UINT32* vIndex; + + // Sort into an array, then we can use the array that is + // stored in the topo + // + if (this->sortToArray() == 0) + { + return; // There were no edges + } + + if (m_hasCycle == true) + { + return; // Do nothing if we detected a cycle + } + + // Ensure that the vector we are sorting is at least as big as the + // the input sequence we were adsked to sort. It does not matter if it is + // bigger as thaat probably just means that nodes numbered higher than the + // limit had no dependencies and so can be left alone. + // + if (m_limit > v.size() ) + { + // We can only sort the entries that we have dude! The caller is + // responsible for ensuring the vector is the correct one and is the + // correct size etc. + // + m_limit = v.size(); + } + // We need to know the locations of each of the entries + // in the vector as we don't want to duplicate them in a new vector. We + // just use an indirection table to get the vector entry for a particular sequence + // acording to where we moved it last. Then we can just swap vector entries until + // we are done :-) + // + vIndex = AllocPolicyType::alloc(m_limit * sizeof(ANTLR_UINT32)); + + // Start index, each vector entry is located where you think it is + // + for (i = 0; i < m_limit; i++) + { + vIndex[i] = i; + } + + // Now we traverse the sorted array and moved the entries of + // the vector around according to the sort order and the indirection + // table we just created. The index telsl us where in the vector the + // original element entry n is now located via vIndex[n]. + // + for (i=0; i < m_limit; i++) + { + ANTLR_UINT32 ind; + + // If the vector entry at i is already the one that it + // should be, then we skip moving it of course. + // + if (vIndex[m_sorted[i]] == i) + { + continue; + } + + // The vector entry at i, should be replaced with the + // vector entry indicated by topo->sorted[i]. The vector entry + // at topo->sorted[i] may have already been swapped out though, so we + // find where it is now and move it from there to i. + // + ind = vIndex[m_sorted[i]]; std::swap( v[i], v[ind] ); - - // Update our index. The element at i is now the one we wanted - // to be sorted here and the element we swapped out is now the - // element that was at i just before we swapped it. If you are lost now - // don't worry about it, we are just reindexing on the fly is all. - // - vIndex[m_sorted[i]] = i; - vIndex[i] = ind; - } - - // Having traversed all the entries, we have sorted the vector in place. - // - AllocPolicyType::free(vIndex); - return; -} - -template<class ImplTraits> -Topo<ImplTraits>::~Topo() -{ - ANTLR_UINT32 i; - - // Free the result vector - // - if (m_sorted != NULL) - { - AllocPolicyType::free(m_sorted); - } - - // Free the visited map - // - if (m_visited != NULL) - { + + // Update our index. The element at i is now the one we wanted + // to be sorted here and the element we swapped out is now the + // element that was at i just before we swapped it. If you are lost now + // don't worry about it, we are just reindexing on the fly is all. + // + vIndex[m_sorted[i]] = i; + vIndex[i] = ind; + } + + // Having traversed all the entries, we have sorted the vector in place. + // + AllocPolicyType::free(vIndex); + return; +} + +template<class ImplTraits> +Topo<ImplTraits>::~Topo() +{ + ANTLR_UINT32 i; + + // Free the result vector + // + if (m_sorted != NULL) + { + AllocPolicyType::free(m_sorted); + } + + // Free the visited map + // + if (m_visited != NULL) + { delete m_visited; - } - - // Free any edgemaps - // - if (m_edges != NULL) - { - Bitset<AllocPolicyType>* edgeList; - - for (i=0; i<m_limit; i++) - { - edgeList = *((m_edges) + i); - if (edgeList != NULL) - { + } + + // Free any edgemaps + // + if (m_edges != NULL) + { + Bitset<AllocPolicyType>* edgeList; + + for (i=0; i<m_limit; i++) + { + edgeList = *((m_edges) + i); + if (edgeList != NULL) + { delete edgeList; - } - } - - AllocPolicyType::free( m_edges ); - } - m_edges = NULL; + } + } + + AllocPolicyType::free( m_edges ); + } + m_edges = NULL; - // Free any cycle map - // - if (m_cycle != NULL) - { - AllocPolicyType::free(m_cycle); - } -} - - + // Free any cycle map + // + if (m_cycle != NULL) + { + AllocPolicyType::free(m_cycle); + } +} + + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp index 51fa3954ab..b348f58a6f 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp @@ -1,59 +1,59 @@ -/** \file - * \brief Defines the interface for a common token. - * - * All token streams should provide their tokens using an instance - * of this common token. A custom pointer is provided, wher you may attach - * a further structure to enhance the common token if you feel the need - * to do so. The C runtime will assume that a token provides implementations - * of the interface functions, but all of them may be rplaced by your own - * implementation if you require it. - */ +/** \file + * \brief Defines the interface for a common token. + * + * All token streams should provide their tokens using an instance + * of this common token. A custom pointer is provided, wher you may attach + * a further structure to enhance the common token if you feel the need + * to do so. The C runtime will assume that a token provides implementations + * of the interface functions, but all of them may be rplaced by your own + * implementation if you require it. + */ #ifndef _ANTLR3_COMMON_TOKEN_HPP #define _ANTLR3_COMMON_TOKEN_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** The definition of an ANTLR3 common token structure, which all implementations - * of a token stream should provide, installing any further structures in the - * custom pointer element of this structure. - * - * \remark - * Token streams are in essence provided by lexers or other programs that serve - * as lexers. - */ - -template<class ImplTraits> -class CommonToken : public ImplTraits::AllocPolicyType -{ -public: + +/** The definition of an ANTLR3 common token structure, which all implementations + * of a token stream should provide, installing any further structures in the + * custom pointer element of this structure. + * + * \remark + * Token streams are in essence provided by lexers or other programs that serve + * as lexers. + */ + +template<class ImplTraits> +class CommonToken : public ImplTraits::AllocPolicyType +{ +public: /* Base token types, which all lexer/parser tokens come after in sequence. */ enum TOKEN_TYPE : ANTLR_UINT32 @@ -71,7 +71,7 @@ public: /** First token that can be used by users/generated code */ , MIN_TOKEN_TYPE = TOKEN_UP + 1 - + /** End of file token */ #ifndef _MSC_VER @@ -80,175 +80,175 @@ public: , TOKEN_EOF = 0xFFFFFFFF #endif }; - + typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::InputStreamType InputStreamType; typedef typename ImplTraits::StreamDataType StreamDataType; typedef typename ImplTraits::TokenUserDataType UserDataType; - -private: - /** The actual type of this token - */ - ANTLR_UINT32 m_type; - + +private: + /** The actual type of this token + */ + ANTLR_UINT32 m_type; + /** The virtual channel that this token exists in. - */ + */ ANTLR_UINT32 m_channel; mutable StringType m_tokText; - - /** The offset into the input stream that the line in which this - * token resides starts. - */ + + /** The offset into the input stream that the line in which this + * token resides starts. + */ const StreamDataType* m_lineStart; - + /** The line number in the input stream where this token was derived from - */ + */ ANTLR_UINT32 m_line; - - /** The character position in the line that this token was derived from - */ + + /** The character position in the line that this token was derived from + */ ANTLR_INT32 m_charPositionInLine; - - /** Pointer to the input stream that this token originated in. - */ - InputStreamType* m_input; - - /** What the index of this token is, 0, 1, .., n-2, n-1 tokens - */ + + /** Pointer to the input stream that this token originated in. + */ + InputStreamType* m_input; + + /** What the index of this token is, 0, 1, .., n-2, n-1 tokens + */ ANTLR_MARKER m_index; - - /** The character offset in the input stream where the text for this token - * starts. - */ + + /** The character offset in the input stream where the text for this token + * starts. + */ ANTLR_MARKER m_startIndex; - - /** The character offset in the input stream where the text for this token - * stops. - */ + + /** The character offset in the input stream where the text for this token + * stops. + */ ANTLR_MARKER m_stopIndex; - -public: + +public: CommonToken(); CommonToken(ANTLR_UINT32 type); CommonToken(TOKEN_TYPE type); CommonToken( const CommonToken& ctoken ); - + ~CommonToken() {} - + CommonToken& operator=( const CommonToken& ctoken ); bool operator==( const CommonToken& ctoken ) const; bool operator<( const CommonToken& ctoken ) const; - + InputStreamType* get_input() const; ANTLR_MARKER get_index() const; void set_index( ANTLR_MARKER index ); void set_input( InputStreamType* input ); - /* ============================== + /* ============================== * API - */ - - /** Function that returns the text pointer of a token, use - * toString() if you want a pANTLR3_STRING version of the token. - */ + */ + + /** Function that returns the text pointer of a token, use + * toString() if you want a pANTLR3_STRING version of the token. + */ StringType const & getText() const; - /** Pointer to a function that 'might' be able to set the text associated - * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually - * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have - * strings associated with them but just point into the current input stream. These - * tokens will implement this function with a function that errors out (probably - * drastically. - */ - void set_tokText( const StringType& text ); - - /** Pointer to a function that 'might' be able to set the text associated - * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually - * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have - * strings associated with them but just point into the current input stream. These - * tokens will implement this function with a function that errors out (probably - * drastically. - */ + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ + void set_tokText( const StringType& text ); + + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ void setText(ANTLR_UINT8* text); void setText(const char* text); - - /** Pointer to a function that returns the token type of this token - */ - ANTLR_UINT32 get_type() const; + + /** Pointer to a function that returns the token type of this token + */ + ANTLR_UINT32 get_type() const; ANTLR_UINT32 getType() const; - - /** Pointer to a function that sets the type of this token - */ + + /** Pointer to a function that sets the type of this token + */ void set_type(ANTLR_UINT32 ttype); - - /** Pointer to a function that gets the 'line' number where this token resides - */ - ANTLR_UINT32 get_line() const; - - /** Pointer to a function that sets the 'line' number where this token reside - */ - void set_line(ANTLR_UINT32 line); - - /** Pointer to a function that gets the offset in the line where this token exists + + /** Pointer to a function that gets the 'line' number where this token resides + */ + ANTLR_UINT32 get_line() const; + + /** Pointer to a function that sets the 'line' number where this token reside */ - ANTLR_INT32 get_charPositionInLine() const; + void set_line(ANTLR_UINT32 line); + + /** Pointer to a function that gets the offset in the line where this token exists + */ + ANTLR_INT32 get_charPositionInLine() const; ANTLR_INT32 getCharPositionInLine() const; - - /** Pointer to a function that sets the offset in the line where this token exists - */ + + /** Pointer to a function that sets the offset in the line where this token exists + */ void set_charPositionInLine(ANTLR_INT32 pos); - - /** Pointer to a function that gets the channel that this token was placed in (parsers - * can 'tune' to these channels. - */ - ANTLR_UINT32 get_channel() const; - - /** Pointer to a function that sets the channel that this token should belong to - */ - void set_channel(ANTLR_UINT32 channel); - - /** Pointer to a function that returns an index 0...n-1 of the token in the token - * input stream. - */ - ANTLR_MARKER get_tokenIndex() const; - - /** Pointer to a function that can set the token index of this token in the token - * input stream. - */ + + /** Pointer to a function that gets the channel that this token was placed in (parsers + * can 'tune' to these channels. + */ + ANTLR_UINT32 get_channel() const; + + /** Pointer to a function that sets the channel that this token should belong to + */ + void set_channel(ANTLR_UINT32 channel); + + /** Pointer to a function that returns an index 0...n-1 of the token in the token + * input stream. + */ + ANTLR_MARKER get_tokenIndex() const; + + /** Pointer to a function that can set the token index of this token in the token + * input stream. + */ void set_tokenIndex(ANTLR_MARKER tokenIndex); - - /** Pointer to a function that gets the start index in the input stream for this token. - */ - ANTLR_MARKER get_startIndex() const; - - /** Pointer to a function that sets the start index in the input stream for this token. - */ + + /** Pointer to a function that gets the start index in the input stream for this token. + */ + ANTLR_MARKER get_startIndex() const; + + /** Pointer to a function that sets the start index in the input stream for this token. + */ void set_startIndex(ANTLR_MARKER index); - /** Pointer to a function that gets the stop index in the input stream for this token. - */ - ANTLR_MARKER get_stopIndex() const; - - /** Pointer to a function that sets the stop index in the input stream for this token. - */ + /** Pointer to a function that gets the stop index in the input stream for this token. + */ + ANTLR_MARKER get_stopIndex() const; + + /** Pointer to a function that sets the stop index in the input stream for this token. + */ void set_stopIndex(ANTLR_MARKER index); const StreamDataType* get_lineStart() const; void set_lineStart( const StreamDataType* lineStart ); - + /** Pointer to a function that returns this token as a text representation that can be - * printed with embedded control codes such as \n replaced with the printable sequence "\\n" + * printed with embedded control codes such as \n replaced with the printable sequence "\\n" * This also yields a string structure that can be used more easily than the pointer to - * the input stream in certain situations. - */ - StringType toString() const; - + * the input stream in certain situations. + */ + StringType toString() const; + UserDataType UserData; -}; - +}; + } - -#include "antlr3commontoken.inl" - -#endif + +#include "antlr3commontoken.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl index 3277b3def1..77ed32fe8f 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl @@ -1,10 +1,10 @@ namespace antlr3 { - -template<class ImplTraits> -CommonToken<ImplTraits>::CommonToken() -{ + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken() +{ m_type = 0; - m_channel = 0; + m_channel = 0; m_lineStart = NULL; m_line = 0; m_charPositionInLine = 0; @@ -12,11 +12,11 @@ CommonToken<ImplTraits>::CommonToken() m_index = 0; m_startIndex = 0; m_stopIndex = 0; -} - -template<class ImplTraits> -CommonToken<ImplTraits>::CommonToken(ANTLR_UINT32 type) -{ +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken(ANTLR_UINT32 type) +{ m_type = type; m_channel = 0; m_lineStart = NULL; @@ -26,11 +26,11 @@ CommonToken<ImplTraits>::CommonToken(ANTLR_UINT32 type) m_index = 0; m_startIndex = 0; m_stopIndex = 0; -} - -template<class ImplTraits> -CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type) -{ +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type) +{ m_type = type; m_channel = 0; m_lineStart = NULL; @@ -40,13 +40,13 @@ CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type) m_index = 0; m_startIndex = 0; m_stopIndex = 0; -} - -template<class ImplTraits> -CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken ) +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken ) :m_tokText( ctoken.m_tokText ) ,UserData(ctoken.UserData) -{ +{ m_type = ctoken.m_type; m_channel = ctoken.m_channel; m_lineStart = ctoken.m_lineStart; @@ -56,11 +56,11 @@ CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken ) m_index = ctoken.m_index; m_startIndex = ctoken.m_startIndex; m_stopIndex = ctoken.m_stopIndex; -} - -template<class ImplTraits> -CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& ctoken ) -{ +} + +template<class ImplTraits> +CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& ctoken ) +{ UserData = ctoken.UserData; m_type = ctoken.m_type; m_channel = ctoken.m_channel; @@ -71,20 +71,20 @@ CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& m_index = ctoken.m_index; m_startIndex = ctoken.m_startIndex; m_stopIndex = ctoken.m_stopIndex; - + m_tokText = ctoken.m_tokText; return *this; -} - -template<class ImplTraits> -ANTLR_INLINE bool CommonToken<ImplTraits>::operator<( const CommonToken& ctoken ) const -{ +} + +template<class ImplTraits> +ANTLR_INLINE bool CommonToken<ImplTraits>::operator<( const CommonToken& ctoken ) const +{ return (m_index < ctoken.m_index); -} - -template<class ImplTraits> -bool CommonToken<ImplTraits>::operator==( const CommonToken& ctoken ) const -{ +} + +template<class ImplTraits> +bool CommonToken<ImplTraits>::operator==( const CommonToken& ctoken ) const +{ return ( (m_type == ctoken.m_type) && (m_channel == ctoken.m_channel) && (m_lineStart == ctoken.m_lineStart) && @@ -94,49 +94,49 @@ bool CommonToken<ImplTraits>::operator==( const CommonToken& ctoken ) const (m_index == ctoken.m_index) && (m_startIndex == ctoken.m_startIndex) && (m_stopIndex == ctoken.m_stopIndex) ); -} - -template<class ImplTraits> -ANTLR_INLINE typename CommonToken<ImplTraits>::InputStreamType* CommonToken<ImplTraits>::get_input() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE typename CommonToken<ImplTraits>::InputStreamType* CommonToken<ImplTraits>::get_input() const +{ return m_input; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_index() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_index() const +{ return m_index; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonToken<ImplTraits>::set_index( ANTLR_MARKER index ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_index( ANTLR_MARKER index ) +{ m_index = index; -} - -template<class ImplTraits> -void CommonToken<ImplTraits>::set_input( InputStreamType* input ) -{ +} + +template<class ImplTraits> +void CommonToken<ImplTraits>::set_input( InputStreamType* input ) +{ m_input = input; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonToken<ImplTraits>::StringType const & CommonToken<ImplTraits>::getText() const -{ +{ static const StringType EOF_STRING("<EOF>"); static const StringType EMPTY_STRING(""); - + if ( !m_tokText.empty() ) return m_tokText; - + // EOF is a special case // if ( m_type == TOKEN_EOF) { return EOF_STRING; } - + // We had nothing installed in the token, create a new string // from the input stream // @@ -147,182 +147,182 @@ CommonToken<ImplTraits>::getText() const // Nothing to return, there is no input stream // return EMPTY_STRING; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonToken<ImplTraits>::set_tokText( const StringType& text ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_tokText( const StringType& text ) +{ m_tokText = text; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonToken<ImplTraits>::setText(ANTLR_UINT8* text) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::setText(ANTLR_UINT8* text) +{ if( text == NULL ) m_tokText.clear(); else m_tokText = (const char*) text; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::setText(const char* text) -{ +{ if( text == NULL ) m_tokText.clear(); else m_tokText = text; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_type() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_type() const +{ return m_type; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::getType() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::getType() const +{ return m_type; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_type(ANTLR_UINT32 ttype) -{ +{ m_type = ttype; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_line() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_line() const +{ return m_line; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonToken<ImplTraits>::set_line(ANTLR_UINT32 line) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_line(ANTLR_UINT32 line) +{ m_line = line; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::get_charPositionInLine() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::get_charPositionInLine() const +{ return m_charPositionInLine; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::getCharPositionInLine() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::getCharPositionInLine() const +{ return this->get_charPositionInLine(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_charPositionInLine(ANTLR_INT32 pos) -{ +{ m_charPositionInLine = pos; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_channel() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_channel() const +{ return m_channel; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonToken<ImplTraits>::set_channel(ANTLR_UINT32 channel) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_channel(ANTLR_UINT32 channel) +{ m_channel = channel; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_tokenIndex() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_tokenIndex() const +{ return m_index; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_tokenIndex(ANTLR_MARKER tokenIndex) -{ +{ m_index = tokenIndex; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_startIndex() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_startIndex() const +{ return (m_startIndex == -1) ? (ANTLR_MARKER)(m_input->get_data()) : m_startIndex; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_startIndex(ANTLR_MARKER index) -{ +{ m_startIndex = index; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_stopIndex() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_stopIndex() const +{ return m_stopIndex; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_stopIndex(ANTLR_MARKER index) -{ +{ m_stopIndex = index; -} - -template<class ImplTraits> -ANTLR_INLINE const typename CommonToken<ImplTraits>::StreamDataType* CommonToken<ImplTraits>::get_lineStart() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE const typename CommonToken<ImplTraits>::StreamDataType* CommonToken<ImplTraits>::get_lineStart() const +{ return m_lineStart; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void CommonToken<ImplTraits>::set_lineStart( const StreamDataType* lineStart ) -{ +{ m_lineStart = lineStart; -} - -template<class ImplTraits> -typename CommonToken<ImplTraits>::StringType CommonToken<ImplTraits>::toString() const -{ - StringType text; - typedef typename ImplTraits::StringStreamType StringStreamType; +} + +template<class ImplTraits> +typename CommonToken<ImplTraits>::StringType CommonToken<ImplTraits>::toString() const +{ + StringType text; + typedef typename ImplTraits::StringStreamType StringStreamType; StringStreamType outtext; - + text = this->getText(); - + if (text.empty()) return ""; - - /* Now we use our handy dandy string utility to assemble the - * the reporting string - * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; - */ - outtext << "[Index: "; - outtext << (int)this->get_tokenIndex(); - outtext << " (Start: "; - outtext << (int)this->get_startIndex(); - outtext << "-Stop: "; - outtext << (int)this->get_stopIndex(); - outtext << ") ='"; - outtext << text; - outtext << "', type<"; - outtext << (int)m_type; - outtext << "> "; - + + /* Now we use our handy dandy string utility to assemble the + * the reporting string + * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + */ + outtext << "[Index: "; + outtext << (int)this->get_tokenIndex(); + outtext << " (Start: "; + outtext << (int)this->get_startIndex(); + outtext << "-Stop: "; + outtext << (int)this->get_stopIndex(); + outtext << ") ='"; + outtext << text; + outtext << "', type<"; + outtext << (int)m_type; + outtext << "> "; + if (this->get_channel() > TOKEN_DEFAULT_CHANNEL) - { + { outtext << "(channel = "; outtext << (int)this->get_channel(); outtext << ") "; - } - - outtext << "Line: "; - outtext << (int)this->get_line(); - outtext << " LinePos:"; - outtext << (int)this->get_charPositionInLine(); - outtext << "]"; - - return outtext.str(); -} - + } + + outtext << "Line: "; + outtext << (int)this->get_line(); + outtext << " LinePos:"; + outtext << (int)this->get_charPositionInLine(); + outtext << "]"; + + return outtext.str(); +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp index 2a5e61f94f..d7adf2a726 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp @@ -1,44 +1,44 @@ -/** Interface for an ANTLR3 common tree which is what gets - * passed around by the AST producing parser. - */ - +/** Interface for an ANTLR3 common tree which is what gets + * passed around by the AST producing parser. + */ + #ifndef _ANTLR3_COMMON_TREE_HPP #define _ANTLR3_COMMON_TREE_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -template<class ImplTraits> + +template<class ImplTraits> class CommonTree : public ImplTraits::AllocPolicyType -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::CommonTokenType CommonTokenType; @@ -50,51 +50,51 @@ public: typedef typename ImplTraits::TreeUserDataType UserDataType; protected: /// The list of all the children that belong to this node. They are not part of the node - /// as they belong to the common tree node that implements this. - /// + /// as they belong to the common tree node that implements this. + /// ChildrenType m_children; - - - /// Start token index that encases this tree - /// + + + /// Start token index that encases this tree + /// ANTLR_MARKER m_startIndex; - - /// End token that encases this tree - /// + + /// End token that encases this tree + /// ANTLR_MARKER m_stopIndex; - - /// A single token, this is the payload for the tree - /// + + /// A single token, this is the payload for the tree + /// const CommonTokenType* m_token; - + /// Points to the node that has this node as a child. /// If this is NULL, then this is the root node. /// CommonTree* m_parent; - + /// What index is this particular node in the child list it /// belongs to? /// ANTLR_INT32 m_childIndex; - -public: + +public: CommonTree(); CommonTree( const CommonTokenType* token ); CommonTree( const CommonTree* token ); CommonTree( const CommonTree& ctree ); ~CommonTree(); - + const CommonTokenType* get_token() const; void set_token(CommonTokenType const*); - + ChildrenType& get_children(); const ChildrenType& get_children() const; ANTLR_INT32 get_childIndex() const; TreeType* get_parent() const; - + ANTLR_MARKER get_startIndex() const; void set_startIndex(ANTLR_MARKER index); - + ANTLR_MARKER get_stopIndex() const; void set_stopIndex(ANTLR_MARKER index); @@ -141,12 +141,12 @@ public: void reuse(); UserDataType UserData; -}; - +}; + } - -#include "antlr3commontree.inl" - -#endif - - + +#include "antlr3commontree.inl" + +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl index 7000ca4fd2..1a5321aab2 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl @@ -1,81 +1,81 @@ namespace antlr3 { - -template<class ImplTraits> -CommonTree<ImplTraits>::CommonTree() -{ + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree() +{ m_startIndex = -1; m_stopIndex = -1; m_childIndex = -1; m_token = NULL; m_parent = NULL; -} - -template<class ImplTraits> -CommonTree<ImplTraits>::CommonTree( const CommonTree& ctree ) +} + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree( const CommonTree& ctree ) :m_children( ctree.m_children) ,UserData(ctree.UserData) -{ +{ m_startIndex = ctree.m_startIndex; m_stopIndex = ctree.m_stopIndex; m_childIndex = ctree.m_childIndex; m_token = ctree.m_token; m_parent = ctree.m_parent; -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTree<ImplTraits>::CommonTree( const CommonTokenType* token ) -{ +{ m_startIndex = -1; m_stopIndex = -1; m_childIndex = -1; m_token = token; m_parent = NULL; -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTree<ImplTraits>::CommonTree( const CommonTree* tree ) :UserData(tree->UserData) -{ +{ m_startIndex = tree->get_startIndex(); m_stopIndex = tree->get_stopIndex(); m_childIndex = -1; m_token = tree->get_token(); m_parent = NULL; -} - -template<class ImplTraits> +} + +template<class ImplTraits> const typename CommonTree<ImplTraits>::CommonTokenType* CommonTree<ImplTraits>::get_token() const -{ +{ return m_token; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::set_token(typename CommonTree<ImplTraits>::CommonTokenType const* token) -{ +{ m_token = token; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::ChildrenType& CommonTree<ImplTraits>::get_children() -{ +{ return m_children; -} - -template<class ImplTraits> +} + +template<class ImplTraits> const typename CommonTree<ImplTraits>::ChildrenType& CommonTree<ImplTraits>::get_children() const -{ +{ return m_children; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::addChild(TreeTypePtr& child) -{ +{ if (child == NULL) return; - + ChildrenType& child_children = child->get_children(); //ChildrenType& tree_children = this->get_children(); - + if (child->isNilNode() == true) { if ( !child_children.empty() && child_children == m_children ) @@ -84,13 +84,13 @@ void CommonTree<ImplTraits>::addChild(TreeTypePtr& child) fprintf(stderr, "ANTLR3: An attempt was made to add a child list to itself!\n"); return; } - - // Add all of the children's children to this list - // - if ( !child_children.empty() ) - { + + // Add all of the children's children to this list + // + if ( !child_children.empty() ) + { if (!m_children.empty()) - { + { // Need to copy(append) the children for(auto i = child_children.begin(); i != child_children.end(); ++i) { @@ -105,15 +105,15 @@ void CommonTree<ImplTraits>::addChild(TreeTypePtr& child) } } } else { - // We are build ing the tree structure here, so we need not - // worry about duplication of pointers as the tree node - // factory will only clean up each node once. So we just - // copy in the child's children pointer as the child is - // a nil node (has not root itself). - // + // We are build ing the tree structure here, so we need not + // worry about duplication of pointers as the tree node + // factory will only clean up each node once. So we just + // copy in the child's children pointer as the child is + // a nil node (has not root itself). + // m_children.swap( child_children ); this->freshenParentAndChildIndexes(); - } + } } } else @@ -125,35 +125,35 @@ void CommonTree<ImplTraits>::addChild(TreeTypePtr& child) m_children.back()->set_parent(tree); m_children.back()->set_childIndex(m_children.size() - 1); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::addChildren(const ChildListType& kids) -{ +{ for( typename ChildListType::const_iterator iter = kids.begin(); iter != kids.end(); ++iter ) { this->addChild( *iter ); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::TreeTypePtr CommonTree<ImplTraits>::deleteChild(ANTLR_UINT32 i) -{ +{ if( m_children.empty() ) return NULL; TreeTypePtr killed = m_children.erase( m_children.begin() + i); this->freshenParentAndChildIndexes(i); return killed; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr newTree) -{ - +{ + ANTLR_INT32 numNewChildren; // Tracking variable ANTLR_INT32 delta; // Difference in new vs existing count - + if ( m_children.empty() ) { fprintf(stderr, "replaceChildren call: Indexes are invalid; no children in list for %s", this->get_text().c_str() ); @@ -163,25 +163,25 @@ void CommonTree<ImplTraits>::replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_ // How many nodes will go away ANTLR_INT32 replacingHowMany = stopChildIndex - startChildIndex + 1; ANTLR_INT32 replacingWithHowMany; // How many nodes will replace them - + // Either use the existing list of children in the supplied nil node, or build a vector of the // tree we were given if it is not a nil node, then we treat both situations exactly the same // ChildrenType newChildren; ChildrenType &newChildrenRef(newChildren); - + if (newTree->isNilNode()) { newChildrenRef = newTree->get_children(); } else { newChildrenRef.push_back(newTree); } - + // Initialize replacingWithHowMany = newChildrenRef.size(); numNewChildren = newChildrenRef.size(); delta = replacingHowMany - replacingWithHowMany; - + // If it is the same number of nodes, then do a direct replacement // if (delta == 0) @@ -221,31 +221,31 @@ void CommonTree<ImplTraits>::replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_ { m_children[ startChildIndex + j ] = newChildrenRef.at(j); } - + for (ANTLR_UINT32 j = replacingHowMany; j < replacingWithHowMany; j++) { m_children.push_back( newChildrenRef.at(j) ); } - + this->freshenParentAndChildIndexes(startChildIndex); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTree<ImplTraits>* CommonTree<ImplTraits>::dupNode() const -{ +{ return new CommonTree<ImplTraits>(this); -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTree<ImplTraits>* CommonTree<ImplTraits>::dupNode(void *p) const -{ +{ return new (p) CommonTree<ImplTraits>(this); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTree<ImplTraits>::get_charPositionInLine() const -{ +{ if(m_token == NULL || (m_token->get_charPositionInLine() == 0) ) { if(m_children.empty()) @@ -255,11 +255,11 @@ ANTLR_UINT32 CommonTree<ImplTraits>::get_charPositionInLine() const return 0; } return m_token->get_charPositionInLine(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getChild(ANTLR_UINT32 i) -{ +{ static TreeTypePtr nul; if ( m_children.empty() || i >= m_children.size() ) { @@ -267,66 +267,66 @@ typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getChild(A return nul; } return m_children.at(i); -} - -template<class ImplTraits> -void CommonTree<ImplTraits>::set_childIndex( ANTLR_INT32 i) -{ +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_childIndex( ANTLR_INT32 i) +{ m_childIndex = i; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INT32 CommonTree<ImplTraits>::get_childIndex() const -{ +{ return m_childIndex; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTree<ImplTraits>::getChildCount() const -{ +{ return static_cast<ANTLR_UINT32>( m_children.size() ); -} - -template<class ImplTraits> -typename CommonTree<ImplTraits>::TreeType* CommonTree<ImplTraits>::get_parent() const -{ +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::TreeType* CommonTree<ImplTraits>::get_parent() const +{ return m_parent; -} - -template<class ImplTraits> -void CommonTree<ImplTraits>::set_parent( TreeType* parent) -{ +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_parent( TreeType* parent) +{ m_parent = parent; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER CommonTree<ImplTraits>::get_startIndex() const -{ +{ if( m_startIndex==-1 && m_token!=NULL) return m_token->get_tokenIndex(); return m_startIndex; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::set_startIndex( ANTLR_MARKER index) -{ +{ m_startIndex = index; } - + template<class ImplTraits> ANTLR_MARKER CommonTree<ImplTraits>::get_stopIndex() const { if( m_stopIndex==-1 && m_token!=NULL) return m_token->get_tokenIndex(); return m_stopIndex; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::set_stopIndex( ANTLR_MARKER index) -{ +{ m_stopIndex = index; } - + template<class ImplTraits> ANTLR_UINT32 CommonTree<ImplTraits>::getType() { @@ -334,11 +334,11 @@ ANTLR_UINT32 CommonTree<ImplTraits>::getType() return CommonTokenType::TOKEN_INVALID; else return m_token->get_type(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getFirstChildWithType(ANTLR_UINT32 type) -{ +{ ANTLR_UINT32 i; std::size_t cs; @@ -356,11 +356,11 @@ typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getFirstCh } } return NULL; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTree<ImplTraits>::get_line() const -{ +{ if(m_token == NULL || m_token->get_line() == 0) { if ( m_children.empty()) @@ -370,53 +370,53 @@ ANTLR_UINT32 CommonTree<ImplTraits>::get_line() const return 0; } return m_token->get_line(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::getText() -{ +{ return this->toString(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> bool CommonTree<ImplTraits>::isNilNode() -{ +{ // This is a Nil tree if it has no payload (Token in our case) if(m_token == NULL) return true; else return false; } - + template<class ImplTraits> void CommonTree<ImplTraits>::setChild(ANTLR_UINT32 i, TreeTypePtr child) { if( child==NULL) return; - + if( child->isNilNode()) { // TODO: throw IllegalArgumentException return; } - + if( m_children.size() <= i ) m_children.resize(i+1); - + m_children[i] = child; TreeType* tree = static_cast<TreeType*>(this); child->set_parent(tree); child->set_childIndex(i); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toStringTree() -{ +{ StringType retval; - + if( m_children.empty() ) return this->toString(); - + /* Need a new string with nothing at all in it. */ if(this->isNilNode() == false) @@ -425,7 +425,7 @@ typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toStringTree retval.append(this->toString()); retval.append(" "); } - + if ( !m_children.empty()) { retval.append( m_children.front()->toStringTree()); @@ -441,22 +441,22 @@ typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toStringTree retval.append(")"); } return retval; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toString() -{ +{ if( this->isNilNode()) return StringType("nil"); return m_token->toString(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::freshenParentAndChildIndexes() -{ +{ this->freshenParentAndChildIndexes(0); } - + template<class ImplTraits> void CommonTree<ImplTraits>::freshenParentAndChildIndexes(ANTLR_UINT32 offset) { @@ -480,13 +480,13 @@ void CommonTree<ImplTraits>::freshenParentAndChildIndexes(ANTLR_UINT32 offset) (*i)->set_parent(tree); } } - + template<class ImplTraits> void CommonTree<ImplTraits>::freshenParentAndChildIndexesDeeply() { this->freshenParentAndChildIndexes(0); } - + template<class ImplTraits> void CommonTree<ImplTraits>::freshenParentAndChildIndexesDeeply(ANTLR_UINT32 offset) { @@ -497,11 +497,11 @@ void CommonTree<ImplTraits>::freshenParentAndChildIndexesDeeply(ANTLR_UINT32 off child->set_parent(this); child->freshenParentAndChildIndexesDeeply(); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTree<ImplTraits>::reuse() -{ +{ m_startIndex = -1; m_stopIndex = -1; m_childIndex = -1; @@ -510,11 +510,11 @@ void CommonTree<ImplTraits>::reuse() ChildrenType empty; m_children.swap(empty); -} - -template<class ImplTraits> -CommonTree<ImplTraits>::~CommonTree() -{ -} - +} + +template<class ImplTraits> +CommonTree<ImplTraits>::~CommonTree() +{ +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp index c32968ac92..523abee82d 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp @@ -1,42 +1,42 @@ -/** \file - * Definition of the ANTLR3 common tree adaptor. - */ - +/** \file + * Definition of the ANTLR3 common tree adaptor. + */ + #ifndef _ANTLR3_COMMON_TREE_ADAPTOR_HPP #define _ANTLR3_COMMON_TREE_ADAPTOR_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - + template <typename ImplTraits> class CommonTreeStore; - + /** Helper class for unique_ptr. Implements deleter for instances of unique_ptr While building AST tree dangling pointers are automatically put back into pool */ @@ -101,48 +101,48 @@ protected: ResourcePoolManagerType m_manager; }; -template<class ImplTraits> +template<class ImplTraits> class CommonTreeAdaptor : public ImplTraits::AllocPolicyType , public CommonTreeStore<ImplTraits> -{ -public: +{ +public: typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::TreeTypePtr TreeTypePtr; typedef typename TreeType::ChildrenType ChildrenType; - + typedef TreeType TokenType; typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef typename ImplTraits::DebugEventListenerType DebuggerType; typedef typename ImplTraits::TokenStreamType TokenStreamType; typedef CommonTreeStore<ImplTraits> TreeStoreType; -public: +public: //The parameter is there only to provide uniform constructor interface CommonTreeAdaptor(DebuggerType* dbg = nullptr); - + TreeTypePtr nilNode(); TreeTypePtr dupTree( const TreeTypePtr& tree); TreeTypePtr dupTree( const TreeType* tree); - + TreeTypePtr dupNode(const TreeTypePtr& treeNode); TreeTypePtr dupNode(const TreeType* treeNode); - + void addChild( TreeTypePtr& t, TreeTypePtr& child); void addChild( TreeTypePtr& t, TreeTypePtr&& child); void addChildToken( TreeTypePtr& t, CommonTokenType* child); void setParent( TreeTypePtr& child, TreeType* parent); TreeType* getParent( TreeTypePtr& child); - + TreeTypePtr errorNode( CommonTokenType* tnstream, const CommonTokenType* startToken, const CommonTokenType* stopToken); bool isNilNode( TreeTypePtr& t); - + TreeTypePtr becomeRoot( TreeTypePtr& newRoot, TreeTypePtr& oldRoot); TreeTypePtr becomeRoot( TreeTypePtr&& newRoot, TreeTypePtr& oldRoot); TreeTypePtr becomeRootToken(CommonTokenType* newRoot, TreeTypePtr& oldRoot); TreeTypePtr rulePostProcessing( TreeTypePtr& root); - + TreeTypePtr create( CommonTokenType const* payload); TreeTypePtr create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken); TreeTypePtr create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, const char* text); @@ -153,7 +153,7 @@ public: CommonTokenType* createToken( ANTLR_UINT32 tokenType, const char* text); CommonTokenType* createToken( ANTLR_UINT32 tokenType, StringType const& text); CommonTokenType* createToken( const CommonTokenType* fromToken); - + ANTLR_UINT32 getType( TreeTypePtr& t); StringType getText( TreeTypePtr& t); @@ -165,17 +165,17 @@ public: ANTLR_UINT32 getChildCount( TreeTypePtr&); ANTLR_UINT64 getUniqueID( TreeTypePtr&); - + CommonTokenType* getToken( TreeTypePtr& t); - + void setTokenBoundaries( TreeTypePtr& t, const CommonTokenType* startToken, const CommonTokenType* stopToken); ANTLR_MARKER getTokenStartIndex( TreeTypePtr& t); ANTLR_MARKER getTokenStopIndex( TreeTypePtr& t); - + /// Produce a DOT (see graphviz freeware suite) from a base tree /// StringType makeDot( TreeTypePtr& theTree); - + /// Replace from start to stop child index of parent with t, which might /// be a list. Number of children may be different /// after this call. @@ -185,23 +185,23 @@ public: /// void replaceChildren( TreeTypePtr parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t); - + ~CommonTreeAdaptor(); - + protected: TreeTypePtr dupTreeImpl( const TreeType* root, TreeType* parent); - + void defineDotNodes(TreeTypePtr t, const StringType& dotSpec); void defineDotEdges(TreeTypePtr t, const StringType& dotSpec); -}; - +}; + //If someone can override the CommonTreeAdaptor at the compile time, that will be -//inherited here. Still you can choose to override the DebugTreeAdaptor, if you wish to -//change the DebugTreeAdaptor -template<class ImplTraits> -class DebugTreeAdaptor : public ImplTraits::CommonTreeAdaptorType -{ -public: +//inherited here. Still you can choose to override the DebugTreeAdaptor, if you wish to +//change the DebugTreeAdaptor +template<class ImplTraits> +class DebugTreeAdaptor : public ImplTraits::CommonTreeAdaptorType +{ +public: //DebugEventListener implements functionality through virtual functions //the template parameter is required for pointing back at the adaptor typedef typename ImplTraits::DebugEventListener DebuggerType; @@ -209,8 +209,8 @@ public: typedef typename ImplTraits::TreeTypePtr TreeTypePtr; typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef typename ImplTraits::CommonTreeAdaptorType super; - -private: + +private: /// If set to something other than NULL, then this structure is /// points to an instance of the debugger interface. In general, the /// debugger is only referenced internally in recovery/error operations @@ -218,8 +218,8 @@ private: /// in every function/method /// DebuggerType* m_debugger; - -public: + +public: DebugTreeAdaptor( DebuggerType* debugger ); void setDebugEventListener( DebuggerType* debugger); TreeTypePtr nilNode(); @@ -227,7 +227,7 @@ public: void addChildToken(TreeTypePtr& t, CommonTokenType* child); TreeTypePtr becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree ); TreeTypePtr becomeRootToken( CommonTokenType* newRoot, TreeTypePtr& oldRoot); - + TreeTypePtr createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken); TreeTypePtr createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text); TreeTypePtr createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text); @@ -239,10 +239,10 @@ public: /// to the debugger. /// void simulateTreeConstruction(TreeTypePtr& tree); -}; - +}; + } - -#include "antlr3commontreeadaptor.inl" - -#endif + +#include "antlr3commontreeadaptor.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl index 6e191b2a15..c897dfd19e 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl @@ -1,5 +1,5 @@ namespace antlr3 { - + template <typename ImplTraits> CommonResourcePoolManager<ImplTraits>::CommonResourcePoolManager(CommonTreeStore<ImplTraits> * pool) : m_pool(pool) @@ -73,56 +73,56 @@ CommonTreeStore<ImplTraits>::reuse(TreeType* releasedResource) m_recycleBin.push_back(releasedResource); } -template<class ImplTraits> +template<class ImplTraits> CommonTreeAdaptor<ImplTraits>::CommonTreeAdaptor(DebuggerType*) {} template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::nilNode() -{ +{ return this->create(NULL); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::dupTree( const TreeType* tree) -{ +{ if (tree == NULL) return NULL; return std::move(this->dupTreeImpl(tree, NULL)); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::dupTree( const TreeTypePtr& tree) -{ +{ if (tree == NULL) return NULL; return std::move(dupTreeImpl(tree.get(), NULL)); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::dupTreeImpl( const TreeType *root, TreeType* parent) -{ +{ TreeTypePtr newTree(dupNode(root)); - + // Ensure new subtree root has parent/child index set // this->setChildIndex( newTree, root->get_childIndex() ); this->setParent(newTree, parent); - + ChildrenType const& r_children = root->get_children(); for (auto i = r_children.begin(); i != r_children.end(); ++i) { // add child's clone this->addChild(newTree, dupTreeImpl(i->get(), newTree.get())); } - + return newTree; } - + template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::addChild( TreeTypePtr& t, TreeTypePtr& child) { @@ -130,66 +130,66 @@ void CommonTreeAdaptor<ImplTraits>::addChild( TreeTypePtr& t, TreeTypePtr& child { t->addChild(child); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::addChild( TreeTypePtr& t, TreeTypePtr&& child) -{ +{ if (t != NULL && child != NULL) { t->addChild(child); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::addChildToken( TreeTypePtr& t, CommonTokenType* child) -{ +{ if (t != NULL && child != NULL) { this->addChild(t, this->create(child)); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::setParent( TreeTypePtr& child, TreeType* parent) -{ +{ child->set_parent(parent); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeType* CommonTreeAdaptor<ImplTraits>::getParent( TreeTypePtr& child) -{ +{ if ( child==NULL ) return NULL; return child->getParent(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::errorNode( CommonTokenType*, const CommonTokenType*, const CommonTokenType*) -{ +{ // Use the supplied common tree node stream to get another tree from the factory // TODO: Look at creating the erronode as in Java, but this is complicated by the // need to track and free the memory allocated to it, so for now, we just // want something in the tree that isn't a NULL pointer. // return this->create( CommonTokenType::TOKEN_INVALID, "Tree Error Node"); - -} - -template<class ImplTraits> + +} + +template<class ImplTraits> bool CommonTreeAdaptor<ImplTraits>::isNilNode( TreeTypePtr& t) -{ +{ return t->isNilNode(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree) -{ +{ /* Protect against tree rewrites if we are in some sort of error * state, but have tried to recover. In C we can end up with a null pointer * for a tree that was not produced. @@ -198,7 +198,7 @@ CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr { return std::move(oldRootTree); } - + /* root is just the new tree as is if there is no * current root tree. */ @@ -206,7 +206,7 @@ CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr { return std::move(newRootTree); } - + /* Produce ^(nil real-node) */ if (newRootTree->isNilNode()) @@ -218,27 +218,27 @@ CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr fprintf(stderr, "More than one node as root! TODO: Create tree exception handling\n"); return std::move(newRootTree); } - + /* The new root is the first child, keep track of the original newRoot * because if it was a Nil Node, then we can reuse it now. */ TreeTypePtr saveRoot = std::move(newRootTree); newRootTree = std::move(saveRoot->getChild(0)); - + // Will Reclaim the old nilNode() saveRoot here } - + /* Add old root into new root. addChild takes care of the case where oldRoot * is a flat list (nill rooted tree). All children of oldroot are added to * new root. */ newRootTree->addChild(oldRootTree); - + /* Always returns new root structure */ return std::move(newRootTree); } - + template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr&& newRootTree, TreeTypePtr& oldRootTree) @@ -290,28 +290,28 @@ CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr&& newRootTree, TreeTypePt /* Always returns new root structure */ return std::move(newRootTree); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::becomeRootToken( CommonTokenType* newRoot, TreeTypePtr& oldRoot) -{ +{ return this->becomeRoot(this->create(newRoot), oldRoot); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( CommonTokenType const* payload) -{ +{ TreeTypePtr retval = TreeStoreType::create(); retval->set_token(payload); return retval; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken) -{ +{ /* Create the new token */ auto newToken = this->createToken(fromToken); /* Set the type of the new token to that supplied */ @@ -319,7 +319,7 @@ CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonToken /* Return a new node based upon this token */ return this->create(newToken); } - + template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, const char* text) @@ -335,7 +335,7 @@ CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonToken /* Return a new node based upon this token */ return this->create(newToken); } - + template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) @@ -351,181 +351,181 @@ CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonToken /* Return a new node based upon this token */ return this->create(newToken); } - + template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const char* text) { auto fromToken = this->createToken(tokenType, text); return this->create(fromToken); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) -{ +{ auto fromToken = this->createToken(tokenType, text); return this->create(fromToken); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::dupNode(const TreeType* treeNode) -{ +{ if (treeNode == NULL) return TreeStoreType::null(); TreeTypePtr retval(TreeStoreType::create()); treeNode->dupNode(retval.get()); return retval; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::dupNode(const TreeTypePtr& treeNode) -{ +{ if (treeNode == NULL) return TreeStoreType::null(); TreeTypePtr retval(TreeStoreType::create()); treeNode->dupNode(retval.get()); return retval; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTreeAdaptor<ImplTraits>::getType( TreeTypePtr& t) -{ +{ if ( t==NULL) return CommonTokenType::TOKEN_INVALID; return t->getType(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits>::getText( TreeTypePtr& t) -{ +{ return t->getText(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr& CommonTreeAdaptor<ImplTraits>::getChild( TreeTypePtr& t, ANTLR_UINT32 i) -{ +{ if ( t==NULL ) return NULL; return t->getChild(i); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::setChild( TreeTypePtr& t, ANTLR_UINT32 i, TreeTypePtr& child) -{ +{ t->setChild(i, child); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::deleteChild( TreeTypePtr& t, ANTLR_UINT32 i) -{ +{ t->deleteChild(i); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::setChildIndex( TreeTypePtr& t, ANTLR_INT32 i) -{ +{ if( t!= NULL) t->set_childIndex(i); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INT32 CommonTreeAdaptor<ImplTraits>::getChildIndex( TreeTypePtr& t) -{ +{ if ( t==NULL ) return 0; return t->getChildIndex(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTreeAdaptor<ImplTraits>::getChildCount( TreeTypePtr& t) -{ +{ if ( t==NULL ) return 0; return t->getChildCount(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT64 CommonTreeAdaptor<ImplTraits>::getUniqueID( TreeTypePtr& node ) -{ +{ return reinterpret_cast<ANTLR_UINT64>(node); -} - -template<class ImplTraits> -typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* CommonTreeAdaptor<ImplTraits>::createToken( ANTLR_UINT32 tokenType, const char* text) -{ +{ CommonTokenType* newToken = TreeStoreType::createToken(); newToken->set_tokText( text ); newToken->set_type(tokenType); return newToken; -} - -template<class ImplTraits> -typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* CommonTreeAdaptor<ImplTraits>::createToken( ANTLR_UINT32 tokenType, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) -{ +{ CommonTokenType* newToken = TreeStoreType::createToken(); newToken->set_tokText( text ); newToken->set_type(tokenType); return newToken; -} - -template<class ImplTraits> -typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* CommonTreeAdaptor<ImplTraits>::createToken( const CommonTokenType* fromToken) -{ +{ CommonTokenType* newToken = TreeStoreType::createToken(fromToken); return newToken; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* CommonTreeAdaptor<ImplTraits>::getToken( TreeTypePtr& t) -{ +{ return t->getToken(); } - + template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::setTokenBoundaries( TreeTypePtr& t, const CommonTokenType* startToken, const CommonTokenType* stopToken) { ANTLR_MARKER start = 0; ANTLR_MARKER stop = 0; - + if (t == NULL) return; - + if ( startToken != NULL) start = startToken->get_tokenIndex(); - + if ( stopToken != NULL) stop = stopToken->get_tokenIndex(); - + t->set_startIndex(start); t->set_stopIndex(stop); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER CommonTreeAdaptor<ImplTraits>::getTokenStartIndex( TreeTypePtr& t) -{ +{ if ( t==NULL ) return -1; return t->get_tokenStartIndex(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER CommonTreeAdaptor<ImplTraits>::getTokenStopIndex( TreeTypePtr& t) -{ +{ if ( t==NULL ) return -1; return t->get_tokenStopIndex(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits>::makeDot( TreeTypePtr& theTree) -{ +{ // The string we are building up // StringType dotSpec; @@ -538,7 +538,7 @@ typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits "\tbgcolor=\"lightgrey\"; node [shape=box, fixedsize=false, fontsize=12, fontname=\"Helvetica-bold\", fontcolor=\"blue\"\n" "\twidth=.25, height=.25, color=\"black\", fillcolor=\"white\", style=\"filled, solid, bold\"];\n\n" "\tedge [arrowsize=.5, color=\"black\", style=\"bold\"]\n\n"; - + if (theTree == NULL) { // No tree, so create a blank spec @@ -546,33 +546,33 @@ typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits dotSpec->append("n0[label=\"EMPTY TREE\"]\n"); return dotSpec; } - - sprintf(buff, "\tn%p[label=\"", theTree); + + sprintf(buff, "\tn%p[label=\"", theTree); dotSpec.append(buff); - text = this->getText(theTree); - for (std::size_t j = 0; j < text.size(); j++) - { - switch(text[j]) - { - case '"': - dotSpec.append("\\\""); - break; - - case '\n': - dotSpec.append("\\n"); - break; - - case '\r': - dotSpec.append("\\r"); - break; - - default: - dotSpec += text[j]; - break; - } - } + text = this->getText(theTree); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } dotSpec->append("\"]\n"); - + // First produce the node defintions // this->defineDotNodes(theTree, dotSpec); @@ -582,22 +582,22 @@ typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits // Terminate the spec // dotSpec.append("\n}"); - + // Result // return dotSpec; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::replaceChildren( TreeTypePtr parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t) -{ +{ if (parent != NULL) parent->replaceChildren(startChildIndex, stopChildIndex, t); -} - -template<class ImplTraits> -CommonTreeAdaptor<ImplTraits>::~CommonTreeAdaptor() -{ +} + +template<class ImplTraits> +CommonTreeAdaptor<ImplTraits>::~CommonTreeAdaptor() +{ #ifdef ANTLR3_DEBUG std::cout << "SZ" << TreeStoreType::size() << std::endl; std::cout << "RZ" << TreeStoreType::m_recycleBin.size() << std::endl; @@ -620,11 +620,11 @@ CommonTreeAdaptor<ImplTraits>::~CommonTreeAdaptor() << std::endl; } #endif -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringType& dotSpec) -{ +{ // How many nodes are we talking about? // int nCount; @@ -633,11 +633,11 @@ void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringTy char buff[64]; StringType text; int j; - + // Count the nodes // nCount = this->getChildCount(t); - + if (nCount == 0) { // This will already have been included as a child of another node @@ -645,17 +645,17 @@ void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringTy // return; } - + // For each child of the current tree, define a node using the // memory address of the node to name it // for (i = 0; i<nCount; i++) { - + // Pick up a pointer for the child // child = this->getChild(t, i); - + // Name the node // sprintf(buff, "\tn%p[label=\"", child); @@ -663,27 +663,27 @@ void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringTy text = this->getText(child); for (j = 0; j < text.size(); j++) { - switch(text[j]) - { - case '"': - dotSpec.append("\\\""); - break; - - case '\n': - dotSpec.append("\\n"); - break; - - case '\r': - dotSpec.append("\\r"); - break; - - default: - dotSpec += text[j]; - break; - } + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } } dotSpec.append("\"]\n"); - + // And now define the children of this child (if any) // this->defineDotNodes(child, dotSpec); @@ -692,11 +692,11 @@ void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringTy // Done // return; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeAdaptor<ImplTraits>::defineDotEdges(TreeTypePtr t, const StringType& dotSpec) -{ +{ // How many nodes are we talking about? // int nCount; @@ -706,11 +706,11 @@ void CommonTreeAdaptor<ImplTraits>::defineDotEdges(TreeTypePtr t, const StringTy // return; } - + // Count the nodes // nCount = this->getChildCount(t); - + if (nCount == 0) { // This will already have been included as a child of another node @@ -718,7 +718,7 @@ void CommonTreeAdaptor<ImplTraits>::defineDotEdges(TreeTypePtr t, const StringTy // return; } - + // For each child, define an edge from this parent, then process // and children of this child in the same way // @@ -726,84 +726,84 @@ void CommonTreeAdaptor<ImplTraits>::defineDotEdges(TreeTypePtr t, const StringTy { TreeTypePtr child; char buff[128]; - StringType text; - + StringType text; + // Next child // child = this->getChild(t, i); - + // Create the edge relation // sprintf(buff, "\t\tn%p -> n%p\t\t// ", t, child); dotSpec.append(buff); - + // Document the relationship // - text = this->getText(t); + text = this->getText(t); for (std::size_t j = 0; j < text.size(); j++) - { - switch(text[j]) - { - case '"': - dotSpec.append("\\\""); - break; - - case '\n': - dotSpec.append("\\n"); - break; - - case '\r': - dotSpec.append("\\r"); - break; - - default: - dotSpec += text[j]; - break; - } - } - - dotSpec.append(" -> "); - - text = this->getText(child); - for (std::size_t j = 0; j < text.size(); j++) - { - switch(text[j]) - { - case '"': - dotSpec.append("\\\""); - break; - - case '\n': - dotSpec.append("\\n"); - break; - - case '\r': - dotSpec.append("\\r"); - break; - - default: - dotSpec += text[j]; - break; - } - } + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + + dotSpec.append(" -> "); + + text = this->getText(child); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } dotSpec.append("\n"); // Define edges for this child // this->defineDotEdges(child, dotSpec); } - + // Done // return; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::rulePostProcessing( TreeTypePtr& root) -{ +{ TreeTypePtr saveRoot = std::move(root); - + if (saveRoot != NULL && saveRoot->isNilNode()) { if (saveRoot->getChildCount() == 0) @@ -824,41 +824,41 @@ typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits } } return saveRoot; -} - -template<class ImplTraits> -DebugTreeAdaptor<ImplTraits>::DebugTreeAdaptor( DebuggerType* debugger ) -{ +} + +template<class ImplTraits> +DebugTreeAdaptor<ImplTraits>::DebugTreeAdaptor( DebuggerType* debugger ) +{ m_debugger = debugger; -} - -template<class ImplTraits> -void DebugTreeAdaptor<ImplTraits>::setDebugEventListener( DebuggerType* debugger) -{ +} + +template<class ImplTraits> +void DebugTreeAdaptor<ImplTraits>::setDebugEventListener( DebuggerType* debugger) +{ m_debugger = debugger; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::nilNode() -{ +{ TreeTypePtr t = this->create(NULL); m_debugger->createNode(t); return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void DebugTreeAdaptor<ImplTraits>::addChild(TreeTypePtr& t, TreeTypePtr& child) -{ +{ if (t != NULL && child != NULL) { t->addChild(child); m_debugger->addChild(t, child); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> void DebugTreeAdaptor<ImplTraits>::addChildToken(TreeTypePtr& t, CommonTokenType* child) -{ +{ TreeTypePtr tc; if (t != NULL && child != NULL) { @@ -866,41 +866,41 @@ void DebugTreeAdaptor<ImplTraits>::addChildToken(TreeTypePtr& t, CommonTokenType this->addChild(t, tc); m_debugger->addChild(t, tc); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree ) -{ +{ TreeTypePtr t = super::becomeRoot(newRootTree, oldRootTree); m_debugger->becomeRoot(newRootTree, oldRootTree); return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::becomeRootToken(CommonTokenType* newRoot, TreeTypePtr& oldRoot) -{ +{ TreeTypePtr t = super::becomeRoot(this->create(newRoot), oldRoot); m_debugger->becomeRoot(t, oldRoot); return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken) -{ +{ TreeTypePtr t; t = this->createTypeToken(tokenType, fromToken); m_debugger->createNode(t); return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text) -{ +{ TreeTypePtr t; t = this->createTypeTokenText(tokenType, fromToken, text); m_debugger->createNode(t); return t; -} +} template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text) @@ -910,10 +910,10 @@ typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>: m_debugger->createNode(t); return t; } - -template<class ImplTraits> + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::dupTree( const TreeTypePtr& tree) -{ +{ TreeTypePtr t; // Call the normal dup tree mechanism first @@ -928,38 +928,38 @@ typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>: this->simulateTreeConstruction( t); return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::dupTree( const TreeType* tree) -{ +{ TreeTypePtr t; - + // Call the normal dup tree mechanism first // t = this->dupTreeImpl(tree, NULL); - + // In order to tell the debugger what we have just done, we now // simulate the tree building mechanism. THis will fire // lots of debugging events to the client and look like we // duped the tree.. // this->simulateTreeConstruction( t); - + return t; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void DebugTreeAdaptor<ImplTraits>::simulateTreeConstruction(TreeTypePtr& tree) -{ +{ ANTLR_UINT32 n; ANTLR_UINT32 i; TreeTypePtr child; - + // Send the create node event // m_debugger->createNode(tree); - + n = this->getChildCount(tree); for (i = 0; i < n; i++) { @@ -967,6 +967,6 @@ void DebugTreeAdaptor<ImplTraits>::simulateTreeConstruction(TreeTypePtr& tree) this->simulateTreeConstruction(child); m_debugger->addChild(tree, child); } -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp index 3adf02e933..7cb9908d2b 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp @@ -1,44 +1,44 @@ -/// \file -/// Definition of the ANTLR3 common tree node stream. -/// - +/// \file +/// Definition of the ANTLR3 common tree node stream. +/// + #ifndef _ANTLR_COMMON_TREE_NODE_STREAM__HPP #define _ANTLR_COMMON_TREE_NODE_STREAM__HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -template<class ImplTraits> -class CommonTreeNodeStream : public ImplTraits::TreeNodeIntStreamType -{ -public: + +template<class ImplTraits> +class CommonTreeNodeStream : public ImplTraits::TreeNodeIntStreamType +{ +public: enum Constants { /// Token buffer initial size settings ( will auto increase) @@ -46,7 +46,7 @@ public: DEFAULT_INITIAL_BUFFER_SIZE = 100 , INITIAL_CALL_STACK_SIZE = 10 }; - + typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::TreeTypePtr TreeTypePtr; typedef TreeType UnitType; @@ -61,28 +61,28 @@ public: typedef typename ImplTraits::TreeParserType ComponentType; typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef typename ImplTraits::TreeNodeIntStreamType BaseType; - -public: - /// Dummy tree node that indicates a descent into a child - /// tree. Initialized by a call to create a new interface. - /// + +public: + /// Dummy tree node that indicates a descent into a child + /// tree. Initialized by a call to create a new interface. + /// TreeType m_DOWN; - - /// Dummy tree node that indicates a descent up to a parent - /// tree. Initialized by a call to create a new interface. - /// + + /// Dummy tree node that indicates a descent up to a parent + /// tree. Initialized by a call to create a new interface. + /// TreeType m_UP; - - /// Dummy tree node that indicates the termination point of the - /// tree. Initialized by a call to create a new interface. - /// + + /// Dummy tree node that indicates the termination point of the + /// tree. Initialized by a call to create a new interface. + /// TreeType m_EOF_NODE; - - /// Dummy node that is returned if we need to indicate an invalid node - /// for any reason. - /// + + /// Dummy node that is returned if we need to indicate an invalid node + /// for any reason. + /// TreeType m_INVALID_NODE; - + /// The complete mapping from stream index to tree node. /// This buffer includes pointers to DOWN, UP, and EOF nodes. /// It is built upon ctor invocation. The elements are type @@ -96,72 +96,72 @@ public: /// Must be freed when the tree node stream is torn down. /// NodesType m_nodes; - + /// Which tree are we navigating ? - /// + /// TreeTypePtr m_root; - - /// Pointer to tree adaptor interface that manipulates/builds - /// the tree. - /// + + /// Pointer to tree adaptor interface that manipulates/builds + /// the tree. + /// TreeAdaptorType* m_adaptor; - - /// As we walk down the nodes, we must track parent nodes so we know - /// where to go after walking the last child of a node. When visiting - /// a child, push current node and current index (current index - /// is first stored in the tree node structure to avoid two stacks. - /// + + /// As we walk down the nodes, we must track parent nodes so we know + /// where to go after walking the last child of a node. When visiting + /// a child, push current node and current index (current index + /// is first stored in the tree node structure to avoid two stacks. + /// NodeStackType m_nodeStack; - + /// The current index into the nodes vector of the current tree /// we are parsing and possibly rewriting. /// ANTLR_INT32 m_p; - - /// Which node are we currently visiting? - /// + + /// Which node are we currently visiting? + /// TreeTypePtr m_currentNode; - - /// Which node did we last visit? Used for LT(-1) - /// + + /// Which node did we last visit? Used for LT(-1) + /// TreeTypePtr m_previousNode; - - /// Which child are we currently visiting? If -1 we have not visited - /// this node yet; next consume() request will set currentIndex to 0. - /// + + /// Which child are we currently visiting? If -1 we have not visited + /// this node yet; next consume() request will set currentIndex to 0. + /// ANTLR_INT32 m_currentChildIndex; - - /// What node index did we just consume? i=0..n-1 for n node trees. - /// IntStream.next is hence 1 + this value. Size will be same. - /// + + /// What node index did we just consume? i=0..n-1 for n node trees. + /// IntStream.next is hence 1 + this value. Size will be same. + /// ANTLR_MARKER m_absoluteNodeIndex; - - /// Buffer tree node stream for use with LT(i). This list grows - /// to fit new lookahead depths, but consume() wraps like a circular - /// buffer. - /// + + /// Buffer tree node stream for use with LT(i). This list grows + /// to fit new lookahead depths, but consume() wraps like a circular + /// buffer. + /// TreeTypePtr* m_lookAhead; - - /// Number of elements available in the lookahead buffer at any point in - /// time. This is the current size of the array. - /// + + /// Number of elements available in the lookahead buffer at any point in + /// time. This is the current size of the array. + /// ANTLR_UINT32 m_lookAheadLength; - + /// lookAhead[head] is the first symbol of lookahead, LT(1). - /// + /// ANTLR_UINT32 m_head; - - /// Add new lookahead at lookahead[tail]. tail wraps around at the - /// end of the lookahead buffer so tail could be less than head. - /// + + /// Add new lookahead at lookahead[tail]. tail wraps around at the + /// end of the lookahead buffer so tail could be less than head. + /// ANTLR_UINT32 m_tail; - - /// Calls to mark() may be nested so we have to track a stack of - /// them. The marker is an index into this stack. Index 0 is - /// the first marker. This is a List<TreeWalkState> - /// + + /// Calls to mark() may be nested so we have to track a stack of + /// them. The marker is an index into this stack. Index 0 is + /// the first marker. This is a List<TreeWalkState> + /// MarkersType m_markers; - + /// Indicates whether this node stream was derived from a prior /// node stream to be used by a rewriting tree parser for instance. /// If this flag is set to ANTLR_TRUE, then when this stream is @@ -169,22 +169,22 @@ public: /// belongs to the origniating node stream. /// bool m_isRewriter; - - /// If set to ANTLR_TRUE then the navigation nodes UP, DOWN are - /// duplicated rather than reused within the tree. - /// + + /// If set to ANTLR_TRUE then the navigation nodes UP, DOWN are + /// duplicated rather than reused within the tree. + /// bool m_uniqueNavigationNodes; - -public: - // INTERFACE + +public: + // INTERFACE // CommonTreeNodeStream( ANTLR_UINT32 hint ); CommonTreeNodeStream( const CommonTreeNodeStream& ctn ); CommonTreeNodeStream( TreeTypePtr tree, ANTLR_UINT32 hint ); - + void init( ANTLR_UINT32 hint ); ~CommonTreeNodeStream(); - + /// Get tree node at current input pointer + i ahead where i=1 is next node. /// i<0 indicates nodes in the past. So LT(-1) is previous node, but /// implementations are not required to provide results for k < -1. @@ -197,27 +197,27 @@ public: /// for both parser and tree grammars. :) /// TreeTypePtr _LT(ANTLR_INT32 k); - + /// Where is this stream pulling nodes from? This is not the name, but /// the object that provides node objects. /// TreeTypePtr getTreeSource(); - + /// What adaptor can tell me how to interpret/navigate nodes and /// trees. E.g., get text of a node. /// TreeAdaptorType* getTreeAdaptor(); - + /// As we flatten the tree, we use UP, DOWN nodes to represent /// the tree structure. When debugging we need unique nodes /// so we have to instantiate new ones. When doing normal tree /// parsing, it's slow and a waste of memory to create unique /// navigation nodes. Default should be false; /// - void set_uniqueNavigationNodes(bool uniqueNavigationNodes); - + void set_uniqueNavigationNodes(bool uniqueNavigationNodes); + StringType toString(); - + /// Return the text of all nodes from start to stop, inclusive. /// If the stream does not buffer all the nodes then it can still /// walk recursively from start until stop. You can always return @@ -225,7 +225,7 @@ public: /// an action of course in that case. /// StringType toStringSS(TreeTypePtr start, TreeTypePtr stop); - + /// Return the text of all nodes from start to stop, inclusive, into the /// supplied buffer. /// If the stream does not buffer all the nodes then it can still @@ -234,15 +234,15 @@ public: /// an action of course in that case. /// void toStringWork(TreeTypePtr start, TreeTypePtr stop, StringType& buf); - + /// Get a tree node at an absolute index i; 0..n-1. /// If you don't want to buffer up nodes, then this method makes no /// sense for you. /// TreeTypePtr get(ANTLR_INT32 i); - + // REWRITING TREES (used by tree parser) - + /// Replace from start to stop child index of parent with t, which might /// be a list. Number of children may be different /// after this call. The stream is notified because it is walking the @@ -255,48 +255,48 @@ public: /// void replaceChildren(TreeTypePtr parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t); - + TreeTypePtr LB(ANTLR_INT32 k); - + /// As we flatten the tree, we use UP, DOWN nodes to represent /// the tree structure. When debugging we need unique nodes /// so instantiate new ones when uniqueNavigationNodes is true. /// void addNavigationNode(ANTLR_UINT32 ttype); - + TreeTypePtr newDownNode(); - + TreeTypePtr newUpNode(); - + bool hasUniqueNavigationNodes() const; - + ANTLR_UINT32 getLookaheadSize(); - + void push(ANTLR_INT32 index); - + ANTLR_INT32 pop(); - + void reset(); - + void fillBufferRoot(); void fillBuffer(TreeTypePtr t); -}; - -/** This structure is used to save the state information in the treenodestream - * when walking ahead with cyclic DFA or for syntactic predicates, - * we need to record the state of the tree node stream. This - * class wraps up the current state of the CommonTreeNodeStream. - * Calling mark() will push another of these on the markers stack. - */ -template<class ImplTraits> -class TreeWalkState : public ImplTraits::AllocPolicyType -{ -public: +}; + +/** This structure is used to save the state information in the treenodestream + * when walking ahead with cyclic DFA or for syntactic predicates, + * we need to record the state of the tree node stream. This + * class wraps up the current state of the CommonTreeNodeStream. + * Calling mark() will push another of these on the markers stack. + */ +template<class ImplTraits> +class TreeWalkState : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::TreeTypePtr TreeTypePtr; - -private: + +private: ANTLR_UINT32 m_currentChildIndex; ANTLR_MARKER m_absoluteNodeIndex; TreeTypePtr m_currentNode; @@ -306,12 +306,12 @@ private: ANTLR_UINT32 m_lookAheadLength; ANTLR_UINT32 m_tail; ANTLR_UINT32 m_head; - - -}; - + + +}; + } - -#include "antlr3commontreenodestream.inl" - -#endif + +#include "antlr3commontreenodestream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl index 096e15db11..7a98fc106d 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl @@ -1,14 +1,14 @@ namespace antlr3 { - -template<class ImplTraits> -CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream(ANTLR_UINT32 hint) -{ + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream(ANTLR_UINT32 hint) +{ this->init(hint); -} - -template<class ImplTraits> -void CommonTreeNodeStream<ImplTraits>::init( ANTLR_UINT32 hint ) -{ +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::init( ANTLR_UINT32 hint ) +{ m_root = NULL; m_adaptor = new TreeAdaptorType; // Create the node list map @@ -16,7 +16,7 @@ void CommonTreeNodeStream<ImplTraits>::init( ANTLR_UINT32 hint ) if (hint == 0) hint = DEFAULT_INITIAL_BUFFER_SIZE; m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); - + m_p = -1; m_currentNode = NULL; m_previousNode = NULL; @@ -28,27 +28,27 @@ void CommonTreeNodeStream<ImplTraits>::init( ANTLR_UINT32 hint ) m_tail = 0; m_uniqueNavigationNodes = false; m_isRewriter = false; - + CommonTokenType* token = new CommonTokenType(CommonTokenType::TOKEN_UP); token->set_tokText( "UP" ); m_UP.set_token( token ); - + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); token->set_tokText( "DOWN" ); m_DOWN.set_token( token ); - + token = new CommonTokenType(CommonTokenType::TOKEN_EOF); token->set_tokText( "EOF" ); m_EOF_NODE.set_token( token ); - + token = new CommonTokenType(CommonTokenType::TOKEN_INVALID); token->set_tokText( "INVALID" ); m_EOF_NODE.set_token( token ); -} - -template<class ImplTraits> -CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( const CommonTreeNodeStream& ctn ) -{ +} + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( const CommonTreeNodeStream& ctn ) +{ m_root = ctn.m_root; m_adaptor = ctn.m_adaptor; m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); @@ -64,23 +64,23 @@ CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( const CommonTreeNodeStre m_tail = 0; m_uniqueNavigationNodes = false; m_isRewriter = true; - + m_UP.set_token( ctn.m_UP.get_token() ); m_DOWN.set_token( ctn.m_DOWN.get_token() ); m_EOF_NODE.set_token( ctn.m_EOF_NODE.get_token() ); m_INVALID_NODE.set_token( ctn.m_INVALID_NODE.get_token() ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( TreeTypePtr tree, ANTLR_UINT32 hint ) -{ +{ this->init(hint); m_root = tree; -} - -template<class ImplTraits> -CommonTreeNodeStream<ImplTraits>::~CommonTreeNodeStream() -{ +} + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::~CommonTreeNodeStream() +{ // If this is a rewrting stream, then certain resources // belong to the originating node stream and we do not // free them here. @@ -88,9 +88,9 @@ CommonTreeNodeStream<ImplTraits>::~CommonTreeNodeStream() if ( m_isRewriter != true) { delete m_adaptor; - + m_nodeStack.clear(); - + delete m_INVALID_NODE.get_token(); delete m_EOF_NODE.get_token(); delete m_DOWN.get_token(); @@ -98,16 +98,16 @@ CommonTreeNodeStream<ImplTraits>::~CommonTreeNodeStream() } m_nodes.clear(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::_LT(ANTLR_INT32 k) -{ +{ if ( m_p == -1) { this->fillBufferRoot(); } - + if (k < 0) { return this->LB(-k); @@ -116,62 +116,62 @@ typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<Impl { return &(m_INVALID_NODE); } - + // k was a legitimate request, // if (( m_p + k - 1) >= (ANTLR_INT32)(m_nodes.size())) { return &(m_EOF_NODE); } - + return m_nodes[ m_p + k - 1 ]; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::getTreeSource() -{ +{ return m_root; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeAdaptorType* CommonTreeNodeStream<ImplTraits>::getTreeAdaptor() -{ +{ return m_adaptor; -} - -template<class ImplTraits> -void CommonTreeNodeStream<ImplTraits>::set_uniqueNavigationNodes(bool uniqueNavigationNodes) -{ +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::set_uniqueNavigationNodes(bool uniqueNavigationNodes) +{ m_uniqueNavigationNodes = uniqueNavigationNodes; -} - -template<class ImplTraits> -typename CommonTreeNodeStream<ImplTraits>::StringType CommonTreeNodeStream<ImplTraits>::toString() -{ - return this->toStringSS(m_root, NULL); -} - -template<class ImplTraits> +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::StringType CommonTreeNodeStream<ImplTraits>::toString() +{ + return this->toStringSS(m_root, NULL); +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::StringType CommonTreeNodeStream<ImplTraits>::toStringSS(TreeTypePtr start, TreeTypePtr stop) -{ +{ StringType buf; - this->toStringWork(start, stop, buf); - return buf; -} - -template<class ImplTraits> + this->toStringWork(start, stop, buf); + return buf; +} + +template<class ImplTraits> void CommonTreeNodeStream<ImplTraits>::toStringWork(TreeTypePtr start, TreeTypePtr stop, StringType& str) -{ +{ ANTLR_UINT32 n; ANTLR_UINT32 c; StringStreamType buf; - + if (!start->isNilNode() ) { StringType text; - + text = start->toString(); - + if (text.empty()) { buf << ' '; @@ -180,84 +180,84 @@ void CommonTreeNodeStream<ImplTraits>::toStringWork(TreeTypePtr start, TreeTypeP else buf << text; } - + if (start == stop) { return; /* Finished */ } - + n = start->getChildCount(); - + if (n > 0 && ! start->isNilNode() ) { buf << ' '; buf << CommonTokenType::TOKEN_DOWN; } - + for (c = 0; c<n ; c++) { TreeTypePtr child; - + child = start->getChild(c); this->toStringWork(child, stop, buf); } - + if (n > 0 && ! start->isNilNode() ) { buf << ' '; buf << CommonTokenType::TOKEN_UP; } str = buf.str(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::get(ANTLR_INT32 k) -{ +{ if( m_p == -1 ) { this->fillBufferRoot(); } - + return m_nodes[k]; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeNodeStream<ImplTraits>::replaceChildren(TreeTypePtr parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t) -{ +{ if (parent != NULL) { TreeAdaptorType* adaptor; adaptor = this->getTreeAdaptor(); adaptor->replaceChildren(parent, startChildIndex, stopChildIndex, t); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::LB(ANTLR_INT32 k) -{ +{ if ( k==0) { return &(m_INVALID_NODE); } - + if ( (m_p - k) < 0) { return &(m_INVALID_NODE); } - + return m_nodes[ m_p - k ]; -} - -template<class ImplTraits> -void CommonTreeNodeStream<ImplTraits>::addNavigationNode(ANTLR_UINT32 ttype) -{ +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::addNavigationNode(ANTLR_UINT32 ttype) +{ TreeTypePtr node; - + node = NULL; - + if (ttype == CommonTokenType::TOKEN_DOWN) { if (this->hasUniqueNavigationNodes() == true) @@ -280,106 +280,106 @@ void CommonTreeNodeStream<ImplTraits>::addNavigationNode(ANTLR_UINT32 ttype) node = &m_UP; } } - + // Now add the node we decided upon. // m_nodes.push_back(node); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::newDownNode() -{ +{ TreeTypePtr dNode; - CommonTokenType* token; - + CommonTokenType* token; + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); token->set_tokText("DOWN"); dNode = new TreeType(token); - return &dNode; -} - -template<class ImplTraits> + return &dNode; +} + +template<class ImplTraits> typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::newUpNode() -{ +{ TreeTypePtr uNode; - CommonTokenType* token; - + CommonTokenType* token; + token = new CommonTokenType(CommonTokenType::TOKEN_UP); token->set_tokText("UP"); uNode = new TreeType(token); - return &uNode; - -} - -template<class ImplTraits> -bool CommonTreeNodeStream<ImplTraits>::hasUniqueNavigationNodes() const -{ + return &uNode; + +} + +template<class ImplTraits> +bool CommonTreeNodeStream<ImplTraits>::hasUniqueNavigationNodes() const +{ return m_uniqueNavigationNodes; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 CommonTreeNodeStream<ImplTraits>::getLookaheadSize() -{ +{ return m_tail < m_head ? (m_lookAheadLength - m_head + m_tail) : (m_tail - m_head); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeNodeStream<ImplTraits>::push(ANTLR_INT32 index) -{ +{ m_nodeStack.push(m_p); // Save current index this->seek(index); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INT32 CommonTreeNodeStream<ImplTraits>::pop() -{ +{ ANTLR_INT32 retVal; - + retVal = m_nodeStack.top(); m_nodeStack.pop(); this->seek(retVal); return retVal; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeNodeStream<ImplTraits>::reset() -{ +{ if ( m_p != -1) { m_p = 0; } BaseType::m_lastMarker = 0; - - + + // Free and reset the node stack only if this is not // a rewriter, which is going to reuse the originating // node streams node stack // if (m_isRewriter != true) m_nodeStack.clear(); -} - -template<class ImplTraits> -void CommonTreeNodeStream<ImplTraits>::fillBufferRoot() -{ +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::fillBufferRoot() +{ // Call the generic buffer routine with the root as the // argument // this->fillBuffer(m_root); m_p = 0; // Indicate we are at buffer start -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTreeNodeStream<ImplTraits>::fillBuffer(TreeTypePtr t) -{ +{ bool nilNode; ANTLR_UINT32 nCount; ANTLR_UINT32 c; - + nilNode = m_adaptor->isNilNode(t); - + // If the supplied node is not a nil (list) node then we // add in the node itself to the vector // @@ -387,17 +387,17 @@ void CommonTreeNodeStream<ImplTraits>::fillBuffer(TreeTypePtr t) { m_nodes.push_back(t); } - + // Only add a DOWN node if the tree is not a nil tree and // the tree does have children. // nCount = t->getChildCount(); - + if (nilNode == false && nCount>0) { this->addNavigationNode( CommonTokenType::TOKEN_DOWN); } - + // We always add any children the tree contains, which is // a recursive call to this function, which will cause similar // recursion and implement a depth first addition @@ -406,7 +406,7 @@ void CommonTreeNodeStream<ImplTraits>::fillBuffer(TreeTypePtr t) { this->fillBuffer( m_adaptor->getChild(t, c)); } - + // If the tree had children and was not a nil (list) node, then we // we need to add an UP node here to match the DOWN node // @@ -414,9 +414,9 @@ void CommonTreeNodeStream<ImplTraits>::fillBuffer(TreeTypePtr t) { this->addNavigationNode(CommonTokenType::TOKEN_UP); } +} + + + } - - - -} - + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp index 7f4b7e0d25..bd1c6a1ad3 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp @@ -1,143 +1,143 @@ -/* - * Copyright 2001-2004 Unicode, Inc. +/* + * Copyright 2001-2004 Unicode, Inc. * - * Disclaimer + * Disclaimer * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. * - * Limitations on Rights to Redistribute This Code + * Limitations on Rights to Redistribute This Code * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* --------------------------------------------------------------------- - - Conversions between UTF32, UTF-16, and UTF-8. Header file. - - Several functions are included here, forming a complete set of - conversions between the three formats. UTF-7 is not included - here, but is handled in a separate source file. - - Each of these routines takes pointers to input buffers and output - buffers. The input buffers are const. - - Each routine converts the text between *sourceStart and sourceEnd, - putting the result into the buffer between *targetStart and + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several functions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and targetEnd. Note: the end pointers are *after* the last item: e.g. - *(sourceEnd - 1) is the last item. - - The return result indicates whether the conversion was successful, - and if not, whether the problem was in the source or target buffers. - (Only the first encountered problem is indicated.) - - After the conversion, *sourceStart and *targetStart are both - updated to point to the end of last text successfully converted in - the respective buffers. - - Input parameters: + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: sourceStart - pointer to a pointer to the source buffer. The contents of this are modified on return so that it points at the next thing to be converted. targetStart - similarly, pointer to pointer to the target buffer. sourceEnd, targetEnd - respectively pointers to the ends of the two buffers, for overflow checking only. - - These conversion functions take a ConversionFlags argument. When this - flag is set to strict, both irregular sequences and isolated surrogates - will cause an error. When the flag is set to lenient, both irregular - sequences and isolated surrogates are converted. - - Whether the flag is strict or lenient, all illegal sequences will cause - an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, - or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code - must check for illegal sequences. - - When the flag is set to lenient, characters over 0x10FFFF are converted - to the replacement character; otherwise (when the flag is set to strict) - they constitute an error. - - Output parameters: + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, + or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: The value "sourceIllegal" is returned from some routines if the input sequence is malformed. When "sourceIllegal" is returned, the source value will point to the illegal value that caused the problem. E.g., in UTF-8 when a sequence is malformed, it points to the start of the malformed sequence. - - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. Fixes & updates, Sept 2001. - ------------------------------------------------------------------------- */ - -/* --------------------------------------------------------------------- - The following 4 definitions are compiler-specific. - The C standard does not guarantee that wchar_t has at least - 16 bits, so wchar_t is no less portable than unsigned short! - All should be unsigned values to avoid sign extension during - bit mask & shift operations. ------------------------------------------------------------------------- */ - - -// Changes for ANTLR3 - Jim Idle, January 2008. -// builtin types defined for Unicode types changed to -// aliases for the types that are system determined by -// ANTLR at compile time. -// + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + + +// Changes for ANTLR3 - Jim Idle, January 2008. +// builtin types defined for Unicode types changed to +// aliases for the types that are system determined by +// ANTLR at compile time. +// // typedef unsigned long UTF32; /* at least 32 bits */ // typedef unsigned short UTF16; /* at least 16 bits */ // typedef unsigned char UTF8; /* typically 8 bits */ // typedef unsigned char Boolean; /* 0 or 1 */ - + #ifndef _ANTLR3_CONVERTUTF_H #define _ANTLR3_CONVERTUTF_H - + namespace antlr3 { - + typedef ANTLR_UINT32 UTF32; /* at least 32 bits */ typedef ANTLR_UINT16 UTF16; /* at least 16 bits */ typedef ANTLR_UINT8 UTF8; /* typically 8 bits */ - -/* Some fundamental constants */ -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF -#define halfShift ((UTF32)10) -#define halfBase ((UTF32)0x0010000UL) -#define halfMask ((UTF32)0x3FFUL) - -enum ConversionResult { + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define halfShift ((UTF32)10) +#define halfBase ((UTF32)0x0010000UL) +#define halfMask ((UTF32)0x3FFUL) + +enum ConversionResult { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ targetExhausted, /* insuff. room in target for conversion */ sourceIllegal /* source sequence is illegal/malformed */ -}; - -enum ConversionFlags { +}; + +enum ConversionFlags { strictConversion = 0, lenientConversion -} ; - - - +} ; + + + } - -#endif - -/* --------------------------------------------------------------------- */ + +#endif + +/* --------------------------------------------------------------------- */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp index 2129552f1a..94750e8fe0 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp @@ -1,45 +1,45 @@ -/// Definition of a cyclic dfa structure such that it can be -/// initialized at compile time and have only a single -/// runtime function that can deal with all cyclic dfa -/// structures and show Java how it is done ;-) -/// +/// Definition of a cyclic dfa structure such that it can be +/// initialized at compile time and have only a single +/// runtime function that can deal with all cyclic dfa +/// structures and show Java how it is done ;-) +/// #ifndef ANTLR3_CYCLICDFA_HPP #define ANTLR3_CYCLICDFA_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -template<class ImplTraits, class CtxType> -class CyclicDFA : public ImplTraits::AllocPolicyType -{ -public: + +template<class ImplTraits, class CtxType> +class CyclicDFA : public ImplTraits::AllocPolicyType +{ +public: typedef typename CtxType::StreamType StreamType; typedef typename CtxType::ExceptionBaseType ExceptionBaseType; typedef typename ImplTraits::template RecognizerType<StreamType> RecognizerType; @@ -47,15 +47,15 @@ public: typedef typename StreamType::TokenType TokenType; typedef TokenType CommonTokenType; typedef CtxType ContextType; - -private: - /// Decision number that a particular static structure - /// represents. - /// + +private: + /// Decision number that a particular static structure + /// represents. + /// const ANTLR_INT32 m_decisionNumber; - - /// What this decision represents - /// + + /// What this decision represents + /// const ANTLR_UCHAR* m_description; const ANTLR_INT32* const m_eot; const ANTLR_INT32* const m_eof; @@ -64,8 +64,8 @@ private: const ANTLR_INT32* const m_accept; const ANTLR_INT32* const m_special; const ANTLR_INT32* const *const m_transition; - -public: + +public: CyclicDFA( ANTLR_INT32 decisionNumber , const ANTLR_UCHAR* description , const ANTLR_INT32* const eot @@ -76,20 +76,20 @@ public: , const ANTLR_INT32* const special , const ANTLR_INT32* const *const transition ); CyclicDFA( const CyclicDFA& cdfa ); - CyclicDFA& operator=( const CyclicDFA& dfa); + CyclicDFA& operator=( const CyclicDFA& dfa); ANTLR_INT32 specialStateTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); ANTLR_INT32 specialTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); - + template<typename SuperType> ANTLR_INT32 predict(CtxType* ctx, RecognizerType* recognizer, IntStreamType* is, SuperType& super); -private: +private: void noViableAlt(RecognizerType* rec, ANTLR_UINT32 s); -}; - +}; + } - -#include "antlr3cyclicdfa.inl" - -#endif + +#include "antlr3cyclicdfa.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl index 61d15bfcf2..cb6dae8231 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl @@ -1,6 +1,6 @@ namespace antlr3 { - -template<class ImplTraits, class CtxType> + +template<class ImplTraits, class CtxType> CyclicDFA<ImplTraits, CtxType>::CyclicDFA( ANTLR_INT32 decisionNumber , const ANTLR_UCHAR* description , const ANTLR_INT32* const eot @@ -18,13 +18,13 @@ CyclicDFA<ImplTraits, CtxType>::CyclicDFA( ANTLR_INT32 decisionNumber , m_accept(accept) , m_special(special) , m_transition(transition) -{ +{ m_description = description; -} - -template<class ImplTraits, class CtxType> -CyclicDFA<ImplTraits, CtxType>::CyclicDFA( const CyclicDFA& dfa ) -{ +} + +template<class ImplTraits, class CtxType> +CyclicDFA<ImplTraits, CtxType>::CyclicDFA( const CyclicDFA& dfa ) +{ m_decisionNumber = dfa.m_decisionNumber; m_description = dfa.m_description; m_eot = dfa.m_eot; @@ -34,11 +34,11 @@ CyclicDFA<ImplTraits, CtxType>::CyclicDFA( const CyclicDFA& dfa ) m_accept = dfa.m_accept; m_special = dfa.m_special; m_transition = dfa.m_transition; -} - -template<class ImplTraits, class CtxType> -CyclicDFA<ImplTraits, CtxType>& CyclicDFA<ImplTraits, CtxType>::operator=( const CyclicDFA& dfa) -{ +} + +template<class ImplTraits, class CtxType> +CyclicDFA<ImplTraits, CtxType>& CyclicDFA<ImplTraits, CtxType>::operator=( const CyclicDFA& dfa) +{ m_decisionNumber = dfa.m_decisionNumber; m_description = dfa.m_description; m_eot = dfa.m_eot; @@ -49,50 +49,50 @@ CyclicDFA<ImplTraits, CtxType>& CyclicDFA<ImplTraits, CtxType>::operator=( const m_special = dfa.m_special; m_transition = dfa.m_transition; return *this; -} - -template<class ImplTraits, class CtxType> +} + +template<class ImplTraits, class CtxType> ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::specialStateTransition(CtxType * , RecognizerType* , IntStreamType* , ANTLR_INT32 ) -{ +{ return -1; -} - -template<class ImplTraits, class CtxType> +} + +template<class ImplTraits, class CtxType> ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::specialTransition(CtxType * /*ctx*/, RecognizerType* /*recognizer*/, IntStreamType* /*is*/, ANTLR_INT32 /*s*/) -{ +{ return 0; -} - -template<class ImplTraits, class CtxType> - template<typename SuperType> +} + +template<class ImplTraits, class CtxType> + template<typename SuperType> ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, SuperType& super) -{ +{ ANTLR_MARKER mark; ANTLR_INT32 s; ANTLR_INT32 specialState; ANTLR_INT32 c; - + mark = is->mark(); /* Store where we are right now */ s = 0; /* Always start with state 0 */ - + for (;;) { /* Pick out any special state entry for this state */ specialState = m_special[s]; - + /* Transition the special state and consume an input token */ if (specialState >= 0) { s = super.specialStateTransition(ctx, recognizer, is, specialState); - + // Error? // if (s<0) @@ -110,7 +110,7 @@ ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, is->consume(); continue; } - + /* Accept state? */ if (m_accept[s] >= 1) @@ -118,21 +118,21 @@ ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, is->rewind(mark); return m_accept[s]; } - + /* Look for a normal transition state based upon the input token element */ c = is->LA(1); - + /* Check against min and max for this state */ if (c>= m_min[s] && c <= m_max[s]) { ANTLR_INT32 snext; - + /* What is the next state? */ snext = m_transition[s][c - m_min[s]]; - + if (snext < 0) { /* Was in range but not a normal transition @@ -150,7 +150,7 @@ ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, is->rewind(mark); return 0; } - + /* New current state - move to it */ s = snext; @@ -172,26 +172,26 @@ ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, is->rewind(mark); return m_accept[m_eof[s]]; } - + /* No alt, so bomb */ this->noViableAlt(recognizer, s); is->rewind(mark); return 0; } -} - -template<class ImplTraits, class CtxType> -void CyclicDFA<ImplTraits, CtxType>::noViableAlt(RecognizerType* rec, ANTLR_UINT32 s) -{ +} + +template<class ImplTraits, class CtxType> +void CyclicDFA<ImplTraits, CtxType>::noViableAlt(RecognizerType* rec, ANTLR_UINT32 s) +{ // In backtracking mode, we just set the failed flag so that the // alt can just exit right now. If we are parsing though, then // we want the exception to be raised. // if (rec->get_state()->get_backtracking() > 0) - { + { rec->get_state()->set_failed(true); - } + } else { ANTLR_Exception<ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>* ex @@ -199,6 +199,6 @@ void CyclicDFA<ImplTraits, CtxType>::noViableAlt(RecognizerType* rec, ANTLR_UINT ex->set_decisionNum( m_decisionNumber ); ex->set_state(s); } -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp index bc5ea00a3b..412c81436b 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp @@ -1,132 +1,132 @@ -/** - * \file - * The definition of all debugging events that a recognizer can trigger. - * - * \remark - * From the java implementation by Terence Parr... - * I did not create a separate AST debugging interface as it would create - * lots of extra classes and DebugParser has a dbg var defined, which makes - * it hard to change to ASTDebugEventListener. I looked hard at this issue - * and it is easier to understand as one monolithic event interface for all - * possible events. Hopefully, adding ST debugging stuff won't be bad. Leave - * for future. 4/26/2006. - */ - +/** + * \file + * The definition of all debugging events that a recognizer can trigger. + * + * \remark + * From the java implementation by Terence Parr... + * I did not create a separate AST debugging interface as it would create + * lots of extra classes and DebugParser has a dbg var defined, which makes + * it hard to change to ASTDebugEventListener. I looked hard at this issue + * and it is easier to understand as one monolithic event interface for all + * possible events. Hopefully, adding ST debugging stuff won't be bad. Leave + * for future. 4/26/2006. + */ + #ifndef ANTLR3_DEBUG_EVENT_LISTENER_HPP #define ANTLR3_DEBUG_EVENT_LISTENER_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/// Default debugging port -/// + +/// Default debugging port +/// #define DEFAULT_DEBUGGER_PORT 0xBFCC; - -/** The ANTLR3 debugging interface for communicating with ANLTR Works. Function comments - * mostly taken from the Java version. - */ - -template<class ImplTraits> -class DebugEventListener : public ImplTraits::AllocPolicyType -{ -public: + +/** The ANTLR3 debugging interface for communicating with ANLTR Works. Function comments + * mostly taken from the Java version. + */ + +template<class ImplTraits> +class DebugEventListener : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::TreeTypePtr TreeTypePtr; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; - -private: + +private: /// The port number which the debug listener should listen on for a connection /// ANTLR_UINT32 m_port; - + /// The socket structure we receive after a successful accept on the serverSocket /// SOCKET m_socket; - + /** The version of the debugging protocol supported by the providing * instance of the debug event listener. */ int m_PROTOCOL_VERSION; - + /// The name of the grammar file that we are debugging /// StringType m_grammarFileName; - + /// Indicates whether we have already connected or not /// bool m_initialized; - + /// Used to serialize the values of any particular token we need to /// send back to the debugger. /// StringType m_tokenString; - - + + /// Allows the debug event system to access the adapter in use /// by the recognizer, if this is a tree parser of some sort. /// TreeAdaptorType* m_adaptor; - - -public: + + +public: /// Wait for a connection from the debugger and initiate the /// debugging session. /// virtual bool handshake(); - + /** The parser has just entered a rule. No decision has been made about * which alt is predicted. This is fired AFTER init actions have been * executed. Attributes are defined and available etc... */ virtual void enterRule( const char * grammarFileName, const char * ruleName); - + /** Because rules can have lots of alternatives, it is very useful to * know which alt you are entering. This is 1..n for n alts. */ virtual void enterAlt( int alt); - + /** This is the last thing executed before leaving a rule. It is * executed even if an exception is thrown. This is triggered after * error reporting and recovery have occurred (unless the exception is * not caught in this rule). This implies an "exitAlt" event. */ virtual void exitRule( const char * grammarFileName, const char * ruleName); - + /** Track entry into any (...) subrule other EBNF construct */ virtual void enterSubRule( int decisionNumber); - + virtual void exitSubRule( int decisionNumber); - + /** Every decision, fixed k or arbitrary, has an enter/exit event * so that a GUI can easily track what LT/consume events are * associated with prediction. You will see a single enter/exit @@ -134,20 +134,20 @@ public: * loop iteration. */ virtual void enterDecision( int decisionNumber); - + virtual void exitDecision( int decisionNumber); - + /** An input token was consumed; matched by any kind of element. * Trigger after the token was matched by things like match(), matchAny(). */ virtual void consumeToken( CommonTokenType* t); - + /** An off-channel input token was consumed. * Trigger after the token was matched by things like match(), matchAny(). * (unless of course the hidden token is first stuff in the input stream). */ virtual void consumeHiddenToken( CommonTokenType* t); - + /** Somebody (anybody) looked ahead. Note that this actually gets * triggered by both LA and LT calls. The debugger will want to know * which Token object was examined. Like consumeToken, this indicates @@ -156,18 +156,18 @@ public: * even if the info is redundant. */ virtual void LT( int i, CommonTokenType* t); - + /** The parser is going to look arbitrarily ahead; mark this location, * the token stream's marker is sent in case you need it. */ virtual void mark( ANTLR_MARKER marker); - + /** After an arbitrarily long lookahead as with a cyclic DFA (or with * any backtrack), this informs the debugger that stream should be * rewound to the position associated with marker. */ virtual void rewind( ANTLR_MARKER marker); - + /** Rewind to the input position of the last marker. * Used currently only after a cyclic DFA and just * before starting a sem/syn predicate to get the @@ -176,11 +176,11 @@ public: * and rewind(i) should balance still. */ virtual void rewindLast(); - + virtual void beginBacktrack( int level); - + virtual void endBacktrack( int level, bool successful); - + /** To watch a parser move through the grammar, the parser needs to * inform the debugger what line/charPos it is passing in the grammar. * For now, this does not know how to switch from one grammar to the @@ -190,7 +190,7 @@ public: * the parser whenever it hits this line/pos. */ virtual void location( int line, int pos); - + /** A recognition exception occurred such as NoViableAltException. I made * this a generic event so that I can alter the exception hierarchy later * without having to alter all the debug objects. @@ -213,9 +213,9 @@ public: * Here is a sample event trace for grammar: * * b : C ({;}A|B) // {;} is there to prevent A|B becoming a set - * | D - * ; - * + * | D + * ; + * * The sequence for this rule (with no viable alt in the subrule) for * input 'c c' (there are 3 tokens) is: * @@ -249,13 +249,13 @@ public: */ template<typename ExceptionBaseType> void recognitionException( ExceptionBaseType* ) {} - + /** Indicates the recognizer is about to consume tokens to resynchronize * the parser. Any consume events from here until the recovered event * are not part of the parse--they are dead tokens. */ virtual void beginResync(); - + /** Indicates that the recognizer has finished consuming tokens in order * to resynchronize. There may be multiple beginResync/endResync pairs * before the recognizer comes out of errorRecovery mode (in which @@ -265,11 +265,11 @@ public: * a beginResync/endResync pair was tossed out by the parser. */ virtual void endResync(); - + /** A semantic predicate was evaluate with this result and action text */ virtual void semanticPredicate( bool result, const char * predicate); - + /** Announce that parsing has begun. Not technically useful except for * sending events over a socket. A GUI for example will launch a thread * to connect and communicate with a remote parser. The thread will want @@ -278,22 +278,22 @@ public: * figure this out). */ virtual void commence(); - + /** Parsing is over; successfully or not. Mostly useful for telling * remote debugging listeners that it's time to quit. When the rule * invocation level goes to zero at the end of a rule, we are done * parsing. */ virtual void terminate(); - + /// Retrieve acknowledge response from the debugger. in fact this /// response is never used at the moment. So we just read whatever /// is in the socket buffer and throw it away. /// virtual void ack(); - + // T r e e P a r s i n g - + /** Input for a tree parser is an AST, but we know nothing for sure * about a node except its type and text (obtained from the adaptor). * This is the analog of the consumeToken method. The ID is usually @@ -311,33 +311,33 @@ public: * @param t */ virtual void consumeNode( TreeTypePtr t); - + /** The tree parser looked ahead. If the type is UP or DOWN, * then the ID is not really meaningful as it's fixed--there is * just one UP node and one DOWN navigation node. */ virtual void LTT( int i, TreeTypePtr t); - - + + // A S T E v e n t s - + /** A nil was created (even nil nodes have a unique ID... * they are not "null" per se). As of 4/28/2006, this * seems to be uniquely triggered when starting a new subtree * such as when entering a subrule in automatic mode and when * building a tree in rewrite mode. - * + * * If you are receiving this event over a socket via * RemoteDebugEventSocketListener then only t.ID is set. */ virtual void nilNode( TreeTypePtr t); - + /** If a syntax error occurs, recognizers bracket the error * with an error node if they are building ASTs. This event * notifies the listener that this is the case */ virtual void errorNode( TreeTypePtr t); - + /** Announce a new node built from token elements such as type etc... * * If you are receiving this event over a socket via @@ -345,7 +345,7 @@ public: * set. */ virtual void createNode( TreeTypePtr t); - + /** Announce a new node built from an existing token. * * If you are receiving this event over a socket via @@ -353,7 +353,7 @@ public: * are set. */ virtual void createNodeTok( TreeTypePtr node, CommonTokenType* token); - + /** Make a node the new root of an existing root. See * * Note: the newRootID parameter is possibly different @@ -371,7 +371,7 @@ public: * @see org.antlr.runtime.tree.TreeAdaptor.becomeRoot() */ virtual void becomeRoot( TreeTypePtr newRoot, TreeTypePtr oldRoot); - + /** Make childID a child of rootID. * * If you are receiving this event over a socket via @@ -380,20 +380,20 @@ public: * @see org.antlr.runtime.tree.TreeAdaptor.addChild() */ virtual void addChild( TreeTypePtr root, TreeTypePtr child); - + /** Set the token start/stop token index for a subtree root or node. * * If you are receiving this event over a socket via * RemoteDebugEventSocketListener then only t.ID is set. */ virtual void setTokenBoundaries( TreeTypePtr t, ANTLR_MARKER tokenStartIndex, ANTLR_MARKER tokenStopIndex); - + /// Free up the resources allocated to this structure /// virtual ~DebugEventListener(); -}; - +}; + } - -#endif - + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp index 7f332981e6..4c07979185 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp @@ -1,114 +1,114 @@ -/** \file - * Basic type and constant definitions for ANTLR3 Runtime. - */ +/** \file + * Basic type and constant definitions for ANTLR3 Runtime. + */ #ifndef _ANTLR3DEFS_HPP #define _ANTLR3DEFS_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // not used in C++ target (kept for "historical" reasons, the generated code still uses this) #define ANTLR_SIZE_HINT 0U - -/* Work out what operating system/compiler this is. We just do this once - * here and use an internal symbol after this. - */ + +/* Work out what operating system/compiler this is. We just do this once + * here and use an internal symbol after this. + */ #ifdef _WIN64 # define ANTLR_USE_64BIT -#endif - +#endif + #ifdef _WIN32 - -#ifndef WIN32_LEAN_AND_MEAN + +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN -#endif - -/* Allow VC 8 (vs2005) and above to use 'secure' versions of various functions such as sprintf - */ +#endif + +/* Allow VC 8 (vs2005) and above to use 'secure' versions of various functions such as sprintf + */ #ifndef _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE -#endif - +#endif + #ifndef NOMINMAX #define NOMINMAX #endif -#include <winsock2.h> - +#include <winsock2.h> + #define ANTLR_INLINE __inline - + typedef FILE * ANTLR_FDSC; - + typedef struct sockaddr_in ANTLR_SOCKADDRT, * pANTLR_SOCKADDRT; // Type used for socket address declaration typedef struct sockaddr ANTLR_SOCKADDRC, * pANTLR_SOCKADDRC; // Type used for cast on accept() - + #define ANTLR_CLOSESOCKET closesocket - + #else // Un*x - -#ifdef __LP64__ -#define ANTLR_USE_64BIT -#endif - + +#ifdef __LP64__ +#define ANTLR_USE_64BIT +#endif + #define ANTLR_INLINE inline - -typedef int SOCKET; + +typedef int SOCKET; typedef FILE * ANTLR_FDSC; - + #endif - + // Standard integer types (since C++11) (should work with MSVC 2010/2013, gcc, clang) // typedef std::int32_t ANTLR_CHAR; typedef std::uint32_t ANTLR_UCHAR; - + typedef std::int8_t ANTLR_INT8; typedef std::int16_t ANTLR_INT16; typedef std::int32_t ANTLR_INT32; typedef std::int64_t ANTLR_INT64; - + typedef std::uint8_t ANTLR_UINT8; typedef std::uint16_t ANTLR_UINT16; typedef std::uint32_t ANTLR_UINT32; typedef std::uint64_t ANTLR_UINT64; typedef std::uint64_t ANTLR_BITWORD; - + #ifdef ANTLR_USE_64BIT #define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)(ptr)) #define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)((ANTLR_UINT64)(ptr)) typedef ANTLR_INT64 ANTLR_MARKER; typedef ANTLR_UINT64 ANTLR_INTKEY; -#else -#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)((ANTLR_UINT32)(ptr)) +#else +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)((ANTLR_UINT32)(ptr)) #define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)(ptr) typedef ANTLR_INT32 ANTLR_MARKER; typedef ANTLR_UINT32 ANTLR_INTKEY; -#endif - +#endif + #define ANTLR_UINT64_LIT(lit) lit##ULL - + #endif /* _ANTLR3DEFS_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp index 0413d87e39..493d873041 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp @@ -1,43 +1,43 @@ #ifndef _ANTLR3ERRORS_HPP #define _ANTLR3ERRORS_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #define ANTLR_SUCCESS 0 #define ANTLR_FAIL 1 - -/** Indicates end of character stream and is an invalid Unicode code point. */ + +/** Indicates end of character stream and is an invalid Unicode code point. */ #define ANTLR_CHARSTREAM_EOF 0xFFFFFFFF - -/** Indicates memoizing on a rule failed. - */ + +/** Indicates memoizing on a rule failed. + */ #define MEMO_RULE_FAILED 0xFFFFFFFE #define MEMO_RULE_UNKNOWN 0xFFFFFFFF - + #endif /* _ANTLR3ERRORS_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp index 9bc18a0432..096e649e54 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp @@ -1,57 +1,57 @@ -/** \file - * Contains the definition of a basic ANTLR3 exception structure created - * by a recognizer when errors are found/predicted. - - * Two things to be noted for C++ Target: - a) This is not the C++ Exception. Consider this just as yet another class. This - has to be like this because there is a inbuilt recovery and hence there is a try..catch - block for every new token. This is not how C++ Exceptions work.Still there is exception support, as we are handling things like OutofMemory by - throwing exceptions - - b) There is no use in implementing templates here, as all the exceptions are grouped in - one container and hence needs virtual functions. But this would occur only when there is - a exception/ while deleting base recognizer. So shouldn't incur the overhead in normal operation - */ +/** \file + * Contains the definition of a basic ANTLR3 exception structure created + * by a recognizer when errors are found/predicted. + + * Two things to be noted for C++ Target: + a) This is not the C++ Exception. Consider this just as yet another class. This + has to be like this because there is a inbuilt recovery and hence there is a try..catch + block for every new token. This is not how C++ Exceptions work.Still there is exception support, as we are handling things like OutofMemory by + throwing exceptions + + b) There is no use in implementing templates here, as all the exceptions are grouped in + one container and hence needs virtual functions. But this would occur only when there is + a exception/ while deleting base recognizer. So shouldn't incur the overhead in normal operation + */ #ifndef _ANTLR3_EXCEPTION_HPP #define _ANTLR3_EXCEPTION_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** Base structure for an ANTLR3 exception tracker - */ - -template<class ImplTraits, class StreamType> -class ANTLR_ExceptionBase -{ -public: + +/** Base structure for an ANTLR3 exception tracker + */ + +template<class ImplTraits, class StreamType> +class ANTLR_ExceptionBase +{ +public: typedef typename StreamType::UnitType TokenType; typedef typename StreamType::IntStreamType IntStreamType; typedef typename ImplTraits::AllocPolicyType AllocPolicyType; @@ -60,82 +60,82 @@ public: typedef typename ImplTraits::BitsetType BitsetType; typedef typename ImplTraits::BitsetListType BitsetListType; typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; - -protected: - /** The printable message that goes with this exception, in your preferred - * encoding format. ANTLR just uses ASCII by default but you can ignore these - * messages or convert them to another format or whatever of course. They are - * really internal messages that you then decide how to print out in a form that - * the users of your product will understand, as they are unlikely to know what - * to do with "Recognition exception at: [[TOK_GERUND..... " ;-) - */ + +protected: + /** The printable message that goes with this exception, in your preferred + * encoding format. ANTLR just uses ASCII by default but you can ignore these + * messages or convert them to another format or whatever of course. They are + * really internal messages that you then decide how to print out in a form that + * the users of your product will understand, as they are unlikely to know what + * to do with "Recognition exception at: [[TOK_GERUND..... " ;-) + */ StringType m_message; - - /** Name of the file/input source for reporting. Note that this may be empty!! - */ + + /** Name of the file/input source for reporting. Note that this may be empty!! + */ StringType m_streamName; - - /** Indicates the index of the 'token' we were looking at when the - * exception occurred. - */ + + /** Indicates the index of the 'token' we were looking at when the + * exception occurred. + */ ANTLR_MARKER m_index; - - /** Indicates what the current token/tree was when the error occurred. Since not - * all input streams will be able to retrieve the nth token, we track it here - * instead. This is for parsers, and even tree parsers may set this. - */ + + /** Indicates what the current token/tree was when the error occurred. Since not + * all input streams will be able to retrieve the nth token, we track it here + * instead. This is for parsers, and even tree parsers may set this. + */ const TokenType* m_token; - - /** Pointer to the next exception in the chain (if any) - */ - ExceptionBaseType* m_nextException; - - /** Indicates the token we were expecting to see next when the error occurred - */ + + /** Pointer to the next exception in the chain (if any) + */ + ExceptionBaseType* m_nextException; + + /** Indicates the token we were expecting to see next when the error occurred + */ ANTLR_UINT32 m_expecting; - - /** Indicates a set of tokens that we were expecting to see one of when the - * error occurred. It is a following bitset list, so you can use load it and use ->toIntList() on it - * to generate an array of integer tokens that it represents. - */ + + /** Indicates a set of tokens that we were expecting to see one of when the + * error occurred. It is a following bitset list, so you can use load it and use ->toIntList() on it + * to generate an array of integer tokens that it represents. + */ BitsetListType* m_expectingSet; - - /** If this is a tree parser exception then the node is set to point to the node - * that caused the issue. - */ + + /** If this is a tree parser exception then the node is set to point to the node + * that caused the issue. + */ TokenType* m_node; - - /** The current character when an error occurred - for lexers. - */ + + /** The current character when an error occurred - for lexers. + */ ANTLR_UCHAR m_c; - - /** Track the line at which the error occurred in case this is - * generated from a lexer. We need to track this since the - * unexpected char doesn't carry the line info. - */ + + /** Track the line at which the error occurred in case this is + * generated from a lexer. We need to track this since the + * unexpected char doesn't carry the line info. + */ ANTLR_UINT32 m_line; - - /** Character position in the line where the error occurred. - */ + + /** Character position in the line where the error occurred. + */ ANTLR_INT32 m_charPositionInLine; - - /** decision number for NVE - */ + + /** decision number for NVE + */ ANTLR_UINT32 m_decisionNum; - - /** State for NVE - */ + + /** State for NVE + */ ANTLR_UINT32 m_state; - - /** Rule name for failed predicate exception - */ + + /** Rule name for failed predicate exception + */ StringType m_ruleName; - - /** Pointer to the input stream that this exception occurred in. - */ + + /** Pointer to the input stream that this exception occurred in. + */ IntStreamType* m_input; - -public: + +public: StringType& get_message(); StringType& get_streamName(); ANTLR_MARKER get_index() const; @@ -172,36 +172,36 @@ public: virtual ANTLR_UINT32 getType() const = 0; virtual void print() const = 0; virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str ) const = 0; - - virtual ~ANTLR_ExceptionBase(); - -protected: + + virtual ~ANTLR_ExceptionBase(); + +protected: ANTLR_ExceptionBase(const StringType& message); -}; - - -template<class ImplTraits, ExceptionType Ex, class StreamType> -class ANTLR_Exception : public ImplTraits::template ExceptionBaseType<StreamType> -{ -public: +}; + + +template<class ImplTraits, ExceptionType Ex, class StreamType> +class ANTLR_Exception : public ImplTraits::template ExceptionBaseType<StreamType> +{ +public: typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::StringStreamType StringStreamType; typedef typename ImplTraits::BitsetType BitsetType; typedef typename ImplTraits::template ExceptionBaseType<StreamType> BaseType; - -public: + +public: template<typename BaseRecognizerType> ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message); - + const StringType& get_name() const; virtual StringType getName() const; virtual ANTLR_UINT32 getType() const; virtual void print() const; virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str_stream) const; -}; - +}; + } - -#include "antlr3exception.inl" - -#endif + +#include "antlr3exception.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl index 27f00bdda8..bb2723e0b7 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl @@ -1,10 +1,10 @@ namespace antlr3 { - -template<class ImplTraits, class StreamType> -ANTLR_ExceptionBase<ImplTraits, StreamType>::ANTLR_ExceptionBase(const StringType& message) + +template<class ImplTraits, class StreamType> +ANTLR_ExceptionBase<ImplTraits, StreamType>::ANTLR_ExceptionBase(const StringType& message) :m_message(message) ,m_input(NULL) -{ +{ m_index = 0; m_token = NULL; m_expecting = 0; @@ -16,183 +16,183 @@ ANTLR_ExceptionBase<ImplTraits, StreamType>::ANTLR_ExceptionBase(const StringTyp m_decisionNum = 0; m_state = 0; m_nextException = NULL; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_message() -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_message() +{ return m_message; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_streamName() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_streamName() +{ return m_streamName; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_MARKER ANTLR_ExceptionBase<ImplTraits, StreamType>::get_index() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER ANTLR_ExceptionBase<ImplTraits, StreamType>::get_index() const +{ return m_index; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE const typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_token() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE const typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_token() const +{ return m_token; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::ExceptionBaseType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_nextException() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::ExceptionBaseType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_nextException() const +{ return m_nextException; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expecting() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expecting() const +{ return m_expecting; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::BitsetListType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expectingSet() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::BitsetListType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expectingSet() const +{ return m_expectingSet; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_node() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_node() const +{ return m_node; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UCHAR ANTLR_ExceptionBase<ImplTraits, StreamType>::get_c() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UCHAR ANTLR_ExceptionBase<ImplTraits, StreamType>::get_c() const +{ return m_c; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_line() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_line() const +{ return m_line; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_INT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_charPositionInLine() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_charPositionInLine() const +{ return m_charPositionInLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_decisionNum() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_decisionNum() const +{ return m_decisionNum; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_state() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_state() const +{ return m_state; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_ruleName() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_ruleName() +{ return m_ruleName; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::IntStreamType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_input() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::IntStreamType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_input() const +{ return m_input; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_message( const StringType& message ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_message( const StringType& message ) +{ m_message = message; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_streamName( const StringType& streamName ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_streamName( const StringType& streamName ) +{ m_streamName = streamName; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_index( ANTLR_MARKER index ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_index( ANTLR_MARKER index ) +{ m_index = index; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_token( const TokenType* token ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_token( const TokenType* token ) +{ if (m_token) delete m_token; m_token = token; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_nextException( ExceptionBaseType* nextException ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_nextException( ExceptionBaseType* nextException ) +{ m_nextException = nextException; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expecting( ANTLR_UINT32 expecting ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expecting( ANTLR_UINT32 expecting ) +{ m_expecting = expecting; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expectingSet( BitsetListType* expectingSet ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expectingSet( BitsetListType* expectingSet ) +{ m_expectingSet = expectingSet; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_node( TokenType* node ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_node( TokenType* node ) +{ m_node = node; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_c( ANTLR_UCHAR c ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_c( ANTLR_UCHAR c ) +{ m_c = c; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_line( ANTLR_UINT32 line ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_line( ANTLR_UINT32 line ) +{ m_line = line; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ m_charPositionInLine = charPositionInLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_decisionNum( ANTLR_UINT32 decisionNum ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_decisionNum( ANTLR_UINT32 decisionNum ) +{ m_decisionNum = decisionNum; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_state( ANTLR_UINT32 state ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_state( ANTLR_UINT32 state ) +{ m_state = state; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_ruleName( const StringType& ruleName ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_ruleName( const StringType& ruleName ) +{ m_ruleName = ruleName; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_input( IntStreamType* input ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_input( IntStreamType* input ) +{ m_input = input; -} - - -template<class ImplTraits, ExceptionType Ex, class StreamType> +} + + +template<class ImplTraits, ExceptionType Ex, class StreamType> template<typename BaseRecognizerType> -ANTLR_Exception<ImplTraits, Ex, StreamType>::ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message) +ANTLR_Exception<ImplTraits, Ex, StreamType>::ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message) :BaseType( message ) -{ +{ recognizer->get_super()->fillExceptionData( this ); BaseType::m_input = recognizer->get_super()->get_istream(); BaseType::m_nextException = recognizer->get_state()->get_exception(); /* So we don't leak the memory */ recognizer->get_state()->set_exception(this); recognizer->get_state()->set_error( true ); /* Exception is outstanding */ -} - -template<class ImplTraits, ExceptionType Ex, class StreamType> -ANTLR_UINT32 ANTLR_Exception<ImplTraits, Ex, StreamType>::getType() const -{ +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> +ANTLR_UINT32 ANTLR_Exception<ImplTraits, Ex, StreamType>::getType() const +{ return static_cast<ANTLR_UINT32>(Ex); -} - -template<class ImplTraits, ExceptionType Ex, class StreamType> +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> void ANTLR_Exception<ImplTraits, Ex, StreamType>::print() const -{ +{ /* Ensure valid pointer */ /* Number if no message, else the message @@ -205,12 +205,12 @@ void ANTLR_Exception<ImplTraits, Ex, StreamType>::print() const { fprintf(stderr, "ANTLR3_EXCEPTION: %s\n", BaseType::m_message.c_str() ); } -} - -template<class ImplTraits, ExceptionType Ex, class StreamType> +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> typename ANTLR_Exception<ImplTraits, Ex, StreamType>::StringType ANTLR_Exception<ImplTraits, Ex, StreamType>::getName() const -{ +{ const char* exArray[] = { "org.antlr.runtime.RecognitionException" , "org.antlr.runtime.MismatchedTokenException" @@ -225,7 +225,7 @@ typename ANTLR_Exception<ImplTraits, Ex, StreamType>::StringType }; return StringType(exArray[Ex]); } - + template<class ImplTraits, ExceptionType Ex, class StreamType> void ANTLR_Exception<ImplTraits, Ex, StreamType>::displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str_stream ) const @@ -302,13 +302,13 @@ void ANTLR_Exception<ImplTraits, Ex, StreamType>::displayRecognitionError( ANTLR ANTLR_UINT32 bit; ANTLR_UINT32 size; ANTLR_UINT32 numbits; - + // This means we were able to deal with one of a set of // possible tokens at this point, but we did not see any // member of that set. // str_stream << " : unexpected input :"; - + // What tokens could we have accepted at this point in the // parse? // @@ -354,14 +354,14 @@ void ANTLR_Exception<ImplTraits, Ex, StreamType>::displayRecognitionError( ANTLR str_stream << " : syntax not recognized...\n"; break; } -} - -template<class ImplTraits, class StreamType> -ANTLR_ExceptionBase<ImplTraits,StreamType>::~ANTLR_ExceptionBase() -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_ExceptionBase<ImplTraits,StreamType>::~ANTLR_ExceptionBase() +{ ANTLR_ExceptionBase<ImplTraits,StreamType>* next; ANTLR_ExceptionBase<ImplTraits,StreamType>* ex = m_nextException; - + /* Ensure valid pointer */ while (ex != NULL) @@ -371,15 +371,15 @@ ANTLR_ExceptionBase<ImplTraits,StreamType>::~ANTLR_ExceptionBase() */ next = ex->m_nextException; ex->m_nextException = NULL; - + /* Free the actual structure itself */ delete ex; - + ex = next; } if ( m_token) delete m_token; -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp index 140b1a5f67..c30cd7b392 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp @@ -1,40 +1,40 @@ #ifndef _ANTLR3_FILESTREAM_HPP #define _ANTLR3_FILESTREAM_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -template<class ImplTraits> -class FileUtils -{ -public: + +template<class ImplTraits> +class FileUtils +{ +public: /** \brief Open an operating system file and return the descriptor * We just use the common open() and related functions here. * Later we might find better ways on systems @@ -42,29 +42,29 @@ public: * while file at once anyway, so it may be irrelevant. */ static ANTLR_FDSC AntlrFopen(const ANTLR_UINT8* filename, const char * mode); - + /** \brief Close an operating system file and free any handles * etc. */ static void AntlrFclose (ANTLR_FDSC fd); - + static ANTLR_UINT32 AntlrFsize(const ANTLR_UINT8* filename); template<typename InputStreamType> static ANTLR_UINT32 AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName); static ANTLR_UINT32 AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data); - -}; - -class ParseFileAbsentException : public std::exception -{ + +}; + +class ParseFileAbsentException : public std::exception +{ virtual const char* what() const noexcept { return " Parse File not Present"; } -}; - +}; + } - -#include "antlr3filestream.inl" - -#endif + +#include "antlr3filestream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl index b67804adf2..a403c3929f 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl @@ -1,44 +1,44 @@ namespace antlr3 { - -template<class ImplTraits> + +template<class ImplTraits> ANTLR_FDSC FileUtils<ImplTraits>::AntlrFopen(const ANTLR_UINT8* filename, const char * mode) -{ +{ return (ANTLR_FDSC)fopen((const char *)filename, mode); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void FileUtils<ImplTraits>::AntlrFclose (ANTLR_FDSC fd) -{ +{ fclose(fd); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 FileUtils<ImplTraits>::AntlrFsize(const ANTLR_UINT8* filename) -{ +{ struct _stat statbuf; - - _stat((const char *)filename, &statbuf); - - return (ANTLR_UINT32)statbuf.st_size; -} - -template<class ImplTraits> + + _stat((const char *)filename, &statbuf); + + return (ANTLR_UINT32)statbuf.st_size; +} + +template<class ImplTraits> ANTLR_UINT32 FileUtils<ImplTraits>::AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data) -{ +{ return (ANTLR_UINT32)fread(data, (size_t)count, 1, fdsc); -} - -template<class ImplTraits> +} + +template<class ImplTraits> template<typename InputStreamType> ANTLR_UINT32 FileUtils<ImplTraits>::AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName) -{ +{ ANTLR_FDSC infile; ANTLR_UINT32 fSize; - + /* Open the OS file in read binary mode */ infile = FileUtils<ImplTraits>::AntlrFopen(fileName, "rb"); - + /* Check that it was there */ if (infile == NULL) @@ -46,11 +46,11 @@ ANTLR_UINT32 FileUtils<ImplTraits>::AntlrRead8Bit(InputStreamType* input, const ParseFileAbsentException ex; throw ex; } - + /* It was there, so we can read the bytes now */ fSize = FileUtils<ImplTraits>::AntlrFsize(fileName); /* Size of input file */ - + /* Allocate buffer for this input set */ void* data = ImplTraits::AllocPolicyType::alloc(fSize); @@ -58,17 +58,17 @@ ANTLR_UINT32 FileUtils<ImplTraits>::AntlrRead8Bit(InputStreamType* input, const * the internal ANTLR encoding until they are read from the buffer */ FileUtils<ImplTraits>::AntlrFread(infile, fSize, data ); - + input->set_data( (unsigned char*) data ); input->set_sizeBuf( fSize ); - + input->set_isAllocated(true); - + /* And close the file handle */ FileUtils<ImplTraits>::AntlrFclose(infile); - + return ANTLR_SUCCESS; -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp index d167f5b392..b523af3499 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp @@ -1,53 +1,53 @@ -/** \file - * Defines the basic structures used to manipulate character - * streams from any input source. Any character size and encoding - * can in theory be used, so long as a set of functinos is provided that - * can return a 32 bit Integer representation of their characters amd efficiently mark and revert - * to specific offsets into their input streams. - */ +/** \file + * Defines the basic structures used to manipulate character + * streams from any input source. Any character size and encoding + * can in theory be used, so long as a set of functinos is provided that + * can return a 32 bit Integer representation of their characters amd efficiently mark and revert + * to specific offsets into their input streams. + */ #ifndef _ANTLR_INPUT_HPP #define _ANTLR_INPUT_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/// Master context structure for an ANTLR3 C runtime based input stream. + +/// Master context structure for an ANTLR3 C runtime based input stream. /// \ingroup apistructures. Calling LT on this doesn't seem right. You would /// call it only with parser / TreeParser, and their respective input streams -/// has that function. calling it from lexer will throw a compile time error -/// - -template<class ImplTraits> +/// has that function. calling it from lexer will throw a compile time error +/// + +template<class ImplTraits> class InputStream : public ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::LexStateType LexStateType; typedef typename ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > IntStreamType; @@ -57,82 +57,82 @@ public: typedef UnitType TokenType; typedef typename AllocPolicyType::template VectorType<LexStateType> MarkersType; typedef typename ImplTraits::StringType StringType; - -private: - /** Pointer the start of the input string, characters may be - * taken as offsets from here and in original input format encoding. - */ + +private: + /** Pointer the start of the input string, characters may be + * taken as offsets from here and in original input format encoding. + */ const DataType* m_data; - - /** Pointer to the next character to be consumed from the input data - * This is cast to point at the encoding of the original file that - * was read by the functions installed as pointer in this input stream - * context instance at file/string/whatever load time. - */ + + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ const DataType* m_nextChar; - - /** Number of characters that can be consumed at this point in time. - * Mostly this is just what is left in the pre-read buffer, but if the - * input source is a stream such as a socket or something then we may - * call special read code to wait for more input. - */ + + /** Number of characters that can be consumed at this point in time. + * Mostly this is just what is left in the pre-read buffer, but if the + * input source is a stream such as a socket or something then we may + * call special read code to wait for more input. + */ ANTLR_UINT32 m_sizeBuf; - - /** The line number we are traversing in the input file. This gets incremented - * by a newline() call in the lexer grammar actions. - */ + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammar actions. + */ ANTLR_UINT32 m_line; - - /** Pointer into the input buffer where the current line - * started. - */ + + /** Pointer into the input buffer where the current line + * started. + */ const DataType* m_currentLine; - - /** The offset within the current line of the current character - */ + + /** The offset within the current line of the current character + */ ANTLR_INT32 m_charPositionInLine; - - /** Tracks how deep mark() calls are nested - */ + + /** Tracks how deep mark() calls are nested + */ ANTLR_UINT32 m_markDepth; - - /** List of mark() points in the input stream - */ + + /** List of mark() points in the input stream + */ MarkersType m_markers; - - /** File name string, set to pointer to memory if - * you set it manually as it will be free()d - */ + + /** File name string, set to pointer to memory if + * you set it manually as it will be free()d + */ StringType m_fileName; - - /** File number, needs to be set manually to some file index of your devising. - */ + + /** File number, needs to be set manually to some file index of your devising. + */ ANTLR_UINT32 m_fileNo; - + /// Character that automatically causes an internal line count - /// increment. - /// + /// increment. + /// ANTLR_UCHAR m_newlineChar; - - /// Indicates the size, in 8 bit units, of a single character. Note that - /// the C runtime does not deal with surrogates as this would be - /// slow and complicated. If this is a UTF-8 stream then this field - /// will be set to 0. Generally you are best working internally with 32 bit characters - /// as this is the most efficient. - /// + + /// Indicates the size, in 8 bit units, of a single character. Note that + /// the C runtime does not deal with surrogates as this would be + /// slow and complicated. If this is a UTF-8 stream then this field + /// will be set to 0. Generally you are best working internally with 32 bit characters + /// as this is the most efficient. + /// ANTLR_UINT8 m_charByteSize; - - /** Indicates if the data pointer was allocated by us, and so should be freed - * when the stream dies. - */ + + /** Indicates if the data pointer was allocated by us, and so should be freed + * when the stream dies. + */ bool m_isAllocated; - - /// Indicates the encoding scheme used in this input stream - /// - ANTLR_UINT32 m_encoding; - - /* API */ -public: + + /// Indicates the encoding scheme used in this input stream + /// + ANTLR_UINT32 m_encoding; + + /* API */ +public: InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding); InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name); ~InputStream(); @@ -150,7 +150,7 @@ public: ANTLR_UCHAR get_newlineChar() const; ANTLR_UINT8 get_charByteSize() const; ANTLR_UINT32 get_encoding() const; - + void set_data( DataType* data ); void set_isAllocated( bool isAllocated ); void set_nextChar( const DataType* nextChar ); @@ -165,81 +165,81 @@ public: void set_newlineChar( ANTLR_UCHAR newlineChar ); void set_charByteSize( ANTLR_UINT8 charByteSize ); void set_encoding( ANTLR_UINT32 encoding ); - + void inc_charPositionInLine(); void inc_line(); void inc_markDepth(); - + IntStreamType* get_istream(); - - /** Function that resets the input stream - */ + + /** Function that resets the input stream + */ void reset(); - - /** Pointer to a function that reuses and resets an input stream by - * supplying a new 'source' - */ - void reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name); - + + /** Pointer to a function that reuses and resets an input stream by + * supplying a new 'source' + */ + void reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name); + - /** Function to return the total size of the input buffer. For streams - * this may be just the total we have available so far. This means of course that - * the input stream must be careful to accumulate enough input so that any backtracking - * can be satisfied. - */ + /** Function to return the total size of the input buffer. For streams + * this may be just the total we have available so far. This means of course that + * the input stream must be careful to accumulate enough input so that any backtracking + * can be satisfied. + */ ANTLR_UINT32 size(); - - /** Function to return a substring of the input stream. String is returned in allocated - * memory and is in same encoding as the input stream itself, NOT internal ANTLR_UCHAR form. - */ + + /** Function to return a substring of the input stream. String is returned in allocated + * memory and is in same encoding as the input stream itself, NOT internal ANTLR_UCHAR form. + */ StringType substr(ANTLR_MARKER start, ANTLR_MARKER stop); - - /** Function to return the current line number in the input stream - */ + + /** Function to return the current line number in the input stream + */ ANTLR_UINT32 get_line(); - - /** Function to return the current line buffer in the input stream - * The pointer returned is directly into the input stream so you must copy - * it if you wish to manipulate it without damaging the input stream. Encoding - * is obviously in the same form as the input stream. - * \remark - * - Note taht this function wil lbe inaccurate if setLine is called as there + + /** Function to return the current line buffer in the input stream + * The pointer returned is directly into the input stream so you must copy + * it if you wish to manipulate it without damaging the input stream. Encoding + * is obviously in the same form as the input stream. + * \remark + * - Note taht this function wil lbe inaccurate if setLine is called as there * is no way at the moment to position the input stream at a particular line * number offset. - */ + */ const DataType* getLineBuf(); - - /** Function to return the current offset in the current input stream line - */ + + /** Function to return the current offset in the current input stream line + */ ANTLR_UINT32 get_charPositionInLine(); - - /** Function to set the current position in the current line. - */ + + /** Function to set the current position in the current line. + */ void set_charPositionInLine(ANTLR_UINT32 position); - - /** Function to override the default newline character that the input stream - * looks for to trigger the line/offset and line buffer recording information. - * \remark - * - By default the chracter '\n' will be installed as the newline trigger character. When this - * character is seen by the consume() function then the current line number is incremented and the - * current line offset is reset to 0. The Pointer for the line of input we are consuming - * is updated to point to the next character after this one in the input stream (which means it - * may become invalid if the last newline character in the file is seen (so watch out). + + /** Function to override the default newline character that the input stream + * looks for to trigger the line/offset and line buffer recording information. + * \remark + * - By default the chracter '\n' will be installed as the newline trigger character. When this + * character is seen by the consume() function then the current line number is incremented and the + * current line offset is reset to 0. The Pointer for the line of input we are consuming + * is updated to point to the next character after this one in the input stream (which means it + * may become invalid if the last newline character in the file is seen (so watch out). * - If for some reason you do not want the counters and pointers to be restee, you can set the - * chracter to some impossible character such as '\0' or whatever. - * - This is a single character only, so choose the last character in a sequence of two or more. - * - This is only a simple aid to error reporting - if you have a complicated binary input structure - * it may not be adequate, but you can always override every function in the input stream with your - * own of course, and can even write your own complete input stream set if you like. + * chracter to some impossible character such as '\0' or whatever. + * - This is a single character only, so choose the last character in a sequence of two or more. + * - This is only a simple aid to error reporting - if you have a complicated binary input structure + * it may not be adequate, but you can always override every function in the input stream with your + * own of course, and can even write your own complete input stream set if you like. * - It is your responsiblity to set a valid character for the input stream type. There is no point - * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never + * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never * trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF - */ + */ void set_newLineChar(ANTLR_UINT32 newlineChar); ANTLR_MARKER index_impl(); - -private: + +private: /** \brief Use the contents of an operating system file as the input * for an input stream. * @@ -249,7 +249,7 @@ private: * - One of the ANTLR3_ERR_ defines on error. */ void createFileStream(const ANTLR_UINT8* fileName); - + /** \brief Use the supplied 'string' as input to the stream * * \param data Pointer to the input data @@ -259,46 +259,46 @@ private: */ void createStringStream(const ANTLR_UINT8* data); void genericSetupStream(); - + /// Determine endianess of the input stream and install the /// API required for the encoding in that format. /// void setupInputStream(); - -}; - -/** \brief Structure for track lex input states as part of mark() - * and rewind() of lexer. - */ -template<class ImplTraits> + +}; + +/** \brief Structure for track lex input states as part of mark() + * and rewind() of lexer. + */ +template<class ImplTraits> class LexState : public ImplTraits::AllocPolicyType -{ -public: +{ +public: typedef typename ImplTraits::StreamDataType DataType; - -private: - /** Pointer to the next character to be consumed from the input data - * This is cast to point at the encoding of the original file that - * was read by the functions installed as pointer in this input stream - * context instance at file/string/whatever load time. - */ + +private: + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ const DataType* m_nextChar; - - /** The line number we are traversing in the input file. This gets incremented - * by a newline() call in the lexer grammer actions. - */ + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammer actions. + */ ANTLR_UINT32 m_line; - - /** Pointer into the input buffer where the current line - * started. - */ + + /** Pointer into the input buffer where the current line + * started. + */ const DataType* m_currentLine; - - /** The offset within the current line of the current character - */ + + /** The offset within the current line of the current character + */ ANTLR_INT32 m_charPositionInLine; - -public: + +public: LexState(); const DataType* get_nextChar() const; ANTLR_UINT32 get_line() const; @@ -308,18 +308,18 @@ public: void set_line( ANTLR_UINT32 line ); void set_currentLine( const DataType* currentLine ); void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); -}; - -class ParseNullStringException : public std::exception -{ +}; + +class ParseNullStringException : public std::exception +{ virtual const char* what() const noexcept { return "Null String"; } -}; - +}; + } - -#include "antlr3input.inl" - + +#include "antlr3input.inl" + #endif /* _ANTLR_INPUT_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl index 6837a06540..d3d2c07b04 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl @@ -1,307 +1,307 @@ namespace antlr3 { - -template<class ImplTraits> -InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) -{ - // First order of business is to read the file into some buffer space - // as just straight 8 bit bytes. Then we will work out the encoding and - // byte order and adjust the API functions that are installed for the - // default 8Bit stream accordingly. - // - this->createFileStream(fileName); - + +template<class ImplTraits> +InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) +{ + // First order of business is to read the file into some buffer space + // as just straight 8 bit bytes. Then we will work out the encoding and + // byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createFileStream(fileName); + // We have the data in memory now so we can deal with it according to - // the encoding scheme we were given by the user. - // - m_encoding = encoding; - + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + // Now we need to work out the endian type and install any - // API functions that differ from 8Bit - // - this->setupInputStream(); - - // Now we can set up the file name + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name // BaseType::m_streamName = (const char* )fileName; m_fileName = BaseType::m_streamName; -} - -template<class ImplTraits> -InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) -{ +} + +template<class ImplTraits> +InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ // First order of business is to set up the stream and install the data pointer. - // Then we will work out the encoding and byte order and adjust the API functions that are installed for the - // default 8Bit stream accordingly. - // - this->createStringStream(data); + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createStringStream(data); - // Size (in bytes) of the given 'string' - // + // Size (in bytes) of the given 'string' + // m_sizeBuf = size; - + // We have the data in memory now so we can deal with it according to - // the encoding scheme we were given by the user. - // - m_encoding = encoding; - + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + // Now we need to work out the endian type and install any - // API functions that differ from 8Bit - // - this->setupInputStream(); - - // Now we can set up the file name + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name // BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name; m_fileName = BaseType::m_streamName; - -} - -template<class ImplTraits> -void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data) -{ + +} + +template<class ImplTraits> +void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data) +{ if (data == NULL) { ParseNullStringException ex; throw ex; } - + // Structure was allocated correctly, now we can install the pointer // - m_data = data; + m_data = data; m_isAllocated = false; - + // Call the common 8 bit input stream handler // initialization. // this->genericSetupStream(); -} - -template<class ImplTraits> -void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName) -{ +} + +template<class ImplTraits> +void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName) +{ if (fileName == NULL) { ParseFileAbsentException ex; throw ex; } - + // Structure was allocated correctly, now we can read the file. // FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName); - + // Call the common 8 bit input stream handler // initialization. // this->genericSetupStream(); -} - -template<class ImplTraits> -void InputStream<ImplTraits>::genericSetupStream() -{ +} + +template<class ImplTraits> +void InputStream<ImplTraits>::genericSetupStream() +{ this->set_charByteSize(1); - /* Set up the input stream brand new - */ - this->reset(); + /* Set up the input stream brand new + */ + this->reset(); - /* Install default line separator character (it can be replaced - * by the grammar programmer later) - */ - this->set_newLineChar((ANTLR_UCHAR)'\n'); -} - -template<class ImplTraits> -InputStream<ImplTraits>::~InputStream() -{ + /* Install default line separator character (it can be replaced + * by the grammar programmer later) + */ + this->set_newLineChar((ANTLR_UCHAR)'\n'); +} + +template<class ImplTraits> +InputStream<ImplTraits>::~InputStream() +{ // Free the input stream buffer if we allocated it - // + // if (m_isAllocated && (m_data != NULL)) AllocPolicyType::free((void*)m_data); //const_cast is required -} - -template<class ImplTraits> -ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const +{ return m_data; -} -template<class ImplTraits> -ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const -{ +} +template<class ImplTraits> +ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const +{ return m_isAllocated; -} -template<class ImplTraits> -ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const -{ +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const +{ return m_nextChar; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const +{ return m_sizeBuf; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const +{ return m_line; -} -template<class ImplTraits> -ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const -{ +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const +{ return m_currentLine; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const +{ return m_charPositionInLine; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const +{ return m_markDepth; -} -template<class ImplTraits> -ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers() -{ +} +template<class ImplTraits> +ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers() +{ return m_markers; -} -template<class ImplTraits> -ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const -{ +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const +{ return m_fileName; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const +{ return m_fileNo; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const +{ return m_newlineChar; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const +{ return m_charByteSize; -} -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const -{ +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const +{ return m_encoding; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data ) +{ m_data = data; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated ) +{ m_isAllocated = isAllocated; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar ) +{ m_nextChar = nextChar; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf ) +{ m_sizeBuf = sizeBuf; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line ) +{ m_line = line; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine ) +{ m_currentLine = currentLine; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ m_charPositionInLine = charPositionInLine; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth ) +{ m_markDepth = markDepth; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers ) +{ m_markers = markers; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName ) +{ m_fileName = fileName; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo ) +{ m_fileNo = fileNo; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar ) +{ m_newlineChar = newlineChar; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize ) +{ m_charByteSize = charByteSize; -} -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding ) -{ +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding ) +{ m_encoding = encoding; -} - -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine() -{ +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine() +{ ++m_charPositionInLine; -} - -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::inc_line() -{ +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_line() +{ ++m_line; -} - -template<class ImplTraits> -ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth() -{ +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth() +{ ++m_markDepth; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void InputStream<ImplTraits>::reset() -{ +{ m_nextChar = m_data; /* Input at first character */ m_line = 1; /* starts at line 1 */ m_charPositionInLine = 0; m_currentLine = m_data; m_markDepth = 0; /* Reset markers */ - /* Clear out up the markers table if it is there - */ + /* Clear out up the markers table if it is there + */ m_markers.clear(); -} - -template<class ImplTraits> -void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) -{ +} + +template<class ImplTraits> +void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ m_isAllocated = false; m_data = inString; m_sizeBuf = size; - // Now we can set up the file name. As we are reusing the stream, there may already - // be a string that we can reuse for holding the filename. - // + // Now we can set up the file name. As we are reusing the stream, there may already + // be a string that we can reuse for holding the filename. + // if ( BaseType::m_streamName.empty() ) { BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); @@ -311,309 +311,309 @@ void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, { BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); } - - this->reset(); -} - -/* -template<class ImplTraits> + + this->reset(); +} + +/* +template<class ImplTraits> typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::LT(ANTLR_INT32 lt) -{ +{ return this->LA(lt); -} -*/ - -template<class ImplTraits> +} +*/ + +template<class ImplTraits> ANTLR_UINT32 InputStream<ImplTraits>::size() -{ +{ return m_sizeBuf; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER InputStream<ImplTraits>::index_impl() -{ +{ return (ANTLR_MARKER)m_nextChar; -} - - -template<class ImplTraits> +} + + +template<class ImplTraits> typename InputStream<ImplTraits>::StringType InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop) -{ +{ std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 ); StringType str( (const char*)start, len ); return str; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 InputStream<ImplTraits>::get_line() -{ +{ return m_line; -} - -template<class ImplTraits> +} + +template<class ImplTraits> const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::getLineBuf() -{ +{ return m_currentLine; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_charPositionInLine() -{ +{ return m_charPositionInLine; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position) -{ +{ m_charPositionInLine = position; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar) -{ +{ m_newlineChar = newlineChar; -} - -template<class ImplTraits> -ANTLR_INLINE LexState<ImplTraits>::LexState() -{ +} + +template<class ImplTraits> +ANTLR_INLINE LexState<ImplTraits>::LexState() +{ m_nextChar = NULL; m_line = 0; m_currentLine = NULL; m_charPositionInLine = 0; -} - -template<class ImplTraits> -ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const +{ return m_nextChar; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const +{ return m_line; -} - -template<class ImplTraits> -ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const +{ return m_currentLine; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const +{ return m_charPositionInLine; -} - -template<class ImplTraits> -ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar ) +{ m_nextChar = nextChar; -} - -template<class ImplTraits> -ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line ) +{ m_line = line; -} - -template<class ImplTraits> -ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine ) +{ m_currentLine = currentLine; -} - -template<class ImplTraits> -ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ m_charPositionInLine = charPositionInLine; -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType* InputStream<ImplTraits>::get_istream() -{ +{ return this; -} - -template<class ImplTraits> -void InputStream<ImplTraits>::setupInputStream() -{ +} + +template<class ImplTraits> +void InputStream<ImplTraits>::setupInputStream() +{ bool isBigEndian; - - // Used to determine the endianness of the machine we are currently - // running on. - // - ANTLR_UINT16 bomTest = 0xFEFF; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR_UINT16 bomTest = 0xFEFF; - // What endianess is the machine we are running on? If the incoming - // encoding endianess is the same as this machine's natural byte order - // then we can use more efficient API calls. - // - if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) - { - isBigEndian = true; - } - else - { - isBigEndian = false; - } - - // What encoding did the user tell us {s}he thought it was? I am going - // to get sick of the questions on antlr-interest, I know I am. - // - switch (m_encoding) - { + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) + { + isBigEndian = true; + } + else + { + isBigEndian = false; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (m_encoding) + { case ENC_UTF8: - - // See if there is a BOM at the start of this UTF-8 sequence - // and just eat it if there is. Windows .TXT files have this for instance - // as it identifies UTF-8 even though it is of no consequence for byte order - // as UTF-8 does not have a byte order. - // - if ( (*(m_nextChar)) == 0xEF - && (*(m_nextChar+1)) == 0xBB - && (*(m_nextChar+2)) == 0xBF - ) - { - // The UTF8 BOM is present so skip it - // - m_nextChar += 3; - } - - // Install the UTF8 input routines - // + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (*(m_nextChar)) == 0xEF + && (*(m_nextChar+1)) == 0xBB + && (*(m_nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + m_nextChar += 3; + } + + // Install the UTF8 input routines + // this->setupIntStream( isBigEndian, isBigEndian ); this->set_charByteSize(0); - break; - + break; + case ENC_UTF16: - - // See if there is a BOM at the start of the input. If not then - // we assume that the byte order is the natural order of this - // machine (or it is really UCS2). If there is a BOM we determine if the encoding - // is the same as the natural order of this machine. - // - if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF - ) - { - // BOM Present, indicates Big Endian - // - m_nextChar += 1; - + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + this->setupIntStream( isBigEndian, true ); - } - else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE - ) - { - // BOM present, indicates Little Endian - // - m_nextChar += 1; - - this->setupIntStream( isBigEndian, false ); - } - else - { - // No BOM present, assume local computer byte order - // - this->setupIntStream(isBigEndian, isBigEndian); - } + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, false ); + } + else + { + // No BOM present, assume local computer byte order + // + this->setupIntStream(isBigEndian, isBigEndian); + } this->set_charByteSize(2); - break; - + break; + case ENC_UTF32: - - // See if there is a BOM at the start of the input. If not then - // we assume that the byte order is the natural order of this - // machine. If there is we determine if the encoding - // is the same as the natural order of this machine. - // - if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF - ) - { - // BOM Present, indicates Big Endian - // - m_nextChar += 1; - - this->setupIntStream(isBigEndian, true); - } - else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 - && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 - ) - { - // BOM present, indicates Little Endian - // - m_nextChar += 1; - + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + + this->setupIntStream(isBigEndian, true); + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + this->setupIntStream( isBigEndian, false ); - } - else - { - // No BOM present, assume local computer byte order - // + } + else + { + // No BOM present, assume local computer byte order + // this->setupIntStream( isBigEndian, isBigEndian ); - } + } this->set_charByteSize(4); - break; - + break; + case ENC_UTF16BE: - - // Encoding is definately Big Endian with no BOM - // + + // Encoding is definately Big Endian with no BOM + // this->setupIntStream( isBigEndian, true ); this->set_charByteSize(2); - break; - + break; + case ENC_UTF16LE: - - // Encoding is definately Little Endian with no BOM - // - this->setupIntStream( isBigEndian, false ); + + // Encoding is definately Little Endian with no BOM + // + this->setupIntStream( isBigEndian, false ); this->set_charByteSize(2); - break; - + break; + case ENC_UTF32BE: - - // Encoding is definately Big Endian with no BOM - // + + // Encoding is definately Big Endian with no BOM + // this->setupIntStream( isBigEndian, true ); this->set_charByteSize(4); - break; - + break; + case ENC_UTF32LE: - - // Encoding is definately Little Endian with no BOM - // + + // Encoding is definately Little Endian with no BOM + // this->setupIntStream( isBigEndian, false ); this->set_charByteSize(4); - break; - + break; + case ENC_EBCDIC: - - // EBCDIC is basically the same as ASCII but with an on the - // fly translation to ASCII - // - this->setupIntStream( isBigEndian, isBigEndian ); + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); this->set_charByteSize(1); - break; - + break; + case ENC_8BIT: - default: - - // Standard 8bit/ASCII - // - this->setupIntStream( isBigEndian, isBigEndian ); + default: + + // Standard 8bit/ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); this->set_charByteSize(1); - break; + break; } -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp index 5f04b40c49..59c3236e9c 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp @@ -1,41 +1,41 @@ -/** \file - * Declarations for all the antlr3 C runtime interfaces/classes. This - * allows the structures that define the interfaces to contain pointers to - * each other without trying to sort out the cyclic interdependencies that - * would otherwise result. - */ +/** \file + * Declarations for all the antlr3 C runtime interfaces/classes. This + * allows the structures that define the interfaces to contain pointers to + * each other without trying to sort out the cyclic interdependencies that + * would otherwise result. + */ #ifndef _ANTLR3_INTERFACES_HPP #define _ANTLR3_INTERFACES_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - + // Definitions that indicate the encoding scheme character streams and strings etc enum Encoding { @@ -94,200 +94,200 @@ enum ExceptionType , MISSING_TOKEN_EXCEPTION }; -template<class ImplTraits, class SuperType> -class IntStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_RECOGNIZER_SHARED_STATE -/// \ingroup ANTLR3_RECOGNIZER_SHARED_STATE -/// -template<class ImplTraits, class SuperType> -class RecognizerSharedState; - -/// Pointer to an instantiation of 'class' #ANTLR3_BITSET_LIST -/// \ingroup ANTLR3_BITSET_LIST -/// -template<class AllocatorType> -class BitsetList; - -/// Pointer to an instantiation of 'class' #ANTLR3_BITSET -/// \ingroup ANTLR3_BITSET -/// -template<class AllocatorType> -class Bitset; - -/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN -/// \ingroup ANTLR3_COMMON_TOKEN -/// -template<class ImplTraits> -class CommonToken; - -template<class ImplTraits, ExceptionType Ex, class StreamType> -class ANTLR_Exception; - -/// Pointer to an instantiation of 'class' #ANTLR3_TOPO -/// \ingroup ANTLR3_TOPO -/// -template<class AllocPolicyType> -class Topo; - -/// Pointer to an instantiation of 'class' #ANTLR3_INPUT_STREAM -/// \ingroup ANTLR3_INPUT_STREAM -/// -template<class ImplTraits> -class InputStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_LEX_STATE -/// \ingroup ANTLR3_LEX_STATE -/// -template<class ImplTraits> -class LexState; - -/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_SOURCE -/// \ingroup ANTLR3_TOKEN_SOURCE -/// -template<class ImplTraits> -class TokenSource; - -/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_STREAM -/// \ingroup ANTLR3_TOKEN_STREAM -/// -template<class ImplTraits> -class TokenStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN_STREAM -/// \ingroup ANTLR3_COMMON_TOKEN_STREAM -/// -template<class ImplTraits> -class CommonTokenStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_CYCLIC_DFA -/// \ingroup ANTLR3_CYCLIC_DFA -/// -template<class ImplTraits, class ComponentType> -class CyclicDFA; - -/// Pointer to an instantiation of 'class' #ANTLR3_LEXER -/// \ingroup ANTLR3_LEXER -/// -template<class ImplTraits> -class Lexer; - -/// Pointer to an instantiation of 'class' #ANTLR3_PARSER -/// \ingroup ANTLR3_PARSER -/// -template<class ImplTraits> -class Parser; - -/// Pointer to an instantiation of 'class' #ANTLR3_BASE_TREE -/// \ingroup ANTLR3_BASE_TREE -/// -template<class ImplTraits> -class BaseTree; - -/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE -/// \ingroup ANTLR3_COMMON_TREE -/// -template<class ImplTraits> -class CommonTree; - -/// Pointer to an instantiation of 'class' #ANTLR3_PARSE_TREE -/// \ingroup ANTLR3_PARSE_TREE -/// -template<class ImplTraits> -class ParseTree; - -/// Pointer to an instantiation of 'class' #ANTLR3_TREE_NODE_STREAM -/// \ingroup ANTLR3_TREE_NODE_STREAM -/// -template<class ImplTraits> -class TreeNodeStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_NODE_STREAM -/// \ingroup ANTLR3_COMMON_TREE_NODE_STREAM -/// -template<class ImplTraits> -class CommonTreeNodeStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_TREE_WALK_STATE -/// \ingroup ANTLR3_TREE_WALK_STATE -/// -template<class ImplTraits> -class TreeWalkState; - -/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_ADAPTOR -/// \ingroup ANTLR3_COMMON_TREE_ADAPTOR -/// -template<class ImplTraits> -class CommonTreeAdaptor; - -/// Pointer to an instantiation of 'class' #ANTLR3_TREE_PARSER -/// \ingroup ANTLR3_TREE_PARSER -/// -template<class ImplTraits> -class TreeParser; - -/// Pointer to an instantiation of 'class' #ANTLR3_INT_TRIE -/// \ingroup ANTLR3_INT_TRIE -/// -template< class DataType, class AllocPolicyType > -class IntTrie; - -/// Pointer to an instantiation of 'class' #ANTLR3_REWRITE_RULE_ELEMENT_STREAM -/// \ingroup ANTLR3_REWRITE_RULE_ELEMENT_STREAM -/// -template<class ImplTraits, class SuperType> -class RewriteRuleElementStream; - -template<class ImplTraits> -class RewriteRuleTokenStream; - -template<class ImplTraits> -class RewriteRuleSubtreeStream; - -template<class ImplTraits> -class RewriteRuleNodeStream; - -/// Pointer to an instantiation of 'class' #ANTLR3_DEBUG_EVENT_LISTENER -/// \ingroup ANTLR3_DEBUG_EVENT_LISTENER -/// -template<class ImplTraits> -class DebugEventListener; - -//A Class just used for forwarding other classes for simplifying class forwarding -//Logic: constructor is made simple -template<class A> -class ClassForwarder {}; - -template<bool b> -class BoolForwarder {}; -class Empty {}; - -template<class ImplTraits, class StreamType> -class ComponentTypeFinder -{ -}; - -template<class ImplTraits> -class ComponentTypeFinder< ImplTraits, typename ImplTraits::InputStreamType> -{ -public: +template<class ImplTraits, class SuperType> +class IntStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_RECOGNIZER_SHARED_STATE +/// \ingroup ANTLR3_RECOGNIZER_SHARED_STATE +/// +template<class ImplTraits, class SuperType> +class RecognizerSharedState; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET_LIST +/// \ingroup ANTLR3_BITSET_LIST +/// +template<class AllocatorType> +class BitsetList; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET +/// \ingroup ANTLR3_BITSET +/// +template<class AllocatorType> +class Bitset; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN +/// \ingroup ANTLR3_COMMON_TOKEN +/// +template<class ImplTraits> +class CommonToken; + +template<class ImplTraits, ExceptionType Ex, class StreamType> +class ANTLR_Exception; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOPO +/// \ingroup ANTLR3_TOPO +/// +template<class AllocPolicyType> +class Topo; + +/// Pointer to an instantiation of 'class' #ANTLR3_INPUT_STREAM +/// \ingroup ANTLR3_INPUT_STREAM +/// +template<class ImplTraits> +class InputStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEX_STATE +/// \ingroup ANTLR3_LEX_STATE +/// +template<class ImplTraits> +class LexState; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_SOURCE +/// \ingroup ANTLR3_TOKEN_SOURCE +/// +template<class ImplTraits> +class TokenSource; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_STREAM +/// \ingroup ANTLR3_TOKEN_STREAM +/// +template<class ImplTraits> +class TokenStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN_STREAM +/// \ingroup ANTLR3_COMMON_TOKEN_STREAM +/// +template<class ImplTraits> +class CommonTokenStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_CYCLIC_DFA +/// \ingroup ANTLR3_CYCLIC_DFA +/// +template<class ImplTraits, class ComponentType> +class CyclicDFA; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEXER +/// \ingroup ANTLR3_LEXER +/// +template<class ImplTraits> +class Lexer; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSER +/// \ingroup ANTLR3_PARSER +/// +template<class ImplTraits> +class Parser; + +/// Pointer to an instantiation of 'class' #ANTLR3_BASE_TREE +/// \ingroup ANTLR3_BASE_TREE +/// +template<class ImplTraits> +class BaseTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE +/// \ingroup ANTLR3_COMMON_TREE +/// +template<class ImplTraits> +class CommonTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSE_TREE +/// \ingroup ANTLR3_PARSE_TREE +/// +template<class ImplTraits> +class ParseTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_NODE_STREAM +/// \ingroup ANTLR3_TREE_NODE_STREAM +/// +template<class ImplTraits> +class TreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_NODE_STREAM +/// \ingroup ANTLR3_COMMON_TREE_NODE_STREAM +/// +template<class ImplTraits> +class CommonTreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_WALK_STATE +/// \ingroup ANTLR3_TREE_WALK_STATE +/// +template<class ImplTraits> +class TreeWalkState; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_ADAPTOR +/// \ingroup ANTLR3_COMMON_TREE_ADAPTOR +/// +template<class ImplTraits> +class CommonTreeAdaptor; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_PARSER +/// \ingroup ANTLR3_TREE_PARSER +/// +template<class ImplTraits> +class TreeParser; + +/// Pointer to an instantiation of 'class' #ANTLR3_INT_TRIE +/// \ingroup ANTLR3_INT_TRIE +/// +template< class DataType, class AllocPolicyType > +class IntTrie; + +/// Pointer to an instantiation of 'class' #ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \ingroup ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// +template<class ImplTraits, class SuperType> +class RewriteRuleElementStream; + +template<class ImplTraits> +class RewriteRuleTokenStream; + +template<class ImplTraits> +class RewriteRuleSubtreeStream; + +template<class ImplTraits> +class RewriteRuleNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_DEBUG_EVENT_LISTENER +/// \ingroup ANTLR3_DEBUG_EVENT_LISTENER +/// +template<class ImplTraits> +class DebugEventListener; + +//A Class just used for forwarding other classes for simplifying class forwarding +//Logic: constructor is made simple +template<class A> +class ClassForwarder {}; + +template<bool b> +class BoolForwarder {}; +class Empty {}; + +template<class ImplTraits, class StreamType> +class ComponentTypeFinder +{ +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::InputStreamType> +{ +public: typedef typename ImplTraits::LexerType ComponentType; -}; - -template<class ImplTraits> -class ComponentTypeFinder< ImplTraits, typename ImplTraits::TokenStreamType> -{ -public: +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TokenStreamType> +{ +public: typedef typename ImplTraits::ParserType ComponentType; -}; - -template<class ImplTraits> -class ComponentTypeFinder< ImplTraits, typename ImplTraits::TreeNodeStreamType> -{ -public: +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TreeNodeStreamType> +{ +public: typedef typename ImplTraits::TreeParserType ComponentType; -}; - +}; + } - -#endif + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp index 01bf60a7cb..b312acb569 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp @@ -1,122 +1,122 @@ -/** \file - * Defines the the class interface for an antlr3 INTSTREAM. +/** \file + * Defines the the class interface for an antlr3 INTSTREAM. * - * Certain functionality (such as DFAs for instance) abstract the stream of tokens - * or characters in to a steam of integers. Hence this structure should be included - * in any stream that is able to provide the output as a stream of integers (which is anything - * basically. - * - * There are no specific implementations of the methods in this interface in general. Though - * for purposes of casting and so on, it may be necesssary to implement a function with - * the signature in this interface which abstracts the base immplementation. In essence though - * the base stream provides a pointer to this interface, within which it installs its - * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM + * Certain functionality (such as DFAs for instance) abstract the stream of tokens + * or characters in to a steam of integers. Hence this structure should be included + * in any stream that is able to provide the output as a stream of integers (which is anything + * basically. + * + * There are no specific implementations of the methods in this interface in general. Though + * for purposes of casting and so on, it may be necesssary to implement a function with + * the signature in this interface which abstracts the base immplementation. In essence though + * the base stream provides a pointer to this interface, within which it installs its + * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM * and can treat any input as an int stream. - * - * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM. - * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from - * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER - * when it is intialized with a pANTLR3_INPUT_STREAM. - * + * + * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM. + * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from + * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER + * when it is intialized with a pANTLR3_INPUT_STREAM. + * * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM. - * - * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where - * the pANTLR3_INT_STREAM comes from? - * - * Note that because the context pointer points to the actual interface structure that is providing - * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation - * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P - */ + * + * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where + * the pANTLR3_INT_STREAM comes from? + * + * Note that because the context pointer points to the actual interface structure that is providing + * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation + * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P + */ #ifndef _ANTLR3_INTSTREAM_HPP #define _ANTLR3_INTSTREAM_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -enum STREAM_TYPE -{ + +enum STREAM_TYPE +{ /** Type indicator for a character stream * \remark if a custom stream is created but it can be treated as * a char stream, then you may OR in this value to your type indicator */ CHARSTREAM = 0x0001 - + /** Type indicator for a Token stream * \remark if a custom stream is created but it can be treated as * a token stream, then you may OR in this value to your type indicator */ , TOKENSTREAM = 0x0002 - + /** Type indicator for a common tree node stream * \remark if a custom stream is created but it can be treated as * a common tree node stream, then you may OR in this value to your type indicator */ , COMMONTREENODE = 0x0004 - + /** Type mask for input stream so we can switch in the above types * \remark DO NOT USE 0x0000 as a stream type! */ , INPUT_MASK = 0x0007 -}; - -class RESOLVE_ENDIAN_AT_RUNTIME {}; -class BYTE_AGNOSTIC {}; -class ANTLR_LITTLE_ENDIAN {}; -class ANTLR_BIG_ENDIAN {}; - -template<class ImplTraits, class SuperType> -class IntStream : public ImplTraits::AllocPolicyType -{ -public: +}; + +class RESOLVE_ENDIAN_AT_RUNTIME {}; +class BYTE_AGNOSTIC {}; +class ANTLR_LITTLE_ENDIAN {}; +class ANTLR_BIG_ENDIAN {}; + +template<class ImplTraits, class SuperType> +class IntStream : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::StringType StringType; -protected: - /** Potentially useful in error reporting and so on, this string is - * an identification of the input source. It may be NULL, so anything - * attempting to access it needs to check this and substitute a sensible - * default. - */ +protected: + /** Potentially useful in error reporting and so on, this string is + * an identification of the input source. It may be NULL, so anything + * attempting to access it needs to check this and substitute a sensible + * default. + */ StringType m_streamName; - - /** Last marker position allocated - */ + + /** Last marker position allocated + */ ANTLR_MARKER m_lastMarker; bool m_upper_case; //if set, values should be returbed in upper case - - /// Indicates whether we should implement endian-specific logic - /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian + + /// Indicates whether we should implement endian-specific logic + /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian ANTLR_UINT8 m_endian_spec; - -public: + +public: IntStream(); // Return a string that identifies the input source @@ -125,139 +125,139 @@ public: StringType& get_streamName(); const StringType& get_streamName() const; ANTLR_MARKER get_lastMarker() const; - + SuperType* get_super(); /** - * Function that installs a version of LA that always - * returns upper case. Only valid for character streams and creates a case - * insensitive lexer if the lexer tokens are described in upper case. The - * tokens will preserve case in the token text. - */ + * Function that installs a version of LA that always + * returns upper case. Only valid for character streams and creates a case + * insensitive lexer if the lexer tokens are described in upper case. The + * tokens will preserve case in the token text. + */ void setUcaseLA(bool flag); - - /** Consume the next 'ANTR3_UINT32' in the stream - */ + + /** Consume the next 'ANTR3_UINT32' in the stream + */ void consume(); - + /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32 - */ + */ ANTLR_UINT32 LA( ANTLR_INT32 i); - - /** Tell the stream to start buffering if it hasn't already. Return - * current input position, index(), or some other marker so that - * when passed to rewind() you get back to the same spot. - * rewind(mark()) should not affect the input cursor. - */ + + /** Tell the stream to start buffering if it hasn't already. Return + * current input position, index(), or some other marker so that + * when passed to rewind() you get back to the same spot. + * rewind(mark()) should not affect the input cursor. + */ ANTLR_MARKER mark(); - /** Return the current input symbol index 0..n where n indicates the - * last symbol has been read. - */ + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. + */ ANTLR_MARKER index(); - - /** Reset the stream so that next call to index would return marker. - * The marker will usually be index() but it doesn't have to be. It's - * just a marker to indicate what state the stream was in. This is - * essentially calling release() and seek(). If there are markers - * created after this marker argument, this routine must unroll them - * like a stack. Assume the state the stream was in when this marker - * was created. - */ + + /** Reset the stream so that next call to index would return marker. + * The marker will usually be index() but it doesn't have to be. It's + * just a marker to indicate what state the stream was in. This is + * essentially calling release() and seek(). If there are markers + * created after this marker argument, this routine must unroll them + * like a stack. Assume the state the stream was in when this marker + * was created. + */ void rewind(ANTLR_MARKER marker); - - /** Reset the stream to the last marker position, witouh destryoing the - * last marker position. - */ + + /** Reset the stream to the last marker position, witouh destryoing the + * last marker position. + */ void rewindLast(); - - /** You may want to commit to a backtrack but don't want to force the - * stream to keep bookkeeping objects around for a marker that is - * no longer necessary. This will have the same behavior as - * rewind() except it releases resources without the backward seek. - */ + + /** You may want to commit to a backtrack but don't want to force the + * stream to keep bookkeeping objects around for a marker that is + * no longer necessary. This will have the same behavior as + * rewind() except it releases resources without the backward seek. + */ void release(ANTLR_MARKER mark); - - /** Set the input cursor to the position indicated by index. This is - * normally used to seek ahead in the input stream. No buffering is - * required to do this unless you know your stream will use seek to - * move backwards such as when backtracking. - * - * This is different from rewind in its multi-directional - * requirement and in that its argument is strictly an input cursor (index). - * - * For char streams, seeking forward must update the stream state such - * as line number. For seeking backwards, you will be presumably - * backtracking using the mark/rewind mechanism that restores state and - * so this method does not need to update state when seeking backwards. - * - * Currently, this method is only used for efficient backtracking, but - * in the future it may be used for incremental parsing. - */ + + /** Set the input cursor to the position indicated by index. This is + * normally used to seek ahead in the input stream. No buffering is + * required to do this unless you know your stream will use seek to + * move backwards such as when backtracking. + * + * This is different from rewind in its multi-directional + * requirement and in that its argument is strictly an input cursor (index). + * + * For char streams, seeking forward must update the stream state such + * as line number. For seeking backwards, you will be presumably + * backtracking using the mark/rewind mechanism that restores state and + * so this method does not need to update state when seeking backwards. + * + * Currently, this method is only used for efficient backtracking, but + * in the future it may be used for incremental parsing. + */ void seek(ANTLR_MARKER index); - + /// Debug only method to flag consumption of initial off-channel /// tokens in the input stream /// void consumeInitialHiddenTokens(); - + void rewindMark(ANTLR_MARKER marker); ANTLR_MARKER tindex(); - - /** Frees any resources that were allocated for the implementation of this - * interface. Usually this is just releasing the memory allocated - * for the structure itself, but it may of course do anything it need to - * so long as it does not stamp on anything else. - */ + + /** Frees any resources that were allocated for the implementation of this + * interface. Usually this is just releasing the memory allocated + * for the structure itself, but it may of course do anything it need to + * so long as it does not stamp on anything else. + */ ~IntStream(); - -protected: + +protected: void setupIntStream(bool machineBigEndian, bool inputBigEndian); void findout_endian_spec(bool machineBigEndian, bool inputBigEndian); - + //If the user chooses this option, then we will be resolving stuffs at run-time ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); - + //resolve into one of the three categories below at runtime void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); -}; - -template<class ImplTraits, class SuperType> -class EBCDIC_IntStream : public IntStream<ImplTraits, SuperType> -{ -public: +}; + +template<class ImplTraits, class SuperType> +class EBCDIC_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: ANTLR_UINT32 LA( ANTLR_INT32 i); - -protected: + +protected: void setupIntStream(); -}; - -template<class ImplTraits, class SuperType> -class UTF8_IntStream : public IntStream<ImplTraits, SuperType> -{ -public: +}; + +template<class ImplTraits, class SuperType> +class UTF8_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: ANTLR_UINT32 LA( ANTLR_INT32 i); void consume(); - -protected: + +protected: void setupIntStream(bool machineBigEndian, bool inputBigEndian); - -private: + +private: static const ANTLR_UINT32* TrailingBytesForUTF8(); static const UTF32* OffsetsFromUTF8(); -}; - -template<class ImplTraits, class SuperType> -class UTF16_IntStream : public IntStream<ImplTraits, SuperType> -{ -public: +}; + +template<class ImplTraits, class SuperType> +class UTF16_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: ANTLR_UINT32 LA( ANTLR_INT32 i); void consume(); ANTLR_MARKER index(); void seek(ANTLR_MARKER seekPoint); - -protected: + +protected: void setupIntStream(bool machineBigEndian, bool inputBigEndian); - + /// \brief Return the input element assuming an 8 bit ascii input /// /// \param[in] input Input stream context pointer @@ -266,7 +266,7 @@ protected: /// \return Next input character in internal ANTLR3 encoding (UTF32) /// ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> ); - + /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not /// /// \param[in] input Input stream context pointer @@ -284,13 +284,13 @@ protected: /// \return Next input character in internal ANTLR3 encoding (UTF32) /// ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> ); - + /// \brief Consume the next character in a UTF16 input stream /// /// \param input Input stream context pointer /// void consume( ClassForwarder<BYTE_AGNOSTIC> ); - + /// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not /// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance /// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we @@ -300,20 +300,20 @@ protected: /// \param input Input stream context pointer /// void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ); - + /// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not /// /// \param input Input stream context pointer /// void consume( ClassForwarder<ANTLR_BIG_ENDIAN> ); -}; - - - -template<class ImplTraits, class SuperType> -class UTF32_IntStream : public IntStream<ImplTraits, SuperType> -{ -public: +}; + + + +template<class ImplTraits, class SuperType> +class UTF32_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: ANTLR_UINT32 LA( ANTLR_INT32 i); void consume(); @@ -322,41 +322,41 @@ public: /// ANTLR_MARKER index(); void seek(ANTLR_MARKER seekPoint); - -protected: + +protected: void setupIntStream(bool machineBigEndian, bool inputBigEndian); ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> ); ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> ); ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> ); - + void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); void consume( ClassForwarder<BYTE_AGNOSTIC> ); void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ); void consume( ClassForwarder<ANTLR_BIG_ENDIAN> ); -}; - -template<class ImplTraits> -class TokenIntStream : public IntStream<ImplTraits, typename ImplTraits::TokenStreamType > -{ -public: +}; + +template<class ImplTraits> +class TokenIntStream : public IntStream<ImplTraits, typename ImplTraits::TokenStreamType > +{ +public: typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::TokenStreamType TokenStreamType; typedef IntStream<ImplTraits, TokenStreamType > BaseType; - -private: + +private: /** Because the indirect call, though small in individual cases can - * mount up if there are thousands of tokens (very large input streams), callers - * of size can optionally use this cached size field. - */ + * mount up if there are thousands of tokens (very large input streams), callers + * of size can optionally use this cached size field. + */ ANTLR_UINT32 m_cachedSize; - -public: + +public: TokenIntStream(); ANTLR_UINT32 get_cachedSize() const; void set_cachedSize( ANTLR_UINT32 cachedSize ); - + void consume(); void consumeInitialHiddenTokens(); ANTLR_UINT32 LA( ANTLR_INT32 i ); @@ -368,20 +368,20 @@ public: void rewind(ANTLR_MARKER marker); void seek(ANTLR_MARKER index); StringType getSourceName(); - -}; - -template<class ImplTraits> + +}; + +template<class ImplTraits> class TreeNodeIntStream : public IntStream<ImplTraits, typename ImplTraits::TreeNodeStreamType> -{ -public: +{ +public: typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; typedef IntStream<ImplTraits, TreeNodeStreamType > BaseType; typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::TreeTypePtr TreeTypePtr; typedef typename ImplTraits::CommonTokenType CommonTokenType; - -public: + +public: void consume(); ANTLR_MARKER tindex(); ANTLR_UINT32 LA(ANTLR_INT32 i); @@ -391,11 +391,11 @@ public: void rewindLast(); void seek(ANTLR_MARKER index); ANTLR_UINT32 size(); -}; - +}; + } - -#include "antlr3intstream.inl" - -#endif - + +#include "antlr3intstream.inl" + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl index e9990786bb..042e87c621 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl @@ -1,57 +1,57 @@ namespace antlr3 { - -template<class ImplTraits, class SuperType> -ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream() -{ + +template<class ImplTraits, class SuperType> +ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream() +{ m_lastMarker = 0; m_upper_case = false; -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType IntStream<ImplTraits, SuperType>::getSourceName() -{ +{ return m_streamName; -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() -{ +{ return m_streamName; -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() const -{ +{ return m_streamName; -} - -template<class ImplTraits, class SuperType> -ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const -{ +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const +{ return m_lastMarker; -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_INLINE void IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag) -{ +{ m_upper_case = flag; -} - -template<class ImplTraits, class SuperType> -ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super() -{ +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super() +{ return static_cast<SuperType*>(this); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::consume() -{ +{ SuperType* input = this->get_super(); - + const ANTLR_UINT8* nextChar = input->get_nextChar(); const ANTLR_UINT8* data = input->get_data(); ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); - + if ( nextChar < ( data + sizeBuf ) ) { /* Indicate one more character in this line @@ -66,144 +66,144 @@ void IntStream<ImplTraits, SuperType>::consume() input->set_charPositionInLine(0); input->set_currentLine(nextChar + 1); } - + /* Increment to next character position */ input->set_nextChar( nextChar + 1 ); - } -} - -template<class ImplTraits, class SuperType> + } +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la ) -{ +{ SuperType* input = this->get_super(); const ANTLR_UINT8* nextChar = input->get_nextChar(); const ANTLR_UINT8* data = input->get_data(); ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); - + if (( nextChar + la - 1) >= (data + sizeBuf)) - { + { return ANTLR_CHARSTREAM_EOF; - } - else - { + } + else + { if( !m_upper_case ) return (ANTLR_UCHAR)(*(nextChar + la - 1)); else return (ANTLR_UCHAR)toupper(*(nextChar + la - 1)); - } -} - -template<class ImplTraits, class SuperType> -ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark() -{ + } +} + +template<class ImplTraits, class SuperType> +ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark() +{ LexState<ImplTraits>* state; - SuperType* input = this->get_super(); - + SuperType* input = this->get_super(); + /* New mark point - */ - input->inc_markDepth(); - - /* See if we are revisiting a mark as we can just reuse the vector - * entry if we are, otherwise, we need a new one - */ + */ + input->inc_markDepth(); + + /* See if we are revisiting a mark as we can just reuse the vector + * entry if we are, otherwise, we need a new one + */ if (input->get_markDepth() > input->get_markers().size() ) { input->get_markers().push_back( LexState<ImplTraits>() ); LexState<ImplTraits>& state_r = input->get_markers().back(); state = &state_r; - } - else - { + } + else + { LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 ); state = &state_r; - + /* Assume no errors for speed, it will just blow up if the table failed * for some reasons, hence lots of unit tests on the tables ;-) */ - } - - /* We have created or retrieved the state, so update it with the current - * elements of the lexer state. - */ - state->set_charPositionInLine( input->get_charPositionInLine() ); - state->set_currentLine( input->get_currentLine() ); - state->set_line( input->get_line() ); - state->set_nextChar( input->get_nextChar() ); - - m_lastMarker = input->get_markDepth(); - - /* And that's it - */ - return input->get_markDepth(); -} - -template<class ImplTraits, class SuperType> + } + + /* We have created or retrieved the state, so update it with the current + * elements of the lexer state. + */ + state->set_charPositionInLine( input->get_charPositionInLine() ); + state->set_currentLine( input->get_currentLine() ); + state->set_line( input->get_line() ); + state->set_nextChar( input->get_nextChar() ); + + m_lastMarker = input->get_markDepth(); + + /* And that's it + */ + return input->get_markDepth(); +} + +template<class ImplTraits, class SuperType> ANTLR_MARKER IntStream<ImplTraits, SuperType>::index() -{ +{ SuperType* input = this->get_super(); return input->index_impl(); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark) -{ - SuperType* input = this->get_super(); - - /* Perform any clean up of the marks - */ - this->release(mark); - +{ + SuperType* input = this->get_super(); + + /* Perform any clean up of the marks + */ + this->release(mark); + /* Find the supplied mark state - */ + */ ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 ); - typename ImplTraits::LexStateType& state = input->get_markers().at( idx ); - - /* Seek input pointer to the requested point (note we supply the void *pointer - * to whatever is implementing the int stream to seek). - */ + typename ImplTraits::LexStateType& state = input->get_markers().at( idx ); + + /* Seek input pointer to the requested point (note we supply the void *pointer + * to whatever is implementing the int stream to seek). + */ this->seek( (ANTLR_MARKER)state.get_nextChar() ); - /* Reset to the reset of the information in the mark - */ - input->set_charPositionInLine( state.get_charPositionInLine() ); - input->set_currentLine( state.get_currentLine() ); - input->set_line( state.get_line() ); - input->set_nextChar( state.get_nextChar() ); - - /* And we are done - */ -} - -template<class ImplTraits, class SuperType> + /* Reset to the reset of the information in the mark + */ + input->set_charPositionInLine( state.get_charPositionInLine() ); + input->set_currentLine( state.get_currentLine() ); + input->set_line( state.get_line() ); + input->set_nextChar( state.get_nextChar() ); + + /* And we are done + */ +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::rewindLast() -{ +{ this->rewind(m_lastMarker); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark) -{ +{ SuperType* input = this->get_super(); - + /* We don't do much here in fact as we never free any higher marks in - * the hashtable as we just resuse any memory allocated for them. - */ - input->set_markDepth( (ANTLR_UINT32)(mark - 1) ); - -} - -template<class ImplTraits, class SuperType> -void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) -{ -} - -template<class ImplTraits, class SuperType> + * the hashtable as we just resuse any memory allocated for them. + */ + input->set_markDepth( (ANTLR_UINT32)(mark - 1) ); + +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) +{ +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) -{ +{ ANTLR_INT32 count; SuperType* input = this->get_super(); - + ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar(); /* If the requested seek point is less than the current * input point, then we assume that we are resetting from a mark @@ -216,22 +216,22 @@ void IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) else { count = (ANTLR_UINT32)(seekPoint - nextChar); - + while (count--) { this->consume(); } } -} - -template<class ImplTraits, class SuperType> -IntStream<ImplTraits, SuperType>::~IntStream() -{ -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> +IntStream<ImplTraits, SuperType>::~IntStream() +{ +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la) -{ +{ // EBCDIC to ASCII conversion table // // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX @@ -272,56 +272,56 @@ ANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la) 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e }; - + SuperType* input = this->get_super(); - + if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() )) - { + { return ANTLR_CHARSTREAM_EOF; - } - else - { - // Translate the required character via the constant conversion table - // + } + else + { + // Translate the required character via the constant conversion table + // return e2a[(*(input->get_nextChar() + la - 1))]; - } -} - -template<class ImplTraits, class SuperType> -void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream() -{ + } +} + +template<class ImplTraits, class SuperType> +void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream() +{ SuperType* super = this->get_super(); super->set_charByteSize(1); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i) -{ +{ return this->LA(i, ClassForwarder< typename ImplTraits::Endianness >() ); -} - -template<class ImplTraits, class SuperType> -void UTF16_IntStream<ImplTraits, SuperType>::consume() -{ +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::consume() +{ this->consume( ClassForwarder< typename ImplTraits::Endianness >() ); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_MARKER UTF16_IntStream<ImplTraits, SuperType>::index() -{ +{ SuperType* input = this->get_super(); - return (ANTLR_MARKER)(input->get_nextChar()); -} - -template<class ImplTraits, class SuperType> -void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) -{ + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) +{ SuperType* input = this->get_super(); - + // If the requested seek point is less than the current // input point, then we assume that we are resetting from a mark // and do not need to scan, but can just set to there as rewind will - // reset line numbers and so on. + // reset line numbers and so on. // if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) { @@ -329,23 +329,23 @@ void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) } else { - // Call consume until we reach the asked for seek point or EOF - // + // Call consume until we reach the asked for seek point or EOF + // while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) ) { this->consume(); } } -} - -template<class ImplTraits, class SuperType> -void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian) -{ +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian) +{ // We must install different UTF16 routines according to whether the input // is the same endianess as the machine we are executing upon or not. If it is not // then we must install methods that can convert the endianess on the fly as they go // - + if(machineBigEndian == true) { // Machine is Big Endian, if the input is also then install the @@ -384,20 +384,20 @@ void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian m_endian_spec = 3; } } -} - -template<class ImplTraits, class SuperType> -void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) -{ +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ SuperType* super = this->get_super(); super->set_charByteSize(2); - + this->findout_endian_spec( machineBigEndian, inputBigEndian ); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) -{ +{ assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); switch(m_endian_spec) { @@ -414,11 +414,11 @@ ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i, ClassForwarder break; } return 0; -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> void IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) -{ +{ assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); switch(m_endian_spec) { @@ -434,436 +434,436 @@ void IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT default: break; } -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; + UTF32 ch; + UTF32 ch2; UTF16* nextChar; - - // Find the input interface and where we are currently pointing to - // in the input stream - // + + // Find the input interface and where we are currently pointing to + // in the input stream + // input = this->get_super; nextChar = input->get_nextChar(); - - // If a positive offset then advance forward, else retreat - // - if (la >= 0) - { - while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) - { - // Advance our copy of the input pointer - // - // Next char in natural machine byte order - // - ch = *nextChar++; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in natural machine byte order + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) - { - // Next character is in natural machine byte order - // - ch2 = *nextChar; - - // If it's a valid low surrogate, consume it - // + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // nextChar++; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // - } - } - else - { - // We need to go backwards from our input point - // - while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) - { - // Get the previous 16 bit character - // - ch = *--nextChar; - - // If we found a low surrogate then go back one more character if - // the hi surrogate is there - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = *--nextChar; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) - { - ch2 = *(nextChar-1); + { + ch2 = *(nextChar-1); if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) - { - // Yes, there is a high surrogate to match it so decrement one more and point to that - // - nextChar--; - } - } - } - } - - // Our local copy of nextChar is now pointing to either the correct character or end of file - // - // Input buffer size is always in bytes - // + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar--; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) { return ANTLR_CHARSTREAM_EOF; } else { - // Pick up the next 16 character (native machine byte order) - // - ch = *nextChar++; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + // Pick up the next 16 character (native machine byte order) + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) - { - // Next character is in natural machine byte order - // - ch2 = *nextChar; - - // If it's a valid low surrogate, consume it - // + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // Construct the UTF32 code point - // - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - } - return ch; -} - -template<class ImplTraits, class SuperType> + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; - ANTLR_UCHAR* nextChar; - - // Find the input interface and where we are currently pointing to - // in the input stream - // + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // input = this->get_super(); - nextChar = input->get_nextChar(); - - // If a positive offset then advance forward, else retreat - // - if (la >= 0) - { - while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) - { - // Advance our copy of the input pointer - // - // Next char in Little Endian byte order - // - ch = (*nextChar) + (*(nextChar+1) << 8); - nextChar += 2; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Little Endian byte order + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) - { - // Next character is in little endian byte order - // - ch2 = (*nextChar) + (*(nextChar+1) << 8); - - // If it's a valid low surrogate, consume it - // + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // nextChar += 2; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // - } - } - else - { - // We need to go backwards from our input point - // - while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) - { - // Get the previous 16 bit character - // - ch = (*nextChar - 2) + ((*nextChar -1) << 8); - nextChar -= 2; - - // If we found a low surrogate then go back one more character if - // the hi surrogate is there - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = (*nextChar - 2) + ((*nextChar -1) << 8); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) - { - ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); + { + ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) - { - // Yes, there is a high surrogate to match it so decrement one more and point to that - // - nextChar -=2; - } - } - } - } - - // Our local copy of nextChar is now pointing to either the correct character or end of file - // - // Input buffer size is always in bytes - // + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) { return ANTLR_CHARSTREAM_EOF; } else { - // Pick up the next 16 character (little endian byte order) - // - ch = (*nextChar) + (*(nextChar+1) << 8); - nextChar += 2; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + // Pick up the next 16 character (little endian byte order) + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) - { - // Next character is in little endian byte order - // - ch2 = (*nextChar) + (*(nextChar+1) << 8); - - // If it's a valid low surrogate, consume it - // + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // Construct the UTF32 code point - // - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - } - return ch; -} - -template<class ImplTraits, class SuperType> + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; - ANTLR_UCHAR* nextChar; - - // Find the input interface and where we are currently pointing to - // in the input stream - // + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // input = this->get_super(); - nextChar = input->get_nextChar(); - - // If a positive offset then advance forward, else retreat - // - if (la >= 0) - { - while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) - { - // Advance our copy of the input pointer - // - // Next char in Big Endian byte order - // - ch = ((*nextChar) << 8) + *(nextChar+1); - nextChar += 2; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Big Endian byte order + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) - { - // Next character is in big endian byte order - // - ch2 = ((*nextChar) << 8) + *(nextChar+1); - - // If it's a valid low surrogate, consume it - // + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // nextChar += 2; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // - } - } - else - { - // We need to go backwards from our input point - // - while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) - { - // Get the previous 16 bit character - // - ch = ((*nextChar - 2) << 8) + (*nextChar -1); - nextChar -= 2; - - // If we found a low surrogate then go back one more character if - // the hi surrogate is there - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = ((*nextChar - 2) << 8) + (*nextChar -1); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) - { - ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); + { + ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) - { - // Yes, there is a high surrogate to match it so decrement one more and point to that - // - nextChar -=2; - } - } - } - } - - // Our local copy of nextChar is now pointing to either the correct character or end of file - // - // Input buffer size is always in bytes - // + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) { return ANTLR_CHARSTREAM_EOF; } else { - // Pick up the next 16 character (big endian byte order) - // - ch = ((*nextChar) << 8) + *(nextChar+1); - nextChar += 2; - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + // Pick up the next 16 character (big endian byte order) + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) - { - // If the 16 bits following the high surrogate are in the source buffer... - // + { + // If the 16 bits following the high surrogate are in the source buffer... + // if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) - { - // Next character is in big endian byte order - // - ch2 = ((*nextChar) << 8) + *(nextChar+1); - - // If it's a valid low surrogate, consume it - // + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // Construct the UTF32 code point - // - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // - } - } - return ch; -} - -template<class ImplTraits, class SuperType> + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; - + UTF32 ch; + UTF32 ch2; + input = this->get_super(); - - // Buffer size is always in bytes - // + + // Buffer size is always in bytes + // if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) { // Indicate one more character in this line // input->inc_charPositionInLine(); - + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) { // Reset for start of a new line of input @@ -872,71 +872,71 @@ void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOST input->set_charPositionInLine(0); input->set_currentLine( input->get_nextChar() + 1 ); } - + // Increment to next character position, accounting for any surrogates // - // Next char in natural machine byte order - // - ch = *(input->get_nextChar()); - - // We consumed one 16 bit character - // + // Next char in natural machine byte order + // + ch = *(input->get_nextChar()); + + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1 ); - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - - // If the 16 bits following the high surrogate are in the source buffer... - // - if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) - { - // Next character is in natural machine byte order - // - ch2 = *(input->get_nextChar()); - - // If it's a valid low surrogate, consume it - // + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Next character is in natural machine byte order + // + ch2 = *(input->get_nextChar()); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1 ); - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // } - -} - -template<class ImplTraits, class SuperType> + +} + +template<class ImplTraits, class SuperType> void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; - + UTF32 ch; + UTF32 ch2; + input = this->get_super(); - - // Buffer size is always in bytes - // + + // Buffer size is always in bytes + // if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) { // Indicate one more character in this line // input->inc_charPositionInLine(); - + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) { // Reset for start of a new line of input @@ -945,68 +945,68 @@ void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTL input->set_charPositionInLine(0); input->set_currentLine(input->get_nextChar() + 1); } - + // Increment to next character position, accounting for any surrogates // - // Next char in litle endian form - // - ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); - - // We consumed one 16 bit character - // + // Next char in litle endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1); - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - // If the 16 bits following the high surrogate are in the source buffer... - // - if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) - { - ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); - - // If it's a valid low surrogate, consume it - // + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1); - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // } -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> ) -{ +{ SuperType* input; - UTF32 ch; - UTF32 ch2; - + UTF32 ch; + UTF32 ch2; + input = this->get_super(); - - // Buffer size is always in bytes - // + + // Buffer size is always in bytes + // if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) { // Indicate one more character in this line // input->inc_charPositionInLine(); - + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) { // Reset for start of a new line of input @@ -1015,77 +1015,77 @@ void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_E input->set_charPositionInLine(0); input->set_currentLine(input->get_nextChar() + 1); } - + // Increment to next character position, accounting for any surrogates // - // Next char in big endian form - // - ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); - - // We consumed one 16 bit character - // + // Next char in big endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1); - - // If we have a surrogate pair then we need to consume - // a following valid LO surrogate. - // + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - // If the 16 bits following the high surrogate are in the source buffer... - // - if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) - { - // Big endian - // - ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); - - // If it's a valid low surrogate, consume it - // + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Big endian + // + ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // If it's a valid low surrogate, consume it + // if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) - { - // We consumed one 16 bit character - // + { + // We consumed one 16 bit character + // input->set_nextChar( input->get_nextChar() + 1); - } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it. - // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // } - // Note that we ignore a valid hi surrogate that has no lo surrogate to go with - // it because the buffer ended - // + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // } - // Note that we did not check for an invalid low surrogate here, or that fact that the - // lo surrogate was missing. We just picked out one 16 bit character unless the character - // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. - // + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // } -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i) -{ +{ return this->LA( i, ClassForwarder<typename ImplTraits::Endianness>() ); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_MARKER UTF32_IntStream<ImplTraits, SuperType>::index() -{ +{ SuperType* input = this->get_super(); - return (ANTLR_MARKER)(input->get_nextChar()); -} - -template<class ImplTraits, class SuperType> -void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) -{ + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template<class ImplTraits, class SuperType> +void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) +{ SuperType* input; - + input = this->get_super(); - + // If the requested seek point is less than the current // input point, then we assume that we are resetting from a mark // and do not need to scan, but can just set to there as rewind will - // reset line numbers and so on. + // reset line numbers and so on. // if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) { @@ -1093,89 +1093,89 @@ void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) } else { - // Call consume until we reach the asked for seek point or EOF - // + // Call consume until we reach the asked for seek point or EOF + // while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) ) { this->consume(); } } - -} - -template<class ImplTraits, class SuperType> -void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) -{ + +} + +template<class ImplTraits, class SuperType> +void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ SuperType* super = this->get_super(); super->set_charByteSize(4); - + this->findout_endian_spec(machineBigEndian, inputBigEndian); -} - -template<class ImplTraits, class SuperType> +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) -{ - SuperType* input = this->get_super(); - +{ + SuperType* input = this->get_super(); + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) - { + { return ANTLR_CHARSTREAM_EOF; - } - else - { + } + else + { return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); - } -} - -template<class ImplTraits, class SuperType> + } +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) -{ +{ SuperType* input = this->get_super(); - + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) - { + { return ANTLR_CHARSTREAM_EOF; - } - else - { - ANTLR_UCHAR c; - - c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); - - // Swap Endianess to Big Endian - // - return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); - } -} - -template<class ImplTraits, class SuperType> + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Big Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template<class ImplTraits, class SuperType> ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) -{ +{ SuperType* input = this->get_super(); - + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) - { + { return ANTLR_CHARSTREAM_EOF; - } - else - { - ANTLR_UCHAR c; - - c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); - - // Swap Endianess to Little Endian - // - return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); - } -} - -template<class ImplTraits, class SuperType> + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Little Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template<class ImplTraits, class SuperType> void UTF32_IntStream<ImplTraits, SuperType>::consume() -{ +{ SuperType* input = this->get_super(); - - // SizeBuf is always in bytes - // + + // SizeBuf is always in bytes + // if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 )) { /* Indicate one more character in this line @@ -1190,33 +1190,33 @@ void UTF32_IntStream<ImplTraits, SuperType>::consume() input->set_charPositionInLine(0); input->set_currentLine( input->get_nextChar() + 1 ); } - + /* Increment to next character position */ input->set_nextChar( input->get_nextChar() + 1 ); - } -} - -template<class ImplTraits, class SuperType> -void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) -{ + } +} + +template<class ImplTraits, class SuperType> +void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) +{ SuperType* super = this->get_super(); super->set_charByteSize(0); -} - -// ------------------------------------------------------ -// Following is from Unicode.org (see antlr3convertutf.c) -// - -/// Index into the table below with the first byte of a UTF-8 sequence to -/// get the number of trailing bytes that are supposed to follow it. -/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is -/// left as-is for anyone who may want to do such conversion, which was -/// allowed in earlier algorithms. -/// -template<class ImplTraits, class SuperType> -const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8() -{ +} + +// ------------------------------------------------------ +// Following is from Unicode.org (see antlr3convertutf.c) +// + +/// Index into the table below with the first byte of a UTF-8 sequence to +/// get the number of trailing bytes that are supposed to follow it. +/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is +/// left as-is for anyone who may want to do such conversion, which was +/// allowed in earlier algorithms. +/// +template<class ImplTraits, class SuperType> +const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8() +{ static const ANTLR_UINT32 trailingBytesForUTF8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -1227,66 +1227,66 @@ const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8( 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - + return trailingBytesForUTF8; -} - -/// Magic values subtracted from a buffer value during UTF8 conversion. -/// This table contains as many values as there might be trailing bytes -/// in a UTF-8 sequence. -/// -template<class ImplTraits, class SuperType> -const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8() -{ +} + +/// Magic values subtracted from a buffer value during UTF8 conversion. +/// This table contains as many values as there might be trailing bytes +/// in a UTF-8 sequence. +/// +template<class ImplTraits, class SuperType> +const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8() +{ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; return offsetsFromUTF8; -} - -// End of Unicode.org tables -// ------------------------- - - -/** \brief Consume the next character in a UTF8 input stream - * - * \param input Input stream context pointer - */ -template<class ImplTraits, class SuperType> -void UTF8_IntStream<ImplTraits, SuperType>::consume() -{ - SuperType* input = this->get_super(); +} + +// End of Unicode.org tables +// ------------------------- + + +/** \brief Consume the next character in a UTF8 input stream + * + * \param input Input stream context pointer + */ +template<class ImplTraits, class SuperType> +void UTF8_IntStream<ImplTraits, SuperType>::consume() +{ + SuperType* input = this->get_super(); const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); - - ANTLR_UINT32 extraBytesToRead; - ANTLR_UCHAR ch; - ANTLR_UINT8* nextChar; - - nextChar = input->get_nextChar(); - + + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + if (nextChar < (input->get_data() + input->get_sizeBuf())) { // Indicate one more character in this line // input->inc_charPositionInLine(); - // Are there more bytes needed to make up the whole thing? - // - extraBytesToRead = trailingBytesForUTF8[*nextChar]; - + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf())) - { - input->set_nextChar( input->get_data() + input->get_sizeBuf() ); - return; - } - - // Cases deliberately fall through (see note A in antlrconvertutf.c) - // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so - // we allow it. - // - ch = 0; + { + input->set_nextChar( input->get_data() + input->get_sizeBuf() ); + return; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so + // we allow it. + // + ch = 0; switch (extraBytesToRead) { case 5: ch += *nextChar++; ch <<= 6; @@ -1296,9 +1296,9 @@ void UTF8_IntStream<ImplTraits, SuperType>::consume() case 1: ch += *nextChar++; ch <<= 6; case 0: ch += *nextChar++; } - - // Magically correct the input value - // + + // Magically correct the input value + // ch -= offsetsFromUTF8[extraBytesToRead]; if (ch == input->get_newlineChar()) { @@ -1308,182 +1308,182 @@ void UTF8_IntStream<ImplTraits, SuperType>::consume() input->set_charPositionInLine(0); input->set_currentLine(nextChar); } - - // Update input pointer - // - input->set_nextChar(nextChar); - } -} - -/** \brief Return the input element assuming a UTF8 input - * - * \param[in] input Input stream context pointer - * \param[in] la 1 based offset of next input stream element - * - * \return Next input character in internal ANTLR3 encoding (UTF32) - */ -template<class ImplTraits, class SuperType> + + // Update input pointer + // + input->set_nextChar(nextChar); + } +} + +/** \brief Return the input element assuming a UTF8 input + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +template<class ImplTraits, class SuperType> ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::LA(ANTLR_INT32 la) -{ - SuperType* input = this->get_super(); +{ + SuperType* input = this->get_super(); const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); - ANTLR_UINT32 extraBytesToRead; - ANTLR_UCHAR ch; - ANTLR_UINT8* nextChar; - - nextChar = input->get_nextChar(); - - // Do we need to traverse forwards or backwards? - // - LA(0) is treated as LA(1) and we assume that the nextChar is - // already positioned. - // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding - // - LA(-n) means we must traverse backwards n chracters - // - if (la > 1) { - - // Make sure that we have at least one character left before trying to - // loop through the buffer. - // + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + + // Do we need to traverse forwards or backwards? + // - LA(0) is treated as LA(1) and we assume that the nextChar is + // already positioned. + // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding + // - LA(-n) means we must traverse backwards n chracters + // + if (la > 1) { + + // Make sure that we have at least one character left before trying to + // loop through the buffer. + // if (nextChar < (input->get_data() + input->get_sizeBuf())) { - // Now traverse n-1 characters forward - // - while (--la > 0) - { - // Does the next character require trailing bytes? - // If so advance the pointer by that many bytes as well as advancing - // one position for what will be at least a single byte character. - // - nextChar += trailingBytesForUTF8[*nextChar] + 1; - - // Does that calculation take us past the byte length of the buffer? - // + // Now traverse n-1 characters forward + // + while (--la > 0) + { + // Does the next character require trailing bytes? + // If so advance the pointer by that many bytes as well as advancing + // one position for what will be at least a single byte character. + // + nextChar += trailingBytesForUTF8[*nextChar] + 1; + + // Does that calculation take us past the byte length of the buffer? + // if (nextChar >= (input->get_data() + input->get_sizeBuf())) - { - return ANTLR_CHARSTREAM_EOF; - } - } - } - else - { - return ANTLR_CHARSTREAM_EOF; - } - } - else - { - // LA is negative so we decrease the pointer by n character positions - // - while (nextChar > input->get_data() && la++ < 0) - { - // Traversing backwards in UTF8 means decermenting by one - // then continuing to decrement while ever a character pattern - // is flagged as being a trailing byte of an encoded code point. - // Trailing UTF8 bytes always start with 10 in binary. We assumne that - // the UTF8 is well formed and do not check boundary conditions - // - nextChar--; - while ((*nextChar & 0xC0) == 0x80) - { - nextChar--; - } - } - } - - // nextChar is now pointing at the UTF8 encoded character that we need to - // decode and return. - // - // Are there more bytes needed to make up the whole thing? - // - extraBytesToRead = trailingBytesForUTF8[*nextChar]; + { + return ANTLR_CHARSTREAM_EOF; + } + } + } + else + { + return ANTLR_CHARSTREAM_EOF; + } + } + else + { + // LA is negative so we decrease the pointer by n character positions + // + while (nextChar > input->get_data() && la++ < 0) + { + // Traversing backwards in UTF8 means decermenting by one + // then continuing to decrement while ever a character pattern + // is flagged as being a trailing byte of an encoded code point. + // Trailing UTF8 bytes always start with 10 in binary. We assumne that + // the UTF8 is well formed and do not check boundary conditions + // + nextChar--; + while ((*nextChar & 0xC0) == 0x80) + { + nextChar--; + } + } + } + + // nextChar is now pointing at the UTF8 encoded character that we need to + // decode and return. + // + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf())) - { - return ANTLR_CHARSTREAM_EOF; - } - - // Cases deliberately fall through (see note A in antlrconvertutf.c) + { + return ANTLR_CHARSTREAM_EOF; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) // - ch = 0; + ch = 0; switch (extraBytesToRead) { - case 5: ch += *nextChar++; ch <<= 6; - case 4: ch += *nextChar++; ch <<= 6; - case 3: ch += *nextChar++; ch <<= 6; - case 2: ch += *nextChar++; ch <<= 6; - case 1: ch += *nextChar++; ch <<= 6; - case 0: ch += *nextChar++; - } - - // Magically correct the input value - // - ch -= offsetsFromUTF8[extraBytesToRead]; - - return ch; -} - -template<class ImplTraits> -TokenIntStream<ImplTraits>::TokenIntStream() -{ + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + + return ch; +} + +template<class ImplTraits> +TokenIntStream<ImplTraits>::TokenIntStream() +{ m_cachedSize = 0; -} - -template<class ImplTraits> -ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const -{ +} + +template<class ImplTraits> +ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const +{ return m_cachedSize; -} - -template<class ImplTraits> -void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize ) -{ +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize ) +{ m_cachedSize = cachedSize; -} - -/** Move the input pointer to the next incoming token. The stream - * must become active with LT(1) available. consume() simply - * moves the input pointer so that LT(1) points at the next - * input symbol. Consume at least one token. - * - * Walk past any token not on the channel the parser is listening to. - */ -template<class ImplTraits> -void TokenIntStream<ImplTraits>::consume() -{ +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +template<class ImplTraits> +void TokenIntStream<ImplTraits>::consume() +{ TokenStreamType* cts = static_cast<TokenStreamType*>(this); - - if((ANTLR_UINT32)cts->get_p() < m_cachedSize ) + + if((ANTLR_UINT32)cts->get_p() < m_cachedSize ) { cts->inc_p(); cts->set_p( cts->skipOffTokenChannels(cts->get_p()) ); } -} -template<class ImplTraits> -void TokenIntStream<ImplTraits>::consumeInitialHiddenTokens() -{ +} +template<class ImplTraits> +void TokenIntStream<ImplTraits>::consumeInitialHiddenTokens() +{ ANTLR_MARKER first; ANTLR_INT32 i; TokenStreamType* ts; - + ts = this->get_super(); first = this->index(); - + for (i=0; i<first; i++) { ts->get_debugger()->consumeHiddenToken(ts->get(i)); } - + ts->set_initialStreamState(false); -} - - -template<class ImplTraits> +} + + +template<class ImplTraits> ANTLR_UINT32 TokenIntStream<ImplTraits>::LA( ANTLR_INT32 i ) -{ +{ const CommonTokenType* tok; TokenStreamType* ts = static_cast<TokenStreamType*>(this); - + tok = ts->LT(i); - + if (tok != NULL) { return tok->get_type(); @@ -1492,170 +1492,170 @@ ANTLR_UINT32 TokenIntStream<ImplTraits>::LA( ANTLR_INT32 i ) { return CommonTokenType::TOKEN_INVALID; } - -} - -template<class ImplTraits> + +} + +template<class ImplTraits> ANTLR_MARKER TokenIntStream<ImplTraits>::mark() -{ - BaseType::m_lastMarker = this->index(); - return BaseType::m_lastMarker; -} - -template<class ImplTraits> -ANTLR_UINT32 TokenIntStream<ImplTraits>::size() -{ - if (this->get_cachedSize() > 0) - { +{ + BaseType::m_lastMarker = this->index(); + return BaseType::m_lastMarker; +} + +template<class ImplTraits> +ANTLR_UINT32 TokenIntStream<ImplTraits>::size() +{ + if (this->get_cachedSize() > 0) + { return this->get_cachedSize(); - } - TokenStreamType* cts = this->get_super(); - - this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) ); - return this->get_cachedSize(); -} - -template<class ImplTraits> + } + TokenStreamType* cts = this->get_super(); + + this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) ); + return this->get_cachedSize(); +} + +template<class ImplTraits> void TokenIntStream<ImplTraits>::release() -{ - return; -} - -template<class ImplTraits> -ANTLR_MARKER TokenIntStream<ImplTraits>::tindex() -{ +{ + return; +} + +template<class ImplTraits> +ANTLR_MARKER TokenIntStream<ImplTraits>::tindex() +{ return this->get_super()->get_p(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void TokenIntStream<ImplTraits>::rewindLast() -{ - this->rewind( this->get_lastMarker() ); -} - -template<class ImplTraits> +{ + this->rewind( this->get_lastMarker() ); +} + +template<class ImplTraits> void TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker) -{ +{ return this->seek(marker); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index) -{ - TokenStreamType* cts = static_cast<TokenStreamType*>(this); - - cts->set_p( static_cast<ANTLR_INT32>(index) ); -} - - -/// Return a string that represents the name assoicated with the input source -/// -/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. -/// +{ + TokenStreamType* cts = static_cast<TokenStreamType*>(this); + + cts->set_p( static_cast<ANTLR_INT32>(index) ); +} + + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// /// /returns -/// /implements ANTLR3_INT_STREAM_struct::getSourceName() -/// -template<class ImplTraits> -typename TokenIntStream<ImplTraits>::StringType -TokenIntStream<ImplTraits>::getSourceName() -{ +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +template<class ImplTraits> +typename TokenIntStream<ImplTraits>::StringType +TokenIntStream<ImplTraits>::getSourceName() +{ // Slightly convoluted as we must trace back to the lexer's input source // via the token source. The streamName that is here is not initialized // because this is a token stream, not a file or string stream, which are the // only things that have a context for a source name. // return this->get_super()->get_tokenSource()->get_fileName(); -} - -template<class ImplTraits> -void TreeNodeIntStream<ImplTraits>::consume() -{ +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::consume() +{ TreeNodeStreamType* ctns = this->get_super(); if( ctns->get_p() == -1 ) ctns->fillBufferRoot(); ctns->inc_p(); -} -template<class ImplTraits> +} +template<class ImplTraits> ANTLR_MARKER TreeNodeIntStream<ImplTraits>::tindex() -{ +{ TreeNodeStreamType* ctns = this->get_super(); return (ANTLR_MARKER)(ctns->get_p()); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::LA(ANTLR_INT32 i) -{ +{ TreeNodeStreamType* tns = this->get_super(); - + // Ask LT for the 'token' at that position // TreeTypePtr t = tns->LT(i); - + if (t == NULL) { return CommonTokenType::TOKEN_INVALID; } - + // Token node was there so return the type of it // return t->get_type(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER TreeNodeIntStream<ImplTraits>::mark() -{ +{ TreeNodeStreamType* ctns = this->get_super(); if (ctns->get_p() == -1) { ctns->fillBufferRoot(); } - + // Return the current mark point // this->set_lastMarker( this->index() ); - + return this->get_lastMarker(); - -} - -template<class ImplTraits> + +} + +template<class ImplTraits> void TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER /*marker*/) -{ - -} - -template<class ImplTraits> -void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker) -{ +{ + +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker) +{ this->seek(marker); -} - -template<class ImplTraits> -void TreeNodeIntStream<ImplTraits>::rewindLast() -{ +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::rewindLast() +{ this->seek( this->get_lastMarker() ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index) -{ +{ TreeNodeStreamType* ctns = this->get_super(); ctns->set_p( ANTLR_UINT32_CAST(index) ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::size() -{ +{ TreeNodeStreamType* ctns = this->get_super(); if (ctns->get_p() == -1) { ctns->fillBufferRoot(); } - + return ctns->get_nodes().size(); -} - - +} + + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp index d23e65dea0..66d15e89c1 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp @@ -1,75 +1,75 @@ -/** \file - * Base interface for any ANTLR3 lexer. - * - * An ANLTR3 lexer builds from two sets of components: - * - * - The runtime components that provide common functionality such as - * traversing character streams, building tokens for output and so on. - * - The generated rules and struutre of the actual lexer, which call upon the - * runtime components. - * - * A lexer class contains a character input stream, a base recognizer interface - * (which it will normally implement) and a token source interface (which it also - * implements. The Tokensource interface is called by a token consumer (such as - * a parser, but in theory it can be anything that wants a set of abstract - * tokens in place of a raw character stream. - * - * So then, we set up a lexer in a sequence akin to: - * - * - Create a character stream (something which implements ANTLR3_INPUT_STREAM) - * and initialize it. - * - Create a lexer interface and tell it where it its input stream is. - * This will cause the creation of a base recognizer class, which it will - * override with its own implementations of some methods. The lexer creator - * can also then in turn override anything it likes. - * - The lexer token source interface is then passed to some interface that - * knows how to use it, byte calling for a next token. - * - When a next token is called, let ze lexing begin. - * - */ +/** \file + * Base interface for any ANTLR3 lexer. + * + * An ANLTR3 lexer builds from two sets of components: + * + * - The runtime components that provide common functionality such as + * traversing character streams, building tokens for output and so on. + * - The generated rules and struutre of the actual lexer, which call upon the + * runtime components. + * + * A lexer class contains a character input stream, a base recognizer interface + * (which it will normally implement) and a token source interface (which it also + * implements. The Tokensource interface is called by a token consumer (such as + * a parser, but in theory it can be anything that wants a set of abstract + * tokens in place of a raw character stream. + * + * So then, we set up a lexer in a sequence akin to: + * + * - Create a character stream (something which implements ANTLR3_INPUT_STREAM) + * and initialize it. + * - Create a lexer interface and tell it where it its input stream is. + * This will cause the creation of a base recognizer class, which it will + * override with its own implementations of some methods. The lexer creator + * can also then in turn override anything it likes. + * - The lexer token source interface is then passed to some interface that + * knows how to use it, byte calling for a next token. + * - When a next token is called, let ze lexing begin. + * + */ #ifndef _ANTLR3_LEXER_HPP #define _ANTLR3_LEXER_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/* Definitions - */ - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* Definitions + */ + namespace antlr3 { - + static const ANTLR_UINT32 ANTLR_STRING_TERMINATOR = 0xFFFFFFFF; - -template<class ImplTraits> -class Lexer : public ImplTraits::template RecognizerType< typename ImplTraits::InputStreamType >, + +template<class ImplTraits> +class Lexer : public ImplTraits::template RecognizerType< typename ImplTraits::InputStreamType >, public ImplTraits::TokenSourceType -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::InputStreamType InputStreamType; typedef InputStreamType StreamType; @@ -83,22 +83,22 @@ public: typedef typename ImplTraits::template ExceptionBaseType<InputStreamType> ExceptionBaseType; typedef typename ImplTraits::BitsetListType BitsetListType; typedef typename ImplTraits::TokenSourceType TokenSourceType; - + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; typedef typename RecognizerType::DebugEventListenerType DebuggerType; - -private: - /** A pointer to the character stream whence this lexer is receiving - * characters. - * TODO: I may come back to this and implement charstream outside - * the input stream as per the java implementation. - */ + +private: + /** A pointer to the character stream whence this lexer is receiving + * characters. + * TODO: I may come back to this and implement charstream outside + * the input stream as per the java implementation. + */ InputStreamType* m_input; - -public: + +public: Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state); - + InputStreamType* get_input() const; IntStreamType* get_istream() const; RecognizerType* get_rec(); @@ -138,13 +138,13 @@ public: void consume(); void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); bool haveParsedRule(ANTLR_MARKER ruleIndex); - - /** Pointer to a function that sets the charstream source for the lexer and - * causes it to be reset. - */ + + /** Pointer to a function that sets the charstream source for the lexer and + * causes it to be reset. + */ void setCharStream(InputStreamType* input); - - /*! + + /*! * \brief * Change to a new input stream, remembering the old one. * @@ -159,7 +159,7 @@ public: * new one. */ void pushCharStream(InputStreamType* input); - + /*! * \brief * Stops using the current input stream and reverts to any prior @@ -175,73 +175,73 @@ public: * The function fails silently if there are no prior input streams. */ void popCharStream(); - - /** Function that emits (a copy of ) the supplied token as the next token in - * the stream. - */ + + /** Function that emits (a copy of ) the supplied token as the next token in + * the stream. + */ void emit(const CommonTokenType* token); - - /** Pointer to a function that constructs a new token from the lexer stored information - */ + + /** Pointer to a function that constructs a new token from the lexer stored information + */ CommonTokenType* emit(); - - /** Pointer to a function that attempts to match and consume the specified string from the input - * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated - * with 0xFFFFFFFF, which is an invalid UTF32 character - */ + + /** Pointer to a function that attempts to match and consume the specified string from the input + * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated + * with 0xFFFFFFFF, which is an invalid UTF32 character + */ bool matchs(ANTLR_UCHAR* string); - - /** Pointer to a function that matches and consumes the specified character from the input stream. - * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer - * implementation is source encoding agnostic and so input streams do not generally need to - * override the default implmentation. - */ + + /** Pointer to a function that matches and consumes the specified character from the input stream. + * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer + * implementation is source encoding agnostic and so input streams do not generally need to + * override the default implmentation. + */ bool matchc(ANTLR_UCHAR c); - - /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too - * but this would only be useful if the tokens were in tsome guaranteed order which is - * only going to happen with a hand crafted token set). - */ + + /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too + * but this would only be useful if the tokens were in tsome guaranteed order which is + * only going to happen with a hand crafted token set). + */ bool matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high); - - /** Pointer to a function that matches the next token/char in the input stream - * regardless of what it actaully is. - */ + + /** Pointer to a function that matches the next token/char in the input stream + * regardless of what it actaully is. + */ void matchAny(); - - /** Pointer to a function that recovers from an error found in the input stream. - * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also - * be from a mismatched token that the (*match)() could not recover from. - */ + + /** Pointer to a function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ void recover(); - - /** Function to return the current line number in the input stream - */ + + /** Function to return the current line number in the input stream + */ ANTLR_UINT32 getLine(); ANTLR_MARKER getCharIndex(); ANTLR_UINT32 getCharPositionInLine(); - - /** Function to return the text so far for the current token being generated - */ + + /** Function to return the text so far for the current token being generated + */ StringType getText(); - + //Other utility functions void fillExceptionData( ExceptionBaseType* ex ); - + /** Default lexer error handler (works for 8 bit streams only!!!) */ void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex); void exConstruct(); TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, ANTLR_UINT32 expectedTokenType, BitsetListType* follow); - - /** Pointer to a function that knows how to free the resources of a lexer - */ + + /** Pointer to a function that knows how to free the resources of a lexer + */ ~Lexer(); -}; - +}; + } - -#include "antlr3lexer.inl" - -#endif + +#include "antlr3lexer.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl index bf6960569f..fc6ed568b9 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl @@ -1,71 +1,71 @@ namespace antlr3 { - -template<class ImplTraits> -Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) + +template<class ImplTraits> +Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) :Lexer<ImplTraits>::RecognizerType(sizeHint, state) ,m_input(NULL) -{ -} - -template<class ImplTraits> -Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state) +{ +} + +template<class ImplTraits> +Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state) :Lexer<ImplTraits>::RecognizerType(sizeHint, state) -{ +{ this->setCharStream(input); -} - -template<class ImplTraits> -typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_input() const -{ +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_input() const +{ return m_input; -} - -template<class ImplTraits> -typename Lexer<ImplTraits>::IntStreamType* Lexer<ImplTraits>::get_istream() const -{ +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::IntStreamType* Lexer<ImplTraits>::get_istream() const +{ return m_input; -} - -template<class ImplTraits> -typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() -{ +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() +{ return this; -} - -template<class ImplTraits> -typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() -{ +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() +{ return this; -} - -template<class ImplTraits> -void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseType* ex) -{ +} + +template<class ImplTraits> +void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseType* ex) +{ StringStreamType err_stream; - + // See if there is a 'filename' we can use - // - if( ex->getName().empty() ) - { + // + if( ex->getName().empty() ) + { err_stream << "-unknown source-("; - } - else - { + } + else + { err_stream << ex->get_streamName().c_str(); err_stream << "("; - } - err_stream << ex->get_line() << ")"; - + } + err_stream << ex->get_line() << ")"; + err_stream << ": lexer error " << ex->getName() << '(' << ex->getType() << ')' << " :\n\t" << ex->get_message() << " at position [" << ex->get_line() << ", " << ex->get_charPositionInLine()+1 << "], "; - + { ANTLR_UINT32 width; - + width = ANTLR_UINT32_CAST(( (ANTLR_UINT8*)(m_input->get_data()) + (m_input->size() )) - (ANTLR_UINT8*)( ex->get_index() )); - + if (width >= 1) { if (isprint(ex->get_c() )) @@ -90,7 +90,7 @@ void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseTy width = ANTLR_UINT32_CAST(((ANTLR_UINT8*)(m_input->get_data() )+ (m_input->size())) - (ANTLR_UINT8*)(this->get_state()->get_tokenStartCharIndex() )); - + if (width >= 1) { err_stream << "looks like this:\n\t\t"; @@ -104,63 +104,63 @@ void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseTy } } ImplTraits::displayRecognitionError( err_stream.str() ); -} - -template<class ImplTraits> -void Lexer<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) -{ +} + +template<class ImplTraits> +void Lexer<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ ex->set_c( m_input->LA(1) ); /* Current input character */ ex->set_line( m_input->get_line() ); /* Line number comes from stream */ ex->set_charPositionInLine( m_input->get_charPositionInLine() ); /* Line offset also comes from the stream */ ex->set_index( m_input->index() ); ex->set_streamName( m_input->get_fileName() ); ex->set_message( "Unexpected character" ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void Lexer<ImplTraits>::setCharStream(InputStreamType* input) -{ - /* Install the input interface - */ +{ + /* Install the input interface + */ m_input = input; - - /* Set the current token to nothing - */ + + /* Set the current token to nothing + */ RecognizerSharedStateType* state = this->get_rec()->get_state(); - state->set_token_present( false ); + state->set_token_present( false ); state->set_text(""); - state->set_tokenStartCharIndex(-1); - - /* Copy the name of the char stream to the token source - */ - this->get_tokSource()->set_fileName( input->get_fileName() ); -} - -template<class ImplTraits> + state->set_tokenStartCharIndex(-1); + + /* Copy the name of the char stream to the token source + */ + this->get_tokSource()->set_fileName( input->get_fileName() ); +} + +template<class ImplTraits> void Lexer<ImplTraits>::pushCharStream(InputStreamType* input) -{ +{ // We have a stack, so we can save the current input stream // into it. // this->get_istream()->mark(); this->get_rec()->get_state()->get_streams().push(this->get_input()); - + // And now we can install this new one // this->setCharStream(input); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void Lexer<ImplTraits>::popCharStream() -{ +{ InputStreamType* input; - - // If we do not have a stream stack or we are already at the - // stack bottom, then do nothing. - // - typename RecognizerSharedStateType::StreamsType& streams = this->get_rec()->get_state()->get_streams(); + + // If we do not have a stream stack or we are already at the + // stack bottom, then do nothing. + // + typename RecognizerSharedStateType::StreamsType& streams = this->get_rec()->get_state()->get_streams(); if ( streams.size() > 0) - { + { // We just leave the current stream to its fate, we do not close // it or anything as we do not know what the programmer intended // for it. This method can always be overridden of course. @@ -169,69 +169,69 @@ void Lexer<ImplTraits>::popCharStream() // input = streams.top(); streams.pop(); - + // Now install the stream as the current one. // this->setCharStream(input); this->get_istream()->rewindLast(); - } - return; -} - -template<class ImplTraits> + } + return; +} + +template<class ImplTraits> void Lexer<ImplTraits>::emit(const CommonTokenType* token) -{ +{ this->get_rec()->get_state()->set_token(token); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::emit() -{ +{ /* We could check pointers to token factories and so on, but - * we are in code that we want to run as fast as possible - * so we are not checking any errors. So make sure you have installed an input stream before - * trying to emit a new token. - */ + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ RecognizerSharedStateType* state = this->get_rec()->get_state(); state->set_token_present(true); - CommonTokenType* token = state->get_token(); + CommonTokenType* token = state->get_token(); token->set_input( this->get_input() ); - - /* Install the supplied information, and some other bits we already know - * get added automatically, such as the input stream it is associated with - * (though it can all be overridden of course) - */ - token->set_type( state->get_type() ); - token->set_channel( state->get_channel() ); - token->set_startIndex( state->get_tokenStartCharIndex() ); - token->set_stopIndex( this->getCharIndex() - 1 ); - token->set_line( state->get_tokenStartLine() ); - token->set_charPositionInLine( state->get_tokenStartCharPositionInLine() ); - + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + token->set_type( state->get_type() ); + token->set_channel( state->get_channel() ); + token->set_startIndex( state->get_tokenStartCharIndex() ); + token->set_stopIndex( this->getCharIndex() - 1 ); + token->set_line( state->get_tokenStartLine() ); + token->set_charPositionInLine( state->get_tokenStartCharPositionInLine() ); + token->set_tokText( state->get_text() ); - token->set_lineStart( this->get_input()->get_currentLine() ); - - return token; -} - -template<class ImplTraits> -Lexer<ImplTraits>::~Lexer() -{ + token->set_lineStart( this->get_input()->get_currentLine() ); + + return token; +} + +template<class ImplTraits> +Lexer<ImplTraits>::~Lexer() +{ // This may have ben a delegate or delegator lexer, in which case the // state may already have been freed (and set to NULL therefore) // so we ignore the state if we don't have it. // RecognizerSharedStateType* state = this->get_rec()->get_state(); - + if ( state != NULL) { state->get_streams().clear(); } -} - -template<class ImplTraits> +} + +template<class ImplTraits> bool Lexer<ImplTraits>::matchs(ANTLR_UCHAR* str ) -{ +{ RecognizerSharedStateType* state = this->get_rec()->get_state(); while (*str != ANTLR_STRING_TERMINATOR) { @@ -242,44 +242,44 @@ bool Lexer<ImplTraits>::matchs(ANTLR_UCHAR* str ) state->set_failed(true); return false; } - + this->exConstruct(); state->set_failed( true ); - + /* TODO: Implement exception creation more fully perhaps */ this->recover(); return false; } - + /* Matched correctly, do consume it */ this->get_istream()->consume(); str++; - + } /* Reset any failed indicator */ state->set_failed( false ); return true; -} - -template<class ImplTraits> +} + +template<class ImplTraits> bool Lexer<ImplTraits>::matchc(ANTLR_UCHAR c) -{ +{ if (this->get_istream()->LA(1) == c) { /* Matched correctly, do consume it */ this->get_istream()->consume(); - + /* Reset any failed indicator */ this->get_rec()->get_state()->set_failed( false ); - + return true; } - + /* Failed to match, exception and recovery time. */ if(this->get_rec()->get_state()->get_backtracking() > 0) @@ -287,306 +287,306 @@ bool Lexer<ImplTraits>::matchc(ANTLR_UCHAR c) this->get_rec()->get_state()->set_failed( true ); return false; } - + this->exConstruct(); - + /* TODO: Implement exception creation more fully perhaps */ this->recover(); - + return false; -} - -template<class ImplTraits> +} + +template<class ImplTraits> bool Lexer<ImplTraits>::matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high) -{ - ANTLR_UCHAR c; - - /* What is in the stream at the moment? - */ +{ + ANTLR_UCHAR c; + + /* What is in the stream at the moment? + */ c = this->get_istream()->LA(1); if ( c >= low && c <= high) - { + { /* Matched correctly, consume it */ this->get_istream()->consume(); - + /* Reset any failed indicator */ this->get_rec()->get_state()->set_failed( false ); - + return true; - } - - /* Failed to match, execption and recovery time. - */ - + } + + /* Failed to match, execption and recovery time. + */ + if (this->get_rec()->get_state()->get_backtracking() > 0) - { + { this->get_rec()->get_state()->set_failed( true ); return false; - } - - this->exConstruct(); - - /* TODO: Implement exception creation more fully - */ - this->recover(); - - return false; -} - -template<class ImplTraits> + } + + this->exConstruct(); + + /* TODO: Implement exception creation more fully + */ + this->recover(); + + return false; +} + +template<class ImplTraits> void Lexer<ImplTraits>::matchAny() -{ +{ this->get_istream()->consume(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void Lexer<ImplTraits>::recover() -{ +{ this->get_istream()->consume(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 Lexer<ImplTraits>::getLine() -{ +{ return this->get_input()->get_line(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_MARKER Lexer<ImplTraits>::getCharIndex() -{ +{ return this->get_istream()->index(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> ANTLR_UINT32 Lexer<ImplTraits>::getCharPositionInLine() -{ +{ return this->get_input()->get_charPositionInLine(); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename Lexer<ImplTraits>::StringType Lexer<ImplTraits>::getText() -{ +{ RecognizerSharedStateType* state = this->get_rec()->get_state(); if ( !state->get_text().empty() ) { return state->get_text(); - + } return this->get_input()->substr( state->get_tokenStartCharIndex(), this->getCharIndex() - this->get_input()->get_charByteSize() ); -} - -template<class ImplTraits> -void Lexer<ImplTraits>::exConstruct() -{ +} + +template<class ImplTraits> +void Lexer<ImplTraits>::exConstruct() +{ new ANTLR_Exception<ImplTraits, RECOGNITION_EXCEPTION, InputStreamType>( this->get_rec(), "" ); -} - -template< class ImplTraits> +} + +template< class ImplTraits> typename Lexer<ImplTraits>::TokenType* Lexer<ImplTraits>::getMissingSymbol( IntStreamType*, ExceptionBaseType*, ANTLR_UINT32 , BitsetListType*) -{ +{ return NULL; -} - -template< class ImplTraits> -ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() const +{ return this; -} - -template< class ImplTraits> -ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_recognizer() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_recognizer() const +{ return this->get_rec(); -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::RecognizerSharedStateType* Lexer<ImplTraits>::get_lexstate() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::RecognizerSharedStateType* Lexer<ImplTraits>::get_lexstate() const +{ return this->get_rec()->get_state(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::set_lexstate( RecognizerSharedStateType* lexstate ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_lexstate( RecognizerSharedStateType* lexstate ) +{ this->get_rec()->set_state(lexstate); -} - -template< class ImplTraits> -ANTLR_INLINE const typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() const +{ return this; -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::get_ltoken() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::get_ltoken() const +{ return this->get_lexstate()->token(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::set_ltoken( const CommonTokenType* ltoken ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_ltoken( const CommonTokenType* ltoken ) +{ this->get_lexstate()->set_token( ltoken ); -} - -template< class ImplTraits> -ANTLR_INLINE bool Lexer<ImplTraits>::hasFailed() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::hasFailed() const +{ return this->get_lexstate()->get_failed(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_INT32 Lexer<ImplTraits>::get_backtracking() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Lexer<ImplTraits>::get_backtracking() const +{ return this->get_lexstate()->get_backtracking(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::inc_backtracking() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::inc_backtracking() +{ this->get_lexstate()->inc_backtracking(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::dec_backtracking() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::dec_backtracking() +{ this->get_lexstate()->dec_backtracking(); -} - -template< class ImplTraits> -ANTLR_INLINE bool Lexer<ImplTraits>::get_failedflag() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::get_failedflag() const +{ return this->get_lexstate()->get_failed(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::set_failedflag( bool failed ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_failedflag( bool failed ) +{ this->get_lexstate()->set_failed(failed); -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_strstream() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_strstream() const +{ return this->get_input(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::index() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::index() const +{ return this->get_istream()->index(); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE void Lexer<ImplTraits>::seek(ANTLR_MARKER index) -{ +{ this->get_istream()->seek(index); -} - -template< class ImplTraits> -ANTLR_INLINE const typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::EOF_Token() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::EOF_Token() const +{ const CommonTokenType& eof_token = this->get_tokSource()->get_eofToken(); return &eof_token; -} - -template< class ImplTraits> -ANTLR_INLINE bool Lexer<ImplTraits>::hasException() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::hasException() const +{ return this->get_lexstate()->get_error(); -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::ExceptionBaseType* Lexer<ImplTraits>::get_exception() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::ExceptionBaseType* Lexer<ImplTraits>::get_exception() const +{ return this->get_lexstate()->get_exception(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::constructEx() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::constructEx() +{ this->get_rec()->exConstruct(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::mark() -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::mark() +{ return this->get_istream()->mark(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::rewind(ANTLR_MARKER marker) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::rewind(ANTLR_MARKER marker) +{ this->get_istream()->rewind(marker); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::rewindLast() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::rewindLast() +{ this->get_istream()->rewindLast(); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE void Lexer<ImplTraits>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) -{ +{ this->get_rec()->memoize( ruleIndex, ruleParseStart ); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE bool Lexer<ImplTraits>::haveParsedRule(ANTLR_MARKER ruleIndex) -{ +{ return this->get_rec()->alreadyParsedRule(ruleIndex); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::setText( const StringType& text ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::setText( const StringType& text ) +{ this->get_lexstate()->set_text(text); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::skip() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::skip() +{ CommonTokenType& skipToken = this->get_tokSource()->get_skipToken(); this->get_lexstate()->set_token( &skipToken ); -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::RuleMemoType* Lexer<ImplTraits>::getRuleMemo() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::RuleMemoType* Lexer<ImplTraits>::getRuleMemo() const +{ return this->get_lexstate()->get_rulememo(); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) +{ return this->get_lexstate()->set_rulememo(rulememo); -} - -template< class ImplTraits> -ANTLR_INLINE typename Lexer<ImplTraits>::DebuggerType* Lexer<ImplTraits>::get_debugger() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::DebuggerType* Lexer<ImplTraits>::get_debugger() const +{ return this->get_rec()->get_debugger(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 Lexer<ImplTraits>::LA(ANTLR_INT32 i) -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Lexer<ImplTraits>::LA(ANTLR_INT32 i) +{ return this->get_istream()->LA(i); -} - -template< class ImplTraits> -ANTLR_INLINE void Lexer<ImplTraits>::consume() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::consume() +{ return this->get_istream()->consume(); +} + } - -} - + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp index 7b85f67545..543df041fd 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp @@ -1,39 +1,39 @@ #ifndef _ANTLR3MEMORY_HPP #define _ANTLR3MEMORY_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -class DefaultAllocPolicy -{ -public: + +class DefaultAllocPolicy +{ +public: //limitation of c++. unable to write a typedef template <class TYPE> class AllocatorType : public std::allocator<TYPE> @@ -49,12 +49,12 @@ public: template<class U> struct rebind { typedef AllocatorType<U> other; }; - + AllocatorType() noexcept {} AllocatorType( const AllocatorType& ) noexcept {} template<typename U> AllocatorType(const AllocatorType<U>& ) noexcept{} }; - + template<class TYPE> class VectorType : public std::vector< TYPE, AllocatorType<TYPE> > { @@ -64,7 +64,7 @@ public: class ListType : public std::deque< TYPE, AllocatorType<TYPE> > { }; - + template<class TYPE> class StackType : public std::deque< TYPE, AllocatorType<TYPE> > { @@ -76,30 +76,30 @@ public: const TYPE& peek() const { return this->back(); } const TYPE& top() const { return this->back(); } }; - - + + template<class TYPE> class OrderedSetType : public std::set< TYPE, std::less<TYPE>, AllocatorType<TYPE> > { }; - + template<class TYPE> class UnOrderedSetType : public std::set< TYPE, std::less<TYPE>, AllocatorType<TYPE> > { }; - + template<class KeyType, class ValueType> class UnOrderedMapType : public std::map< KeyType, ValueType, std::less<KeyType>, AllocatorType<std::pair<const KeyType, ValueType> > > { }; - + template<class KeyType, class ValueType> class OrderedMapType : public std::map< KeyType, ValueType, std::less<KeyType>, AllocatorType<std::pair<KeyType, ValueType> > > { }; - + template<class TYPE> class SmartPtrType : public std::unique_ptr<TYPE, std::default_delete<TYPE> > { @@ -119,7 +119,7 @@ public: SmartPtrType & operator=(const SmartPtrType&) /*= delete*/; SmartPtrType(const SmartPtrType&) /*= delete*/; }; - + ANTLR_INLINE static void* operator new (std::size_t bytes) { void* p = alloc(bytes); @@ -136,12 +136,12 @@ public: DefaultAllocPolicy::free(p); } ANTLR_INLINE static void operator delete(void* , void* ) {} //placement delete - + ANTLR_INLINE static void operator delete[](void* p) { DefaultAllocPolicy::free(p); } - + ANTLR_INLINE static void* alloc( std::size_t bytes ) { void* p = malloc(bytes); @@ -149,7 +149,7 @@ public: throw std::bad_alloc(); return p; } - + ANTLR_INLINE static void* alloc0( std::size_t bytes ) { void* p = calloc(1, bytes); @@ -157,7 +157,7 @@ public: throw std::bad_alloc(); return p; } - + ANTLR_INLINE static void free( void* p ) { return ::free(p); @@ -167,8 +167,8 @@ public: { return ::realloc( ptr, size ); } -}; - +}; + } - + #endif /* _ANTLR3MEMORY_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp index ccf8e9a323..67d4364d86 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp @@ -1,73 +1,73 @@ -/** \file - * Base implementation of an ANTLR3 parser. - * - * - */ +/** \file + * Base implementation of an ANTLR3 parser. + * + * + */ #ifndef _ANTLR3_PARSER_HPP #define _ANTLR3_PARSER_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** This is the main interface for an ANTLR3 parser. - */ -template< class ImplTraits > -class Parser : public ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > -{ -public: + +/** This is the main interface for an ANTLR3 parser. + */ +template< class ImplTraits > +class Parser : public ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > +{ +public: typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::TokenStreamType TokenStreamType; typedef typename TokenStreamType::IntStreamType IntStreamType; typedef TokenStreamType StreamType; - + typedef typename ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > RecognizerType; typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; - + typedef DebugEventListener<ImplTraits> DebugEventListenerType; typedef typename ImplTraits::CommonTokenType CommonTokenType; typedef CommonTokenType TokenType; typedef typename ImplTraits::BitsetListType BitsetListType; typedef ANTLR_ExceptionBase<ImplTraits, TokenStreamType> ExceptionBaseType; typedef Empty TokenSourceType; - + typedef typename RecognizerSharedStateType::FollowingType FollowingType; typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; typedef typename ImplTraits::DebugEventListenerType DebuggerType; - -private: - /** A provider of a tokenstream interface, for the parser to consume - * tokens from. - */ + +private: + /** A provider of a tokenstream interface, for the parser to consume + * tokens from. + */ TokenStreamType* m_tstream; - -public: + +public: Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ); Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, RecognizerSharedStateType* state ); Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, DebugEventListenerType* dbg, @@ -76,40 +76,40 @@ public: TokenStreamType* get_input() const; IntStreamType* get_istream() const; RecognizerType* get_rec(); - + //same as above. Just that get_istream exists for lexer, parser, treeparser //get_parser_istream exists only for parser, treeparser. So use it accordingly IntStreamType* get_parser_istream() const; - + /** A pointer to a function that installs a debugger object (it also * installs the debugging versions of the parser methods. This means that * a non debug parser incurs no overhead because of the debugging stuff. */ void setDebugListener(DebugEventListenerType* dbg); - - /** A pointer to a function that installs a token stream - * for the parser. - */ + + /** A pointer to a function that installs a token stream + * for the parser. + */ void setTokenStream(TokenStreamType*); - - /** A pointer to a function that returns the token stream for this - * parser. - */ + + /** A pointer to a function that returns the token stream for this + * parser. + */ TokenStreamType* getTokenStream(); - + void exConstruct(); TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, ANTLR_UINT32 expectedTokenType, BitsetListType* follow); - + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); - - /** Pointer to a function that knows how to free resources of an ANTLR3 parser. - */ + + /** Pointer to a function that knows how to free resources of an ANTLR3 parser. + */ ~Parser(); - + void fillExceptionData( ExceptionBaseType* ex ); void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); - + //convenience functions exposed in .stg const RecognizerType* get_recognizer() const; RecognizerSharedStateType* get_psrstate() const; @@ -148,21 +148,21 @@ public: DebuggerType* get_debugger() const; TokenStreamType* get_strstream() const; void setRuleMemo(RuleMemoType* rulememo); - -}; - -//Generic rule return value. Unlike the general ANTLR, this gets generated for -//every rule in the target. Handle rule exit here -template<class ImplTraits> -class RuleReturnValue -{ -public: + +}; + +//Generic rule return value. Unlike the general ANTLR, this gets generated for +//every rule in the target. Handle rule exit here +template<class ImplTraits> +class RuleReturnValue +{ +public: typedef typename ImplTraits::BaseParserType BaseParserType; typedef typename ImplTraits::CommonTokenType CommonTokenType; - + const CommonTokenType* start; const CommonTokenType* stop; - + RuleReturnValue(BaseParserType* psr = NULL ); RuleReturnValue( const RuleReturnValue& val ); RuleReturnValue& operator=( const RuleReturnValue& val ); @@ -170,17 +170,17 @@ public: void call_stop_placeholder(BaseParserType*); RuleReturnValue& get_struct(); ~RuleReturnValue(); -}; - -//This kind makes sure that whenever tokens are condensed into a rule, -//all the tokens except the start and stop tokens are deleted -template<class ImplTraits> -class RuleReturnValue_1 : public RuleReturnValue<ImplTraits> -{ -public: +}; + +//This kind makes sure that whenever tokens are condensed into a rule, +//all the tokens except the start and stop tokens are deleted +template<class ImplTraits> +class RuleReturnValue_1 : public RuleReturnValue<ImplTraits> +{ +public: typedef RuleReturnValue<ImplTraits> BaseType; typedef typename BaseType::BaseParserType BaseParserType; - + BaseParserType* parser; RuleReturnValue_1(); @@ -188,10 +188,10 @@ public: RuleReturnValue_1( const RuleReturnValue_1& val ); void call_start_placeholder(BaseParserType*); //its dummy here ~RuleReturnValue_1(); -}; - +}; + } - -#include "antlr3parser.inl" - -#endif + +#include "antlr3parser.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl index bb1e4e6960..0ad5517af2 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl @@ -1,57 +1,57 @@ namespace antlr3 { - -template< class ImplTraits > -Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ) + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ) : RecognizerType( sizeHint, state ) -{ +{ m_tstream = NULL; -} - -template< class ImplTraits > -Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, +} + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, RecognizerSharedStateType* state ) : RecognizerType( sizeHint, state ) -{ +{ this->setTokenStream( tstream ); -} - -template< class ImplTraits > -Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, +} + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, DebugEventListenerType* dbg, RecognizerSharedStateType* state ) : RecognizerType( sizeHint, state ) -{ +{ this->setTokenStream( tstream ); this->setDebugListener( dbg ); -} - -template< class ImplTraits > -ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_tstream() const -{ +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_tstream() const +{ return m_tstream; -} - -template< class ImplTraits > -ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_istream() const -{ +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_istream() const +{ return m_tstream; -} - -template< class ImplTraits > -ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_parser_istream() const -{ +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_parser_istream() const +{ return m_tstream; -} - -template< class ImplTraits > -ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_input() const -{ +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_input() const +{ return m_tstream; -} - -template< class ImplTraits > -void Parser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) -{ +} + +template< class ImplTraits > +void Parser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ ex->set_token( new CommonTokenType(*(m_tstream->LT(1))) ); /* Current input token (clonned) - held by the exception */ ex->set_line( ex->get_token()->get_line() ); ex->set_charPositionInLine( ex->get_token()->get_charPositionInLine() ); @@ -65,11 +65,11 @@ void Parser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) ex->set_streamName( ex->get_token()->get_input()->get_fileName() ); } ex->set_message("Unexpected token"); -} - -template< class ImplTraits > -void Parser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ) -{ +} + +template< class ImplTraits > +void Parser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ) +{ typename ImplTraits::StringStreamType errtext; // See if there is a 'filename' we can use // @@ -88,19 +88,19 @@ void Parser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** tokenNames, Exce { errtext << ex->get_streamName() << "("; } - + // Next comes the line number // errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() << " : " << this->get_rec()->get_state()->get_exception()->get_message(); - + // Prepare the knowledge we know we have // const CommonTokenType* theToken = this->get_rec()->get_state()->get_exception()->get_token(); StringType ttext = theToken->toString(); - + errtext << ", at offset , " << this->get_rec()->get_state()->get_exception()->get_charPositionInLine(); if (theToken != NULL) @@ -117,16 +117,16 @@ void Parser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** tokenNames, Exce ? "<no text for the token>" : ttext ) << "\n"; } } - + ex->displayRecognitionError( tokenNames, errtext ); ImplTraits::displayRecognitionError( errtext.str() ); -} - -template< class ImplTraits > -Parser<ImplTraits>::~Parser() -{ +} + +template< class ImplTraits > +Parser<ImplTraits>::~Parser() +{ if (this->get_rec() != NULL) - { + { // This may have ben a delegate or delegator parser, in which case the // state may already have been freed (and set to NULL therefore) // so we ignore the state if we don't have it. @@ -136,12 +136,12 @@ Parser<ImplTraits>::~Parser() { state->get_following().clear(); } - } -} - -template< class ImplTraits > + } +} + +template< class ImplTraits > void Parser<ImplTraits>::setDebugListener(DebugEventListenerType* dbg) -{ +{ // Set the debug listener. There are no methods to override // because currently the only ones that notify the debugger // are error reporting and recovery. Hence we can afford to @@ -152,7 +152,7 @@ void Parser<ImplTraits>::setDebugListener(DebugEventListenerType* dbg) // and installed here. // this->get_rec()->set_debugger(dbg); - + // If there was a tokenstream installed already // then we need to tell it about the debug interface // @@ -160,43 +160,43 @@ void Parser<ImplTraits>::setDebugListener(DebugEventListenerType* dbg) { this->get_tstream()->setDebugListener(dbg); } -} - -template< class ImplTraits > +} + +template< class ImplTraits > ANTLR_INLINE void Parser<ImplTraits>::setTokenStream(TokenStreamType* tstream) -{ +{ m_tstream = tstream; - this->get_rec()->reset(); -} - -template< class ImplTraits > + this->get_rec()->reset(); +} + +template< class ImplTraits > ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::getTokenStream() -{ +{ return m_tstream; -} - -template< class ImplTraits > -ANTLR_INLINE typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_rec() -{ +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_rec() +{ return this; -} - -template< class ImplTraits > -ANTLR_INLINE void Parser<ImplTraits>::exConstruct() -{ +} + +template< class ImplTraits > +ANTLR_INLINE void Parser<ImplTraits>::exConstruct() +{ new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>( this->get_rec(), "" ); -} - -template< class ImplTraits > +} + +template< class ImplTraits > typename Parser<ImplTraits>::TokenType* Parser<ImplTraits>::getMissingSymbol( IntStreamType* istream, ExceptionBaseType*, ANTLR_UINT32 expectedTokenType, BitsetListType* ) -{ +{ // Dereference the standard pointers // TokenStreamType *cts = static_cast<TokenStreamType*>(istream); - + // Work out what to use as the current symbol to make a line and offset etc // If we are at EOF, we use the token before EOF // @@ -205,9 +205,9 @@ typename Parser<ImplTraits>::TokenType* Parser<ImplTraits>::getMissingSymbol( In { current = cts->LT(-1); } - + CommonTokenType* token = new CommonTokenType; - + // Set some of the token properties based on the current token // token->set_line(current->get_line()); @@ -229,19 +229,19 @@ typename Parser<ImplTraits>::TokenType* Parser<ImplTraits>::getMissingSymbol( In // Finally return the pointer to our new token // return token; -} - -template< class ImplTraits > -void Parser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) -{ - // Install a mismatched token exception in the exception stack - // +} + +template< class ImplTraits > +void Parser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + // Install a mismatched token exception in the exception stack + // new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>(this, ""); - + //With the statement below, only the parsers are allowed to compile fine IntStreamType* is = this->get_istream(); - - + + if (this->mismatchIsUnwantedToken(is, ttype)) { // Now update it to indicate this is an unwanted token exception @@ -249,7 +249,7 @@ void Parser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) new ANTLR_Exception<ImplTraits, UNWANTED_TOKEN_EXCEPTION, StreamType>(this, ""); return; } - + if ( this->mismatchIsMissingToken(is, follow)) { // Now update it to indicate this is an unwanted token exception @@ -257,310 +257,310 @@ void Parser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) new ANTLR_Exception<ImplTraits, MISSING_TOKEN_EXCEPTION, StreamType>(this, ""); return; } - + // Just a mismatched token is all we can dtermine // new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>(this, ""); - + return; -} - -template< class ImplTraits> -ANTLR_INLINE const typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_recognizer() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_recognizer() const +{ return this; -} - -template< class ImplTraits> -ANTLR_INLINE typename Parser<ImplTraits>::RecognizerSharedStateType* Parser<ImplTraits>::get_psrstate() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::RecognizerSharedStateType* Parser<ImplTraits>::get_psrstate() const +{ return this->get_recognizer()->get_state(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::set_psrstate(RecognizerSharedStateType* state) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_psrstate(RecognizerSharedStateType* state) +{ this->get_rec()->set_state( state ); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE bool Parser<ImplTraits>::haveParsedRule(ANTLR_MARKER ruleIndex) -{ +{ return this->get_rec()->alreadyParsedRule(ruleIndex); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE void Parser<ImplTraits>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) -{ +{ return this->get_rec()->memoize( ruleIndex, ruleParseStart ); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::index() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::index() const +{ return this->get_istream()->index(); -} - -template< class ImplTraits> -ANTLR_INLINE bool Parser<ImplTraits>::hasException() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::hasException() const +{ return this->get_psrstate()->get_error(); -} - -template< class ImplTraits> -ANTLR_INLINE typename Parser<ImplTraits>::ExceptionBaseType* Parser<ImplTraits>::get_exception() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::ExceptionBaseType* Parser<ImplTraits>::get_exception() const +{ return this->get_psrstate()->get_exception(); -} - -template< class ImplTraits> -ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ) +{ return this->get_rec()->match( ttype, follow ); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::matchAnyToken() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::matchAnyToken() +{ return this->get_rec()->matchAny(); -} - -template< class ImplTraits> -ANTLR_INLINE const typename Parser<ImplTraits>::FollowingType& Parser<ImplTraits>::get_follow_stack() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::FollowingType& Parser<ImplTraits>::get_follow_stack() const +{ return this->get_psrstate()->get_following(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::followPush(const BitsetListType& follow) -{ -#ifndef SKIP_FOLLOW_SETS +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::followPush(const BitsetListType& follow) +{ +#ifndef SKIP_FOLLOW_SETS this->get_rec()->get_state()->get_following().push(follow); -#endif -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::followPop() -{ -#ifndef SKIP_FOLLOW_SETS +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::followPop() +{ +#ifndef SKIP_FOLLOW_SETS this->get_rec()->get_state()->get_following().pop(); -#endif -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::precover() -{ +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::precover() +{ return this->get_rec()->recover(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::preporterror() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::preporterror() +{ return this->get_rec()->reportError(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_UINT32 Parser<ImplTraits>::LA(ANTLR_INT32 i) -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Parser<ImplTraits>::LA(ANTLR_INT32 i) +{ return this->get_istream()->LA(i); -} - -template< class ImplTraits> -ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::LT(ANTLR_INT32 k) -{ +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::LT(ANTLR_INT32 k) +{ return this->get_input()->LT(k); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::constructEx() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::constructEx() +{ this->get_rec()->constructEx(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::consume() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::consume() +{ this->get_istream()->consume(); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::mark() -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::mark() +{ return this->get_istream()->mark(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::rewind(ANTLR_MARKER marker) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::rewind(ANTLR_MARKER marker) +{ this->get_istream()->rewind(marker); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::rewindLast() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::rewindLast() +{ this->get_istream()->rewindLast(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::seek(ANTLR_MARKER index) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::seek(ANTLR_MARKER index) +{ this->get_istream()->seek(index); -} - -template< class ImplTraits> -ANTLR_INLINE bool Parser<ImplTraits>::get_perror_recovery() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::get_perror_recovery() const +{ return this->get_psrstate()->get_errorRecovery(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::set_perror_recovery( bool val ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_perror_recovery( bool val ) +{ this->get_psrstate()->set_errorRecovery(val); -} - -template< class ImplTraits> -ANTLR_INLINE bool Parser<ImplTraits>::hasFailed() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::hasFailed() const +{ return this->get_psrstate()->get_failed(); -} - -template< class ImplTraits> -ANTLR_INLINE bool Parser<ImplTraits>::get_failedflag() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::get_failedflag() const +{ return this->get_psrstate()->get_failed(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::set_failedflag( bool failed ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_failedflag( bool failed ) +{ this->get_psrstate()->set_failed(failed); -} - -template< class ImplTraits> -ANTLR_INLINE ANTLR_INT32 Parser<ImplTraits>::get_backtracking() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Parser<ImplTraits>::get_backtracking() const +{ return this->get_psrstate()->get_backtracking(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::inc_backtracking() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::inc_backtracking() +{ this->get_psrstate()->inc_backtracking(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::dec_backtracking() -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::dec_backtracking() +{ this->get_psrstate()->dec_backtracking(); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::recoverFromMismatchedSet(BitsetListType* follow) -{ +{ return this->get_rec()->recoverFromMismatchedSet(follow); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE bool Parser<ImplTraits>::recoverFromMismatchedElement(BitsetListType* follow) -{ +{ return this->get_rec()->recoverFromMismatchedElement(follow); -} - -template< class ImplTraits> -ANTLR_INLINE typename Parser<ImplTraits>::RuleMemoType* Parser<ImplTraits>::getRuleMemo() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::RuleMemoType* Parser<ImplTraits>::getRuleMemo() const +{ return this->get_psrstate()->get_ruleMemo(); -} - -template< class ImplTraits> -ANTLR_INLINE void Parser<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) -{ +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) +{ this->get_psrstate()->set_ruleMemo(rulememo); -} - -template< class ImplTraits> -ANTLR_INLINE typename Parser<ImplTraits>::DebuggerType* Parser<ImplTraits>::get_debugger() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::DebuggerType* Parser<ImplTraits>::get_debugger() const +{ return this->get_rec()->get_debugger(); -} - -template< class ImplTraits> -ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_strstream() const -{ +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_strstream() const +{ return this->get_tstream(); -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE RuleReturnValue<ImplTraits>::RuleReturnValue(BaseParserType* /*psr*/) -{ +{ start = NULL; stop = NULL; -} - -template< class ImplTraits> -ANTLR_INLINE RuleReturnValue<ImplTraits>::RuleReturnValue( const RuleReturnValue& val ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>::RuleReturnValue( const RuleReturnValue& val ) +{ start = val.start; stop = val.stop; -} - -template< class ImplTraits> -ANTLR_INLINE RuleReturnValue<ImplTraits>& RuleReturnValue<ImplTraits>::operator=( const RuleReturnValue& val ) -{ +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>& RuleReturnValue<ImplTraits>::operator=( const RuleReturnValue& val ) +{ start = val.start; stop = val.stop; return *this; -} - -template< class ImplTraits> -ANTLR_INLINE RuleReturnValue<ImplTraits>::~RuleReturnValue() -{ -} - -template< class ImplTraits> +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>::~RuleReturnValue() +{ +} + +template< class ImplTraits> ANTLR_INLINE void RuleReturnValue<ImplTraits>::call_start_placeholder(BaseParserType *parser) -{ +{ start = parser->LT(1); stop = start; -} - -template< class ImplTraits> +} + +template< class ImplTraits> ANTLR_INLINE void RuleReturnValue<ImplTraits>::call_stop_placeholder(BaseParserType *parser) -{ +{ stop = parser->LT(-1); -} - -template< class ImplTraits> -ANTLR_INLINE RuleReturnValue_1<ImplTraits>::RuleReturnValue_1() +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue_1<ImplTraits>::RuleReturnValue_1() : parser() -{ -} - -template< class ImplTraits> -RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( BaseParserType* psr ) +{ +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( BaseParserType* psr ) : RuleReturnValue_1<ImplTraits>::BaseType(psr) , parser(psr) -{ +{ BaseType::start = psr->LT(1); BaseType::stop = BaseType::start; -} - -template< class ImplTraits> -RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( const RuleReturnValue_1& val ) +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( const RuleReturnValue_1& val ) : RuleReturnValue_1<ImplTraits>::BaseType(val) , parser(val.parser) -{ -} - -template< class ImplTraits> +{ +} + +template< class ImplTraits> void RuleReturnValue_1<ImplTraits>::call_start_placeholder(BaseParserType*) -{ -} - -template< class ImplTraits> -RuleReturnValue_1<ImplTraits>::~RuleReturnValue_1() -{ +{ +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::~RuleReturnValue_1() +{ if( parser && parser->get_backtracking() == 0 ) { if( BaseType::stop == NULL ) @@ -574,6 +574,6 @@ RuleReturnValue_1<ImplTraits>::~RuleReturnValue_1() parser->getTokenStream()->discardTokens( start_token_idx, stop_token_idx); } } -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp index ef0855ea08..3fd7da6886 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp @@ -1,58 +1,58 @@ -/** \file - * While the C runtime does not need to model the state of - * multiple lexers and parsers in the same way as the Java runtime does - * it is no overhead to reflect that model. In fact the - * C runtime has always been able to share recognizer state. - * - * This 'class' therefore defines all the elements of a recognizer - * (either lexer, parser or tree parser) that are need to - * track the current recognition state. Multiple recognizers - * may then share this state, for instance when one grammar - * imports another. - */ - +/** \file + * While the C runtime does not need to model the state of + * multiple lexers and parsers in the same way as the Java runtime does + * it is no overhead to reflect that model. In fact the + * C runtime has always been able to share recognizer state. + * + * This 'class' therefore defines all the elements of a recognizer + * (either lexer, parser or tree parser) that are need to + * track the current recognition state. Multiple recognizers + * may then share this state, for instance when one grammar + * imports another. + */ + #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP #define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + namespace antlr3 { - -/** All the data elements required to track the current state - * of any recognizer (lexer, parser, tree parser). - * May be share between multiple recognizers such that - * grammar inheritance is easily supported. - */ -template<class ImplTraits, class StreamType> -class RecognizerSharedState : public ImplTraits::AllocPolicyType -{ -public: + +/** All the data elements required to track the current state + * of any recognizer (lexer, parser, tree parser). + * May be share between multiple recognizers such that + * grammar inheritance is easily supported. + */ +template<class ImplTraits, class StreamType> +class RecognizerSharedState : public ImplTraits::AllocPolicyType +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename StreamType::UnitType TokenType; typedef typename ImplTraits::CommonTokenType CommonTokenType; @@ -64,152 +64,152 @@ public: typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; typedef typename ImplTraits::BitsetType BitsetType; typedef typename ImplTraits::BitsetListType BitsetListType; - + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; - + typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType; typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType; typedef InputStreamsType StreamsType; typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType; - + typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; typedef IntTrie<ImplTraits, std::shared_ptr<RuleListType>> RuleMemoType; - -private: + +private: /** Points to the first in a possible chain of exceptions that the - * recognizer has discovered. - */ + * recognizer has discovered. + */ ExceptionBaseType* m_exception; - - - /** Track the set of token types that can follow any rule invocation. - * Stack structure, to support: List<BitSet>. - */ + + + /** Track the set of token types that can follow any rule invocation. + * Stack structure, to support: List<BitSet>. + */ FollowingType m_following; - - /** Track around a hint from the creator of the recognizer as to how big this - * thing is going to get, as the actress said to the bishop. This allows us - * to tune hash tables accordingly. This might not be the best place for this - * in the end but we will see. - */ + + /** Track around a hint from the creator of the recognizer as to how big this + * thing is going to get, as the actress said to the bishop. This allows us + * to tune hash tables accordingly. This might not be the best place for this + * in the end but we will see. + */ ANTLR_UINT32 m_sizeHint; - - - /** If set to true then the recognizer has an exception - * condition (this is tested by the generated code for the rules of - * the grammar). - */ + + + /** If set to true then the recognizer has an exception + * condition (this is tested by the generated code for the rules of + * the grammar). + */ bool m_error; - - - /** This is true when we see an error and before having successfully - * matched a token. Prevents generation of more than one error message - * per error. - */ + + + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ bool m_errorRecovery; - + /** In lieu of a return value, this indicates that a rule or token - * has failed to match. Reset to false upon valid token match. - */ + * has failed to match. Reset to false upon valid token match. + */ bool m_failed; - + /* Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator */ bool m_token_present; - - /** The index into the input stream where the last error occurred. + + /** The index into the input stream where the last error occurred. * This is used to prevent infinite loops where an error is found - * but no token is consumed during recovery...another error is found, - * ad nauseam. This is a failsafe mechanism to guarantee that at least - * one token/tree node is consumed for two errors. - */ + * but no token is consumed during recovery...another error is found, + * ad nauseam. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ ANTLR_MARKER m_lastErrorIndex; - - /** When the recognizer terminates, the error handling functions - * will have incremented this value if any error occurred (that was displayed). It can then be - * used by the grammar programmer without having to use static globals. - */ + + /** When the recognizer terminates, the error handling functions + * will have incremented this value if any error occurred (that was displayed). It can then be + * used by the grammar programmer without having to use static globals. + */ ANTLR_UINT32 m_errorCount; - - /** If 0, no backtracking is going on. Safe to exec actions etc... - * If >0 then it's the level of backtracking. - */ + + /** If 0, no backtracking is going on. Safe to exec actions etc... + * If >0 then it's the level of backtracking. + */ ANTLR_INT32 m_backtracking; - - /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. - * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is - * the memoization table for ruleIndex. For key ruleStartIndex, you - * get back the stop token for associated rule or MEMO_RULE_FAILED. - * - * This is only used if rule memoization is on. - */ + + /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. + * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is + * the memoization table for ruleIndex. For key ruleStartIndex, you + * get back the stop token for associated rule or MEMO_RULE_FAILED. + * + * This is only used if rule memoization is on. + */ RuleMemoType* m_ruleMemo; - - /** Pointer to an array of token names - * that are generally useful in error reporting. The generated parsers install - * this pointer. The table it points to is statically allocated as 8 bit ascii - * at parser compile time - grammar token names are thus restricted in character - * sets, which does not seem to terrible. - */ + + /** Pointer to an array of token names + * that are generally useful in error reporting. The generated parsers install + * this pointer. The table it points to is statically allocated as 8 bit ascii + * at parser compile time - grammar token names are thus restricted in character + * sets, which does not seem to terrible. + */ ANTLR_UINT8** m_tokenNames; - - /** The goal of all lexer rules/methods is to create a token object. - * This is an instance variable as multiple rules may collaborate to - * create a single token. For example, NUM : INT | FLOAT ; - * In this case, you want the INT or FLOAT rule to set token and not - * have it reset to a NUM token in rule NUM. - */ + + /** The goal of all lexer rules/methods is to create a token object. + * This is an instance variable as multiple rules may collaborate to + * create a single token. For example, NUM : INT | FLOAT ; + * In this case, you want the INT or FLOAT rule to set token and not + * have it reset to a NUM token in rule NUM. + */ CommonTokenType m_token; - - /** A lexer is a source of tokens, produced by all the generated (or - * hand crafted if you like) matching rules. As such it needs to provide - * a token source interface implementation. For others, this will become a empty class - */ + + /** A lexer is a source of tokens, produced by all the generated (or + * hand crafted if you like) matching rules. As such it needs to provide + * a token source interface implementation. For others, this will become a empty class + */ TokenSourceType* m_tokSource; - - /** The channel number for the current token - */ + + /** The channel number for the current token + */ ANTLR_UINT32 m_channel; - - /** The token type for the current token - */ + + /** The token type for the current token + */ ANTLR_UINT32 m_type; - - /** The input line (where it makes sense) on which the first character of the current - * token resides. - */ + + /** The input line (where it makes sense) on which the first character of the current + * token resides. + */ ANTLR_INT32 m_tokenStartLine; - - /** The character position of the first character of the current token - * within the line specified by tokenStartLine - */ + + /** The character position of the first character of the current token + * within the line specified by tokenStartLine + */ ANTLR_INT32 m_tokenStartCharPositionInLine; - - /** What character index in the stream did the current token start at? - * Needed, for example, to get the text for current token. Set at - * the start of nextToken. - */ + + /** What character index in the stream did the current token start at? + * Needed, for example, to get the text for current token. Set at + * the start of nextToken. + */ ANTLR_MARKER m_tokenStartCharIndex; - - /** Text for the current token. This can be overridden by setting this - * variable directly or by using the SETTEXT() macro (preferred) in your - * lexer rules. - */ + + /** Text for the current token. This can be overridden by setting this + * variable directly or by using the SETTEXT() macro (preferred) in your + * lexer rules. + */ StringType m_text; - - /** Input stream stack, which allows the C programmer to switch input streams - * easily and allow the standard nextToken() implementation to deal with it - * as this is a common requirement. - */ + + /** Input stream stack, which allows the C programmer to switch input streams + * easily and allow the standard nextToken() implementation to deal with it + * as this is a common requirement. + */ InputStreamsType m_streams; - + /** Tree adaptor drives an AST trie construction. * Is shared between multiple imported grammars. */ TreeAdaptorType* m_treeAdaptor; -public: +public: RecognizerSharedState(); ExceptionBaseType* get_exception() const; FollowingType& get_following(); @@ -261,12 +261,12 @@ public: void inc_errorCount(); void inc_backtracking(); void dec_backtracking(); -}; - +}; + } - -#include "antlr3recognizersharedstate.inl" - -#endif - - + +#include "antlr3recognizersharedstate.inl" + +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl index 27732cb34f..0a4ba55cf4 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl @@ -1,8 +1,8 @@ namespace antlr3 { - -template<class ImplTraits, class StreamType> -RecognizerSharedState<ImplTraits, StreamType>::RecognizerSharedState() -{ + +template<class ImplTraits, class StreamType> +RecognizerSharedState<ImplTraits, StreamType>::RecognizerSharedState() +{ m_exception = NULL; m_sizeHint = 0; m_error = false; @@ -21,260 +21,260 @@ RecognizerSharedState<ImplTraits, StreamType>::RecognizerSharedState() m_tokenStartCharPositionInLine = 0; m_tokenStartCharIndex = 0; m_treeAdaptor = NULL; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::FollowingType& RecognizerSharedState<ImplTraits, StreamType>::get_following() -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::FollowingType& RecognizerSharedState<ImplTraits, StreamType>::get_following() +{ return m_following; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_sizeHint() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_sizeHint() const +{ return m_sizeHint; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_error() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_error() const +{ return m_error; -} -template<class ImplTraits, class StreamType> +} +template<class ImplTraits, class StreamType> ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::ExceptionBaseType* -RecognizerSharedState<ImplTraits, StreamType>::get_exception() const -{ +RecognizerSharedState<ImplTraits, StreamType>::get_exception() const +{ return m_exception; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_errorRecovery() const -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_errorRecovery() const +{ return m_errorRecovery; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_failed() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_failed() const +{ return m_failed; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_token_present() const -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_token_present() const +{ return m_token_present; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_lastErrorIndex() const -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_lastErrorIndex() const +{ return m_lastErrorIndex; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_errorCount() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_errorCount() const +{ return m_errorCount; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_backtracking() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_backtracking() const +{ return m_backtracking; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::RuleMemoType* RecognizerSharedState<ImplTraits, StreamType>::get_ruleMemo() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::RuleMemoType* RecognizerSharedState<ImplTraits, StreamType>::get_ruleMemo() const +{ return m_ruleMemo; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT8** RecognizerSharedState<ImplTraits, StreamType>::get_tokenNames() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT8** RecognizerSharedState<ImplTraits, StreamType>::get_tokenNames() const +{ return m_tokenNames; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT8* RecognizerSharedState<ImplTraits, StreamType>::get_tokenName( ANTLR_UINT32 i ) const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT8* RecognizerSharedState<ImplTraits, StreamType>::get_tokenName( ANTLR_UINT32 i ) const +{ return m_tokenNames[i]; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::CommonTokenType* RecognizerSharedState<ImplTraits, StreamType>::get_token() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::CommonTokenType* RecognizerSharedState<ImplTraits, StreamType>::get_token() +{ return &m_token; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::TokenSourceType* RecognizerSharedState<ImplTraits, StreamType>::get_tokSource() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::TokenSourceType* RecognizerSharedState<ImplTraits, StreamType>::get_tokSource() const +{ return m_tokSource; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32& RecognizerSharedState<ImplTraits, StreamType>::get_channel() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32& RecognizerSharedState<ImplTraits, StreamType>::get_channel() +{ return m_channel; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_type() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_type() const +{ return m_type; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartLine() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartLine() const +{ return m_tokenStartLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharPositionInLine() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharPositionInLine() const +{ return m_tokenStartCharPositionInLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharIndex() const -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharIndex() const +{ return m_tokenStartCharIndex; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StringType& RecognizerSharedState<ImplTraits, StreamType>::get_text() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StringType& RecognizerSharedState<ImplTraits, StreamType>::get_text() +{ return m_text; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StreamsType& RecognizerSharedState<ImplTraits, StreamType>::get_streams() -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StreamsType& RecognizerSharedState<ImplTraits, StreamType>::get_streams() +{ return m_streams; -} -template<class ImplTraits, class StreamType> +} +template<class ImplTraits, class StreamType> ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::TreeAdaptorType* RecognizerSharedState<ImplTraits, StreamType>::get_treeAdaptor() const { return m_treeAdaptor; } template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_exception( ExceptionBaseType* exception ) -{ +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_exception( ExceptionBaseType* exception ) +{ m_exception = exception; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_following( const FollowingType& following ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_following( const FollowingType& following ) +{ m_following = following; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_sizeHint( ANTLR_UINT32 sizeHint ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_sizeHint( ANTLR_UINT32 sizeHint ) +{ m_sizeHint = sizeHint; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_error( bool error ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_error( bool error ) +{ m_error = error; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorRecovery( bool errorRecovery ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorRecovery( bool errorRecovery ) +{ m_errorRecovery = errorRecovery; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_failed( bool failed ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_failed( bool failed ) +{ m_failed = failed; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token_present(bool token_present) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token_present(bool token_present) +{ m_token_present = token_present; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ) +{ m_lastErrorIndex = lastErrorIndex; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorCount( ANTLR_UINT32 errorCount ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorCount( ANTLR_UINT32 errorCount ) +{ m_errorCount = errorCount; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_backtracking( ANTLR_INT32 backtracking ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_backtracking( ANTLR_INT32 backtracking ) +{ m_backtracking = backtracking; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_ruleMemo( RuleMemoType* ruleMemo ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_ruleMemo( RuleMemoType* ruleMemo ) +{ m_ruleMemo = ruleMemo; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenNames( ANTLR_UINT8** tokenNames ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenNames( ANTLR_UINT8** tokenNames ) +{ m_tokenNames = tokenNames; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokSource( TokenSourceType* tokSource ) -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokSource( TokenSourceType* tokSource ) +{ m_tokSource = tokSource; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_channel( ANTLR_UINT32 channel ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_channel( ANTLR_UINT32 channel ) +{ m_channel = channel; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token(const CommonTokenType* tok) -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token(const CommonTokenType* tok) +{ this->set_token_present( tok != NULL ); if( tok != NULL ) m_token = *tok; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_type( ANTLR_UINT32 type ) -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_type( ANTLR_UINT32 type ) +{ m_type = type; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartLine( ANTLR_INT32 tokenStartLine ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartLine( ANTLR_INT32 tokenStartLine ) +{ m_tokenStartLine = tokenStartLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ) +{ m_tokenStartCharPositionInLine = tokenStartCharPositionInLine; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ) +{ m_tokenStartCharIndex = tokenStartCharIndex; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_text( const StringType& text ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_text( const StringType& text ) +{ m_text = text; -} -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_streams( const InputStreamsType& streams ) -{ +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_streams( const InputStreamsType& streams ) +{ m_streams = streams; -} +} template<class ImplTraits, class StreamType> ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_treeAdaptor( TreeAdaptorType* adaptor ) { m_treeAdaptor = adaptor; } - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_errorCount() -{ + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_errorCount() +{ ++m_errorCount; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_backtracking() -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_backtracking() +{ ++m_backtracking; -} - -template<class ImplTraits, class StreamType> -ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::dec_backtracking() -{ +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::dec_backtracking() +{ --m_backtracking; -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp index a8d7396643..bfea7eabcb 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp @@ -1,174 +1,174 @@ #ifndef ANTLR3REWRITESTREAM_HPP #define ANTLR3REWRITESTREAM_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/// A generic list of elements tracked in an alternative to be used in + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/// A generic list of elements tracked in an alternative to be used in /// a -> rewrite rule. -/// +/// /// In the C implementation, all tree oriented streams return a pointer to -/// the same type: pANTLR3_BASE_TREE. Anything that has subclassed from this -/// still passes this type, within which there is a super pointer, which points -/// to it's own data and methods. Hence we do not need to implement this as -/// the equivalent of an abstract class, but just fill in the appropriate interface -/// as usual with this model. -/// -/// Once you start next()ing, do not try to add more elements. It will -/// break the cursor tracking I believe. -/// +/// the same type: pANTLR3_BASE_TREE. Anything that has subclassed from this +/// still passes this type, within which there is a super pointer, which points +/// to it's own data and methods. Hence we do not need to implement this as +/// the equivalent of an abstract class, but just fill in the appropriate interface +/// as usual with this model. +/// +/// Once you start next()ing, do not try to add more elements. It will +/// break the cursor tracking I believe. +/// +/// +/// \see #pANTLR3_REWRITE_RULE_NODE_STREAM +/// \see #pANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \see #pANTLR3_REWRITE_RULE_SUBTREE_STREAM +/// +/// TODO: add mechanism to detect/puke on modification after reading from stream /// -/// \see #pANTLR3_REWRITE_RULE_NODE_STREAM -/// \see #pANTLR3_REWRITE_RULE_ELEMENT_STREAM -/// \see #pANTLR3_REWRITE_RULE_SUBTREE_STREAM -/// -/// TODO: add mechanism to detect/puke on modification after reading from stream -/// namespace antlr3 { - + template<class ImplTraits, class ElementType> //template<class ImplTraits> class RewriteRuleElementStream : public ImplTraits::AllocPolicyType -{ -public: +{ +public: //typedef typename ElementTypePtr::element_type ElementType; unique_ptr //typedef typename ImplTraits::TreeType TreeType; typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; - + //typedef typename ImplTraits::template RecognizerType< typename SuperType::StreamType > RecognizerType; typedef typename ImplTraits::StringType StringType; typedef typename AllocPolicyType::template VectorType< ElementType* > ElementsType; - -protected: - /// The list of tokens or subtrees we are tracking - /// + +protected: + /// The list of tokens or subtrees we are tracking + /// ElementsType m_elements; - - /// The element or stream description; usually has name of the token or - /// rule reference that this list tracks. Can include rulename too, but - /// the exception would track that info. - /// + + /// The element or stream description; usually has name of the token or + /// rule reference that this list tracks. Can include rulename too, but + /// the exception would track that info. + /// StringType m_elementDescription; - + private: ElementType* dupImpl(typename ImplTraits::CommonTokenType* el); ElementType* dupImpl(typename ImplTraits::TreeTypePtr el); - - + + /// Pointer to the tree adaptor in use for this stream /// TreeAdaptorType* m_adaptor; /// Cursor 0..n-1. If singleElement!=NULL, cursor is 0 until you next(), - /// which bumps it to 1 meaning no more elements. - /// + /// which bumps it to 1 meaning no more elements. + /// ANTLR_UINT32 m_cursor; - + /// Once a node / subtree has been used in a stream, it must be dup'ed /// from then on. Streams are reset after sub rules so that the streams /// can be reused in future sub rules. So, reset must set a dirty bit. /// If dirty, then next() always returns a dup. /// bool m_dirty; - -public: + +public: RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description); RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementType* oneElement); RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); - + ~RewriteRuleElementStream(); // Methods - - /// Reset the condition of this stream so that it appears we have - /// not consumed any of its elements. Elements themselves are untouched. - /// + + /// Reset the condition of this stream so that it appears we have + /// not consumed any of its elements. Elements themselves are untouched. + /// void reset(); - - /// Add a new pANTLR3_BASE_TREE to this stream - /// + + /// Add a new pANTLR3_BASE_TREE to this stream + /// void add(ElementType* el); - - /// Return the next element in the stream. If out of elements, throw - /// an exception unless size()==1. If size is 1, then return elements[0]. - /// + + /// Return the next element in the stream. If out of elements, throw + /// an exception unless size()==1. If size is 1, then return elements[0]. + /// //TokenType* next(); ElementType nextTree(); //TokenType* nextToken(); ElementType* _next(); - + /// When constructing trees, sometimes we need to dup a token or AST /// subtree. Dup'ing a token means just creating another AST node - /// around it. For trees, you must call the adaptor.dupTree(). - /// + /// around it. For trees, you must call the adaptor.dupTree(). + /// ElementType* dup( ElementType* el ); - - /// Ensure stream emits trees; tokens must be converted to AST nodes. - /// AST nodes can be passed through unmolested. - /// + + /// Ensure stream emits trees; tokens must be converted to AST nodes. + /// AST nodes can be passed through unmolested. + /// ElementType* toTree(ElementType* el); - - /// Returns true if there is a next element available - /// + + /// Returns true if there is a next element available + /// bool hasNext(); - - /// Treat next element as a single node even if it's a subtree. - /// This is used instead of next() when the result has to be a - /// tree root node. Also prevents us from duplicating recently-added - /// children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration - /// must dup the type node, but ID has been added. - /// - /// Referencing to a rule result twice is ok; dup entire tree as + + /// Treat next element as a single node even if it's a subtree. + /// This is used instead of next() when the result has to be a + /// tree root node. Also prevents us from duplicating recently-added + /// children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration + /// must dup the type node, but ID has been added. + /// + /// Referencing to a rule result twice is ok; dup entire tree as /// we can't be adding trees; e.g., expr expr. - /// + /// //TreeTypePtr nextNode(); - - /// Number of elements available in the stream - /// + + /// Number of elements available in the stream + /// ANTLR_UINT32 size(); - - /// Returns the description string if there is one available (check for NULL). - /// - StringType getDescription(); - -protected: + + /// Returns the description string if there is one available (check for NULL). + /// + StringType getDescription(); + +protected: void init(TreeAdaptorType* adaptor, const char* description); -}; - -/// This is an implementation of a token stream, which is basically an element -/// stream that deals with tokens only. -/// -template<class ImplTraits> +}; + +/// This is an implementation of a token stream, which is basically an element +/// stream that deals with tokens only. +/// +template<class ImplTraits> //class RewriteRuleTokenStream : public ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::ParserType> class RewriteRuleTokenStream //: public ImplTraits::template RewriteStreamType< const typename ImplTraits::CommonTokenType > -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; typedef typename ImplTraits::ParserType ComponentType; @@ -179,12 +179,12 @@ public: typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; typedef typename ImplTraits::template RewriteStreamType< const typename ImplTraits::CommonTokenType > BaseType; - -public: + +public: RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description); RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const TokenType* oneElement); RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); - + TreeTypePtr nextNode(); TokenType* nextToken(); @@ -198,19 +198,19 @@ public: TreeAdaptorType* m_adaptor; ElementType* _next(); -private: +private: //TreeTypePtr nextNodeToken(); -}; - -/// This is an implementation of a subtree stream which is a set of trees +}; + +/// This is an implementation of a subtree stream which is a set of trees /// modeled as an element stream. -/// -template<class ImplTraits> +/// +template<class ImplTraits> //class RewriteRuleSubtreeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeParserType> class RewriteRuleSubtreeStream //: public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; typedef typename ImplTraits::TreeParserType ComponentType; @@ -221,13 +221,13 @@ public: typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; typedef typename ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > BaseType; - + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description); RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, TreeTypePtr& oneElement); RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); - + TreeTypePtr nextNode(TreeTypePtr); - + /// TODO copied from RewriteRuleElementStreamType /// Add a new pANTLR3_BASE_TREE to this stream /// @@ -239,10 +239,10 @@ public: protected: TreeTypePtr dup( TreeTypePtr el ); -private: +private: TreeTypePtr dupTree( TreeTypePtr el ); -}; - +}; + /* TODO This class is probably used in TreeParser only * Notes about Java target * - these classes reimplement only dup and toTree methods: @@ -262,14 +262,14 @@ private: * There should 3 types of specializations for RewriteRuleElementStreamType (which is not defined yet) * ATM: RewriteRuleElementStreamType is replaced with ImplTraits::template RewriteStreamType * -/// This is an implementation of a node stream, which is basically an element -/// stream that deals with tree nodes only. -/// -template<class ImplTraits> +/// This is an implementation of a node stream, which is basically an element +/// stream that deals with tree nodes only. +/// +template<class ImplTraits> //class RewriteRuleNodeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeParserType> class RewriteRuleNodeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; typedef typename ImplTraits::TreeParserType ComponentType; @@ -279,21 +279,21 @@ public: typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; typedef typename ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeType > BaseType; - -public: + +public: RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description); RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, TokenType* oneElement); RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); - + protected: TreeTypePtr toTree(TreeTypePtr element); - -private: + +private: TreeTypePtr toTreeNode(TreeTypePtr element); -}; +}; */ } - -#include "antlr3rewritestreams.inl" - -#endif + +#include "antlr3rewritestreams.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl index 47568da649..4f1cce7773 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl @@ -1,138 +1,138 @@ namespace antlr3 { - + template<class ImplTraits, class ElementType> RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description) -{ +{ this->init(adaptor, description); -} - +} + template<class ImplTraits, class ElementType> RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementType* oneElement) -{ +{ this->init(adaptor, description); if( oneElement != NULL ) this->add( oneElement ); -} - +} + template<class ImplTraits, class ElementType> RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements) : m_elements(elements) -{ +{ this->init(adaptor, description); -} - +} + template<class ImplTraits, class ElementType> void RewriteRuleElementStream<ImplTraits, ElementType>::init(TreeAdaptorType* adaptor, const char* description) -{ +{ m_adaptor = adaptor; m_cursor = 0; m_dirty = false; -} - -template<class ImplTraits> -RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description) //: BaseType(adaptor, description) -{ -} - -template<class ImplTraits> +{ +} + +template<class ImplTraits> RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const TokenType* oneElement) //: BaseType(adaptor, description, oneElement) -{ -} - -template<class ImplTraits> -RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, +{ +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements) //: BaseType(adaptor, description, elements) -{ -} - -template<class ImplTraits> -RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, +{ +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description) //: BaseType(adaptor, description) -{ -} - -template<class ImplTraits> +{ +} + +template<class ImplTraits> RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, TreeTypePtr& oneElement) //: BaseType(adaptor, description, oneElement) -{ -} - -template<class ImplTraits> -RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, +{ +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements) //: BaseType(adaptor, description, elements) -{ -} - +{ +} + /* -template<class ImplTraits> -RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, +template<class ImplTraits> +RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description) : BaseType(adaptor, description) -{ -} - -template<class ImplTraits> +{ +} + +template<class ImplTraits> RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, TokenType* oneElement) : BaseType(adaptor, description, oneElement) -{ -} - -template<class ImplTraits> -RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, +{ +} + +template<class ImplTraits> +RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements) : BaseType(adaptor, description, elements) -{ -} +{ +} */ - + template<class ImplTraits, class ElementType> void RewriteRuleElementStream<ImplTraits, ElementType>::reset() -{ +{ m_cursor = 0; m_dirty = true; -} - +} + template<class ImplTraits, class ElementType> void RewriteRuleElementStream<ImplTraits, ElementType>::add(ElementType* el) -{ +{ if ( el== NULL ) return; - + m_elements.push_back(el); -} - +} + template<class ImplTraits, class ElementType> ElementType* RewriteRuleElementStream<ImplTraits, ElementType>::_next() -{ +{ ANTLR_UINT32 n = this->size(); - + if (n == 0) { // This means that the stream is empty return NULL; // Caller must cope with this (TODO throw RewriteEmptyStreamException) } - + // Traversed all the available elements already? if ( m_cursor >= n) // out of elements? { @@ -142,25 +142,25 @@ ElementType* RewriteRuleElementStream<ImplTraits, ElementType>::_next() //return this->toTree(m_singleElement); return this->toTree(m_elements.at(0)); } - + // Out of elements and the size is not 1, so we cannot assume // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) // This means we ran out of elements earlier than was expected. // return NULL; // Caller must cope with this (TODO throw RewriteEmptyStreamException) } - + // More than just a single element so we extract it from the // vector. ElementType* t = this->toTree(m_elements.at(m_cursor)); m_cursor++; return t; -} - +} + template<class ImplTraits, class ElementType> ElementType RewriteRuleElementStream<ImplTraits, ElementType>::nextTree() -{ +{ ANTLR_UINT32 n = this->size(); if ( m_dirty || ( (m_cursor >=n) && (n==1)) ) { @@ -168,24 +168,24 @@ RewriteRuleElementStream<ImplTraits, ElementType>::nextTree() ElementType* el = this->_next(); return this->dup(el); } - + // test size above then fetch ElementType* el = this->_next(); return el; -} - +} + /* -template<class ImplTraits, class SuperType> -typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* -RewriteRuleElementStream<ImplTraits, SuperType>::nextToken() -{ +template<class ImplTraits, class SuperType> +typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* +RewriteRuleElementStream<ImplTraits, SuperType>::nextToken() +{ return this->_next(); -} - -template<class ImplTraits, class SuperType> -typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* -RewriteRuleElementStream<ImplTraits, SuperType>::next() -{ +} + +template<class ImplTraits, class SuperType> +typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* +RewriteRuleElementStream<ImplTraits, SuperType>::next() +{ ANTLR_UINT32 s; s = this->size(); if ( (m_cursor >= s) && (s == 1) ) @@ -195,8 +195,8 @@ RewriteRuleElementStream<ImplTraits, SuperType>::next() return this->dup(el); } return this->_next(); -} - +} + */ template<class ImplTraits, class ElementType> @@ -220,46 +220,46 @@ RewriteRuleElementStream<ImplTraits, ElementType>::dupImpl( typename ImplTraits: return m_adaptor->dupTree(element); } -template<class ImplTraits> +template<class ImplTraits> typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr RewriteRuleSubtreeStream<ImplTraits>::dup(TreeTypePtr element) -{ +{ return this->dupTree(element); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr RewriteRuleSubtreeStream<ImplTraits>::dupTree(TreeTypePtr element) -{ +{ return BaseType::m_adaptor->dupNode(element); -} - +} + template<class ImplTraits, class ElementType> ElementType* RewriteRuleElementStream<ImplTraits, ElementType>::toTree( ElementType* element) -{ +{ return element; -} - +} + /* -template<class ImplTraits> +template<class ImplTraits> typename RewriteRuleNodeStream<ImplTraits>::TreeTypePtr RewriteRuleNodeStream<ImplTraits>::toTree(TreeTypePtr element) -{ +{ return this->toTreeNode(element); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename RewriteRuleNodeStream<ImplTraits>::TreeTypePtr RewriteRuleNodeStream<ImplTraits>::toTreeNode(TreeTypePtr element) -{ +{ return BaseType::m_adaptor->dupNode(element); -} +} */ - + template<class ImplTraits, class ElementType> bool RewriteRuleElementStream<ImplTraits, ElementType>::hasNext() -{ +{ if ( !m_elements.empty() && m_cursor < m_elements.size()) { return true; @@ -268,55 +268,55 @@ bool RewriteRuleElementStream<ImplTraits, ElementType>::hasNext() { return false; } -} - -template<class ImplTraits > +} + +template<class ImplTraits > typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr -RewriteRuleTokenStream<ImplTraits>::nextNode() -{ +RewriteRuleTokenStream<ImplTraits>::nextNode() +{ TokenType *Token = this->nextToken(); //return BaseType::m_adaptor->create(Token); return m_adaptor->create(Token); -} - +} + /* -template<class ImplTraits> +template<class ImplTraits> typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr -RewriteRuleTokenStream<ImplTraits>::nextNodeToken() -{ +RewriteRuleTokenStream<ImplTraits>::nextNodeToken() +{ return BaseType::m_adaptor->create(this->_next()); -} +} */ - -/// Number of elements available in the stream -/// + +/// Number of elements available in the stream +/// template<class ImplTraits, class ElementType> ANTLR_UINT32 RewriteRuleElementStream<ImplTraits, ElementType>::size() -{ +{ return (ANTLR_UINT32)(m_elements.size()); -} - +} + template<class ImplTraits, class ElementType> typename RewriteRuleElementStream<ImplTraits, ElementType>::StringType RewriteRuleElementStream<ImplTraits, ElementType>::getDescription() -{ +{ if ( m_elementDescription.empty() ) { m_elementDescription = "<unknown source>"; } return m_elementDescription; -} - +} + template<class ImplTraits, class ElementType> RewriteRuleElementStream<ImplTraits, ElementType>::~RewriteRuleElementStream() -{ - // Before placing the stream back in the pool, we +{ + // Before placing the stream back in the pool, we // need to clear any vector it has. This is so any // free pointers that are associated with the // entries are called. However, if this particular function is called // then we know that the entries in the stream are definitely - // tree nodes. Hence we check to see if any of them were nilNodes as - // if they were, we can reuse them. + // tree nodes. Hence we check to see if any of them were nilNodes as + // if they were, we can reuse them. // // We have some elements to traverse // @@ -331,14 +331,14 @@ RewriteRuleElementStream<ImplTraits, ElementType>::~RewriteRuleElementStream() } m_elements.clear(); } - + template<class ImplTraits> typename RewriteRuleTokenStream<ImplTraits>::TokenType* RewriteRuleTokenStream<ImplTraits>::nextToken() { return this->_next(); -} - +} + template<class ImplTraits> typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr RewriteRuleSubtreeStream<ImplTraits>::nextNode(TreeTypePtr element) diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp index 947ac097c8..8cf1fe841d 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp @@ -1,89 +1,89 @@ -/** \file - * Defines the interface for an ANTLR3 common token stream. Custom token streams should create - * one of these and then override any functions by installing their own pointers - * to implement the various functions. - */ +/** \file + * Defines the interface for an ANTLR3 common token stream. Custom token streams should create + * one of these and then override any functions by installing their own pointers + * to implement the various functions. + */ #ifndef _ANTLR3_TOKENSTREAM_HPP #define _ANTLR3_TOKENSTREAM_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/** Definition of a token source, which has a pointer to a function that - * returns the next token (using a token factory if it is going to be - * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly - * different to the Java interface because we have no way to implement - * multiple interfaces without defining them in the interface structure - * or casting (void *), which is too convoluted. - */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** Definition of a token source, which has a pointer to a function that + * returns the next token (using a token factory if it is going to be + * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly + * different to the Java interface because we have no way to implement + * multiple interfaces without defining them in the interface structure + * or casting (void *), which is too convoluted. + */ namespace antlr3 { - -//We are not making it subclass AllocPolicy, as this will always be a base class -template<class ImplTraits> -class TokenSource -{ -public: + +//We are not making it subclass AllocPolicy, as this will always be a base class +template<class ImplTraits> +class TokenSource +{ +public: typedef typename ImplTraits::CommonTokenType TokenType; typedef TokenType CommonTokenType; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::LexerType LexerType; - -private: - /** A special pre-allocated token, which signifies End Of Tokens. Because this must - * be set up with the current input index and so on, we embed the structure and - * return the address of it. It is marked as factoryMade, so that it is never - * attempted to be freed. - */ + +private: + /** A special pre-allocated token, which signifies End Of Tokens. Because this must + * be set up with the current input index and so on, we embed the structure and + * return the address of it. It is marked as factoryMade, so that it is never + * attempted to be freed. + */ TokenType m_eofToken; - + /// A special pre-allocated token, which is returned by mTokens() if the /// lexer rule said to just skip the generated token altogether. /// Having this single token stops us wasting memory by have the token factory /// actually create something that we are going to SKIP(); anyway. /// TokenType m_skipToken; - - /** When the token source is constructed, it is populated with the file - * name from whence the tokens were produced by the lexer. This pointer is a - * copy of the one supplied by the CharStream (and may be NULL) so should - * not be manipulated other than to copy or print it. - */ + + /** When the token source is constructed, it is populated with the file + * name from whence the tokens were produced by the lexer. This pointer is a + * copy of the one supplied by the CharStream (and may be NULL) so should + * not be manipulated other than to copy or print it. + */ StringType m_fileName; - -public: + +public: TokenType& get_eofToken(); const TokenType& get_eofToken() const; TokenType& get_skipToken(); StringType& get_fileName(); LexerType* get_super(); - + void set_fileName( const StringType& fileName ); - + /** * \brief * Default implementation of the nextToken() call for a lexer. @@ -103,10 +103,10 @@ public: * * \see nextTokenStr */ - TokenType* nextToken(); + TokenType* nextToken(); CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ ); CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ ); - + /// /// \brief /// Returns the next available token from the current input stream. @@ -125,19 +125,19 @@ public: /// \see nextToken /// TokenType* nextTokenStr(); - -protected: + +protected: TokenSource(); -}; - -/** Definition of the ANTLR3 common token stream interface. - * \remark - * Much of the documentation for this interface is stolen from Ter's Java implementation. - */ -template<class ImplTraits> -class TokenStream : public ImplTraits::TokenIntStreamType -{ -public: +}; + +/** Definition of the ANTLR3 common token stream interface. + * \remark + * Much of the documentation for this interface is stolen from Ter's Java implementation. + */ +template<class ImplTraits> +class TokenStream : public ImplTraits::TokenIntStreamType +{ +public: typedef typename ImplTraits::TokenSourceType TokenSourceType; typedef typename ImplTraits::TokenIntStreamType IntStreamType; typedef typename ImplTraits::CommonTokenType TokenType; @@ -146,96 +146,96 @@ public: typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; typedef typename ImplTraits::TokenStreamType TokenStreamType; typedef typename ImplTraits::ParserType ComponentType; - -protected: - /** Pointer to the token source for this stream - */ - TokenSourceType* m_tokenSource; - + +protected: + /** Pointer to the token source for this stream + */ + TokenSourceType* m_tokenSource; + /// Debugger interface, is this is a debugging token stream /// DebugEventListenerType* m_debugger; - + /// Indicates the initial stream state for dbgConsume() /// bool m_initialStreamState; - -public: + +public: TokenStream(TokenSourceType* source, DebugEventListenerType* debugger); IntStreamType* get_istream(); TokenSourceType* get_tokenSource() const; void set_tokenSource( TokenSourceType* tokenSource ); - - /** Get Token at current input pointer + i ahead where i=1 is next Token. - * i<0 indicates tokens in the past. So -1 is previous token and -2 is - * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. - * Return null for LT(0) and any index that results in an absolute address - * that is negative. - */ + + /** Get Token at current input pointer + i ahead where i=1 is next Token. + * i<0 indicates tokens in the past. So -1 is previous token and -2 is + * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. + * Return null for LT(0) and any index that results in an absolute address + * that is negative. + */ const TokenType* LT(ANTLR_INT32 k); - - /** Where is this stream pulling tokens from? This is not the name, but - * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. - * The Token Source interface contains a pointer to the input stream and a pointer - * to a function that returns the next token. - */ - TokenSourceType* getTokenSource(); - - /** Function that installs a token source for teh stream - */ + + /** Where is this stream pulling tokens from? This is not the name, but + * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. + * The Token Source interface contains a pointer to the input stream and a pointer + * to a function that returns the next token. + */ + TokenSourceType* getTokenSource(); + + /** Function that installs a token source for teh stream + */ void setTokenSource(TokenSourceType* tokenSource); - - /** Return the text of all the tokens in the stream, as the old tramp in - * Leeds market used to say; "Get the lot!" - */ + + /** Return the text of all the tokens in the stream, as the old tramp in + * Leeds market used to say; "Get the lot!" + */ StringType toString(); - - /** Return the text of all tokens from start to stop, inclusive. - * If the stream does not buffer all the tokens then it can just - * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in - * an action in that case. - */ + + /** Return the text of all tokens from start to stop, inclusive. + * If the stream does not buffer all the tokens then it can just + * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in + * an action in that case. + */ StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop); - - /** Because the user is not required to use a token with an index stored - * in it, we must provide a means for two token objects themselves to - * indicate the start/end location. Most often this will just delegate - * to the other toString(int,int). This is also parallel with - * the pTREENODE_STREAM->toString(Object,Object). - */ + + /** Because the user is not required to use a token with an index stored + * in it, we must provide a means for two token objects themselves to + * indicate the start/end location. Most often this will just delegate + * to the other toString(int,int). This is also parallel with + * the pTREENODE_STREAM->toString(Object,Object). + */ StringType toStringTT(const TokenType* start, const TokenType* stop); - - - /** Function that sets the token stream into debugging mode - */ + + + /** Function that sets the token stream into debugging mode + */ void setDebugListener(DebugEventListenerType* debugger); - + TokenStream(); - -}; - -/** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default - * parsers and recognizers. You may of course build your own implementation if - * you are so inclined. - */ -template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType> -class TokenStoreSelector -{ -public: + +}; + +/** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default + * parsers and recognizers. You may of course build your own implementation if + * you are so inclined. + */ +template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType> +class TokenStoreSelector +{ +public: typedef ListType TokensType; -}; - -template<class ListType, class MapType> -class TokenStoreSelector<true, ListType, MapType> -{ -public: +}; + +template<class ListType, class MapType> +class TokenStoreSelector<true, ListType, MapType> +{ +public: typedef MapType TokensType; -}; - -template<class ImplTraits> +}; + +template<class ImplTraits> class CommonTokenStream : public TokenStream<ImplTraits> -{ -public: +{ +public: typedef typename ImplTraits::AllocPolicyType AllocPolicyType; typedef typename ImplTraits::BitsetType BitsetType; typedef typename ImplTraits::CommonTokenType TokenType; @@ -245,55 +245,55 @@ public: typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType; typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE, TokensListType, TokensMapType >::TokensType TokensType; - + typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType; typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType; typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; typedef TokenStream<ImplTraits> BaseType; - -private: - /** Records every single token pulled from the source indexed by the token index. - * There might be more efficient ways to do this, such as referencing directly in to - * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not - * a huge overhead as it only stores pointers anyway, but allows for iterations and - * so on. - */ + +private: + /** Records every single token pulled from the source indexed by the token index. + * There might be more efficient ways to do this, such as referencing directly in to + * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not + * a huge overhead as it only stores pointers anyway, but allows for iterations and + * so on. + */ TokensType m_tokens; - - /** Override map of tokens. If a token type has an entry in here, then - * the pointer in the table points to an int, being the override channel number - * that should always be used for this token type. - */ + + /** Override map of tokens. If a token type has an entry in here, then + * the pointer in the table points to an int, being the override channel number + * that should always be used for this token type. + */ ChannelOverridesType m_channelOverrides; - - /** Discared set. If a token has an entry in this table, then it is thrown - * away (data pointer is always NULL). - */ + + /** Discared set. If a token has an entry in this table, then it is thrown + * away (data pointer is always NULL). + */ DiscardSetType m_discardSet; - - /* The channel number that this token stream is tuned to. For instance, whitespace - * is usually tuned to channel 99, which no token stream would normally tune to and - * so it is thrown away. - */ + + /* The channel number that this token stream is tuned to. For instance, whitespace + * is usually tuned to channel 99, which no token stream would normally tune to and + * so it is thrown away. + */ ANTLR_UINT32 m_channel; - + /** The index into the tokens list of the current token (the next one that will be - * consumed. p = -1 indicates that the token list is empty. - */ + * consumed. p = -1 indicates that the token list is empty. + */ ANTLR_INT32 m_p; - + /* The total number of tokens issued till now. For streams that delete tokens, this helps in issuing the index */ ANTLR_UINT32 m_nissued; - - /** If this flag is set to true, then tokens that the stream sees that are not - * in the channel that this stream is tuned to, are not tracked in the - * tokens table. When set to false, ALL tokens are added to the tracking. - */ + + /** If this flag is set to true, then tokens that the stream sees that are not + * in the channel that this stream is tuned to, are not tracked in the + * tokens table. When set to false, ALL tokens are added to the tracking. + */ bool m_discardOffChannel; - -public: + +public: CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL, DebugEventListenerType* debugger = NULL); ~CommonTokenStream(); @@ -305,31 +305,31 @@ public: void set_p( ANTLR_INT32 p ); void inc_p(); void dec_p(); - - /** A simple filter mechanism whereby you can tell this token stream - * to force all tokens of type ttype to be on channel. For example, - * when interpreting, we cannot exec actions so we need to tell - * the stream to force all WS and NEWLINE to be a different, ignored - * channel. - */ - void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); - - /** Add a particular token type to the discard set. If a token is found to belong - * to this set, then it is skipped/thrown away - */ - void discardTokenType(ANTLR_INT32 ttype); - + + /** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot exec actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored + * channel. + */ + void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); + + /** Add a particular token type to the discard set. If a token is found to belong + * to this set, then it is skipped/thrown away + */ + void discardTokenType(ANTLR_INT32 ttype); + //This will discard tokens of a particular rule after the rule execution completion void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ); void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, BoolForwarder<true> tokens_accessed_from_owning_rule ); void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, BoolForwarder<false> tokens_accessed_from_owning_rule ); - + void insertToken( const TokenType& tok ); void insertToken( const TokenType& tok, BoolForwarder<true> tokens_accessed_from_owning_rule ); void insertToken( const TokenType& tok, BoolForwarder<false> tokens_accessed_from_owning_rule ); - + /** Get a token at an absolute index i; 0..n-1. This is really only * needed for profiling and debugging and token stream rewriting. * If you don't want to buffer up tokens, then this method makes no @@ -341,66 +341,66 @@ public: const TokenType* getToken(ANTLR_MARKER i); const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> tokens_accessed_from_owning_rule ); const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> tokens_accessed_from_owning_rule ); - - /** Signal to discard off channel tokens from here on in. - */ - void discardOffChannelToks(bool discard); - - /** Function that returns a pointer to the ANTLR3_LIST of all tokens - * in the stream (this causes the buffer to fill if we have not get any yet) - */ + + /** Signal to discard off channel tokens from here on in. + */ + void discardOffChannelToks(bool discard); + + /** Function that returns a pointer to the ANTLR3_LIST of all tokens + * in the stream (this causes the buffer to fill if we have not get any yet) + */ TokensType* getTokens(); - - /** Function that returns all the tokens between a start and a stop index. - */ - void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); - - /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens - */ - void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); - - /** Function that returns all the tokens indicated by being a member of the supplied List - */ - void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, + + /** Function that returns all the tokens between a start and a stop index. + */ + void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); + + /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens + */ + void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); + + /** Function that returns all the tokens indicated by being a member of the supplied List + */ + void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, const IntListType& list, TokensListType& tokenList); - - /** Function that returns all tokens of a certain type within a range. - */ - void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); - - /** Function that resets the token stream so that it can be reused, but - * but that does not free up any resources, such as the token factory - * the factory pool and so on. This prevents the need to keep freeing - * and reallocating the token pools if the thing you are building is - * a multi-shot dameon or somethign like that. It is much faster to - * just reuse all the vectors. - */ - void reset(); - + + /** Function that returns all tokens of a certain type within a range. + */ + void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); + + /** Function that resets the token stream so that it can be reused, but + * but that does not free up any resources, such as the token factory + * the factory pool and so on. This prevents the need to keep freeing + * and reallocating the token pools if the thing you are building is + * a multi-shot dameon or somethign like that. It is much faster to + * just reuse all the vectors. + */ + void reset(); + const TokenType* LB(ANTLR_INT32 k); - - + + void fillBufferExt(); void fillBuffer(); - + bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> tokens_accessed_from_owning_rule ); bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false> tokens_accessed_from_owning_rule ); - + ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i); ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x); ANTLR_MARKER index_impl(); -}; - -class TokenAccessException : public std::exception -{ +}; + +class TokenAccessException : public std::exception +{ virtual const char* what() const noexcept { return " Attempted access on Deleted Token"; } -}; - +}; + } - -#include "antlr3tokenstream.inl" - -#endif + +#include "antlr3tokenstream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl index b2c4e3bcc8..b9f4ae7575 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl @@ -1,173 +1,173 @@ namespace antlr3 { - -template<class ImplTraits> -TokenSource<ImplTraits>::TokenSource() + +template<class ImplTraits> +TokenSource<ImplTraits>::TokenSource() :m_eofToken( ImplTraits::CommonTokenType::TOKEN_EOF), m_skipToken( ImplTraits::CommonTokenType::TOKEN_INVALID) -{ -} - -template<class ImplTraits> -ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_eofToken() -{ +{ +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_eofToken() +{ return m_eofToken; -} - -template<class ImplTraits> -ANTLR_INLINE const typename TokenSource<ImplTraits>::TokenType& TokenSource<ImplTraits>::get_eofToken() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE const typename TokenSource<ImplTraits>::TokenType& TokenSource<ImplTraits>::get_eofToken() const +{ return m_eofToken; -} - -template<class ImplTraits> -ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_skipToken() -{ +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_skipToken() +{ return m_skipToken; -} - -template<class ImplTraits> -ANTLR_INLINE typename TokenSource<ImplTraits>::StringType& TokenSource<ImplTraits>::get_fileName() -{ +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::StringType& TokenSource<ImplTraits>::get_fileName() +{ return m_fileName; -} - -template<class ImplTraits> -ANTLR_INLINE void TokenSource<ImplTraits>::set_fileName( const StringType& fileName ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void TokenSource<ImplTraits>::set_fileName( const StringType& fileName ) +{ m_fileName = fileName; -} - -template<class ImplTraits> -typename TokenSource<ImplTraits>::LexerType* TokenSource<ImplTraits>::get_super() -{ +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::LexerType* TokenSource<ImplTraits>::get_super() +{ return static_cast<LexerType*>(this); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename TokenSource<ImplTraits>::TokenType* TokenSource<ImplTraits>::nextTokenStr() -{ +{ typedef typename LexerType::RecognizerSharedStateType RecognizerSharedStateType; typedef typename LexerType::InputStreamType InputStreamType; typedef typename LexerType::IntStreamType IntStreamType; LexerType* lexer; RecognizerSharedStateType* state; - InputStreamType* input; - IntStreamType* istream; - - lexer = this->get_super(); - state = lexer->get_rec()->get_state(); - input = lexer->get_input(); - istream = input->get_istream(); - - /// Loop until we get a non skipped token or EOF - /// + InputStreamType* input; + IntStreamType* istream; + + lexer = this->get_super(); + state = lexer->get_rec()->get_state(); + input = lexer->get_input(); + istream = input->get_istream(); + + /// Loop until we get a non skipped token or EOF + /// for (;;) - { - // Get rid of any previous token (token factory takes care of - // any de-allocation when this token is finally used up. - // - state->set_token_present(false); + { + // Get rid of any previous token (token factory takes care of + // any de-allocation when this token is finally used up. + // + state->set_token_present(false); state->set_error(false); // Start out without an exception - state->set_failed(false); - - // Now call the matching rules and see if we can generate a new token - // + state->set_failed(false); + + // Now call the matching rules and see if we can generate a new token + // for (;;) - { - // Record the start of the token in our input stream. - // - state->set_channel( TOKEN_DEFAULT_CHANNEL ); - state->set_tokenStartCharIndex( (ANTLR_MARKER)input->get_nextChar() ); - state->set_tokenStartCharPositionInLine( input->get_charPositionInLine() ); - state->set_tokenStartLine( input->get_line() ); - state->set_text(""); - + { + // Record the start of the token in our input stream. + // + state->set_channel( TOKEN_DEFAULT_CHANNEL ); + state->set_tokenStartCharIndex( (ANTLR_MARKER)input->get_nextChar() ); + state->set_tokenStartCharPositionInLine( input->get_charPositionInLine() ); + state->set_tokenStartLine( input->get_line() ); + state->set_text(""); + if (istream->LA(1) == ANTLR_CHARSTREAM_EOF) - { - // Reached the end of the current stream, nothing more to do if this is - // the last in the stack. - // - TokenType& teof = m_eofToken; - - teof.set_startIndex(lexer->getCharIndex()); - teof.set_stopIndex(lexer->getCharIndex()); - teof.set_line(lexer->getLine()); - return &teof; - } - - state->set_token_present( false ); + { + // Reached the end of the current stream, nothing more to do if this is + // the last in the stack. + // + TokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.set_line(lexer->getLine()); + return &teof; + } + + state->set_token_present( false ); state->set_error(false); // Start out without an exception - state->set_failed(false); - - // Call the generated lexer, see if it can get a new token together. - // - lexer->mTokens(); - - if (state->get_error() == true) - { - // Recognition exception, report it and try to recover. - // - state->set_failed(true); - lexer->get_rec()->reportError(); + state->set_failed(false); + + // Call the generated lexer, see if it can get a new token together. + // + lexer->mTokens(); + + if (state->get_error() == true) + { + // Recognition exception, report it and try to recover. + // + state->set_failed(true); + lexer->get_rec()->reportError(); lexer->recover(); if (state->get_token_present()) // Good(or invalid) token factored by custom recover procedure // return state->get_token(); - } - else - { - if ( !state->get_token_present() ) - { - // Emit the real token, which adds it in to the token stream basically - // - lexer->emit(); - } + } + else + { + if ( !state->get_token_present() ) + { + // Emit the real token, which adds it in to the token stream basically + // + lexer->emit(); + } else if ( *(state->get_token()) == m_skipToken ) - { - // A real token could have been generated, but "Computer say's naaaaah" and it - // it is just something we need to skip altogether. - // - continue; - } - - // Good token, not skipped, not EOF token - // - return state->get_token(); - } - } - } -} - -template<class ImplTraits> -typename TokenSource<ImplTraits>::TokenType* TokenSource<ImplTraits>::nextToken() -{ + { + // A real token could have been generated, but "Computer say's naaaaah" and it + // it is just something we need to skip altogether. + // + continue; + } + + // Good token, not skipped, not EOF token + // + return state->get_token(); + } + } + } +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::TokenType* TokenSource<ImplTraits>::nextToken() +{ return this->nextToken( BoolForwarder<LexerType::IsFiltered>() ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::nextToken( BoolForwarder<true> /*isFiltered*/ ) -{ +{ LexerType* lexer; typename LexerType::RecognizerSharedStateType* state; - + lexer = this->get_super(); state = lexer->get_lexstate(); - + /* Get rid of any previous token (token factory takes care of * any deallocation when this token is finally used up. */ state->set_token_present( false ); state->set_error( false ); /* Start out without an exception */ state->set_failed(false); - + /* Record the start of the token in our input stream. */ state->set_tokenStartCharIndex( lexer->index() ); state->set_tokenStartCharPositionInLine( lexer->getCharPositionInLine() ); state->set_tokenStartLine( lexer->getLine() ); state->set_text(""); - + /* Now call the matching rules and see if we can generate a new token */ for (;;) @@ -177,31 +177,31 @@ typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::next /* Reached the end of the stream, nothing more to do. */ CommonTokenType& teof = m_eofToken; - + teof.set_startIndex(lexer->getCharIndex()); teof.set_stopIndex(lexer->getCharIndex()); teof.set_line(lexer->getLine()); return &teof; } - + state->set_token_present(false); state->set_error(false); /* Start out without an exception */ - + { ANTLR_MARKER m; - + m = lexer->get_istream()->mark(); state->set_backtracking(1); /* No exceptions */ state->set_failed(false); - + /* Call the generated lexer, see if it can get a new token together. */ lexer->mTokens(); state->set_backtracking(0); - + /* mTokens backtracks with synpred at BACKTRACKING==2 and we set the synpredgate to allow actions at level 1. */ - + if(state->get_failed()) { lexer->rewind(m); @@ -215,15 +215,15 @@ typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::next } } } -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::nextToken( BoolForwarder<false> /*isFiltered*/ ) -{ +{ // Find the next token in the current stream // CommonTokenType* tok = this->nextTokenStr(); - + // If we got to the EOF token then switch to the previous // input stream if there were any and just return the // EOF if there are none. We must check the next token @@ -241,7 +241,7 @@ typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::next { typename ImplTraits::LexerType* lexer; lexer = static_cast<typename ImplTraits::LexerType*>( this->get_super() ); - + if ( lexer->get_rec()->get_state()->get_streams().size() > 0) { // We have another input stream in the stack so we @@ -262,175 +262,175 @@ typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::next } } - + // return whatever token we have, which may be EOF // return tok; -} - -template<class ImplTraits> -TokenStream<ImplTraits>::TokenStream() -{ +} + +template<class ImplTraits> +TokenStream<ImplTraits>::TokenStream() +{ m_tokenSource = NULL; m_debugger = NULL; m_initialStreamState = false; -} - -template<class ImplTraits> -typename TokenStream<ImplTraits>::IntStreamType* TokenStream<ImplTraits>::get_istream() -{ +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::IntStreamType* TokenStream<ImplTraits>::get_istream() +{ return this; -} - -template<class ImplTraits> -TokenStream<ImplTraits>::TokenStream(TokenSourceType* source, DebugEventListenerType* debugger) -{ +} + +template<class ImplTraits> +TokenStream<ImplTraits>::TokenStream(TokenSourceType* source, DebugEventListenerType* debugger) +{ m_initialStreamState = false; m_tokenSource = source; m_debugger = debugger; -} - -template<class ImplTraits> +} + +template<class ImplTraits> CommonTokenStream<ImplTraits>::CommonTokenStream(ANTLR_UINT32 , TokenSourceType* source, DebugEventListenerType* debugger) : CommonTokenStream<ImplTraits>::BaseType( source, debugger ) -{ +{ m_p = -1; m_channel = TOKEN_DEFAULT_CHANNEL; m_discardOffChannel = false; m_nissued = 0; -} - -template<class ImplTraits> -typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() -{ +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() +{ return m_tokens; -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() const -{ +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() const +{ return m_tokens; -} - -template<class ImplTraits> -typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() -{ +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() +{ return m_discardSet; -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() const -{ +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() const +{ return m_discardSet; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_INT32 CommonTokenStream<ImplTraits>::get_p() const -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonTokenStream<ImplTraits>::get_p() const +{ return m_p; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonTokenStream<ImplTraits>::set_p( ANTLR_INT32 p ) -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::set_p( ANTLR_INT32 p ) +{ m_p = p; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonTokenStream<ImplTraits>::inc_p() -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::inc_p() +{ ++m_p; -} - -template<class ImplTraits> -ANTLR_INLINE void CommonTokenStream<ImplTraits>::dec_p() -{ +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::dec_p() +{ --m_p; -} - -template<class ImplTraits> -ANTLR_INLINE ANTLR_MARKER CommonTokenStream<ImplTraits>::index_impl() -{ +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonTokenStream<ImplTraits>::index_impl() +{ return m_p; -} - -// Reset a token stream so it can be used again and can reuse it's -// resources. -// -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::reset() -{ +} + +// Reset a token stream so it can be used again and can reuse it's +// resources. +// +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::reset() +{ // Free any resources that ar most like specifc to the - // run we just did. - // + // run we just did. + // m_discardSet.clear(); m_channelOverrides.clear(); - - // Now, if there were any existing tokens in the stream, - // then we just reset the vector count so that it starts - // again. We must traverse the entries unfortunately as - // there may be free pointers for custom token types and - // so on. However that is just a quick NULL check on the - // vector entries. - // + + // Now, if there were any existing tokens in the stream, + // then we just reset the vector count so that it starts + // again. We must traverse the entries unfortunately as + // there may be free pointers for custom token types and + // so on. However that is just a quick NULL check on the + // vector entries. + // m_tokens.clear(); - - // Reset to defaults - // - m_discardOffChannel = false; - m_channel = ImplTraits::CommonTokenType::TOKEN_DEFAULT_CHANNEL; + + // Reset to defaults + // + m_discardOffChannel = false; + m_channel = ImplTraits::CommonTokenType::TOKEN_DEFAULT_CHANNEL; m_p = -1; -} - -template<class ImplTraits> +} + +template<class ImplTraits> void TokenStream<ImplTraits>::setDebugListener(DebugEventListenerType* debugger) -{ +{ m_debugger = debugger; m_initialStreamState = false; -} - -template<class ImplTraits> +} + +template<class ImplTraits> const typename TokenStream<ImplTraits>::TokenType* TokenStream<ImplTraits>::LT(ANTLR_INT32 k) -{ +{ ANTLR_INT32 i; ANTLR_INT32 n; TokenStreamType* cts; - + cts = this->get_super(); - - if(k < 0) + + if(k < 0) { return cts->LB(-k); } - + ANTLR_INT32 req_idx = cts->get_p() + k - 1; ANTLR_INT32 cached_size = static_cast<ANTLR_INT32>(this->get_istream()->get_cachedSize()); - + if( (cts->get_p() == -1) || ( ( req_idx >= cached_size ) && ( (cached_size % ImplTraits::TOKEN_FILL_BUFFER_INCREMENT) == 0 ) ) ) { cts->fillBuffer(); } - - // Here we used to check for k == 0 and return 0, but this seems - // a superfluous check to me. LT(k=0) is therefore just undefined - // and we won't waste the clock cycles on the check - // + + // Here we used to check for k == 0 and return 0, but this seems + // a superfluous check to me. LT(k=0) is therefore just undefined + // and we won't waste the clock cycles on the check + // cached_size = static_cast<ANTLR_INT32>(this->get_istream()->get_cachedSize()); if ( req_idx >= cached_size ) { TokenType& teof = cts->get_tokenSource()->get_eofToken(); - + teof.set_startIndex( this->get_istream()->index()); teof.set_stopIndex( this->get_istream()->index()); return &teof; } - + i = cts->get_p(); n = 1; - + /* Need to find k good tokens, skipping ones that are off channel */ while( n < k) @@ -447,81 +447,81 @@ const typename TokenStream<ImplTraits>::TokenType* TokenStream<ImplTraits>::LT( if ( (ANTLR_UINT32) i >= this->get_istream()->get_cachedSize() ) { TokenType& teof = cts->get_tokenSource()->get_eofToken(); - + teof.set_startIndex(this->get_istream()->index()); teof.set_stopIndex(this->get_istream()->index()); return &teof; } - + // Here the token must be in the input vector. Rather then incur // function call penalty, we just return the pointer directly // from the vector // return cts->getToken(i); -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::LB(ANTLR_INT32 k) -{ - ANTLR_INT32 i; - ANTLR_INT32 n; - - if (m_p == -1) - { - this->fillBuffer(); - } - if (k == 0) - { - return NULL; - } - if ((m_p - k) < 0) - { - return NULL; - } - - i = m_p; - n = 1; - - /* Need to find k good tokens, going backwards, skipping ones that are off channel - */ - while (n <= k) - { - /* Skip off-channel tokens - */ - - i = this->skipOffTokenChannelsReverse(i - 1); /* leave p on valid token */ - n++; - } - if (i < 0) - { - return NULL; - } +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::LB(ANTLR_INT32 k) +{ + ANTLR_INT32 i; + ANTLR_INT32 n; + + if (m_p == -1) + { + this->fillBuffer(); + } + if (k == 0) + { + return NULL; + } + if ((m_p - k) < 0) + { + return NULL; + } + + i = m_p; + n = 1; + + /* Need to find k good tokens, going backwards, skipping ones that are off channel + */ + while (n <= k) + { + /* Skip off-channel tokens + */ + + i = this->skipOffTokenChannelsReverse(i - 1); /* leave p on valid token */ + n++; + } + if (i < 0) + { + return NULL; + } // Here the token must be in the input vector. Rather then incut // function call penalty, we jsut return the pointer directly // from the vector // return this->getToken(i); -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken(ANTLR_MARKER i) -{ +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken(ANTLR_MARKER i) +{ return this->get(i); -} - - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::get(ANTLR_MARKER i) -{ +} + + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::get(ANTLR_MARKER i) +{ return this->getToken( static_cast<ANTLR_MARKER>(i), BoolForwarder<ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE>() ); -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> /*tokens_accessed_from_owning_rule*/ ) -{ +{ typename TokensType::iterator iter = m_tokens.find(tok_idx); if( iter == m_tokens.end() ) { @@ -530,86 +530,86 @@ const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplT } const TokenType& tok = iter->second; return &tok; -} - -template<class ImplTraits> -const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) -{ +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) +{ TokenType& tok = m_tokens.at( static_cast<ANTLR_UINT32>(tok_idx) ); return &tok; -} - -template<class ImplTraits> -typename TokenStream<ImplTraits>::TokenSourceType* TokenStream<ImplTraits>::get_tokenSource() const -{ +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::TokenSourceType* TokenStream<ImplTraits>::get_tokenSource() const +{ return m_tokenSource; -} - -template<class ImplTraits> -void TokenStream<ImplTraits>::set_tokenSource( TokenSourceType* tokenSource ) -{ +} + +template<class ImplTraits> +void TokenStream<ImplTraits>::set_tokenSource( TokenSourceType* tokenSource ) +{ m_tokenSource = tokenSource; -} - -template<class ImplTraits> +} + +template<class ImplTraits> typename TokenStream<ImplTraits>::StringType TokenStream<ImplTraits>::toString() -{ +{ TokenStreamType* cts = static_cast<TokenStreamType>(this); - + if (cts->get_p() == -1) - { + { cts->fillBuffer(); - } - - return this->toStringSS(0, this->get_istream()->size()); -} - -template<class ImplTraits> -typename TokenStream<ImplTraits>::StringType -TokenStream<ImplTraits>::toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop) -{ - StringType string; - TokenSourceType* tsource; - const TokenType* tok; - TokenStreamType* cts; - - cts = this->get_super(); - - if (cts->get_p() == -1) - { - cts->fillBuffer(); - } - if (stop >= this->get_istream()->size()) - { - stop = this->get_istream()->size() - 1; - } - - /* Who is giving us these tokens? - */ - tsource = cts->get_tokenSource(); - - if (tsource != NULL && !cts->get_tokens().empty() ) - { - /* Finally, let's get a string - */ - for (ANTLR_MARKER i = start; i <= stop; i++) - { - tok = cts->get(i); - if (tok != NULL) - { - string.append( tok->getText() ); - } - } - - return string; - } - return ""; -} - -template<class ImplTraits> -typename TokenStream<ImplTraits>::StringType -TokenStream<ImplTraits>::toStringTT(const TokenType* start, const TokenType* stop) -{ + } + + return this->toStringSS(0, this->get_istream()->size()); +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::StringType +TokenStream<ImplTraits>::toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop) +{ + StringType string; + TokenSourceType* tsource; + const TokenType* tok; + TokenStreamType* cts; + + cts = this->get_super(); + + if (cts->get_p() == -1) + { + cts->fillBuffer(); + } + if (stop >= this->get_istream()->size()) + { + stop = this->get_istream()->size() - 1; + } + + /* Who is giving us these tokens? + */ + tsource = cts->get_tokenSource(); + + if (tsource != NULL && !cts->get_tokens().empty() ) + { + /* Finally, let's get a string + */ + for (ANTLR_MARKER i = start; i <= stop; i++) + { + tok = cts->get(i); + if (tok != NULL) + { + string.append( tok->getText() ); + } + } + + return string; + } + return ""; +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::StringType +TokenStream<ImplTraits>::toStringTT(const TokenType* start, const TokenType* stop) +{ if (start != NULL && stop != NULL) { return this->toStringSS( start->get_tokenIndex(), @@ -619,91 +619,91 @@ TokenStream<ImplTraits>::toStringTT(const TokenType* start, const TokenType* sto { return ""; } -} - -/** A simple filter mechanism whereby you can tell this token stream - * to force all tokens of type ttype to be on channel. For example, - * when interpreting, we cannot execute actions so we need to tell - * the stream to force all WS and NEWLINE to be a different, ignored, - * channel. - */ -template<class ImplTraits> +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +template<class ImplTraits> void CommonTokenStream<ImplTraits>::setTokenTypeChannel ( ANTLR_UINT32 ttype, ANTLR_UINT32 channel) -{ - /* We add one to the channel so we can distinguish NULL as being no entry in the - * table for a particular token type. - */ - m_channelOverrides[ttype] = (ANTLR_UINT32)channel + 1; - -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::discardTokenType(ANTLR_INT32 ttype) -{ +{ + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + m_channelOverrides[ttype] = (ANTLR_UINT32)channel + 1; + +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokenType(ANTLR_INT32 ttype) +{ /* We add one to the channel so we can distinguish NULL as being no entry in the - * table for a particular token type. We could use bitsets for this I suppose too. - */ + * table for a particular token type. We could use bitsets for this I suppose too. + */ m_discardSet.insert(ttype); -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::discardOffChannelToks(bool discard) -{ +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardOffChannelToks(bool discard) +{ m_discardOffChannel = discard; -} - -template<class ImplTraits> -typename CommonTokenStream<ImplTraits>::TokensType* CommonTokenStream<ImplTraits>::getTokens() -{ +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::TokensType* CommonTokenStream<ImplTraits>::getTokens() +{ if (m_p == -1) - { + { this->fillBuffer(); - } - - return &m_tokens; -} - -template<class ImplTraits> + } + + return &m_tokens; +} + +template<class ImplTraits> void CommonTokenStream<ImplTraits>::getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange) -{ +{ return this->getTokensSet(start, stop, NULL, tokenRange); -} - -/** Given a start and stop index, return a List of all tokens in - * the token type BitSet. Return null if no tokens were found. This - * method looks at both on and off channel tokens. - */ -template<class ImplTraits> -void -CommonTokenStream<ImplTraits>::getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, - TokensListType& filteredList ) -{ +} + +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +template<class ImplTraits> +void +CommonTokenStream<ImplTraits>::getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, + TokensListType& filteredList ) +{ ANTLR_UINT32 i; ANTLR_UINT32 n; TokenType* tok; - + if ( m_p == -1) - { + { this->fillBuffer(); - } + } if (stop > this->get_istream()->size()) - { + { stop = this->get_istream()->size(); - } + } if (start > stop) - { + { return; - } - - /* We have the range set, now we need to iterate through the - * installed tokens and create a new list with just the ones we want - * in it. We are just moving pointers about really. - */ - for(i = start, n = 0; i<= stop; i++) - { + } + + /* We have the range set, now we need to iterate through the + * installed tokens and create a new list with just the ones we want + * in it. We are just moving pointers about really. + */ + for(i = start, n = 0; i<= stop; i++) + { tok = this->get(i); - + if ( types == NULL || (types->isMember( tok->get_type() ) == true ) ) @@ -712,230 +712,230 @@ CommonTokenStream<ImplTraits>::getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 sto } } - return ; -} - -template<class ImplTraits> -void + return ; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, const IntListType& list, TokensListType& newlist) -{ +{ BitsetType* bitSet; - - bitSet = Bitset<ImplTraits>::BitsetFromList(list); - this->getTokensSet(start, stop, bitSet, newlist); - delete bitSet; -} - -template<class ImplTraits> + + bitSet = Bitset<ImplTraits>::BitsetFromList(list); + this->getTokensSet(start, stop, bitSet, newlist); + delete bitSet; +} + +template<class ImplTraits> void -CommonTokenStream<ImplTraits>::getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, - TokensListType& newlist ) -{ - BitsetType* bitSet; - - bitSet = BitsetType::BitsetOf(type, -1); - this->getTokensSet(start, stop, bitSet, newlist); - - delete bitSet; -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::fillBufferExt() -{ - this->fillBuffer(); -} - -template<class ImplTraits> +CommonTokenStream<ImplTraits>::getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, + TokensListType& newlist ) +{ + BitsetType* bitSet; + + bitSet = BitsetType::BitsetOf(type, -1); + this->getTokensSet(start, stop, bitSet, newlist); + + delete bitSet; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::fillBufferExt() +{ + this->fillBuffer(); +} + +template<class ImplTraits> bool CommonTokenStream<ImplTraits>::hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> ) -{ +{ return ( cnt >= ImplTraits::TOKEN_FILL_BUFFER_INCREMENT ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> bool CommonTokenStream<ImplTraits>::hasReachedFillbufferTarget( ANTLR_UINT32, BoolForwarder<false> ) -{ +{ return false; -} - - -template<class ImplTraits> +} + + +template<class ImplTraits> void CommonTokenStream<ImplTraits>::fillBuffer() -{ - ANTLR_UINT32 index; - TokenType* tok; - bool discard; +{ + ANTLR_UINT32 index; + TokenType* tok; + bool discard; - /* Start at index 0 of course - */ + /* Start at index 0 of course + */ ANTLR_UINT32 cached_p = (m_p < 0) ? 0 : m_p; - index = m_nissued; + index = m_nissued; ANTLR_UINT32 cnt = 0; - - /* Pick out the next token from the token source - * Remember we just get a pointer (reference if you like) here - * and so if we store it anywhere, we don't set any pointers to auto free it. - */ - tok = this->get_tokenSource()->nextToken(); - - while ( tok->get_type() != TokenType::TOKEN_EOF ) - { + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + tok = this->get_tokenSource()->nextToken(); + + while ( tok->get_type() != TokenType::TOKEN_EOF ) + { discard = false; /* Assume we are not discarding */ - - /* I employ a bit of a trick, or perhaps hack here. Rather than - * store a pointer to a structure in the override map and discard set - * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 - * we can distinguish "not being there" from "being channel or type 0" - */ - - if ( m_discardSet.find(tok->get_type()) != m_discardSet.end() ) - { - discard = true; - } - else if ( m_discardOffChannel == true - && tok->get_channel() != m_channel - ) - { - discard = true; - } - else if (!m_channelOverrides.empty()) - { - /* See if this type is in the override map - */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if ( m_discardSet.find(tok->get_type()) != m_discardSet.end() ) + { + discard = true; + } + else if ( m_discardOffChannel == true + && tok->get_channel() != m_channel + ) + { + discard = true; + } + else if (!m_channelOverrides.empty()) + { + /* See if this type is in the override map + */ typename ChannelOverridesType::iterator iter = m_channelOverrides.find( tok->get_type() + 1 ); - - if (iter != m_channelOverrides.end()) - { - /* Override found - */ - tok->set_channel( ANTLR_UINT32_CAST(iter->second) - 1); - } - } - - /* If not discarding it, add it to the list at the current index - */ - if (discard == false) - { - /* Add it, indicating that we will delete it and the table should not - */ - tok->set_tokenIndex(index); - ++m_p; - this->insertToken(*tok); - index++; + + if (iter != m_channelOverrides.end()) + { + /* Override found + */ + tok->set_channel( ANTLR_UINT32_CAST(iter->second) - 1); + } + } + + /* If not discarding it, add it to the list at the current index + */ + if (discard == false) + { + /* Add it, indicating that we will delete it and the table should not + */ + tok->set_tokenIndex(index); + ++m_p; + this->insertToken(*tok); + index++; m_nissued++; cnt++; - } - + } + if( !this->hasReachedFillbufferTarget( cnt, BoolForwarder<ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE>() ) ) tok = this->get_tokenSource()->nextToken(); else break; - } - - /* Cache the size so we don't keep doing indirect method calls. We do this as - * early as possible so that anything after this may utilize the cached value. - */ - this->get_istream()->set_cachedSize( m_nissued ); - - /* Set the consume pointer to the first token that is on our channel, we just read - */ - m_p = cached_p; - m_p = this->skipOffTokenChannels( m_p ); - -} -/// Given a starting index, return the index of the first on-channel -/// token. -/// -template<class ImplTraits> -ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannels(ANTLR_INT32 i) -{ - ANTLR_INT32 n; - n = this->get_istream()->get_cachedSize(); - - while (i < n) - { - const TokenType* tok = this->getToken(i); - - if (tok->get_channel() != m_channel ) - { - i++; - } - else - { - return i; - } - } - return i; -} - -template<class ImplTraits> -ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannelsReverse(ANTLR_INT32 x) -{ - while (x >= 0) - { - const TokenType* tok = this->getToken(x); + } + + /* Cache the size so we don't keep doing indirect method calls. We do this as + * early as possible so that anything after this may utilize the cached value. + */ + this->get_istream()->set_cachedSize( m_nissued ); + + /* Set the consume pointer to the first token that is on our channel, we just read + */ + m_p = cached_p; + m_p = this->skipOffTokenChannels( m_p ); + +} +/// Given a starting index, return the index of the first on-channel +/// token. +/// +template<class ImplTraits> +ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannels(ANTLR_INT32 i) +{ + ANTLR_INT32 n; + n = this->get_istream()->get_cachedSize(); + + while (i < n) + { + const TokenType* tok = this->getToken(i); + + if (tok->get_channel() != m_channel ) + { + i++; + } + else + { + return i; + } + } + return i; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannelsReverse(ANTLR_INT32 x) +{ + while (x >= 0) + { + const TokenType* tok = this->getToken(x); - if( tok->get_channel() != m_channel ) - { - x--; - } - else - { - return x; - } - } - return x; -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ) -{ + if( tok->get_channel() != m_channel ) + { + x--; + } + else + { + return x; + } + } + return x; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ) +{ this->discardTokens( start, stop, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, BoolForwarder<true> /*tokens_accessed_from_owning_rule */ ) -{ +{ typename TokensType::iterator iter1 = m_tokens.lower_bound(start); typename TokensType::iterator iter2 = m_tokens.upper_bound(stop); m_tokens.erase( iter1, iter2 ); -} - -template<class ImplTraits> +} + +template<class ImplTraits> void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) -{ +{ m_tokens.erase( m_tokens.begin() + start, m_tokens.begin() + stop ); -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok ) -{ +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok ) +{ this->insertToken( tok, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<true> /*tokens_accessed_from_owning_rule*/ ) -{ +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<true> /*tokens_accessed_from_owning_rule*/ ) +{ assert( m_tokens.find( tok.get_index() ) == m_tokens.end() ); assert( tok.get_index() == m_nissued ); m_tokens[ tok.get_index() ] = tok; -} - -template<class ImplTraits> -void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) -{ +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) +{ m_tokens.push_back( tok ); -} - -template<class ImplTraits> -CommonTokenStream<ImplTraits>::~CommonTokenStream() -{ +} + +template<class ImplTraits> +CommonTokenStream<ImplTraits>::~CommonTokenStream() +{ m_tokens.clear(); -} - +} + } diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp index c5741ce985..420139c2db 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp @@ -1,48 +1,48 @@ -#ifndef _ANTLR3_TRAITS_HPP -#define _ANTLR3_TRAITS_HPP - +#ifndef _ANTLR3_TRAITS_HPP +#define _ANTLR3_TRAITS_HPP + namespace antlr3 { - + /** * Users implementing overrides should inherit from this * * All classes typenames reffer to Empty class */ -template<class ImplTraits> -class CustomTraitsBase -{ -public: +template<class ImplTraits> +class CustomTraitsBase +{ +public: typedef Empty AllocPolicyType; typedef Empty StringType; typedef Empty StringStreamType; typedef Empty StreamDataType; typedef Empty Endianness; - + //collections typedef Empty BitsetType; typedef Empty BitsetListType; - + typedef Empty InputStreamType; - + template<class StreamType> class IntStreamType : public Empty { public: typedef Empty BaseType; }; - + typedef Empty LexStateType; - + typedef Empty CommonTokenType; typedef Empty TokenUserDataType; - + typedef Empty TokenIntStreamType; typedef Empty TokenStreamType; - + typedef Empty TreeNodeIntStreamType; typedef Empty TreeNodeStreamType; - - + + typedef Empty DebugEventListenerType; template<class StreamType> class RecognizerSharedStateType : public Empty @@ -50,7 +50,7 @@ public: public: typedef Empty BaseType; }; - + template<class StreamType> class RecognizerType : public Empty { @@ -69,7 +69,7 @@ public: public: typedef Empty BaseType; }; - + //this should be overridden with generated lexer typedef Empty BaseLexerType; @@ -83,76 +83,76 @@ public: public: typedef Empty BaseType; }; - + typedef Empty RuleReturnValueType; - + //If we want to change the way tokens are stored static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = false; static const unsigned TOKEN_FILL_BUFFER_INCREMENT = 100; //used only if the above val is true - + static void displayRecognitionError( const std::string& str ) { printf("%s", str.c_str() ); } -}; - +}; + /** * Traits manipulation classes */ -template<class A, class B> -class TraitsSelector -{ -public: +template<class A, class B> +class TraitsSelector +{ +public: typedef A selected; -}; - -template<class B> -class TraitsSelector<Empty, B> -{ -public: +}; + +template<class B> +class TraitsSelector<Empty, B> +{ +public: typedef B selected; -}; - -template<class A, class B, class C> -class TraitsOneArgSelector -{ -public: +}; + +template<class A, class B, class C> +class TraitsOneArgSelector +{ +public: typedef A selected; -}; - -template<class A, class B> -class TraitsOneArgSelector<A,B,Empty> -{ -public: +}; + +template<class A, class B> +class TraitsOneArgSelector<A,B,Empty> +{ +public: typedef B selected; -}; - -template<bool v, class A, class B> -class BoolSelector -{ -public: +}; + +template<bool v, class A, class B> +class BoolSelector +{ +public: typedef A selected; -}; - -template<class A, class B> -class BoolSelector<false, A, B> -{ -public: +}; + +template<class A, class B> +class BoolSelector<false, A, B> +{ +public: typedef B selected; -}; - +}; + /** * Base traits template * * This class contains default typenames for every trait */ -template< template<class ImplTraits> class UserTraits > -class TraitsBase -{ -public: +template< template<class ImplTraits> class UserTraits > +class TraitsBase +{ +public: typedef TraitsBase TraitsType; typedef typename TraitsSelector< typename UserTraits<TraitsType>::AllocPolicyType, DefaultAllocPolicy >::selected AllocPolicyType; - + typedef typename TraitsSelector< typename UserTraits<TraitsType>::StringType, std::string >::selected StringType; @@ -160,7 +160,7 @@ public: typedef typename TraitsSelector< typename UserTraits<TraitsType>::StringStreamType, std::stringstream >::selected StringStreamType; - + typedef typename TraitsSelector< typename UserTraits<TraitsType>::StreamDataType, ANTLR_UINT8 >::selected StreamDataType; @@ -168,18 +168,18 @@ public: typedef typename TraitsSelector< typename UserTraits<TraitsType>::Endianness, RESOLVE_ENDIAN_AT_RUNTIME >::selected Endianness; - + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BitsetType, Bitset<TraitsType> >::selected BitsetType; typedef typename TraitsSelector< typename UserTraits<TraitsType>::BitsetListType, BitsetList<TraitsType> >::selected BitsetListType; - + typedef typename TraitsSelector< typename UserTraits<TraitsType>::InputStreamType, InputStream<TraitsType> >::selected InputStreamType; - + template<class SuperType> class IntStreamType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template IntStreamType<SuperType>, IntStream<TraitsType, SuperType>, @@ -190,13 +190,13 @@ public: typedef typename TraitsSelector< typename UserTraits<TraitsType>::LexStateType, LexState<TraitsType> >::selected LexStateType; - + static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = UserTraits<TraitsType>::TOKENS_ACCESSED_FROM_OWNING_RULE; static const unsigned TOKEN_FILL_BUFFER_INCREMENT = UserTraits<TraitsType>::TOKEN_FILL_BUFFER_INCREMENT; //used only if the above val is true - + static void displayRecognitionError( const StringType& str ) { UserTraits<TraitsType>::displayRecognitionError(str); } }; - + /** * Final traits * @@ -214,37 +214,37 @@ class Traits : public TraitsBase<UserTraits> public: typedef Traits TraitsType; typedef TraitsBase<UserTraits> BaseTraitsType; - + // CommonTokenType typedef typename TraitsSelector< typename UserTraits<TraitsType>::CommonTokenType, CommonToken<TraitsType> >::selected CommonTokenType; - + // TokenUserDataType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenUserDataType, Empty >::selected TokenUserDataType; - + // TokenListType typedef typename BaseTraitsType::AllocPolicyType::template ListType<const CommonTokenType*> TokenListType; - + // TokenIntStreamType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenIntStreamType, TokenIntStream<TraitsType> >::selected TokenIntStreamType; // TokenStreamType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenStreamType, CommonTokenStream<TraitsType> >::selected TokenStreamType; - + // TreeNodeIntStreamType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeNodeIntStreamType, TreeNodeIntStream<TraitsType> >::selected TreeNodeIntStreamType; - + // TreeNodeStreamType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeNodeStreamType, CommonTreeNodeStream<TraitsType> >::selected TreeNodeStreamType; - + // DebugEventListenerType typedef typename TraitsSelector< typename UserTraits<TraitsType>::DebugEventListenerType, DebugEventListener<TraitsType> >::selected DebugEventListenerType; - + // RecognizerSharedStateType template<class StreamType> class RecognizerSharedStateType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RecognizerSharedStateType<StreamType>, @@ -252,7 +252,7 @@ public: typename UserTraits<TraitsType>::template RecognizerSharedStateType<StreamType>::BaseType >::selected {}; - + // RecognizerType template<class StreamType> class RecognizerType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RecognizerType<StreamType>, @@ -266,14 +266,14 @@ public: typename UserTraits<TraitsType>::template RecognizerType<StreamType>::BaseType >::selected BaseType; typedef typename BaseType::RecognizerSharedStateType RecognizerSharedStateType; - + public: RecognizerType(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) : BaseType( sizeHint, state ) { } }; - + // TreeType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeType, CommonTree<TraitsType> >::selected TreeType; @@ -286,7 +286,7 @@ public: CommonTreeStore<TraitsType> >::selected TreeStoreType; typedef typename TreeStoreType::TreeTypePtr TreeTypePtr; //typedef std::unique_ptr<TreeType, ResourcePoolManager<ImplTraits>> TreeTypePtr; - + // ExceptionBaseType template<class StreamType> class ExceptionBaseType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template ExceptionBaseType<StreamType>, @@ -306,30 +306,30 @@ public: { } }; - + // this should be overridden with generated lexer // BaseLexerType typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseLexerType, Lexer<TraitsType> >::selected BaseLexerType; typedef LxrType LexerType; - + // TokenSourceType typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenSourceType, TokenSource<TraitsType> >::selected TokenSourceType; - + // this should be overridden with generated parser // BaseParserType typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseParserType, Parser<TraitsType> >::selected BaseParserType; typedef PsrType ParserType; - + // this should be overridden with generated treeparser (not implemented yet) // BaseTreeParserType typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseTreeParserType, TreeParser<TraitsType> >::selected BaseTreeParserType; //typedef TreePsrType<Traits> TreeParserType; typedef BaseTreeParserType TreeParserType; - + // RewriteStreamType template<class ElementType> class RewriteStreamType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>, @@ -342,12 +342,12 @@ public: RewriteRuleElementStream<TraitsType, ElementType>, typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>::BaseType >::selected BaseType; - + //typedef typename SuperType::StreamType StreamType; //typedef typename BaseType::RecognizerType Recognizer_Type; //typedef typename BaseType::ElementType ElementType; typedef typename BaseType::ElementsType ElementsType; - + public: RewriteStreamType(TreeAdaptorType* adaptor = NULL, const char* description = NULL) :BaseType(adaptor, description) @@ -362,7 +362,7 @@ public: { } }; - + // RuleReturnValueType typedef typename TraitsSelector< typename UserTraits<TraitsType>::RuleReturnValueType, typename BoolSelector< TraitsType::TOKENS_ACCESSED_FROM_OWNING_RULE, @@ -370,8 +370,8 @@ public: RuleReturnValue<TraitsType> >::selected >::selected RuleReturnValueType; -}; - +}; + } - -#endif //_ANTLR3_TRAITS_HPP + +#endif //_ANTLR3_TRAITS_HPP diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp index c1395382b5..0be899df13 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp @@ -1,45 +1,45 @@ #ifndef ANTLR3TREEPARSER_HPP #define ANTLR3TREEPARSER_HPP - -// [The "BSD licence"] -// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB - -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. The name of the author may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/** Internal structure representing an element in a hash bucket. - * Stores the original key so that duplicate keys can be rejected - * if necessary, and contains function can be supported If the hash key - * could be unique I would have invented the perfect compression algorithm ;-) - */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** Internal structure representing an element in a hash bucket. + * Stores the original key so that duplicate keys can be rejected + * if necessary, and contains function can be supported If the hash key + * could be unique I would have invented the perfect compression algorithm ;-) + */ namespace antlr3 { - -template<class ImplTraits> + +template<class ImplTraits> class TreeParser : public ImplTraits::template RecognizerType< typename ImplTraits::TreeNodeStreamType > -{ -public: +{ +public: typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; typedef TreeNodeStreamType StreamType; typedef typename TreeNodeStreamType::IntStreamType IntStreamType; @@ -53,48 +53,48 @@ public: typedef typename ImplTraits::BitsetListType BitsetListType; typedef typename ImplTraits::StringType StringType; typedef typename ImplTraits::CommonTokenType CommonTokenType; - -private: - /** Pointer to the common tree node stream for the parser - */ + +private: + /** Pointer to the common tree node stream for the parser + */ TreeNodeStreamType* m_ctnstream; - -public: + +public: TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, RecognizerSharedStateType* state); TreeNodeStreamType* get_ctnstream() const; IntStreamType* get_istream() const; RecognizerType* get_rec(); - + //same as above. Just that get_istream exists for lexer, parser, treeparser //get_parser_istream exists only for parser, treeparser. So use it accordingly IntStreamType* get_parser_istream() const; - - /** Set the input stream and reset the parser - */ + + /** Set the input stream and reset the parser + */ void setTreeNodeStream(TreeNodeStreamType* input); - - /** Return a pointer to the input stream - */ - TreeNodeStreamType* getTreeNodeStream(); - + + /** Return a pointer to the input stream + */ + TreeNodeStreamType* getTreeNodeStream(); + TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, ANTLR_UINT32 expectedTokenType, BitsetListType* follow); - - /** Pointer to a function that knows how to free resources of an ANTLR3 tree parser. - */ + + /** Pointer to a function that knows how to free resources of an ANTLR3 tree parser. + */ ~TreeParser(); - + void fillExceptionData( ExceptionBaseType* ex ); void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); void exConstruct(); void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); -}; - +}; + } - -#include "antlr3treeparser.inl" - -#endif + +#include "antlr3treeparser.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl index 5f5991f4eb..2d6ca50fcc 100644 --- a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl @@ -1,62 +1,62 @@ namespace antlr3 { - -template< class ImplTraits > -TreeParser<ImplTraits>::TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, + +template< class ImplTraits > +TreeParser<ImplTraits>::TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, RecognizerSharedStateType* state) :RecognizerType( sizeHint, state ) -{ +{ /* Install the tree node stream */ this->setTreeNodeStream(ctnstream); - -} - -template< class ImplTraits > -TreeParser<ImplTraits>::~TreeParser() -{ + +} + +template< class ImplTraits > +TreeParser<ImplTraits>::~TreeParser() +{ this->get_rec()->get_state()->get_following().clear(); -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::get_ctnstream() const -{ +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::get_ctnstream() const +{ return m_ctnstream; -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_istream() const -{ +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_istream() const +{ return m_ctnstream; -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_parser_istream() const -{ +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_parser_istream() const +{ return m_ctnstream; -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::RecognizerType* TreeParser<ImplTraits>::get_rec() -{ +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::RecognizerType* TreeParser<ImplTraits>::get_rec() +{ return this; -} - -template< class ImplTraits > -void TreeParser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) -{ +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ auto xxx = m_ctnstream->LT(1); //ex->set_token( m_ctnstream->LT(1) ); /* Current input tree node */ ex->set_line( ex->get_token()->get_line() ); ex->set_charPositionInLine( ex->get_token()->get_charPositionInLine() ); ex->set_index( m_ctnstream->index() ); - + // Are you ready for this? Deep breath now... // { TreeTypePtr tnode; - + tnode = ex->get_token(); - + if (tnode->get_token() == NULL) { ex->set_streamName("-unknown source-" ); @@ -74,11 +74,11 @@ void TreeParser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) } ex->set_message("Unexpected node"); } -} - -template< class ImplTraits > +} + +template< class ImplTraits > void TreeParser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** /*tokenNames*/, ExceptionBaseType* ex ) -{ +{ typename ImplTraits::StringStreamType errtext; // See if there is a 'filename' we can use // @@ -97,18 +97,18 @@ void TreeParser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** /*tokenNames { errtext << ex->get_streamName() << "("; } - + // Next comes the line number // errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() << " : " << this->get_rec()->get_state()->get_exception()->get_message(); - + IntStreamType* is = this->get_istream(); TreeTypePtr theBaseTree = this->get_rec()->get_state()->get_exception()->get_token(); StringType ttext = theBaseTree->toStringTree(); - + if (theBaseTree != NULL) { TreeTypePtr theCommonTree = static_cast<TreeTypePtr>(theBaseTree); @@ -122,78 +122,78 @@ void TreeParser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** /*tokenNames } ex->displayRecognitionError( errtext ); ImplTraits::displayRecognitionError( errtext.str() ); -} - -template< class ImplTraits > +} + +template< class ImplTraits > void TreeParser<ImplTraits>::setTreeNodeStream(TreeNodeStreamType* input) -{ +{ m_ctnstream = input; - this->get_rec()->reset(); - m_ctnstream->reset(); -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::getTreeNodeStream() -{ + this->get_rec()->reset(); + m_ctnstream->reset(); +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::getTreeNodeStream() +{ return m_ctnstream; -} - -template< class ImplTraits > -void TreeParser<ImplTraits>::exConstruct() -{ +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::exConstruct() +{ new ANTLR_Exception<ImplTraits, MISMATCHED_TREE_NODE_EXCEPTION, TreeNodeStreamType>( this->get_rec(), "" ); -} - -template< class ImplTraits > -void TreeParser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) -{ +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ this->exConstruct(); - this->recoverFromMismatchedToken(ttype, follow); -} - -template< class ImplTraits > -typename TreeParser<ImplTraits>::TokenType* + this->recoverFromMismatchedToken(ttype, follow); +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TokenType* TreeParser<ImplTraits>::getMissingSymbol( IntStreamType* istream, ExceptionBaseType* /*e*/, ANTLR_UINT32 expectedTokenType, BitsetListType* /*follow*/) -{ +{ TreeNodeStreamType* tns; TreeTypePtr node; TreeTypePtr current; CommonTokenType* token; StringType text; - ANTLR_INT32 i; - + ANTLR_INT32 i; + // Dereference the standard pointers // tns = static_cast<TreeNodeStreamType*>(istream); - + // Create a new empty node, by stealing the current one, or the previous one if the current one is EOF // current = tns->LT(1); - i = -1; - + i = -1; + if (current == tns->get_EOF_NODE_p()) { current = tns->LT(-1); - i--; + i--; } node = current->dupNode(); - + // Find the newly dupicated token // token = node->getToken(); - + // Create the token text that shows it has been inserted // token->setText("<missing "); text = token->getText(); text.append((const char *)this->get_rec()->get_state()->get_tokenName(expectedTokenType)); text.append((const char *)">"); - + // Finally return the pointer to our new node // return node; -} - - +} + + } diff --git a/contrib/libs/antlr3_cpp_runtime/ya.make b/contrib/libs/antlr3_cpp_runtime/ya.make index 5c019aa396..1b38029531 100644 --- a/contrib/libs/antlr3_cpp_runtime/ya.make +++ b/contrib/libs/antlr3_cpp_runtime/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - +LIBRARY() + # git repository: https://github.com/ibre5041/antlr3.git # XXX fork of: https://github.com/antlr/antlr3.git # directory: runtime/Cpp @@ -11,19 +11,19 @@ LICENSE( BSD-3-Clause AND Unicode-Mappings ) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER(g:yql) -NO_COMPILER_WARNINGS() - +NO_COMPILER_WARNINGS() + ADDINCL( GLOBAL contrib/libs/antlr3_cpp_runtime/include ) - -SRCS( - antlr3.cpp -) - -END() + +SRCS( + antlr3.cpp +) + +END() diff --git a/contrib/libs/re2/re2/bitstate.cc b/contrib/libs/re2/re2/bitstate.cc index 877e548234..ab4e75f6e5 100644 --- a/contrib/libs/re2/re2/bitstate.cc +++ b/contrib/libs/re2/re2/bitstate.cc @@ -1,22 +1,22 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Tested by search_test.cc, exhaustive_test.cc, tester.cc - -// Prog::SearchBitState is a regular expression search with submatch +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc, exhaustive_test.cc, tester.cc + +// Prog::SearchBitState is a regular expression search with submatch // tracking for small regular expressions and texts. Similarly to // testing/backtrack.cc, it allocates a bitmap with (count of // lists) * (length of text) bits to make sure it never explores the // same (instruction list, character position) multiple times. This -// limits the search to run in time linear in the length of the text. -// -// Unlike testing/backtrack.cc, SearchBitState is not recursive -// on the text. -// -// SearchBitState is a fast replacement for the NFA code on small -// regexps and texts when SearchOnePass cannot be used. - +// limits the search to run in time linear in the length of the text. +// +// Unlike testing/backtrack.cc, SearchBitState is not recursive +// on the text. +// +// SearchBitState is a fast replacement for the NFA code on small +// regexps and texts when SearchOnePass cannot be used. + #include <stddef.h> #include <stdint.h> #include <string.h> @@ -25,84 +25,84 @@ #include "util/logging.h" #include "re2/pod_array.h" -#include "re2/prog.h" -#include "re2/regexp.h" - -namespace re2 { - -struct Job { - int id; +#include "re2/prog.h" +#include "re2/regexp.h" + +namespace re2 { + +struct Job { + int id; int rle; // run length encoding - const char* p; -}; - -class BitState { - public: - explicit BitState(Prog* prog); - - // The usual Search prototype. - // Can only call Search once per BitState. - bool Search(const StringPiece& text, const StringPiece& context, - bool anchored, bool longest, - StringPiece* submatch, int nsubmatch); - - private: - inline bool ShouldVisit(int id, const char* p); + const char* p; +}; + +class BitState { + public: + explicit BitState(Prog* prog); + + // The usual Search prototype. + // Can only call Search once per BitState. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch); + + private: + inline bool ShouldVisit(int id, const char* p); void Push(int id, const char* p); void GrowStack(); - bool TrySearch(int id, const char* p); - - // Search parameters - Prog* prog_; // program being run - StringPiece text_; // text being searched - StringPiece context_; // greater context of text being searched - bool anchored_; // whether search is anchored at text.begin() - bool longest_; // whether search wants leftmost-longest match - bool endmatch_; // whether match must end at text.end() + bool TrySearch(int id, const char* p); + + // Search parameters + Prog* prog_; // program being run + StringPiece text_; // text being searched + StringPiece context_; // greater context of text being searched + bool anchored_; // whether search is anchored at text.begin() + bool longest_; // whether search wants leftmost-longest match + bool endmatch_; // whether match must end at text.end() StringPiece* submatch_; // submatches to fill in - int nsubmatch_; // # of submatches to fill in - - // Search state + int nsubmatch_; // # of submatches to fill in + + // Search state static constexpr int kVisitedBits = 64; PODArray<uint64_t> visited_; // bitmap: (list ID, char*) pairs visited PODArray<const char*> cap_; // capture registers PODArray<Job> job_; // stack of text positions to explore int njob_; // stack size - + BitState(const BitState&) = delete; BitState& operator=(const BitState&) = delete; -}; - -BitState::BitState(Prog* prog) - : prog_(prog), - anchored_(false), - longest_(false), - endmatch_(false), - submatch_(NULL), - nsubmatch_(0), +}; + +BitState::BitState(Prog* prog) + : prog_(prog), + anchored_(false), + longest_(false), + endmatch_(false), + submatch_(NULL), + nsubmatch_(0), njob_(0) { -} - +} + // Given id, which *must* be a list head, we can look up its list ID. // Then the question is: Should the search visit the (list ID, p) pair? -// If so, remember that it was visited so that the next time, -// we don't repeat the visit. -bool BitState::ShouldVisit(int id, const char* p) { +// If so, remember that it was visited so that the next time, +// we don't repeat the visit. +bool BitState::ShouldVisit(int id, const char* p) { int n = prog_->list_heads()[id] * static_cast<int>(text_.size()+1) + static_cast<int>(p-text_.data()); if (visited_[n/kVisitedBits] & (uint64_t{1} << (n & (kVisitedBits-1)))) - return false; + return false; visited_[n/kVisitedBits] |= uint64_t{1} << (n & (kVisitedBits-1)); - return true; -} - -// Grow the stack. + return true; +} + +// Grow the stack. void BitState::GrowStack() { PODArray<Job> tmp(2*job_.size()); memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]); job_ = std::move(tmp); -} - +} + // Push (id, p) onto the stack, growing it if necessary. void BitState::Push(int id, const char* p) { if (njob_ >= job_.size()) { @@ -111,10 +111,10 @@ void BitState::Push(int id, const char* p) { LOG(DFATAL) << "GrowStack() failed: " << "njob_ = " << njob_ << ", " << "job_.size() = " << job_.size(); - return; + return; } - } - + } + // If id < 0, it's undoing a Capture, // so we mustn't interfere with that. if (id >= 0 && njob_ > 0) { @@ -126,30 +126,30 @@ void BitState::Push(int id, const char* p) { return; } } - + Job* top = &job_[njob_++]; top->id = id; top->rle = 0; top->p = p; -} - -// Try a search from instruction id0 in state p0. -// Return whether it succeeded. -bool BitState::TrySearch(int id0, const char* p0) { - bool matched = false; +} + +// Try a search from instruction id0 in state p0. +// Return whether it succeeded. +bool BitState::TrySearch(int id0, const char* p0) { + bool matched = false; const char* end = text_.data() + text_.size(); - njob_ = 0; + njob_ = 0; // Push() no longer checks ShouldVisit(), // so we must perform the check ourselves. if (ShouldVisit(id0, p0)) Push(id0, p0); - while (njob_ > 0) { - // Pop job off stack. - --njob_; - int id = job_[njob_].id; + while (njob_ > 0) { + // Pop job off stack. + --njob_; + int id = job_[njob_].id; int& rle = job_[njob_].rle; - const char* p = job_[njob_].p; - + const char* p = job_[njob_].p; + if (id < 0) { // Undo the Capture. cap_[prog_->inst(-id)->cap()] = p; @@ -161,16 +161,16 @@ bool BitState::TrySearch(int id0, const char* p0) { // Revivify job on stack. --rle; ++njob_; - } - + } + Loop: // Visit id, p. - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { - default: + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: LOG(DFATAL) << "Unexpected opcode: " << ip->opcode(); - return false; - + return false; + case kInstFail: break; @@ -180,7 +180,7 @@ bool BitState::TrySearch(int id0, const char* p0) { id = ip->out1(); p = end; goto Loop; - } + } if (longest_) { // ip must be non-greedy... // out is the Match instruction. @@ -189,11 +189,11 @@ bool BitState::TrySearch(int id0, const char* p0) { goto Loop; } goto Next; - - case kInstByteRange: { - int c = -1; - if (p < end) - c = *p & 0xFF; + + case kInstByteRange: { + int c = -1; + if (p < end) + c = *p & 0xFF; if (!ip->Matches(c)) goto Next; @@ -202,9 +202,9 @@ bool BitState::TrySearch(int id0, const char* p0) { id = ip->out(); p++; goto CheckAndLoop; - } - - case kInstCapture: + } + + case kInstCapture: if (!ip->last()) Push(id+1, p); // try the next when we're done @@ -217,20 +217,20 @@ bool BitState::TrySearch(int id0, const char* p0) { id = ip->out(); goto CheckAndLoop; - case kInstEmptyWidth: - if (ip->empty() & ~Prog::EmptyFlags(context_, p)) + case kInstEmptyWidth: + if (ip->empty() & ~Prog::EmptyFlags(context_, p)) goto Next; if (!ip->last()) Push(id+1, p); // try the next when we're done - id = ip->out(); - goto CheckAndLoop; - - case kInstNop: + id = ip->out(); + goto CheckAndLoop; + + case kInstNop: if (!ip->last()) Push(id+1, p); // try the next when we're done - id = ip->out(); - + id = ip->out(); + CheckAndLoop: // Sanity check: id is the head of its list, which must // be the case if id-1 is the last of *its* list. :) @@ -239,37 +239,37 @@ bool BitState::TrySearch(int id0, const char* p0) { goto Loop; break; - case kInstMatch: { + case kInstMatch: { if (endmatch_ && p != end) goto Next; - - // We found a match. If the caller doesn't care - // where the match is, no point going further. - if (nsubmatch_ == 0) - return true; - - // Record best match so far. - // Only need to check end point, because this entire - // call is only considering one start position. - matched = true; - cap_[1] = p; - if (submatch_[0].data() == NULL || + + // We found a match. If the caller doesn't care + // where the match is, no point going further. + if (nsubmatch_ == 0) + return true; + + // Record best match so far. + // Only need to check end point, because this entire + // call is only considering one start position. + matched = true; + cap_[1] = p; + if (submatch_[0].data() == NULL || (longest_ && p > submatch_[0].data() + submatch_[0].size())) { - for (int i = 0; i < nsubmatch_; i++) + for (int i = 0; i < nsubmatch_; i++) submatch_[i] = StringPiece(cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); - } - - // If going for first match, we're done. - if (!longest_) - return true; - - // If we used the entire text, no longer match is possible. + } + + // If going for first match, we're done. + if (!longest_) + return true; + + // If we used the entire text, no longer match is possible. if (p == end) - return true; - - // Otherwise, continue on in hope of a longer match. + return true; + + // Otherwise, continue on in hope of a longer match. // Note the absence of the ShouldVisit() check here // due to execution remaining in the same list. Next: @@ -278,60 +278,60 @@ bool BitState::TrySearch(int id0, const char* p0) { goto Loop; } break; - } - } - } - return matched; -} - -// Search text (within context) for prog_. -bool BitState::Search(const StringPiece& text, const StringPiece& context, - bool anchored, bool longest, - StringPiece* submatch, int nsubmatch) { - // Search parameters. - text_ = text; - context_ = context; + } + } + } + return matched; +} + +// Search text (within context) for prog_. +bool BitState::Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch) { + // Search parameters. + text_ = text; + context_ = context; if (context_.data() == NULL) - context_ = text; + context_ = text; if (prog_->anchor_start() && BeginPtr(context_) != BeginPtr(text)) - return false; + return false; if (prog_->anchor_end() && EndPtr(context_) != EndPtr(text)) - return false; - anchored_ = anchored || prog_->anchor_start(); - longest_ = longest || prog_->anchor_end(); - endmatch_ = prog_->anchor_end(); - submatch_ = submatch; - nsubmatch_ = nsubmatch; - for (int i = 0; i < nsubmatch_; i++) + return false; + anchored_ = anchored || prog_->anchor_start(); + longest_ = longest || prog_->anchor_end(); + endmatch_ = prog_->anchor_end(); + submatch_ = submatch; + nsubmatch_ = nsubmatch; + for (int i = 0; i < nsubmatch_; i++) submatch_[i] = StringPiece(); - - // Allocate scratch space. + + // Allocate scratch space. int nvisited = prog_->list_count() * static_cast<int>(text.size()+1); nvisited = (nvisited + kVisitedBits-1) / kVisitedBits; visited_ = PODArray<uint64_t>(nvisited); memset(visited_.data(), 0, nvisited*sizeof visited_[0]); - + int ncap = 2*nsubmatch; if (ncap < 2) ncap = 2; cap_ = PODArray<const char*>(ncap); memset(cap_.data(), 0, ncap*sizeof cap_[0]); - + // When sizeof(Job) == 16, we start with a nice round 1KiB. :) job_ = PODArray<Job>(64); - - // Anchored search must start at text.begin(). - if (anchored_) { + + // Anchored search must start at text.begin(). + if (anchored_) { cap_[0] = text.data(); return TrySearch(prog_->start(), text.data()); - } - - // Unanchored search, starting from each possible text position. - // Notice that we have to try the empty string at the end of - // the text, so the loop condition is p <= text.end(), not p < text.end(). - // This looks like it's quadratic in the size of the text, - // but we are not clearing visited_ between calls to TrySearch, - // so no work is duplicated and it ends up still being linear. + } + + // Unanchored search, starting from each possible text position. + // Notice that we have to try the empty string at the end of + // the text, so the loop condition is p <= text.end(), not p < text.end(). + // This looks like it's quadratic in the size of the text, + // but we are not clearing visited_ between calls to TrySearch, + // so no work is duplicated and it ends up still being linear. const char* etext = text.data() + text.size(); for (const char* p = text.data(); p <= etext; p++) { // Try to use prefix accel (e.g. memchr) to skip ahead. @@ -341,45 +341,45 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, p = etext; } - cap_[0] = p; - if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. - return true; + cap_[0] = p; + if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. + return true; // Avoid invoking undefined behavior (arithmetic on a null pointer) // by simply not continuing the loop. if (p == NULL) break; - } - return false; -} - -// Bit-state search. -bool Prog::SearchBitState(const StringPiece& text, - const StringPiece& context, - Anchor anchor, - MatchKind kind, - StringPiece* match, - int nmatch) { - // If full match, we ask for an anchored longest match - // and then check that match[0] == text. - // So make sure match[0] exists. - StringPiece sp0; - if (kind == kFullMatch) { - anchor = kAnchored; - if (nmatch < 1) { - match = &sp0; - nmatch = 1; - } - } - - // Run the search. - BitState b(this); - bool anchored = anchor == kAnchored; - bool longest = kind != kFirstMatch; - if (!b.Search(text, context, anchored, longest, match, nmatch)) - return false; + } + return false; +} + +// Bit-state search. +bool Prog::SearchBitState(const StringPiece& text, + const StringPiece& context, + Anchor anchor, + MatchKind kind, + StringPiece* match, + int nmatch) { + // If full match, we ask for an anchored longest match + // and then check that match[0] == text. + // So make sure match[0] exists. + StringPiece sp0; + if (kind == kFullMatch) { + anchor = kAnchored; + if (nmatch < 1) { + match = &sp0; + nmatch = 1; + } + } + + // Run the search. + BitState b(this); + bool anchored = anchor == kAnchored; + bool longest = kind != kFirstMatch; + if (!b.Search(text, context, anchored, longest, match, nmatch)) + return false; if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text)) - return false; - return true; -} - -} // namespace re2 + return false; + return true; +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/compile.cc b/contrib/libs/re2/re2/compile.cc index 61d801a630..36c902044b 100644 --- a/contrib/libs/re2/re2/compile.cc +++ b/contrib/libs/re2/re2/compile.cc @@ -1,13 +1,13 @@ -// Copyright 2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Compile regular expression to Prog. -// -// Prog and Inst are defined in prog.h. -// This file's external interface is just Regexp::CompileToProg. -// The Compiler class defined in this file is private. - +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Compile regular expression to Prog. +// +// Prog and Inst are defined in prog.h. +// This file's external interface is just Regexp::CompileToProg. +// The Compiler class defined in this file is private. + #include <stdint.h> #include <string.h> #include <unordered_map> @@ -16,32 +16,32 @@ #include "util/logging.h" #include "util/utf.h" #include "re2/pod_array.h" -#include "re2/prog.h" +#include "re2/prog.h" #include "re2/re2.h" -#include "re2/regexp.h" -#include "re2/walker-inl.h" - -namespace re2 { - -// List of pointers to Inst* that need to be filled in (patched). -// Because the Inst* haven't been filled in yet, -// we can use the Inst* word to hold the list's "next" pointer. -// It's kind of sleazy, but it works well in practice. -// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. -// -// Because the out and out1 fields in Inst are no longer pointers, +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// List of pointers to Inst* that need to be filled in (patched). +// Because the Inst* haven't been filled in yet, +// we can use the Inst* word to hold the list's "next" pointer. +// It's kind of sleazy, but it works well in practice. +// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. +// +// Because the out and out1 fields in Inst are no longer pointers, // we can't use pointers directly here either. Instead, head refers // to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1). // head == 0 represents the NULL list. This is okay because instruction #0 -// is always the fail instruction, which never appears on a list. -struct PatchList { - // Returns patch list containing just p. +// is always the fail instruction, which never appears on a list. +struct PatchList { + // Returns patch list containing just p. static PatchList Mk(uint32_t p) { return {p, p}; } - + // Patches all the entries on l to have value p. - // Caller must not ever use patch list again. + // Caller must not ever use patch list again. static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) { while (l.head != 0) { Prog::Inst* ip = &inst0[l.head>>1]; @@ -52,9 +52,9 @@ struct PatchList { l.head = ip->out(); ip->set_out(p); } - } - } - + } + } + // Appends two patch lists and returns result. static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) { if (l1.head == 0) @@ -67,113 +67,113 @@ struct PatchList { else ip->set_out(l2.head); return {l1.head, l2.tail}; - } - + } + uint32_t head; uint32_t tail; // for constant-time append }; - + static const PatchList kNullPatchList = {0, 0}; - -// Compiled program fragment. -struct Frag { + +// Compiled program fragment. +struct Frag { uint32_t begin; - PatchList end; + PatchList end; bool nullable; - + Frag() : begin(0), end(kNullPatchList), nullable(false) {} Frag(uint32_t begin, PatchList end, bool nullable) : begin(begin), end(end), nullable(nullable) {} -}; - -// Input encodings. -enum Encoding { - kEncodingUTF8 = 1, // UTF-8 (0-10FFFF) +}; + +// Input encodings. +enum Encoding { + kEncodingUTF8 = 1, // UTF-8 (0-10FFFF) kEncodingLatin1, // Latin-1 (0-FF) -}; - -class Compiler : public Regexp::Walker<Frag> { - public: - explicit Compiler(); - ~Compiler(); - - // Compiles Regexp to a new Prog. - // Caller is responsible for deleting Prog when finished with it. - // If reversed is true, compiles for walking over the input - // string backward (reverses all concatenations). +}; + +class Compiler : public Regexp::Walker<Frag> { + public: + explicit Compiler(); + ~Compiler(); + + // Compiles Regexp to a new Prog. + // Caller is responsible for deleting Prog when finished with it. + // If reversed is true, compiles for walking over the input + // string backward (reverses all concatenations). static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem); - - // Compiles alternation of all the re to a new Prog. - // Each re has a match with an id equal to its index in the vector. + + // Compiles alternation of all the re to a new Prog. + // Each re has a match with an id equal to its index in the vector. static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem); - - // Interface for Regexp::Walker, which helps traverse the Regexp. - // The walk is purely post-recursive: given the machines for the - // children, PostVisit combines them to create the machine for - // the current node. The child_args are Frags. - // The Compiler traverses the Regexp parse tree, visiting - // each node in depth-first order. It invokes PreVisit before - // visiting the node's children and PostVisit after visiting - // the children. - Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop); - Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args, - int nchild_args); - Frag ShortVisit(Regexp* re, Frag parent_arg); - Frag Copy(Frag arg); - - // Given fragment a, returns a+ or a+?; a* or a*?; a? or a?? - Frag Plus(Frag a, bool nongreedy); - Frag Star(Frag a, bool nongreedy); - Frag Quest(Frag a, bool nongreedy); - - // Given fragment a, returns (a) capturing as \n. - Frag Capture(Frag a, int n); - - // Given fragments a and b, returns ab; a|b - Frag Cat(Frag a, Frag b); - Frag Alt(Frag a, Frag b); - - // Returns a fragment that can't match anything. - Frag NoMatch(); - - // Returns a fragment that matches the empty string. + + // Interface for Regexp::Walker, which helps traverse the Regexp. + // The walk is purely post-recursive: given the machines for the + // children, PostVisit combines them to create the machine for + // the current node. The child_args are Frags. + // The Compiler traverses the Regexp parse tree, visiting + // each node in depth-first order. It invokes PreVisit before + // visiting the node's children and PostVisit after visiting + // the children. + Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop); + Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args, + int nchild_args); + Frag ShortVisit(Regexp* re, Frag parent_arg); + Frag Copy(Frag arg); + + // Given fragment a, returns a+ or a+?; a* or a*?; a? or a?? + Frag Plus(Frag a, bool nongreedy); + Frag Star(Frag a, bool nongreedy); + Frag Quest(Frag a, bool nongreedy); + + // Given fragment a, returns (a) capturing as \n. + Frag Capture(Frag a, int n); + + // Given fragments a and b, returns ab; a|b + Frag Cat(Frag a, Frag b); + Frag Alt(Frag a, Frag b); + + // Returns a fragment that can't match anything. + Frag NoMatch(); + + // Returns a fragment that matches the empty string. Frag Match(int32_t id); - - // Returns a no-op fragment. - Frag Nop(); - - // Returns a fragment matching the byte range lo-hi. - Frag ByteRange(int lo, int hi, bool foldcase); - - // Returns a fragment matching an empty-width special op. - Frag EmptyWidth(EmptyOp op); - - // Adds n instructions to the program. - // Returns the index of the first one. - // Returns -1 if no more instructions are available. - int AllocInst(int n); - - // Rune range compiler. - - // Begins a new alternation. - void BeginRange(); - - // Adds a fragment matching the rune range lo-hi. - void AddRuneRange(Rune lo, Rune hi, bool foldcase); - void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase); - void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase); - void Add_80_10ffff(); - - // New suffix that matches the byte range lo-hi, then goes to next. + + // Returns a no-op fragment. + Frag Nop(); + + // Returns a fragment matching the byte range lo-hi. + Frag ByteRange(int lo, int hi, bool foldcase); + + // Returns a fragment matching an empty-width special op. + Frag EmptyWidth(EmptyOp op); + + // Adds n instructions to the program. + // Returns the index of the first one. + // Returns -1 if no more instructions are available. + int AllocInst(int n); + + // Rune range compiler. + + // Begins a new alternation. + void BeginRange(); + + // Adds a fragment matching the rune range lo-hi. + void AddRuneRange(Rune lo, Rune hi, bool foldcase); + void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase); + void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase); + void Add_80_10ffff(); + + // New suffix that matches the byte range lo-hi, then goes to next. int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); - + // Returns true iff the suffix is cached. bool IsCachedRuneByteSuffix(int id); - // Adds a suffix to alternation. - void AddSuffix(int id); - + // Adds a suffix to alternation. + void AddSuffix(int id); + // Adds a suffix to the trie starting from the given root node. // Returns zero iff allocating an instruction fails. Otherwise, returns // the current root node, which might be different from what was given. @@ -187,62 +187,62 @@ class Compiler : public Regexp::Walker<Frag> { // Compares two ByteRanges and returns true iff they are equal. bool ByteRangeEqual(int id1, int id2); - // Returns the alternation of all the added suffixes. - Frag EndRange(); - - // Single rune. - Frag Literal(Rune r, bool foldcase); - + // Returns the alternation of all the added suffixes. + Frag EndRange(); + + // Single rune. + Frag Literal(Rune r, bool foldcase); + void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor); Prog* Finish(Regexp* re); - - // Returns .* where dot = any byte - Frag DotStar(); - - private: - Prog* prog_; // Program being built. - bool failed_; // Did we give up compiling? - Encoding encoding_; // Input encoding - bool reversed_; // Should program run backward over text? - + + // Returns .* where dot = any byte + Frag DotStar(); + + private: + Prog* prog_; // Program being built. + bool failed_; // Did we give up compiling? + Encoding encoding_; // Input encoding + bool reversed_; // Should program run backward over text? + PODArray<Prog::Inst> inst_; int ninst_; // Number of instructions used. int max_ninst_; // Maximum number of instructions. - + int64_t max_mem_; // Total memory budget. - + std::unordered_map<uint64_t, int> rune_cache_; - Frag rune_range_; - - RE2::Anchor anchor_; // anchor mode for RE2::Set - + Frag rune_range_; + + RE2::Anchor anchor_; // anchor mode for RE2::Set + Compiler(const Compiler&) = delete; Compiler& operator=(const Compiler&) = delete; -}; - -Compiler::Compiler() { - prog_ = new Prog(); - failed_ = false; - encoding_ = kEncodingUTF8; - reversed_ = false; +}; + +Compiler::Compiler() { + prog_ = new Prog(); + failed_ = false; + encoding_ = kEncodingUTF8; + reversed_ = false; ninst_ = 0; max_ninst_ = 1; // make AllocInst for fail instruction okay - max_mem_ = 0; - int fail = AllocInst(1); - inst_[fail].InitFail(); + max_mem_ = 0; + int fail = AllocInst(1); + inst_[fail].InitFail(); max_ninst_ = 0; // Caller must change -} - -Compiler::~Compiler() { - delete prog_; -} - -int Compiler::AllocInst(int n) { +} + +Compiler::~Compiler() { + delete prog_; +} + +int Compiler::AllocInst(int n) { if (failed_ || ninst_ + n > max_ninst_) { - failed_ = true; - return -1; - } - + failed_ = true; + return -1; + } + if (ninst_ + n > inst_.size()) { int cap = inst_.size(); if (cap == 0) @@ -254,92 +254,92 @@ int Compiler::AllocInst(int n) { memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]); memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]); inst_ = std::move(inst); - } + } int id = ninst_; ninst_ += n; - return id; -} - -// These routines are somewhat hard to visualize in text -- -// see http://swtch.com/~rsc/regexp/regexp1.html for -// pictures explaining what is going on here. - -// Returns an unmatchable fragment. -Frag Compiler::NoMatch() { + return id; +} + +// These routines are somewhat hard to visualize in text -- +// see http://swtch.com/~rsc/regexp/regexp1.html for +// pictures explaining what is going on here. + +// Returns an unmatchable fragment. +Frag Compiler::NoMatch() { return Frag(); -} - -// Is a an unmatchable fragment? -static bool IsNoMatch(Frag a) { - return a.begin == 0; -} - -// Given fragments a and b, returns fragment for ab. -Frag Compiler::Cat(Frag a, Frag b) { - if (IsNoMatch(a) || IsNoMatch(b)) - return NoMatch(); - - // Elide no-op. - Prog::Inst* begin = &inst_[a.begin]; - if (begin->opcode() == kInstNop && +} + +// Is a an unmatchable fragment? +static bool IsNoMatch(Frag a) { + return a.begin == 0; +} + +// Given fragments a and b, returns fragment for ab. +Frag Compiler::Cat(Frag a, Frag b) { + if (IsNoMatch(a) || IsNoMatch(b)) + return NoMatch(); + + // Elide no-op. + Prog::Inst* begin = &inst_[a.begin]; + if (begin->opcode() == kInstNop && a.end.head == (a.begin << 1) && - begin->out() == 0) { + begin->out() == 0) { // in case refs to a somewhere PatchList::Patch(inst_.data(), a.end, b.begin); - return b; - } - - // To run backward over string, reverse all concatenations. - if (reversed_) { + return b; + } + + // To run backward over string, reverse all concatenations. + if (reversed_) { PatchList::Patch(inst_.data(), b.end, a.begin); return Frag(b.begin, a.end, b.nullable && a.nullable); - } - + } + PatchList::Patch(inst_.data(), a.end, b.begin); return Frag(a.begin, b.end, a.nullable && b.nullable); -} - -// Given fragments for a and b, returns fragment for a|b. -Frag Compiler::Alt(Frag a, Frag b) { - // Special case for convenience in loops. - if (IsNoMatch(a)) - return b; - if (IsNoMatch(b)) - return a; - - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - - inst_[id].InitAlt(a.begin, b.begin); +} + +// Given fragments for a and b, returns fragment for a|b. +Frag Compiler::Alt(Frag a, Frag b) { + // Special case for convenience in loops. + if (IsNoMatch(a)) + return b; + if (IsNoMatch(b)) + return a; + + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + + inst_[id].InitAlt(a.begin, b.begin); return Frag(id, PatchList::Append(inst_.data(), a.end, b.end), a.nullable || b.nullable); -} - -// When capturing submatches in like-Perl mode, a kOpAlt Inst -// treats out_ as the first choice, out1_ as the second. -// -// For *, +, and ?, if out_ causes another repetition, -// then the operator is greedy. If out1_ is the repetition -// (and out_ moves forward), then the operator is non-greedy. - +} + +// When capturing submatches in like-Perl mode, a kOpAlt Inst +// treats out_ as the first choice, out1_ as the second. +// +// For *, +, and ?, if out_ causes another repetition, +// then the operator is greedy. If out1_ is the repetition +// (and out_ moves forward), then the operator is non-greedy. + // Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy) Frag Compiler::Plus(Frag a, bool nongreedy) { - int id = AllocInst(1); - if (id < 0) - return NoMatch(); + int id = AllocInst(1); + if (id < 0) + return NoMatch(); PatchList pl; - if (nongreedy) { + if (nongreedy) { inst_[id].InitAlt(0, a.begin); pl = PatchList::Mk(id << 1); - } else { + } else { inst_[id].InitAlt(a.begin, 0); pl = PatchList::Mk((id << 1) | 1); - } + } PatchList::Patch(inst_.data(), a.end, id); return Frag(a.begin, pl, a.nullable); -} - +} + // Given a fragment for a, returns a fragment for a* or a*? (if nongreedy) Frag Compiler::Star(Frag a, bool nongreedy) { // When the subexpression is nullable, one Alt isn't enough to guarantee @@ -361,112 +361,112 @@ Frag Compiler::Star(Frag a, bool nongreedy) { } PatchList::Patch(inst_.data(), a.end, id); return Frag(id, pl, true); -} - -// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy) -Frag Compiler::Quest(Frag a, bool nongreedy) { +} + +// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy) +Frag Compiler::Quest(Frag a, bool nongreedy) { if (IsNoMatch(a)) return Nop(); - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - PatchList pl; - if (nongreedy) { - inst_[id].InitAlt(0, a.begin); - pl = PatchList::Mk(id << 1); - } else { - inst_[id].InitAlt(a.begin, 0); - pl = PatchList::Mk((id << 1) | 1); - } + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + PatchList pl; + if (nongreedy) { + inst_[id].InitAlt(0, a.begin); + pl = PatchList::Mk(id << 1); + } else { + inst_[id].InitAlt(a.begin, 0); + pl = PatchList::Mk((id << 1) | 1); + } return Frag(id, PatchList::Append(inst_.data(), pl, a.end), true); -} - -// Returns a fragment for the byte range lo-hi. -Frag Compiler::ByteRange(int lo, int hi, bool foldcase) { - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - inst_[id].InitByteRange(lo, hi, foldcase, 0); +} + +// Returns a fragment for the byte range lo-hi. +Frag Compiler::ByteRange(int lo, int hi, bool foldcase) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitByteRange(lo, hi, foldcase, 0); return Frag(id, PatchList::Mk(id << 1), false); -} - -// Returns a no-op fragment. Sometimes unavoidable. -Frag Compiler::Nop() { - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - inst_[id].InitNop(0); +} + +// Returns a no-op fragment. Sometimes unavoidable. +Frag Compiler::Nop() { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitNop(0); return Frag(id, PatchList::Mk(id << 1), true); -} - -// Returns a fragment that signals a match. +} + +// Returns a fragment that signals a match. Frag Compiler::Match(int32_t match_id) { - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - inst_[id].InitMatch(match_id); + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitMatch(match_id); return Frag(id, kNullPatchList, false); -} - -// Returns a fragment matching a particular empty-width op (like ^ or $) -Frag Compiler::EmptyWidth(EmptyOp empty) { - int id = AllocInst(1); - if (id < 0) - return NoMatch(); - inst_[id].InitEmptyWidth(empty, 0); +} + +// Returns a fragment matching a particular empty-width op (like ^ or $) +Frag Compiler::EmptyWidth(EmptyOp empty) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitEmptyWidth(empty, 0); return Frag(id, PatchList::Mk(id << 1), true); -} - -// Given a fragment a, returns a fragment with capturing parens around a. -Frag Compiler::Capture(Frag a, int n) { +} + +// Given a fragment a, returns a fragment with capturing parens around a. +Frag Compiler::Capture(Frag a, int n) { if (IsNoMatch(a)) return NoMatch(); - int id = AllocInst(2); - if (id < 0) - return NoMatch(); - inst_[id].InitCapture(2*n, a.begin); - inst_[id+1].InitCapture(2*n+1, 0); + int id = AllocInst(2); + if (id < 0) + return NoMatch(); + inst_[id].InitCapture(2*n, a.begin); + inst_[id+1].InitCapture(2*n+1, 0); PatchList::Patch(inst_.data(), a.end, id+1); - + return Frag(id, PatchList::Mk((id+1) << 1), a.nullable); -} - -// A Rune is a name for a Unicode code point. -// Returns maximum rune encoded by UTF-8 sequence of length len. -static int MaxRune(int len) { +} + +// A Rune is a name for a Unicode code point. +// Returns maximum rune encoded by UTF-8 sequence of length len. +static int MaxRune(int len) { int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax) - if (len == 1) - b = 7; - else - b = 8-(len+1) + 6*(len-1); - return (1<<b) - 1; // maximum Rune for b bits. -} - -// The rune range compiler caches common suffix fragments, -// which are very common in UTF-8 (e.g., [80-bf]). -// The fragment suffixes are identified by their start -// instructions. NULL denotes the eventual end match. -// The Frag accumulates in rune_range_. Caching common -// suffixes reduces the UTF-8 "." from 32 to 24 instructions, -// and it reduces the corresponding one-pass NFA from 16 nodes to 8. - -void Compiler::BeginRange() { - rune_cache_.clear(); - rune_range_.begin = 0; + if (len == 1) + b = 7; + else + b = 8-(len+1) + 6*(len-1); + return (1<<b) - 1; // maximum Rune for b bits. +} + +// The rune range compiler caches common suffix fragments, +// which are very common in UTF-8 (e.g., [80-bf]). +// The fragment suffixes are identified by their start +// instructions. NULL denotes the eventual end match. +// The Frag accumulates in rune_range_. Caching common +// suffixes reduces the UTF-8 "." from 32 to 24 instructions, +// and it reduces the corresponding one-pass NFA from 16 nodes to 8. + +void Compiler::BeginRange() { + rune_cache_.clear(); + rune_range_.begin = 0; rune_range_.end = kNullPatchList; -} - +} + int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, - int next) { - Frag f = ByteRange(lo, hi, foldcase); - if (next != 0) { + int next) { + Frag f = ByteRange(lo, hi, foldcase); + if (next != 0) { PatchList::Patch(inst_.data(), f.end, next); - } else { + } else { rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end); - } - return f.begin; -} - + } + return f.begin; +} + static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, int next) { return (uint64_t)next << 17 | @@ -474,18 +474,18 @@ static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, (uint64_t)hi << 1 | (uint64_t)foldcase; } - + int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next) { uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key); - if (it != rune_cache_.end()) - return it->second; - int id = UncachedRuneByteSuffix(lo, hi, foldcase, next); - rune_cache_[key] = id; - return id; -} - + if (it != rune_cache_.end()) + return it->second; + int id = UncachedRuneByteSuffix(lo, hi, foldcase, next); + rune_cache_[key] = id; + return id; +} + bool Compiler::IsCachedRuneByteSuffix(int id) { uint8_t lo = inst_[id].lo_; uint8_t hi = inst_[id].hi_; @@ -496,30 +496,30 @@ bool Compiler::IsCachedRuneByteSuffix(int id) { return rune_cache_.find(key) != rune_cache_.end(); } -void Compiler::AddSuffix(int id) { +void Compiler::AddSuffix(int id) { if (failed_) return; - if (rune_range_.begin == 0) { - rune_range_.begin = id; - return; - } - + if (rune_range_.begin == 0) { + rune_range_.begin = id; + return; + } + if (encoding_ == kEncodingUTF8) { // Build a trie in order to reduce fanout. rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id); return; } - int alt = AllocInst(1); - if (alt < 0) { - rune_range_.begin = 0; - return; - } - inst_[alt].InitAlt(rune_range_.begin, id); - rune_range_.begin = alt; -} - + int alt = AllocInst(1); + if (alt < 0) { + rune_range_.begin = 0; + return; + } + inst_[alt].InitAlt(rune_range_.begin, id); + rune_range_.begin = alt; +} + int Compiler::AddSuffixRecursive(int root, int id) { DCHECK(inst_[root].opcode() == kInstAlt || inst_[root].opcode() == kInstByteRange); @@ -616,38 +616,38 @@ Frag Compiler::FindByteRange(int root, int id) { return NoMatch(); } -Frag Compiler::EndRange() { - return rune_range_; -} - -// Converts rune range lo-hi into a fragment that recognizes -// the bytes that would make up those runes in the current -// encoding (Latin 1 or UTF-8). -// This lets the machine work byte-by-byte even when -// using multibyte encodings. - -void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) { - switch (encoding_) { - default: - case kEncodingUTF8: - AddRuneRangeUTF8(lo, hi, foldcase); - break; - case kEncodingLatin1: - AddRuneRangeLatin1(lo, hi, foldcase); - break; - } -} - -void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) { +Frag Compiler::EndRange() { + return rune_range_; +} + +// Converts rune range lo-hi into a fragment that recognizes +// the bytes that would make up those runes in the current +// encoding (Latin 1 or UTF-8). +// This lets the machine work byte-by-byte even when +// using multibyte encodings. + +void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) { + switch (encoding_) { + default: + case kEncodingUTF8: + AddRuneRangeUTF8(lo, hi, foldcase); + break; + case kEncodingLatin1: + AddRuneRangeLatin1(lo, hi, foldcase); + break; + } +} + +void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) { // Latin-1 is easy: runes *are* bytes. - if (lo > hi || lo > 0xFF) - return; - if (hi > 0xFF) - hi = 0xFF; + if (lo > hi || lo > 0xFF) + return; + if (hi > 0xFF) + hi = 0xFF; AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), static_cast<uint8_t>(hi), foldcase, 0)); -} - +} + void Compiler::Add_80_10ffff() { // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by @@ -661,12 +661,12 @@ void Compiler::Add_80_10ffff() { id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0); id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); AddSuffix(id); - + id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0); id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); AddSuffix(id); - + id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0); id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); @@ -677,7 +677,7 @@ void Compiler::Add_80_10ffff() { int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0); id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1); AddSuffix(id); - + int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1); id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2); AddSuffix(id); @@ -685,60 +685,60 @@ void Compiler::Add_80_10ffff() { int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2); id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3); AddSuffix(id); - } -} - -void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { - if (lo > hi) - return; - + } +} + +void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { + if (lo > hi) + return; + // Pick off 80-10FFFF as a common special case. if (lo == 0x80 && hi == 0x10ffff) { - Add_80_10ffff(); - return; - } - - // Split range into same-length sized ranges. - for (int i = 1; i < UTFmax; i++) { - Rune max = MaxRune(i); - if (lo <= max && max < hi) { - AddRuneRangeUTF8(lo, max, foldcase); - AddRuneRangeUTF8(max+1, hi, foldcase); - return; - } - } - - // ASCII range is always a special case. - if (hi < Runeself) { + Add_80_10ffff(); + return; + } + + // Split range into same-length sized ranges. + for (int i = 1; i < UTFmax; i++) { + Rune max = MaxRune(i); + if (lo <= max && max < hi) { + AddRuneRangeUTF8(lo, max, foldcase); + AddRuneRangeUTF8(max+1, hi, foldcase); + return; + } + } + + // ASCII range is always a special case. + if (hi < Runeself) { AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), static_cast<uint8_t>(hi), foldcase, 0)); - return; - } - - // Split range into sections that agree on leading bytes. - for (int i = 1; i < UTFmax; i++) { + return; + } + + // Split range into sections that agree on leading bytes. + for (int i = 1; i < UTFmax; i++) { uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence - if ((lo & ~m) != (hi & ~m)) { - if ((lo & m) != 0) { - AddRuneRangeUTF8(lo, lo|m, foldcase); - AddRuneRangeUTF8((lo|m)+1, hi, foldcase); - return; - } - if ((hi & m) != m) { - AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase); - AddRuneRangeUTF8(hi&~m, hi, foldcase); - return; - } - } - } - - // Finally. Generate byte matching equivalent for lo-hi. + if ((lo & ~m) != (hi & ~m)) { + if ((lo & m) != 0) { + AddRuneRangeUTF8(lo, lo|m, foldcase); + AddRuneRangeUTF8((lo|m)+1, hi, foldcase); + return; + } + if ((hi & m) != m) { + AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase); + AddRuneRangeUTF8(hi&~m, hi, foldcase); + return; + } + } + } + + // Finally. Generate byte matching equivalent for lo-hi. uint8_t ulo[UTFmax], uhi[UTFmax]; - int n = runetochar(reinterpret_cast<char*>(ulo), &lo); - int m = runetochar(reinterpret_cast<char*>(uhi), &hi); - (void)m; // USED(m) - DCHECK_EQ(n, m); - + int n = runetochar(reinterpret_cast<char*>(ulo), &lo); + int m = runetochar(reinterpret_cast<char*>(uhi), &hi); + (void)m; // USED(m) + DCHECK_EQ(n, m); + // The logic below encodes this thinking: // // 1. When we have built the whole suffix, we know that it cannot @@ -763,8 +763,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { // is more likely so; in reverse mode, a byte range is unlikely to // be part of a common suffix whereas a single byte is more likely // so. The same benefit versus cost argument applies here. - int id = 0; - if (reversed_) { + int id = 0; + if (reversed_) { for (int i = 0; i < n; i++) { // In reverse UTF-8 mode: cache the leading byte; don't cache the last // continuation byte; cache anything else iff it's a single byte (XX-XX). @@ -773,7 +773,7 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { else id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); } - } else { + } else { for (int i = n-1; i >= 0; i--) { // In forward UTF-8 mode: don't cache the leading byte; cache the last // continuation byte; cache anything else iff it's a byte range (XX-YY). @@ -782,206 +782,206 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { else id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); } - } - AddSuffix(id); -} - -// Should not be called. -Frag Compiler::Copy(Frag arg) { - // We're using WalkExponential; there should be no copying. - LOG(DFATAL) << "Compiler::Copy called!"; - failed_ = true; - return NoMatch(); -} - -// Visits a node quickly; called once WalkExponential has -// decided to cut this walk short. -Frag Compiler::ShortVisit(Regexp* re, Frag) { - failed_ = true; - return NoMatch(); -} - -// Called before traversing a node's children during the walk. -Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) { - // Cut off walk if we've already failed. - if (failed_) - *stop = true; - + } + AddSuffix(id); +} + +// Should not be called. +Frag Compiler::Copy(Frag arg) { + // We're using WalkExponential; there should be no copying. + LOG(DFATAL) << "Compiler::Copy called!"; + failed_ = true; + return NoMatch(); +} + +// Visits a node quickly; called once WalkExponential has +// decided to cut this walk short. +Frag Compiler::ShortVisit(Regexp* re, Frag) { + failed_ = true; + return NoMatch(); +} + +// Called before traversing a node's children during the walk. +Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) { + // Cut off walk if we've already failed. + if (failed_) + *stop = true; + return Frag(); // not used by caller -} - -Frag Compiler::Literal(Rune r, bool foldcase) { - switch (encoding_) { - default: +} + +Frag Compiler::Literal(Rune r, bool foldcase) { + switch (encoding_) { + default: return Frag(); - - case kEncodingLatin1: - return ByteRange(r, r, foldcase); - - case kEncodingUTF8: { - if (r < Runeself) // Make common case fast. - return ByteRange(r, r, foldcase); + + case kEncodingLatin1: + return ByteRange(r, r, foldcase); + + case kEncodingUTF8: { + if (r < Runeself) // Make common case fast. + return ByteRange(r, r, foldcase); uint8_t buf[UTFmax]; - int n = runetochar(reinterpret_cast<char*>(buf), &r); + int n = runetochar(reinterpret_cast<char*>(buf), &r); Frag f = ByteRange((uint8_t)buf[0], buf[0], false); - for (int i = 1; i < n; i++) + for (int i = 1; i < n; i++) f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false)); - return f; - } - } -} - -// Called after traversing the node's children during the walk. -// Given their frags, build and return the frag for this re. -Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, - int nchild_frags) { - // If a child failed, don't bother going forward, especially - // since the child_frags might contain Frags with NULLs in them. - if (failed_) - return NoMatch(); - - // Given the child fragments, return the fragment for this node. - switch (re->op()) { - case kRegexpRepeat: - // Should not see; code at bottom of function will print error - break; - - case kRegexpNoMatch: - return NoMatch(); - - case kRegexpEmptyMatch: - return Nop(); - - case kRegexpHaveMatch: { - Frag f = Match(re->match_id()); + return f; + } + } +} + +// Called after traversing the node's children during the walk. +// Given their frags, build and return the frag for this re. +Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, + int nchild_frags) { + // If a child failed, don't bother going forward, especially + // since the child_frags might contain Frags with NULLs in them. + if (failed_) + return NoMatch(); + + // Given the child fragments, return the fragment for this node. + switch (re->op()) { + case kRegexpRepeat: + // Should not see; code at bottom of function will print error + break; + + case kRegexpNoMatch: + return NoMatch(); + + case kRegexpEmptyMatch: + return Nop(); + + case kRegexpHaveMatch: { + Frag f = Match(re->match_id()); if (anchor_ == RE2::ANCHOR_BOTH) { // Append \z or else the subexpression will effectively be unanchored. // Complemented by the UNANCHORED case in CompileSet(). f = Cat(EmptyWidth(kEmptyEndText), f); } - return f; - } - - case kRegexpConcat: { - Frag f = child_frags[0]; - for (int i = 1; i < nchild_frags; i++) - f = Cat(f, child_frags[i]); - return f; - } - - case kRegexpAlternate: { - Frag f = child_frags[0]; - for (int i = 1; i < nchild_frags; i++) - f = Alt(f, child_frags[i]); - return f; - } - - case kRegexpStar: + return f; + } + + case kRegexpConcat: { + Frag f = child_frags[0]; + for (int i = 1; i < nchild_frags; i++) + f = Cat(f, child_frags[i]); + return f; + } + + case kRegexpAlternate: { + Frag f = child_frags[0]; + for (int i = 1; i < nchild_frags; i++) + f = Alt(f, child_frags[i]); + return f; + } + + case kRegexpStar: return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); - - case kRegexpPlus: + + case kRegexpPlus: return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); - - case kRegexpQuest: + + case kRegexpQuest: return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); - - case kRegexpLiteral: + + case kRegexpLiteral: return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0); - - case kRegexpLiteralString: { - // Concatenation of literals. - if (re->nrunes() == 0) - return Nop(); - Frag f; - for (int i = 0; i < re->nrunes(); i++) { + + case kRegexpLiteralString: { + // Concatenation of literals. + if (re->nrunes() == 0) + return Nop(); + Frag f; + for (int i = 0; i < re->nrunes(); i++) { Frag f1 = Literal(re->runes()[i], (re->parse_flags()&Regexp::FoldCase) != 0); - if (i == 0) - f = f1; - else - f = Cat(f, f1); - } - return f; - } - - case kRegexpAnyChar: - BeginRange(); - AddRuneRange(0, Runemax, false); - return EndRange(); - - case kRegexpAnyByte: - return ByteRange(0x00, 0xFF, false); - - case kRegexpCharClass: { - CharClass* cc = re->cc(); - if (cc->empty()) { - // This can't happen. - LOG(DFATAL) << "No ranges in char class"; - failed_ = true; - return NoMatch(); - } - - // ASCII case-folding optimization: if the char class - // behaves the same on A-Z as it does on a-z, - // discard any ranges wholly contained in A-Z - // and mark the other ranges as foldascii. - // This reduces the size of a program for - // (?i)abc from 3 insts per letter to 1 per letter. - bool foldascii = cc->FoldsASCII(); - - // Character class is just a big OR of the different - // character ranges in the class. - BeginRange(); - for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) { - // ASCII case-folding optimization (see above). - if (foldascii && 'A' <= i->lo && i->hi <= 'Z') - continue; - - // If this range contains all of A-Za-z or none of it, - // the fold flag is unnecessary; don't bother. - bool fold = foldascii; + if (i == 0) + f = f1; + else + f = Cat(f, f1); + } + return f; + } + + case kRegexpAnyChar: + BeginRange(); + AddRuneRange(0, Runemax, false); + return EndRange(); + + case kRegexpAnyByte: + return ByteRange(0x00, 0xFF, false); + + case kRegexpCharClass: { + CharClass* cc = re->cc(); + if (cc->empty()) { + // This can't happen. + LOG(DFATAL) << "No ranges in char class"; + failed_ = true; + return NoMatch(); + } + + // ASCII case-folding optimization: if the char class + // behaves the same on A-Z as it does on a-z, + // discard any ranges wholly contained in A-Z + // and mark the other ranges as foldascii. + // This reduces the size of a program for + // (?i)abc from 3 insts per letter to 1 per letter. + bool foldascii = cc->FoldsASCII(); + + // Character class is just a big OR of the different + // character ranges in the class. + BeginRange(); + for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) { + // ASCII case-folding optimization (see above). + if (foldascii && 'A' <= i->lo && i->hi <= 'Z') + continue; + + // If this range contains all of A-Za-z or none of it, + // the fold flag is unnecessary; don't bother. + bool fold = foldascii; if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo || ('Z' < i->lo && i->hi < 'a')) - fold = false; - - AddRuneRange(i->lo, i->hi, fold); - } - return EndRange(); - } - - case kRegexpCapture: - // If this is a non-capturing parenthesis -- (?:foo) -- - // just use the inner expression. - if (re->cap() < 0) - return child_frags[0]; - return Capture(child_frags[0], re->cap()); - - case kRegexpBeginLine: - return EmptyWidth(reversed_ ? kEmptyEndLine : kEmptyBeginLine); - - case kRegexpEndLine: - return EmptyWidth(reversed_ ? kEmptyBeginLine : kEmptyEndLine); - - case kRegexpBeginText: - return EmptyWidth(reversed_ ? kEmptyEndText : kEmptyBeginText); - - case kRegexpEndText: - return EmptyWidth(reversed_ ? kEmptyBeginText : kEmptyEndText); - - case kRegexpWordBoundary: - return EmptyWidth(kEmptyWordBoundary); - - case kRegexpNoWordBoundary: - return EmptyWidth(kEmptyNonWordBoundary); - } - LOG(DFATAL) << "Missing case in Compiler: " << re->op(); - failed_ = true; - return NoMatch(); -} - -// Is this regexp required to start at the beginning of the text? -// Only approximate; can return false for complicated regexps like (\Aa|\Ab), -// but handles (\A(a|b)). Could use the Walker to write a more exact one. + fold = false; + + AddRuneRange(i->lo, i->hi, fold); + } + return EndRange(); + } + + case kRegexpCapture: + // If this is a non-capturing parenthesis -- (?:foo) -- + // just use the inner expression. + if (re->cap() < 0) + return child_frags[0]; + return Capture(child_frags[0], re->cap()); + + case kRegexpBeginLine: + return EmptyWidth(reversed_ ? kEmptyEndLine : kEmptyBeginLine); + + case kRegexpEndLine: + return EmptyWidth(reversed_ ? kEmptyBeginLine : kEmptyEndLine); + + case kRegexpBeginText: + return EmptyWidth(reversed_ ? kEmptyEndText : kEmptyBeginText); + + case kRegexpEndText: + return EmptyWidth(reversed_ ? kEmptyBeginText : kEmptyEndText); + + case kRegexpWordBoundary: + return EmptyWidth(kEmptyWordBoundary); + + case kRegexpNoWordBoundary: + return EmptyWidth(kEmptyNonWordBoundary); + } + LOG(DFATAL) << "Missing case in Compiler: " << re->op(); + failed_ = true; + return NoMatch(); +} + +// Is this regexp required to start at the beginning of the text? +// Only approximate; can return false for complicated regexps like (\Aa|\Ab), +// but handles (\A(a|b)). Could use the Walker to write a more exact one. static bool IsAnchorStart(Regexp** pre, int depth) { Regexp* re = *pre; Regexp* sub; @@ -1005,7 +1005,7 @@ static bool IsAnchorStart(Regexp** pre, int depth) { *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); re->Decref(); return true; - } + } sub->Decref(); } break; @@ -1013,8 +1013,8 @@ static bool IsAnchorStart(Regexp** pre, int depth) { sub = re->sub()[0]->Incref(); if (IsAnchorStart(&sub, depth+1)) { *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); - re->Decref(); - return true; + re->Decref(); + return true; } sub->Decref(); break; @@ -1022,13 +1022,13 @@ static bool IsAnchorStart(Regexp** pre, int depth) { *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); re->Decref(); return true; - } + } return false; -} - -// Is this regexp required to start at the end of the text? -// Only approximate; can return false for complicated regexps like (a\z|b\z), -// but handles ((a|b)\z). Could use the Walker to write a more exact one. +} + +// Is this regexp required to start at the end of the text? +// Only approximate; can return false for complicated regexps like (a\z|b\z), +// but handles ((a|b)\z). Could use the Walker to write a more exact one. static bool IsAnchorEnd(Regexp** pre, int depth) { Regexp* re = *pre; Regexp* sub; @@ -1052,7 +1052,7 @@ static bool IsAnchorEnd(Regexp** pre, int depth) { *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); re->Decref(); return true; - } + } sub->Decref(); } break; @@ -1060,8 +1060,8 @@ static bool IsAnchorEnd(Regexp** pre, int depth) { sub = re->sub()[0]->Incref(); if (IsAnchorEnd(&sub, depth+1)) { *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); - re->Decref(); - return true; + re->Decref(); + return true; } sub->Decref(); break; @@ -1069,110 +1069,110 @@ static bool IsAnchorEnd(Regexp** pre, int depth) { *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); re->Decref(); return true; - } + } return false; -} - +} + void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem, - RE2::Anchor anchor) { - if (flags & Regexp::Latin1) - encoding_ = kEncodingLatin1; - max_mem_ = max_mem; - if (max_mem <= 0) { + RE2::Anchor anchor) { + if (flags & Regexp::Latin1) + encoding_ = kEncodingLatin1; + max_mem_ = max_mem; + if (max_mem <= 0) { max_ninst_ = 100000; // more than enough } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) { - // No room for anything. + // No room for anything. max_ninst_ = 0; - } else { + } else { int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); - // Limit instruction count so that inst->id() fits nicely in an int. - // SparseArray also assumes that the indices (inst->id()) are ints. + // Limit instruction count so that inst->id() fits nicely in an int. + // SparseArray also assumes that the indices (inst->id()) are ints. // The call to WalkExponential uses 2*max_ninst_ below, - // and other places in the code use 2 or 3 * prog->size(). - // Limiting to 2^24 should avoid overflow in those places. - // (The point of allowing more than 32 bits of memory is to - // have plenty of room for the DFA states, not to use it up - // on the program.) - if (m >= 1<<24) - m = 1<<24; - // Inst imposes its own limit (currently bigger than 2^24 but be safe). - if (m > Prog::Inst::kMaxInst) - m = Prog::Inst::kMaxInst; + // and other places in the code use 2 or 3 * prog->size(). + // Limiting to 2^24 should avoid overflow in those places. + // (The point of allowing more than 32 bits of memory is to + // have plenty of room for the DFA states, not to use it up + // on the program.) + if (m >= 1<<24) + m = 1<<24; + // Inst imposes its own limit (currently bigger than 2^24 but be safe). + if (m > Prog::Inst::kMaxInst) + m = Prog::Inst::kMaxInst; max_ninst_ = static_cast<int>(m); - } - anchor_ = anchor; -} - -// Compiles re, returning program. -// Caller is responsible for deleting prog_. -// If reversed is true, compiles a program that expects -// to run over the input string backward (reverses all concatenations). -// The reversed flag is also recorded in the returned program. + } + anchor_ = anchor; +} + +// Compiles re, returning program. +// Caller is responsible for deleting prog_. +// If reversed is true, compiles a program that expects +// to run over the input string backward (reverses all concatenations). +// The reversed flag is also recorded in the returned program. Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) { - Compiler c; + Compiler c; c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */); - c.reversed_ = reversed; - - // Simplify to remove things like counted repetitions - // and character classes like \d. - Regexp* sre = re->Simplify(); - if (sre == NULL) - return NULL; - - // Record whether prog is anchored, removing the anchors. - // (They get in the way of other optimizations.) + c.reversed_ = reversed; + + // Simplify to remove things like counted repetitions + // and character classes like \d. + Regexp* sre = re->Simplify(); + if (sre == NULL) + return NULL; + + // Record whether prog is anchored, removing the anchors. + // (They get in the way of other optimizations.) bool is_anchor_start = IsAnchorStart(&sre, 0); bool is_anchor_end = IsAnchorEnd(&sre, 0); - - // Generate fragment for entire regexp. + + // Generate fragment for entire regexp. Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_); - sre->Decref(); - if (c.failed_) - return NULL; - - // Success! Finish by putting Match node at end, and record start. - // Turn off c.reversed_ (if it is set) to force the remaining concatenations - // to behave normally. - c.reversed_ = false; + sre->Decref(); + if (c.failed_) + return NULL; + + // Success! Finish by putting Match node at end, and record start. + // Turn off c.reversed_ (if it is set) to force the remaining concatenations + // to behave normally. + c.reversed_ = false; all = c.Cat(all, c.Match(0)); - + c.prog_->set_reversed(reversed); if (c.prog_->reversed()) { - c.prog_->set_anchor_start(is_anchor_end); - c.prog_->set_anchor_end(is_anchor_start); - } else { - c.prog_->set_anchor_start(is_anchor_start); - c.prog_->set_anchor_end(is_anchor_end); - } - + c.prog_->set_anchor_start(is_anchor_end); + c.prog_->set_anchor_end(is_anchor_start); + } else { + c.prog_->set_anchor_start(is_anchor_start); + c.prog_->set_anchor_end(is_anchor_end); + } + c.prog_->set_start(all.begin); if (!c.prog_->anchor_start()) { // Also create unanchored version, which starts with a .*? loop. all = c.Cat(c.DotStar(), all); - } + } c.prog_->set_start_unanchored(all.begin); - - // Hand ownership of prog_ to caller. + + // Hand ownership of prog_ to caller. return c.Finish(re); -} - +} + Prog* Compiler::Finish(Regexp* re) { - if (failed_) - return NULL; - - if (prog_->start() == 0 && prog_->start_unanchored() == 0) { - // No possible matches; keep Fail instruction only. + if (failed_) + return NULL; + + if (prog_->start() == 0 && prog_->start_unanchored() == 0) { + // No possible matches; keep Fail instruction only. ninst_ = 1; - } - + } + // Hand off the array to Prog. prog_->inst_ = std::move(inst_); prog_->size_ = ninst_; - + prog_->Optimize(); prog_->Flatten(); - prog_->ComputeByteMap(); - + prog_->ComputeByteMap(); + if (!prog_->reversed()) { std::string prefix; bool prefix_foldcase; @@ -1180,82 +1180,82 @@ Prog* Compiler::Finish(Regexp* re) { prog_->ConfigurePrefixAccel(prefix, prefix_foldcase); } - // Record remaining memory for DFA. - if (max_mem_ <= 0) { - prog_->set_dfa_mem(1<<20); - } else { + // Record remaining memory for DFA. + if (max_mem_ <= 0) { + prog_->set_dfa_mem(1<<20); + } else { int64_t m = max_mem_ - sizeof(Prog); m -= prog_->size_*sizeof(Prog::Inst); // account for inst_ if (prog_->CanBitState()) m -= prog_->size_*sizeof(uint16_t); // account for list_heads_ - if (m < 0) - m = 0; - prog_->set_dfa_mem(m); - } - - Prog* p = prog_; - prog_ = NULL; - return p; -} - -// Converts Regexp to Prog. + if (m < 0) + m = 0; + prog_->set_dfa_mem(m); + } + + Prog* p = prog_; + prog_ = NULL; + return p; +} + +// Converts Regexp to Prog. Prog* Regexp::CompileToProg(int64_t max_mem) { - return Compiler::Compile(this, false, max_mem); -} - + return Compiler::Compile(this, false, max_mem); +} + Prog* Regexp::CompileToReverseProg(int64_t max_mem) { - return Compiler::Compile(this, true, max_mem); -} - -Frag Compiler::DotStar() { - return Star(ByteRange(0x00, 0xff, false), true); -} - -// Compiles RE set to Prog. + return Compiler::Compile(this, true, max_mem); +} + +Frag Compiler::DotStar() { + return Star(ByteRange(0x00, 0xff, false), true); +} + +// Compiles RE set to Prog. Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) { - Compiler c; + Compiler c; c.Setup(re->parse_flags(), max_mem, anchor); - + Regexp* sre = re->Simplify(); if (sre == NULL) return NULL; - + Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_); sre->Decref(); - if (c.failed_) - return NULL; - + if (c.failed_) + return NULL; + c.prog_->set_anchor_start(true); c.prog_->set_anchor_end(true); - if (anchor == RE2::UNANCHORED) { + if (anchor == RE2::UNANCHORED) { // Prepend .* or else the expression will effectively be anchored. // Complemented by the ANCHOR_BOTH case in PostVisit(). - all = c.Cat(c.DotStar(), all); - } - c.prog_->set_start(all.begin); - c.prog_->set_start_unanchored(all.begin); - + all = c.Cat(c.DotStar(), all); + } + c.prog_->set_start(all.begin); + c.prog_->set_start_unanchored(all.begin); + Prog* prog = c.Finish(re); - if (prog == NULL) - return NULL; - - // Make sure DFA has enough memory to operate, - // since we're not going to fall back to the NFA. + if (prog == NULL) + return NULL; + + // Make sure DFA has enough memory to operate, + // since we're not going to fall back to the NFA. bool dfa_failed = false; - StringPiece sp = "hello, world"; - prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch, + StringPiece sp = "hello, world"; + prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch, NULL, &dfa_failed, NULL); if (dfa_failed) { - delete prog; - return NULL; - } - - return prog; -} - + delete prog; + return NULL; + } + + return prog; +} + Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) { return Compiler::CompileSet(re, anchor, max_mem); -} - -} // namespace re2 +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc index d47c7d50a7..c02e5730cc 100644 --- a/contrib/libs/re2/re2/dfa.cc +++ b/contrib/libs/re2/re2/dfa.cc @@ -1,26 +1,26 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// A DFA (deterministic finite automaton)-based regular expression search. -// -// The DFA search has two main parts: the construction of the automaton, -// which is represented by a graph of State structures, and the execution -// of the automaton over a given input string. -// -// The basic idea is that the State graph is constructed so that the -// execution can simply start with a state s, and then for each byte c in -// the input string, execute "s = s->next[c]", checking at each point whether -// the current s represents a matching state. -// -// The simple explanation just given does convey the essence of this code, -// but it omits the details of how the State graph gets constructed as well -// as some performance-driven optimizations to the execution of the automaton. -// All these details are explained in the comments for the code following -// the definition of class DFA. -// -// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. - +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A DFA (deterministic finite automaton)-based regular expression search. +// +// The DFA search has two main parts: the construction of the automaton, +// which is represented by a graph of State structures, and the execution +// of the automaton over a given input string. +// +// The basic idea is that the State graph is constructed so that the +// execution can simply start with a state s, and then for each byte c in +// the input string, execute "s = s->next[c]", checking at each point whether +// the current s represents a matching state. +// +// The simple explanation just given does convey the essence of this code, +// but it omits the details of how the State graph gets constructed as well +// as some performance-driven optimizations to the execution of the automaton. +// All these details are explained in the comments for the code following +// the definition of class DFA. +// +// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. + #include <stddef.h> #include <stdint.h> #include <stdio.h> @@ -41,18 +41,18 @@ #include "util/mutex.h" #include "util/strutil.h" #include "re2/pod_array.h" -#include "re2/prog.h" +#include "re2/prog.h" #include "re2/re2.h" #include "re2/sparse_set.h" #include "re2/stringpiece.h" - + // Silence "zero-sized array in struct/union" warning for DFA::State::next_. #ifdef _MSC_VER #pragma warning(disable: 4200) #endif - -namespace re2 { - + +namespace re2 { + // Controls whether the DFA should bail out early if the NFA would be faster. static bool dfa_should_bail_when_slow = true; @@ -60,65 +60,65 @@ void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) { dfa_should_bail_when_slow = b; } -// Changing this to true compiles in prints that trace execution of the DFA. -// Generates a lot of output -- only useful for debugging. +// Changing this to true compiles in prints that trace execution of the DFA. +// Generates a lot of output -- only useful for debugging. static const bool ExtraDebug = false; - -// A DFA implementation of a regular expression program. -// Since this is entirely a forward declaration mandated by C++, -// some of the comments here are better understood after reading -// the comments in the sections that follow the DFA definition. -class DFA { - public: + +// A DFA implementation of a regular expression program. +// Since this is entirely a forward declaration mandated by C++, +// some of the comments here are better understood after reading +// the comments in the sections that follow the DFA definition. +class DFA { + public: DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem); - ~DFA(); - bool ok() const { return !init_failed_; } - Prog::MatchKind kind() { return kind_; } - - // Searches for the regular expression in text, which is considered - // as a subsection of context for the purposes of interpreting flags - // like ^ and $ and \A and \z. - // Returns whether a match was found. - // If a match is found, sets *ep to the end point of the best match in text. - // If "anchored", the match must begin at the start of text. - // If "want_earliest_match", the match that ends first is used, not - // necessarily the best one. - // If "run_forward" is true, the DFA runs from text.begin() to text.end(). - // If it is false, the DFA runs from text.end() to text.begin(), - // returning the leftmost end of the match instead of the rightmost one. - // If the DFA cannot complete the search (for example, if it is out of - // memory), it sets *failed and returns false. - bool Search(const StringPiece& text, const StringPiece& context, - bool anchored, bool want_earliest_match, bool run_forward, + ~DFA(); + bool ok() const { return !init_failed_; } + Prog::MatchKind kind() { return kind_; } + + // Searches for the regular expression in text, which is considered + // as a subsection of context for the purposes of interpreting flags + // like ^ and $ and \A and \z. + // Returns whether a match was found. + // If a match is found, sets *ep to the end point of the best match in text. + // If "anchored", the match must begin at the start of text. + // If "want_earliest_match", the match that ends first is used, not + // necessarily the best one. + // If "run_forward" is true, the DFA runs from text.begin() to text.end(). + // If it is false, the DFA runs from text.end() to text.begin(), + // returning the leftmost end of the match instead of the rightmost one. + // If the DFA cannot complete the search (for example, if it is out of + // memory), it sets *failed and returns false. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool want_earliest_match, bool run_forward, bool* failed, const char** ep, SparseSet* matches); - + // Builds out all states for the entire DFA. // If cb is not empty, it receives one callback per state built. // Returns the number of states built. // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. int BuildAllStates(const Prog::DFAStateCallback& cb); - - // Computes min and max for matching strings. Won't return strings - // bigger than maxlen. + + // Computes min and max for matching strings. Won't return strings + // bigger than maxlen. bool PossibleMatchRange(std::string* min, std::string* max, int maxlen); - - // These data structures are logically private, but C++ makes it too - // difficult to mark them as such. - class RWLocker; - class StateSaver; + + // These data structures are logically private, but C++ makes it too + // difficult to mark them as such. + class RWLocker; + class StateSaver; class Workq; - - // A single DFA state. The DFA is represented as a graph of these - // States, linked by the next_ pointers. If in state s and reading - // byte c, the next state should be s->next_[c]. - struct State { + + // A single DFA state. The DFA is represented as a graph of these + // States, linked by the next_ pointers. If in state s and reading + // byte c, the next state should be s->next_[c]. + struct State { inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; } - - int* inst_; // Instruction pointers in the state. - int ninst_; // # of inst_ pointers. + + int* inst_; // Instruction pointers in the state. + int ninst_; // # of inst_ pointers. uint32_t flag_; // Empty string bitfield flags in effect on the way - // into this state, along with kFlagMatch if this - // is a matching state. + // into this state, along with kFlagMatch if this + // is a matching state. // Work around the bug affecting flexible array members in GCC 6.x (for x >= 1). // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932) @@ -128,18 +128,18 @@ class DFA { std::atomic<State*> next_[]; // Outgoing arrows from State, #endif - // one per input byte class - }; - - enum { - kByteEndText = 256, // imaginary byte at end of text - + // one per input byte class + }; + + enum { + kByteEndText = 256, // imaginary byte at end of text + kFlagEmptyMask = 0xFF, // State.flag_: bits holding kEmptyXXX flags kFlagMatch = 0x0100, // State.flag_: this is a matching state kFlagLastWord = 0x0200, // State.flag_: last byte was a word char - kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left - }; - + kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left + }; + struct StateHash { size_t operator()(const State* a) const { DCHECK(a != NULL); @@ -151,285 +151,285 @@ class DFA { } }; - struct StateEqual { - bool operator()(const State* a, const State* b) const { + struct StateEqual { + bool operator()(const State* a, const State* b) const { DCHECK(a != NULL); DCHECK(b != NULL); - if (a == b) - return true; + if (a == b) + return true; if (a->flag_ != b->flag_) - return false; - if (a->ninst_ != b->ninst_) - return false; - for (int i = 0; i < a->ninst_; i++) - if (a->inst_[i] != b->inst_[i]) - return false; + return false; + if (a->ninst_ != b->ninst_) + return false; + for (int i = 0; i < a->ninst_; i++) + if (a->inst_[i] != b->inst_[i]) + return false; return true; - } - }; - + } + }; + typedef std::unordered_set<State*, StateHash, StateEqual> StateSet; - - private: + + private: // Make it easier to swap in a scalable reader-writer mutex. using CacheMutex = Mutex; - enum { - // Indices into start_ for unanchored searches. - // Add kStartAnchored for anchored searches. - kStartBeginText = 0, // text at beginning of context - kStartBeginLine = 2, // text at beginning of line - kStartAfterWordChar = 4, // text follows a word character - kStartAfterNonWordChar = 6, // text follows non-word character - kMaxStart = 8, - - kStartAnchored = 1, - }; - - // Resets the DFA State cache, flushing all saved State* information. - // Releases and reacquires cache_mutex_ via cache_lock, so any - // State* existing before the call are not valid after the call. - // Use a StateSaver to preserve important states across the call. - // cache_mutex_.r <= L < mutex_ - // After: cache_mutex_.w <= L < mutex_ - void ResetCache(RWLocker* cache_lock); - - // Looks up and returns the State corresponding to a Workq. - // L >= mutex_ + enum { + // Indices into start_ for unanchored searches. + // Add kStartAnchored for anchored searches. + kStartBeginText = 0, // text at beginning of context + kStartBeginLine = 2, // text at beginning of line + kStartAfterWordChar = 4, // text follows a word character + kStartAfterNonWordChar = 6, // text follows non-word character + kMaxStart = 8, + + kStartAnchored = 1, + }; + + // Resets the DFA State cache, flushing all saved State* information. + // Releases and reacquires cache_mutex_ via cache_lock, so any + // State* existing before the call are not valid after the call. + // Use a StateSaver to preserve important states across the call. + // cache_mutex_.r <= L < mutex_ + // After: cache_mutex_.w <= L < mutex_ + void ResetCache(RWLocker* cache_lock); + + // Looks up and returns the State corresponding to a Workq. + // L >= mutex_ State* WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag); - - // Looks up and returns a State matching the inst, ninst, and flag. - // L >= mutex_ + + // Looks up and returns a State matching the inst, ninst, and flag. + // L >= mutex_ State* CachedState(int* inst, int ninst, uint32_t flag); - - // Clear the cache entirely. - // Must hold cache_mutex_.w or be in destructor. - void ClearCache(); - - // Converts a State into a Workq: the opposite of WorkqToCachedState. - // L >= mutex_ + + // Clear the cache entirely. + // Must hold cache_mutex_.w or be in destructor. + void ClearCache(); + + // Converts a State into a Workq: the opposite of WorkqToCachedState. + // L >= mutex_ void StateToWorkq(State* s, Workq* q); - - // Runs a State on a given byte, returning the next state. - State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ - State* RunStateOnByte(State*, int); // L >= mutex_ - - // Runs a Workq on a given byte followed by a set of empty-string flags, - // producing a new Workq in nq. If a match instruction is encountered, - // sets *ismatch to true. - // L >= mutex_ - void RunWorkqOnByte(Workq* q, Workq* nq, + + // Runs a State on a given byte, returning the next state. + State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ + State* RunStateOnByte(State*, int); // L >= mutex_ + + // Runs a Workq on a given byte followed by a set of empty-string flags, + // producing a new Workq in nq. If a match instruction is encountered, + // sets *ismatch to true. + // L >= mutex_ + void RunWorkqOnByte(Workq* q, Workq* nq, int c, uint32_t flag, bool* ismatch); - - // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. - // L >= mutex_ + + // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. + // L >= mutex_ void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag); - - // Adds the instruction id to the Workq, following empty arrows - // according to flag. - // L >= mutex_ + + // Adds the instruction id to the Workq, following empty arrows + // according to flag. + // L >= mutex_ void AddToQueue(Workq* q, int id, uint32_t flag); - - // For debugging, returns a text representation of State. + + // For debugging, returns a text representation of State. static std::string DumpState(State* state); - - // For debugging, returns a text representation of a Workq. + + // For debugging, returns a text representation of a Workq. static std::string DumpWorkq(Workq* q); - - // Search parameters - struct SearchParams { - SearchParams(const StringPiece& text, const StringPiece& context, - RWLocker* cache_lock) + + // Search parameters + struct SearchParams { + SearchParams(const StringPiece& text, const StringPiece& context, + RWLocker* cache_lock) : text(text), context(context), - anchored(false), + anchored(false), can_prefix_accel(false), - want_earliest_match(false), - run_forward(false), - start(NULL), - cache_lock(cache_lock), - failed(false), - ep(NULL), + want_earliest_match(false), + run_forward(false), + start(NULL), + cache_lock(cache_lock), + failed(false), + ep(NULL), matches(NULL) {} - - StringPiece text; - StringPiece context; - bool anchored; + + StringPiece text; + StringPiece context; + bool anchored; bool can_prefix_accel; - bool want_earliest_match; - bool run_forward; - State* start; + bool want_earliest_match; + bool run_forward; + State* start; RWLocker* cache_lock; - bool failed; // "out" parameter: whether search gave up - const char* ep; // "out" parameter: end pointer for match + bool failed; // "out" parameter: whether search gave up + const char* ep; // "out" parameter: end pointer for match SparseSet* matches; - - private: + + private: SearchParams(const SearchParams&) = delete; SearchParams& operator=(const SearchParams&) = delete; - }; - - // Before each search, the parameters to Search are analyzed by + }; + + // Before each search, the parameters to Search are analyzed by // AnalyzeSearch to determine the state in which to start. - struct StartInfo { + struct StartInfo { StartInfo() : start(NULL) {} std::atomic<State*> start; - }; - + }; + // Fills in params->start and params->can_prefix_accel using - // the other search parameters. Returns true on success, - // false on failure. - // cache_mutex_.r <= L < mutex_ - bool AnalyzeSearch(SearchParams* params); + // the other search parameters. Returns true on success, + // false on failure. + // cache_mutex_.r <= L < mutex_ + bool AnalyzeSearch(SearchParams* params); bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint32_t flags); - - // The generic search loop, inlined to create specialized versions. - // cache_mutex_.r <= L < mutex_ - // Might unlock and relock cache_mutex_ via params->cache_lock. + + // The generic search loop, inlined to create specialized versions. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. template <bool can_prefix_accel, bool want_earliest_match, bool run_forward> inline bool InlinedSearchLoop(SearchParams* params); - - // The specialized versions of InlinedSearchLoop. The three letters - // at the ends of the name denote the true/false values used as the - // last three parameters of InlinedSearchLoop. - // cache_mutex_.r <= L < mutex_ - // Might unlock and relock cache_mutex_ via params->cache_lock. - bool SearchFFF(SearchParams* params); - bool SearchFFT(SearchParams* params); - bool SearchFTF(SearchParams* params); - bool SearchFTT(SearchParams* params); - bool SearchTFF(SearchParams* params); - bool SearchTFT(SearchParams* params); - bool SearchTTF(SearchParams* params); - bool SearchTTT(SearchParams* params); - - // The main search loop: calls an appropriate specialized version of - // InlinedSearchLoop. - // cache_mutex_.r <= L < mutex_ - // Might unlock and relock cache_mutex_ via params->cache_lock. - bool FastSearchLoop(SearchParams* params); - - - // Looks up bytes in bytemap_ but handles case c == kByteEndText too. - int ByteMap(int c) { - if (c == kByteEndText) - return prog_->bytemap_range(); - return prog_->bytemap()[c]; - } - - // Constant after initialization. - Prog* prog_; // The regular expression program to run. - Prog::MatchKind kind_; // The kind of DFA. - bool init_failed_; // initialization failed (out of memory) - - Mutex mutex_; // mutex_ >= cache_mutex_.r - - // Scratch areas, protected by mutex_. - Workq* q0_; // Two pre-allocated work queues. - Workq* q1_; + + // The specialized versions of InlinedSearchLoop. The three letters + // at the ends of the name denote the true/false values used as the + // last three parameters of InlinedSearchLoop. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + bool SearchFFF(SearchParams* params); + bool SearchFFT(SearchParams* params); + bool SearchFTF(SearchParams* params); + bool SearchFTT(SearchParams* params); + bool SearchTFF(SearchParams* params); + bool SearchTFT(SearchParams* params); + bool SearchTTF(SearchParams* params); + bool SearchTTT(SearchParams* params); + + // The main search loop: calls an appropriate specialized version of + // InlinedSearchLoop. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + bool FastSearchLoop(SearchParams* params); + + + // Looks up bytes in bytemap_ but handles case c == kByteEndText too. + int ByteMap(int c) { + if (c == kByteEndText) + return prog_->bytemap_range(); + return prog_->bytemap()[c]; + } + + // Constant after initialization. + Prog* prog_; // The regular expression program to run. + Prog::MatchKind kind_; // The kind of DFA. + bool init_failed_; // initialization failed (out of memory) + + Mutex mutex_; // mutex_ >= cache_mutex_.r + + // Scratch areas, protected by mutex_. + Workq* q0_; // Two pre-allocated work queues. + Workq* q1_; PODArray<int> stack_; // Pre-allocated stack for AddToQueue - - // State* cache. Many threads use and add to the cache simultaneously, - // holding cache_mutex_ for reading and mutex_ (above) when adding. - // If the cache fills and needs to be discarded, the discarding is done - // while holding cache_mutex_ for writing, to avoid interrupting other - // readers. Any State* pointers are only valid while cache_mutex_ - // is held. + + // State* cache. Many threads use and add to the cache simultaneously, + // holding cache_mutex_ for reading and mutex_ (above) when adding. + // If the cache fills and needs to be discarded, the discarding is done + // while holding cache_mutex_ for writing, to avoid interrupting other + // readers. Any State* pointers are only valid while cache_mutex_ + // is held. CacheMutex cache_mutex_; int64_t mem_budget_; // Total memory budget for all States. int64_t state_budget_; // Amount of memory remaining for new States. - StateSet state_cache_; // All States computed so far. - StartInfo start_[kMaxStart]; + StateSet state_cache_; // All States computed so far. + StartInfo start_[kMaxStart]; DFA(const DFA&) = delete; DFA& operator=(const DFA&) = delete; -}; - +}; + // Shorthand for casting to uint8_t*. static inline const uint8_t* BytePtr(const void* v) { return reinterpret_cast<const uint8_t*>(v); -} - -// Work queues - -// Marks separate thread groups of different priority -// in the work queue when in leftmost-longest matching mode. -#define Mark (-1) - +} + +// Work queues + +// Marks separate thread groups of different priority +// in the work queue when in leftmost-longest matching mode. +#define Mark (-1) + // Separates the match IDs from the instructions in inst_. // Used only for "many match" DFA states. #define MatchSep (-2) -// Internally, the DFA uses a sparse array of -// program instruction pointers as a work queue. -// In leftmost longest mode, marks separate sections -// of workq that started executing at different -// locations in the string (earlier locations first). -class DFA::Workq : public SparseSet { - public: - // Constructor: n is number of normal slots, maxmark number of mark slots. - Workq(int n, int maxmark) : - SparseSet(n+maxmark), - n_(n), - maxmark_(maxmark), - nextmark_(n), - last_was_mark_(true) { - } - - bool is_mark(int i) { return i >= n_; } - - int maxmark() { return maxmark_; } - - void clear() { - SparseSet::clear(); - nextmark_ = n_; - } - - void mark() { - if (last_was_mark_) - return; - last_was_mark_ = false; - SparseSet::insert_new(nextmark_++); - } - - int size() { - return n_ + maxmark_; - } - - void insert(int id) { - if (contains(id)) - return; - insert_new(id); - } - - void insert_new(int id) { - last_was_mark_ = false; - SparseSet::insert_new(id); - } - - private: - int n_; // size excluding marks - int maxmark_; // maximum number of marks - int nextmark_; // id of next mark - bool last_was_mark_; // last inserted was mark +// Internally, the DFA uses a sparse array of +// program instruction pointers as a work queue. +// In leftmost longest mode, marks separate sections +// of workq that started executing at different +// locations in the string (earlier locations first). +class DFA::Workq : public SparseSet { + public: + // Constructor: n is number of normal slots, maxmark number of mark slots. + Workq(int n, int maxmark) : + SparseSet(n+maxmark), + n_(n), + maxmark_(maxmark), + nextmark_(n), + last_was_mark_(true) { + } + + bool is_mark(int i) { return i >= n_; } + + int maxmark() { return maxmark_; } + + void clear() { + SparseSet::clear(); + nextmark_ = n_; + } + + void mark() { + if (last_was_mark_) + return; + last_was_mark_ = false; + SparseSet::insert_new(nextmark_++); + } + + int size() { + return n_ + maxmark_; + } + + void insert(int id) { + if (contains(id)) + return; + insert_new(id); + } + + void insert_new(int id) { + last_was_mark_ = false; + SparseSet::insert_new(id); + } + + private: + int n_; // size excluding marks + int maxmark_; // maximum number of marks + int nextmark_; // id of next mark + bool last_was_mark_; // last inserted was mark Workq(const Workq&) = delete; Workq& operator=(const Workq&) = delete; -}; - +}; + DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) - : prog_(prog), - kind_(kind), - init_failed_(false), - q0_(NULL), - q1_(NULL), + : prog_(prog), + kind_(kind), + init_failed_(false), + q0_(NULL), + q1_(NULL), mem_budget_(max_mem) { if (ExtraDebug) fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str()); - int nmark = 0; + int nmark = 0; if (kind_ == Prog::kLongestMatch) nmark = prog_->size(); // See DFA::AddToQueue() for why this is so. @@ -437,266 +437,266 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) prog_->inst_count(kInstEmptyWidth) + prog_->inst_count(kInstNop) + nmark + 1; // + 1 for start inst - + // Account for space needed for DFA, q0, q1, stack. - mem_budget_ -= sizeof(DFA); - mem_budget_ -= (prog_->size() + nmark) * - (sizeof(int)+sizeof(int)) * 2; // q0, q1 + mem_budget_ -= sizeof(DFA); + mem_budget_ -= (prog_->size() + nmark) * + (sizeof(int)+sizeof(int)) * 2; // q0, q1 mem_budget_ -= nstack * sizeof(int); // stack - if (mem_budget_ < 0) { - init_failed_ = true; - return; - } - - state_budget_ = mem_budget_; - - // Make sure there is a reasonable amount of working room left. - // At minimum, the search requires room for two states in order - // to limp along, restarting frequently. We'll get better performance - // if there is room for a larger number of states, say 20. + if (mem_budget_ < 0) { + init_failed_ = true; + return; + } + + state_budget_ = mem_budget_; + + // Make sure there is a reasonable amount of working room left. + // At minimum, the search requires room for two states in order + // to limp along, restarting frequently. We'll get better performance + // if there is room for a larger number of states, say 20. // Note that a state stores list heads only, so we use the program // list count for the upper bound, not the program size. int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + (prog_->list_count()+nmark)*sizeof(int); - if (state_budget_ < 20*one_state) { - init_failed_ = true; - return; - } - + if (state_budget_ < 20*one_state) { + init_failed_ = true; + return; + } + q0_ = new Workq(prog_->size(), nmark); q1_ = new Workq(prog_->size(), nmark); stack_ = PODArray<int>(nstack); -} - -DFA::~DFA() { - delete q0_; - delete q1_; - ClearCache(); -} - -// In the DFA state graph, s->next[c] == NULL means that the -// state has not yet been computed and needs to be. We need -// a different special value to signal that s->next[c] is a -// state that can never lead to a match (and thus the search -// can be called off). Hence DeadState. -#define DeadState reinterpret_cast<State*>(1) - -// Signals that the rest of the string matches no matter what it is. -#define FullMatchState reinterpret_cast<State*>(2) - -#define SpecialStateMax FullMatchState - -// Debugging printouts - -// For debugging, returns a string representation of the work queue. +} + +DFA::~DFA() { + delete q0_; + delete q1_; + ClearCache(); +} + +// In the DFA state graph, s->next[c] == NULL means that the +// state has not yet been computed and needs to be. We need +// a different special value to signal that s->next[c] is a +// state that can never lead to a match (and thus the search +// can be called off). Hence DeadState. +#define DeadState reinterpret_cast<State*>(1) + +// Signals that the rest of the string matches no matter what it is. +#define FullMatchState reinterpret_cast<State*>(2) + +#define SpecialStateMax FullMatchState + +// Debugging printouts + +// For debugging, returns a string representation of the work queue. std::string DFA::DumpWorkq(Workq* q) { std::string s; - const char* sep = ""; + const char* sep = ""; for (Workq::iterator it = q->begin(); it != q->end(); ++it) { - if (q->is_mark(*it)) { + if (q->is_mark(*it)) { s += "|"; - sep = ""; - } else { + sep = ""; + } else { s += StringPrintf("%s%d", sep, *it); - sep = ","; - } - } - return s; -} - -// For debugging, returns a string representation of the state. + sep = ","; + } + } + return s; +} + +// For debugging, returns a string representation of the state. std::string DFA::DumpState(State* state) { - if (state == NULL) - return "_"; - if (state == DeadState) - return "X"; - if (state == FullMatchState) - return "*"; + if (state == NULL) + return "_"; + if (state == DeadState) + return "X"; + if (state == FullMatchState) + return "*"; std::string s; - const char* sep = ""; + const char* sep = ""; s += StringPrintf("(%p)", state); - for (int i = 0; i < state->ninst_; i++) { - if (state->inst_[i] == Mark) { + for (int i = 0; i < state->ninst_; i++) { + if (state->inst_[i] == Mark) { s += "|"; - sep = ""; + sep = ""; } else if (state->inst_[i] == MatchSep) { s += "||"; sep = ""; - } else { + } else { s += StringPrintf("%s%d", sep, state->inst_[i]); - sep = ","; - } - } + sep = ","; + } + } s += StringPrintf(" flag=%#x", state->flag_); - return s; -} - -////////////////////////////////////////////////////////////////////// -// -// DFA state graph construction. -// -// The DFA state graph is a heavily-linked collection of State* structures. -// The state_cache_ is a set of all the State structures ever allocated, -// so that if the same state is reached by two different paths, -// the same State structure can be used. This reduces allocation -// requirements and also avoids duplication of effort across the two -// identical states. -// -// A State is defined by an ordered list of instruction ids and a flag word. -// -// The choice of an ordered list of instructions differs from a typical -// textbook DFA implementation, which would use an unordered set. -// Textbook descriptions, however, only care about whether -// the DFA matches, not where it matches in the text. To decide where the -// DFA matches, we need to mimic the behavior of the dominant backtracking -// implementations like PCRE, which try one possible regular expression -// execution, then another, then another, stopping when one of them succeeds. -// The DFA execution tries these many executions in parallel, representing -// each by an instruction id. These pointers are ordered in the State.inst_ -// list in the same order that the executions would happen in a backtracking -// search: if a match is found during execution of inst_[2], inst_[i] for i>=3 -// can be discarded. -// -// Textbooks also typically do not consider context-aware empty string operators -// like ^ or $. These are handled by the flag word, which specifies the set -// of empty-string operators that should be matched when executing at the -// current text position. These flag bits are defined in prog.h. -// The flag word also contains two DFA-specific bits: kFlagMatch if the state -// is a matching state (one that reached a kInstMatch in the program) -// and kFlagLastWord if the last processed byte was a word character, for the -// implementation of \B and \b. -// -// The flag word also contains, shifted up 16 bits, the bits looked for by -// any kInstEmptyWidth instructions in the state. These provide a useful -// summary indicating when new flags might be useful. -// -// The permanent representation of a State's instruction ids is just an array, -// but while a state is being analyzed, these instruction ids are represented -// as a Workq, which is an array that allows iteration in insertion order. - -// NOTE(rsc): The choice of State construction determines whether the DFA -// mimics backtracking implementations (so-called leftmost first matching) or -// traditional DFA implementations (so-called leftmost longest matching as -// prescribed by POSIX). This implementation chooses to mimic the -// backtracking implementations, because we want to replace PCRE. To get -// POSIX behavior, the states would need to be considered not as a simple -// ordered list of instruction ids, but as a list of unordered sets of instruction -// ids. A match by a state in one set would inhibit the running of sets -// farther down the list but not other instruction ids in the same set. Each -// set would correspond to matches beginning at a given point in the string. -// This is implemented by separating different sets with Mark pointers. - -// Looks in the State cache for a State matching q, flag. -// If one is found, returns it. If one is not found, allocates one, -// inserts it in the cache, and returns it. + return s; +} + +////////////////////////////////////////////////////////////////////// +// +// DFA state graph construction. +// +// The DFA state graph is a heavily-linked collection of State* structures. +// The state_cache_ is a set of all the State structures ever allocated, +// so that if the same state is reached by two different paths, +// the same State structure can be used. This reduces allocation +// requirements and also avoids duplication of effort across the two +// identical states. +// +// A State is defined by an ordered list of instruction ids and a flag word. +// +// The choice of an ordered list of instructions differs from a typical +// textbook DFA implementation, which would use an unordered set. +// Textbook descriptions, however, only care about whether +// the DFA matches, not where it matches in the text. To decide where the +// DFA matches, we need to mimic the behavior of the dominant backtracking +// implementations like PCRE, which try one possible regular expression +// execution, then another, then another, stopping when one of them succeeds. +// The DFA execution tries these many executions in parallel, representing +// each by an instruction id. These pointers are ordered in the State.inst_ +// list in the same order that the executions would happen in a backtracking +// search: if a match is found during execution of inst_[2], inst_[i] for i>=3 +// can be discarded. +// +// Textbooks also typically do not consider context-aware empty string operators +// like ^ or $. These are handled by the flag word, which specifies the set +// of empty-string operators that should be matched when executing at the +// current text position. These flag bits are defined in prog.h. +// The flag word also contains two DFA-specific bits: kFlagMatch if the state +// is a matching state (one that reached a kInstMatch in the program) +// and kFlagLastWord if the last processed byte was a word character, for the +// implementation of \B and \b. +// +// The flag word also contains, shifted up 16 bits, the bits looked for by +// any kInstEmptyWidth instructions in the state. These provide a useful +// summary indicating when new flags might be useful. +// +// The permanent representation of a State's instruction ids is just an array, +// but while a state is being analyzed, these instruction ids are represented +// as a Workq, which is an array that allows iteration in insertion order. + +// NOTE(rsc): The choice of State construction determines whether the DFA +// mimics backtracking implementations (so-called leftmost first matching) or +// traditional DFA implementations (so-called leftmost longest matching as +// prescribed by POSIX). This implementation chooses to mimic the +// backtracking implementations, because we want to replace PCRE. To get +// POSIX behavior, the states would need to be considered not as a simple +// ordered list of instruction ids, but as a list of unordered sets of instruction +// ids. A match by a state in one set would inhibit the running of sets +// farther down the list but not other instruction ids in the same set. Each +// set would correspond to matches beginning at a given point in the string. +// This is implemented by separating different sets with Mark pointers. + +// Looks in the State cache for a State matching q, flag. +// If one is found, returns it. If one is not found, allocates one, +// inserts it in the cache, and returns it. // If mq is not null, MatchSep and the match IDs in mq will be appended // to the State. DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { //mutex_.AssertHeld(); - - // Construct array of instruction ids for the new state. - // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: - // those are the only operators with any effect in - // RunWorkqOnEmptyString or RunWorkqOnByte. + + // Construct array of instruction ids for the new state. + // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: + // those are the only operators with any effect in + // RunWorkqOnEmptyString or RunWorkqOnByte. PODArray<int> inst(q->size()); - int n = 0; + int n = 0; uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions bool sawmatch = false; // whether queue contains guaranteed kInstMatch bool sawmark = false; // whether queue contains a Mark if (ExtraDebug) - fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); - for (Workq::iterator it = q->begin(); it != q->end(); ++it) { - int id = *it; - if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) - break; - if (q->is_mark(id)) { - if (n > 0 && inst[n-1] != Mark) { - sawmark = true; - inst[n++] = Mark; - } - continue; - } - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { - case kInstAltMatch: - // This state will continue to a match no matter what - // the rest of the input is. If it is the highest priority match - // being considered, return the special FullMatchState - // to indicate that it's all matches from here out. - if (kind_ != Prog::kManyMatch && - (kind_ != Prog::kFirstMatch || - (it == q->begin() && ip->greedy(prog_))) && + fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); + for (Workq::iterator it = q->begin(); it != q->end(); ++it) { + int id = *it; + if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) + break; + if (q->is_mark(id)) { + if (n > 0 && inst[n-1] != Mark) { + sawmark = true; + inst[n++] = Mark; + } + continue; + } + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + case kInstAltMatch: + // This state will continue to a match no matter what + // the rest of the input is. If it is the highest priority match + // being considered, return the special FullMatchState + // to indicate that it's all matches from here out. + if (kind_ != Prog::kManyMatch && + (kind_ != Prog::kFirstMatch || + (it == q->begin() && ip->greedy(prog_))) && (kind_ != Prog::kLongestMatch || !sawmark) && (flag & kFlagMatch)) { if (ExtraDebug) - fprintf(stderr, " -> FullMatchState\n"); - return FullMatchState; - } + fprintf(stderr, " -> FullMatchState\n"); + return FullMatchState; + } FALLTHROUGH_INTENDED; default: // Record iff id is the head of its list, which must // be the case if id-1 is the last of *its* list. :) if (prog_->inst(id-1)->last()) inst[n++] = *it; - if (ip->opcode() == kInstEmptyWidth) - needflags |= ip->empty(); - if (ip->opcode() == kInstMatch && !prog_->anchor_end()) - sawmatch = true; - break; - } - } - DCHECK_LE(n, q->size()); - if (n > 0 && inst[n-1] == Mark) - n--; - - // If there are no empty-width instructions waiting to execute, - // then the extra flag bits will not be used, so there is no - // point in saving them. (Discarding them reduces the number - // of distinct states.) - if (needflags == 0) - flag &= kFlagMatch; - - // NOTE(rsc): The code above cannot do flag &= needflags, - // because if the right flags were present to pass the current - // kInstEmptyWidth instructions, new kInstEmptyWidth instructions - // might be reached that in turn need different flags. - // The only sure thing is that if there are no kInstEmptyWidth - // instructions at all, no flags will be needed. - // We could do the extra work to figure out the full set of - // possibly needed flags by exploring past the kInstEmptyWidth - // instructions, but the check above -- are any flags needed - // at all? -- handles the most common case. More fine-grained - // analysis can only be justified by measurements showing that - // too many redundant states are being allocated. - - // If there are no Insts in the list, it's a dead state, - // which is useful to signal with a special pointer so that - // the execution loop can stop early. This is only okay - // if the state is *not* a matching state. - if (n == 0 && flag == 0) { + if (ip->opcode() == kInstEmptyWidth) + needflags |= ip->empty(); + if (ip->opcode() == kInstMatch && !prog_->anchor_end()) + sawmatch = true; + break; + } + } + DCHECK_LE(n, q->size()); + if (n > 0 && inst[n-1] == Mark) + n--; + + // If there are no empty-width instructions waiting to execute, + // then the extra flag bits will not be used, so there is no + // point in saving them. (Discarding them reduces the number + // of distinct states.) + if (needflags == 0) + flag &= kFlagMatch; + + // NOTE(rsc): The code above cannot do flag &= needflags, + // because if the right flags were present to pass the current + // kInstEmptyWidth instructions, new kInstEmptyWidth instructions + // might be reached that in turn need different flags. + // The only sure thing is that if there are no kInstEmptyWidth + // instructions at all, no flags will be needed. + // We could do the extra work to figure out the full set of + // possibly needed flags by exploring past the kInstEmptyWidth + // instructions, but the check above -- are any flags needed + // at all? -- handles the most common case. More fine-grained + // analysis can only be justified by measurements showing that + // too many redundant states are being allocated. + + // If there are no Insts in the list, it's a dead state, + // which is useful to signal with a special pointer so that + // the execution loop can stop early. This is only okay + // if the state is *not* a matching state. + if (n == 0 && flag == 0) { if (ExtraDebug) - fprintf(stderr, " -> DeadState\n"); - return DeadState; - } - - // If we're in longest match mode, the state is a sequence of - // unordered state sets separated by Marks. Sort each set - // to canonicalize, to reduce the number of distinct sets stored. - if (kind_ == Prog::kLongestMatch) { + fprintf(stderr, " -> DeadState\n"); + return DeadState; + } + + // If we're in longest match mode, the state is a sequence of + // unordered state sets separated by Marks. Sort each set + // to canonicalize, to reduce the number of distinct sets stored. + if (kind_ == Prog::kLongestMatch) { int* ip = inst.data(); - int* ep = ip + n; - while (ip < ep) { - int* markp = ip; - while (markp < ep && *markp != Mark) - markp++; + int* ep = ip + n; + while (ip < ep) { + int* markp = ip; + while (markp < ep && *markp != Mark) + markp++; std::sort(ip, markp); - if (markp < ep) - markp++; - ip = markp; - } - } - + if (markp < ep) + markp++; + ip = markp; + } + } + // If we're in many match mode, canonicalize for similar reasons: // we have an unordered set of states (i.e. we don't have Marks) // and sorting will reduce the number of distinct sets stored. @@ -717,47 +717,47 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { } } - // Save the needed empty-width flags in the top bits for use later. - flag |= needflags << kFlagNeedShift; - + // Save the needed empty-width flags in the top bits for use later. + flag |= needflags << kFlagNeedShift; + State* state = CachedState(inst.data(), n, flag); - return state; -} - -// Looks in the State cache for a State matching inst, ninst, flag. -// If one is found, returns it. If one is not found, allocates one, -// inserts it in the cache, and returns it. + return state; +} + +// Looks in the State cache for a State matching inst, ninst, flag. +// If one is found, returns it. If one is not found, allocates one, +// inserts it in the cache, and returns it. DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { //mutex_.AssertHeld(); - - // Look in the cache for a pre-existing state. + + // Look in the cache for a pre-existing state. // We have to initialise the struct like this because otherwise // MSVC will complain about the flexible array member. :( State state; state.inst_ = inst; state.ninst_ = ninst; state.flag_ = flag; - StateSet::iterator it = state_cache_.find(&state); - if (it != state_cache_.end()) { + StateSet::iterator it = state_cache_.find(&state); + if (it != state_cache_.end()) { if (ExtraDebug) - fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); - return *it; - } - - // Must have enough memory for new state. - // In addition to what we're going to allocate, + fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); + return *it; + } + + // Must have enough memory for new state. + // In addition to what we're going to allocate, // the state cache hash table seems to incur about 40 bytes per - // State*, empirically. + // State*, empirically. const int kStateCacheOverhead = 40; - int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot + int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) + ninst*sizeof(int); - if (mem_budget_ < mem + kStateCacheOverhead) { - mem_budget_ = -1; - return NULL; - } - mem_budget_ -= mem + kStateCacheOverhead; - + if (mem_budget_ < mem + kStateCacheOverhead) { + mem_budget_ = -1; + return NULL; + } + mem_budget_ -= mem + kStateCacheOverhead; + // Allocate new state along with room for next_ and inst_. char* space = std::allocator<char>().allocate(mem); State* s = new (space) State; @@ -767,19 +767,19 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { for (int i = 0; i < nnext; i++) (void) new (s->next_ + i) std::atomic<State*>(NULL); s->inst_ = new (s->next_ + nnext) int[ninst]; - memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); - s->ninst_ = ninst; - s->flag_ = flag; + memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); + s->ninst_ = ninst; + s->flag_ = flag; if (ExtraDebug) - fprintf(stderr, " -> %s\n", DumpState(s).c_str()); - - // Put state in cache and return it. - state_cache_.insert(s); - return s; -} - -// Clear the cache. Must hold cache_mutex_.w or be in destructor. -void DFA::ClearCache() { + fprintf(stderr, " -> %s\n", DumpState(s).c_str()); + + // Put state in cache and return it. + state_cache_.insert(s); + return s; +} + +// Clear the cache. Must hold cache_mutex_.w or be in destructor. +void DFA::ClearCache() { StateSet::iterator begin = state_cache_.begin(); StateSet::iterator end = state_cache_.end(); while (begin != end) { @@ -793,15 +793,15 @@ void DFA::ClearCache() { ninst*sizeof(int); std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem); } - state_cache_.clear(); -} - -// Copies insts in state s to the work queue q. -void DFA::StateToWorkq(State* s, Workq* q) { - q->clear(); - for (int i = 0; i < s->ninst_; i++) { + state_cache_.clear(); +} + +// Copies insts in state s to the work queue q. +void DFA::StateToWorkq(State* s, Workq* q) { + q->clear(); + for (int i = 0; i < s->ninst_; i++) { if (s->inst_[i] == Mark) { - q->mark(); + q->mark(); } else if (s->inst_[i] == MatchSep) { // Nothing after this is an instruction! break; @@ -809,12 +809,12 @@ void DFA::StateToWorkq(State* s, Workq* q) { // Explore from the head of the list. AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask); } - } -} - + } +} + // Adds ip to the work queue, following empty arrows according to flag. void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { - + // Use stack_ to hold our stack of instructions yet to process. // It was preallocated as follows: // one entry per Capture; @@ -825,66 +825,66 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { // When using marks, we also added nmark == prog_->size(). // (Otherwise, nmark == 0.) int* stk = stack_.data(); - int nstk = 0; - - stk[nstk++] = id; - while (nstk > 0) { + int nstk = 0; + + stk[nstk++] = id; + while (nstk > 0) { DCHECK_LE(nstk, stack_.size()); - id = stk[--nstk]; - + id = stk[--nstk]; + Loop: - if (id == Mark) { - q->mark(); - continue; - } - - if (id == 0) - continue; - - // If ip is already on the queue, nothing to do. + if (id == Mark) { + q->mark(); + continue; + } + + if (id == 0) + continue; + + // If ip is already on the queue, nothing to do. // Otherwise add it. We don't actually keep all the // ones that get added, but adding all of them here - // increases the likelihood of q->contains(id), - // reducing the amount of duplicated work. - if (q->contains(id)) - continue; - q->insert_new(id); - - // Process instruction. - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { + // increases the likelihood of q->contains(id), + // reducing the amount of duplicated work. + if (q->contains(id)) + continue; + q->insert_new(id); + + // Process instruction. + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { default: LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - - case kInstByteRange: // just save these on the queue - case kInstMatch: + break; + + case kInstByteRange: // just save these on the queue + case kInstMatch: if (ip->last()) break; id = id+1; goto Loop; - - case kInstCapture: // DFA treats captures as no-ops. - case kInstNop: + + case kInstCapture: // DFA treats captures as no-ops. + case kInstNop: if (!ip->last()) stk[nstk++] = id+1; - + // If this instruction is the [00-FF]* loop at the beginning of // a leftmost-longest unanchored search, separate with a Mark so // that future threads (which will start farther to the right in // the input string) are lower priority than current threads. if (ip->opcode() == kInstNop && q->maxmark() > 0 && - id == prog_->start_unanchored() && id != prog_->start()) - stk[nstk++] = Mark; + id == prog_->start_unanchored() && id != prog_->start()) + stk[nstk++] = Mark; id = ip->out(); goto Loop; - + case kInstAltMatch: DCHECK(!ip->last()); id = id+1; goto Loop; - case kInstEmptyWidth: + case kInstEmptyWidth: if (!ip->last()) stk[nstk++] = id+1; @@ -893,67 +893,67 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { break; id = ip->out(); goto Loop; - } - } -} - -// Running of work queues. In the work queue, order matters: -// the queue is sorted in priority order. If instruction i comes before j, -// then the instructions that i produces during the run must come before -// the ones that j produces. In order to keep this invariant, all the -// work queue runners have to take an old queue to process and then -// also a new queue to fill in. It's not acceptable to add to the end of -// an existing queue, because new instructions will not end up in the -// correct position. - -// Runs the work queue, processing the empty strings indicated by flag. -// For example, flag == kEmptyBeginLine|kEmptyEndLine means to match -// both ^ and $. It is important that callers pass all flags at once: -// processing both ^ and $ is not the same as first processing only ^ -// and then processing only $. Doing the two-step sequence won't match -// ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior -// exhibited by existing implementations). + } + } +} + +// Running of work queues. In the work queue, order matters: +// the queue is sorted in priority order. If instruction i comes before j, +// then the instructions that i produces during the run must come before +// the ones that j produces. In order to keep this invariant, all the +// work queue runners have to take an old queue to process and then +// also a new queue to fill in. It's not acceptable to add to the end of +// an existing queue, because new instructions will not end up in the +// correct position. + +// Runs the work queue, processing the empty strings indicated by flag. +// For example, flag == kEmptyBeginLine|kEmptyEndLine means to match +// both ^ and $. It is important that callers pass all flags at once: +// processing both ^ and $ is not the same as first processing only ^ +// and then processing only $. Doing the two-step sequence won't match +// ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior +// exhibited by existing implementations). void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) { - newq->clear(); - for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { - if (oldq->is_mark(*i)) - AddToQueue(newq, Mark, flag); - else - AddToQueue(newq, *i, flag); - } -} - -// Runs the work queue, processing the single byte c followed by any empty -// strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, -// means to match c$. Sets the bool *ismatch to true if the end of the -// regular expression program has been reached (the regexp has matched). -void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, + newq->clear(); + for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { + if (oldq->is_mark(*i)) + AddToQueue(newq, Mark, flag); + else + AddToQueue(newq, *i, flag); + } +} + +// Runs the work queue, processing the single byte c followed by any empty +// strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, +// means to match c$. Sets the bool *ismatch to true if the end of the +// regular expression program has been reached (the regexp has matched). +void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, int c, uint32_t flag, bool* ismatch) { //mutex_.AssertHeld(); - - newq->clear(); - for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { - if (oldq->is_mark(*i)) { - if (*ismatch) - return; - newq->mark(); - continue; - } - int id = *i; - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { + + newq->clear(); + for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { + if (oldq->is_mark(*i)) { + if (*ismatch) + return; + newq->mark(); + continue; + } + int id = *i; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { default: LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); break; - case kInstFail: // never succeeds - case kInstCapture: // already followed - case kInstNop: // already followed - case kInstAltMatch: // already followed - case kInstEmptyWidth: // already followed - break; - - case kInstByteRange: // can follow if c is in range + case kInstFail: // never succeeds + case kInstCapture: // already followed + case kInstNop: // already followed + case kInstAltMatch: // already followed + case kInstEmptyWidth: // already followed + break; + + case kInstByteRange: // can follow if c is in range if (!ip->Matches(c)) break; AddToQueue(newq, ip->out(), flag); @@ -969,363 +969,363 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, ++ip; i += ip - ip0; } - break; - - case kInstMatch: + break; + + case kInstMatch: if (prog_->anchor_end() && c != kByteEndText && kind_ != Prog::kManyMatch) - break; - *ismatch = true; + break; + *ismatch = true; if (kind_ == Prog::kFirstMatch) { - // Can stop processing work queue since we found a match. - return; - } - break; - } - } - + // Can stop processing work queue since we found a match. + return; + } + break; + } + } + if (ExtraDebug) fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch); -} - -// Processes input byte c in state, returning new state. -// Caller does not hold mutex. -DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) { - // Keep only one RunStateOnByte going - // even if the DFA is being run by multiple threads. - MutexLock l(&mutex_); - return RunStateOnByte(state, c); -} - -// Processes input byte c in state, returning new state. -DFA::State* DFA::RunStateOnByte(State* state, int c) { +} + +// Processes input byte c in state, returning new state. +// Caller does not hold mutex. +DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) { + // Keep only one RunStateOnByte going + // even if the DFA is being run by multiple threads. + MutexLock l(&mutex_); + return RunStateOnByte(state, c); +} + +// Processes input byte c in state, returning new state. +DFA::State* DFA::RunStateOnByte(State* state, int c) { //mutex_.AssertHeld(); - if (state <= SpecialStateMax) { - if (state == FullMatchState) { - // It is convenient for routines like PossibleMatchRange - // if we implement RunStateOnByte for FullMatchState: - // once you get into this state you never get out, - // so it's pretty easy. - return FullMatchState; - } - if (state == DeadState) { - LOG(DFATAL) << "DeadState in RunStateOnByte"; - return NULL; - } - if (state == NULL) { - LOG(DFATAL) << "NULL state in RunStateOnByte"; - return NULL; - } - LOG(DFATAL) << "Unexpected special state in RunStateOnByte"; - return NULL; - } - - // If someone else already computed this, return it. + if (state <= SpecialStateMax) { + if (state == FullMatchState) { + // It is convenient for routines like PossibleMatchRange + // if we implement RunStateOnByte for FullMatchState: + // once you get into this state you never get out, + // so it's pretty easy. + return FullMatchState; + } + if (state == DeadState) { + LOG(DFATAL) << "DeadState in RunStateOnByte"; + return NULL; + } + if (state == NULL) { + LOG(DFATAL) << "NULL state in RunStateOnByte"; + return NULL; + } + LOG(DFATAL) << "Unexpected special state in RunStateOnByte"; + return NULL; + } + + // If someone else already computed this, return it. State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed); if (ns != NULL) return ns; - - // Convert state into Workq. - StateToWorkq(state, q0_); - - // Flags marking the kinds of empty-width things (^ $ etc) - // around this byte. Before the byte we have the flags recorded - // in the State structure itself. After the byte we have - // nothing yet (but that will change: read on). + + // Convert state into Workq. + StateToWorkq(state, q0_); + + // Flags marking the kinds of empty-width things (^ $ etc) + // around this byte. Before the byte we have the flags recorded + // in the State structure itself. After the byte we have + // nothing yet (but that will change: read on). uint32_t needflag = state->flag_ >> kFlagNeedShift; uint32_t beforeflag = state->flag_ & kFlagEmptyMask; uint32_t oldbeforeflag = beforeflag; uint32_t afterflag = 0; - - if (c == '\n') { - // Insert implicit $ and ^ around \n - beforeflag |= kEmptyEndLine; - afterflag |= kEmptyBeginLine; - } - - if (c == kByteEndText) { - // Insert implicit $ and \z before the fake "end text" byte. - beforeflag |= kEmptyEndLine | kEmptyEndText; - } - - // The state flag kFlagLastWord says whether the last - // byte processed was a word character. Use that info to - // insert empty-width (non-)word boundaries. + + if (c == '\n') { + // Insert implicit $ and ^ around \n + beforeflag |= kEmptyEndLine; + afterflag |= kEmptyBeginLine; + } + + if (c == kByteEndText) { + // Insert implicit $ and \z before the fake "end text" byte. + beforeflag |= kEmptyEndLine | kEmptyEndText; + } + + // The state flag kFlagLastWord says whether the last + // byte processed was a word character. Use that info to + // insert empty-width (non-)word boundaries. bool islastword = (state->flag_ & kFlagLastWord) != 0; bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c)); - if (isword == islastword) - beforeflag |= kEmptyNonWordBoundary; - else - beforeflag |= kEmptyWordBoundary; - - // Okay, finally ready to run. - // Only useful to rerun on empty string if there are new, useful flags. - if (beforeflag & ~oldbeforeflag & needflag) { - RunWorkqOnEmptyString(q0_, q1_, beforeflag); + if (isword == islastword) + beforeflag |= kEmptyNonWordBoundary; + else + beforeflag |= kEmptyWordBoundary; + + // Okay, finally ready to run. + // Only useful to rerun on empty string if there are new, useful flags. + if (beforeflag & ~oldbeforeflag & needflag) { + RunWorkqOnEmptyString(q0_, q1_, beforeflag); using std::swap; - swap(q0_, q1_); - } - bool ismatch = false; + swap(q0_, q1_); + } + bool ismatch = false; RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch); using std::swap; swap(q0_, q1_); - - // Save afterflag along with ismatch and isword in new state. + + // Save afterflag along with ismatch and isword in new state. uint32_t flag = afterflag; - if (ismatch) - flag |= kFlagMatch; - if (isword) - flag |= kFlagLastWord; - + if (ismatch) + flag |= kFlagMatch; + if (isword) + flag |= kFlagLastWord; + if (ismatch && kind_ == Prog::kManyMatch) ns = WorkqToCachedState(q0_, q1_, flag); else ns = WorkqToCachedState(q0_, NULL, flag); - + // Flush ns before linking to it. - // Write barrier before updating state->next_ so that the - // main search loop can proceed without any locking, for speed. - // (Otherwise it would need one mutex operation per input byte.) + // Write barrier before updating state->next_ so that the + // main search loop can proceed without any locking, for speed. + // (Otherwise it would need one mutex operation per input byte.) state->next_[ByteMap(c)].store(ns, std::memory_order_release); - return ns; -} - - -////////////////////////////////////////////////////////////////////// -// DFA cache reset. - -// Reader-writer lock helper. -// -// The DFA uses a reader-writer mutex to protect the state graph itself. -// Traversing the state graph requires holding the mutex for reading, -// and discarding the state graph and starting over requires holding the -// lock for writing. If a search needs to expand the graph but is out -// of memory, it will need to drop its read lock and then acquire the -// write lock. Since it cannot then atomically downgrade from write lock -// to read lock, it runs the rest of the search holding the write lock. -// (This probably helps avoid repeated contention, but really the decision -// is forced by the Mutex interface.) It's a bit complicated to keep -// track of whether the lock is held for reading or writing and thread -// that through the search, so instead we encapsulate it in the RWLocker -// and pass that around. - -class DFA::RWLocker { - public: + return ns; +} + + +////////////////////////////////////////////////////////////////////// +// DFA cache reset. + +// Reader-writer lock helper. +// +// The DFA uses a reader-writer mutex to protect the state graph itself. +// Traversing the state graph requires holding the mutex for reading, +// and discarding the state graph and starting over requires holding the +// lock for writing. If a search needs to expand the graph but is out +// of memory, it will need to drop its read lock and then acquire the +// write lock. Since it cannot then atomically downgrade from write lock +// to read lock, it runs the rest of the search holding the write lock. +// (This probably helps avoid repeated contention, but really the decision +// is forced by the Mutex interface.) It's a bit complicated to keep +// track of whether the lock is held for reading or writing and thread +// that through the search, so instead we encapsulate it in the RWLocker +// and pass that around. + +class DFA::RWLocker { + public: explicit RWLocker(CacheMutex* mu); - ~RWLocker(); - - // If the lock is only held for reading right now, - // drop the read lock and re-acquire for writing. - // Subsequent calls to LockForWriting are no-ops. - // Notice that the lock is *released* temporarily. - void LockForWriting(); - - private: + ~RWLocker(); + + // If the lock is only held for reading right now, + // drop the read lock and re-acquire for writing. + // Subsequent calls to LockForWriting are no-ops. + // Notice that the lock is *released* temporarily. + void LockForWriting(); + + private: CacheMutex* mu_; - bool writing_; - + bool writing_; + RWLocker(const RWLocker&) = delete; RWLocker& operator=(const RWLocker&) = delete; -}; - +}; + DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) { - mu_->ReaderLock(); -} - + mu_->ReaderLock(); +} + // This function is marked as NO_THREAD_SAFETY_ANALYSIS because // the annotations don't support lock upgrade. -void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { - if (!writing_) { - mu_->ReaderUnlock(); +void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { + if (!writing_) { + mu_->ReaderUnlock(); mu_->WriterLock(); - writing_ = true; - } -} - -DFA::RWLocker::~RWLocker() { + writing_ = true; + } +} + +DFA::RWLocker::~RWLocker() { if (!writing_) mu_->ReaderUnlock(); else - mu_->WriterUnlock(); -} - - -// When the DFA's State cache fills, we discard all the states in the -// cache and start over. Many threads can be using and adding to the -// cache at the same time, so we synchronize using the cache_mutex_ -// to keep from stepping on other threads. Specifically, all the -// threads using the current cache hold cache_mutex_ for reading. -// When a thread decides to flush the cache, it drops cache_mutex_ -// and then re-acquires it for writing. That ensures there are no -// other threads accessing the cache anymore. The rest of the search -// runs holding cache_mutex_ for writing, avoiding any contention -// with or cache pollution caused by other threads. - -void DFA::ResetCache(RWLocker* cache_lock) { - // Re-acquire the cache_mutex_ for writing (exclusive use). - cache_lock->LockForWriting(); - + mu_->WriterUnlock(); +} + + +// When the DFA's State cache fills, we discard all the states in the +// cache and start over. Many threads can be using and adding to the +// cache at the same time, so we synchronize using the cache_mutex_ +// to keep from stepping on other threads. Specifically, all the +// threads using the current cache hold cache_mutex_ for reading. +// When a thread decides to flush the cache, it drops cache_mutex_ +// and then re-acquires it for writing. That ensures there are no +// other threads accessing the cache anymore. The rest of the search +// runs holding cache_mutex_ for writing, avoiding any contention +// with or cache pollution caused by other threads. + +void DFA::ResetCache(RWLocker* cache_lock) { + // Re-acquire the cache_mutex_ for writing (exclusive use). + cache_lock->LockForWriting(); + hooks::GetDFAStateCacheResetHook()({ state_budget_, state_cache_.size(), }); - // Clear the cache, reset the memory budget. + // Clear the cache, reset the memory budget. for (int i = 0; i < kMaxStart; i++) start_[i].start.store(NULL, std::memory_order_relaxed); - ClearCache(); - mem_budget_ = state_budget_; -} - -// Typically, a couple States do need to be preserved across a cache -// reset, like the State at the current point in the search. -// The StateSaver class helps keep States across cache resets. -// It makes a copy of the state's guts outside the cache (before the reset) -// and then can be asked, after the reset, to recreate the State -// in the new cache. For example, in a DFA method ("this" is a DFA): -// -// StateSaver saver(this, s); -// ResetCache(cache_lock); -// s = saver.Restore(); -// -// The saver should always have room in the cache to re-create the state, -// because resetting the cache locks out all other threads, and the cache -// is known to have room for at least a couple states (otherwise the DFA -// constructor fails). - -class DFA::StateSaver { - public: - explicit StateSaver(DFA* dfa, State* state); - ~StateSaver(); - - // Recreates and returns a state equivalent to the - // original state passed to the constructor. - // Returns NULL if the cache has filled, but - // since the DFA guarantees to have room in the cache - // for a couple states, should never return NULL - // if used right after ResetCache. - State* Restore(); - - private: - DFA* dfa_; // the DFA to use - int* inst_; // saved info from State - int ninst_; + ClearCache(); + mem_budget_ = state_budget_; +} + +// Typically, a couple States do need to be preserved across a cache +// reset, like the State at the current point in the search. +// The StateSaver class helps keep States across cache resets. +// It makes a copy of the state's guts outside the cache (before the reset) +// and then can be asked, after the reset, to recreate the State +// in the new cache. For example, in a DFA method ("this" is a DFA): +// +// StateSaver saver(this, s); +// ResetCache(cache_lock); +// s = saver.Restore(); +// +// The saver should always have room in the cache to re-create the state, +// because resetting the cache locks out all other threads, and the cache +// is known to have room for at least a couple states (otherwise the DFA +// constructor fails). + +class DFA::StateSaver { + public: + explicit StateSaver(DFA* dfa, State* state); + ~StateSaver(); + + // Recreates and returns a state equivalent to the + // original state passed to the constructor. + // Returns NULL if the cache has filled, but + // since the DFA guarantees to have room in the cache + // for a couple states, should never return NULL + // if used right after ResetCache. + State* Restore(); + + private: + DFA* dfa_; // the DFA to use + int* inst_; // saved info from State + int ninst_; uint32_t flag_; - bool is_special_; // whether original state was special - State* special_; // if is_special_, the original state - + bool is_special_; // whether original state was special + State* special_; // if is_special_, the original state + StateSaver(const StateSaver&) = delete; StateSaver& operator=(const StateSaver&) = delete; -}; - -DFA::StateSaver::StateSaver(DFA* dfa, State* state) { - dfa_ = dfa; - if (state <= SpecialStateMax) { - inst_ = NULL; - ninst_ = 0; - flag_ = 0; - is_special_ = true; - special_ = state; - return; - } - is_special_ = false; - special_ = NULL; - flag_ = state->flag_; - ninst_ = state->ninst_; - inst_ = new int[ninst_]; - memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); -} - -DFA::StateSaver::~StateSaver() { - if (!is_special_) - delete[] inst_; -} - -DFA::State* DFA::StateSaver::Restore() { - if (is_special_) - return special_; - MutexLock l(&dfa_->mutex_); - State* s = dfa_->CachedState(inst_, ninst_, flag_); - if (s == NULL) - LOG(DFATAL) << "StateSaver failed to restore state."; - return s; -} - - -////////////////////////////////////////////////////////////////////// -// -// DFA execution. -// -// The basic search loop is easy: start in a state s and then for each -// byte c in the input, s = s->next[c]. -// -// This simple description omits a few efficiency-driven complications. -// -// First, the State graph is constructed incrementally: it is possible -// that s->next[c] is null, indicating that that state has not been -// fully explored. In this case, RunStateOnByte must be invoked to -// determine the next state, which is cached in s->next[c] to save -// future effort. An alternative reason for s->next[c] to be null is -// that the DFA has reached a so-called "dead state", in which any match -// is no longer possible. In this case RunStateOnByte will return NULL -// and the processing of the string can stop early. -// -// Second, a 256-element pointer array for s->next_ makes each State -// quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[] -// maps from bytes to "byte classes" and then next_ only needs to have -// as many pointers as there are byte classes. A byte class is simply a -// range of bytes that the regexp never distinguishes between. -// A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1, -// 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit -// but in exchange we typically cut the size of a State (and thus our -// memory footprint) by about 5-10x. The comments still refer to -// s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]]. -// -// Third, it is common for a DFA for an unanchored match to begin in a -// state in which only one particular byte value can take the DFA to a -// different state. That is, s->next[c] != s for only one c. In this -// situation, the DFA can do better than executing the simple loop. -// Instead, it can call memchr to search very quickly for the byte c. -// Whether the start state has this property is determined during a +}; + +DFA::StateSaver::StateSaver(DFA* dfa, State* state) { + dfa_ = dfa; + if (state <= SpecialStateMax) { + inst_ = NULL; + ninst_ = 0; + flag_ = 0; + is_special_ = true; + special_ = state; + return; + } + is_special_ = false; + special_ = NULL; + flag_ = state->flag_; + ninst_ = state->ninst_; + inst_ = new int[ninst_]; + memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); +} + +DFA::StateSaver::~StateSaver() { + if (!is_special_) + delete[] inst_; +} + +DFA::State* DFA::StateSaver::Restore() { + if (is_special_) + return special_; + MutexLock l(&dfa_->mutex_); + State* s = dfa_->CachedState(inst_, ninst_, flag_); + if (s == NULL) + LOG(DFATAL) << "StateSaver failed to restore state."; + return s; +} + + +////////////////////////////////////////////////////////////////////// +// +// DFA execution. +// +// The basic search loop is easy: start in a state s and then for each +// byte c in the input, s = s->next[c]. +// +// This simple description omits a few efficiency-driven complications. +// +// First, the State graph is constructed incrementally: it is possible +// that s->next[c] is null, indicating that that state has not been +// fully explored. In this case, RunStateOnByte must be invoked to +// determine the next state, which is cached in s->next[c] to save +// future effort. An alternative reason for s->next[c] to be null is +// that the DFA has reached a so-called "dead state", in which any match +// is no longer possible. In this case RunStateOnByte will return NULL +// and the processing of the string can stop early. +// +// Second, a 256-element pointer array for s->next_ makes each State +// quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[] +// maps from bytes to "byte classes" and then next_ only needs to have +// as many pointers as there are byte classes. A byte class is simply a +// range of bytes that the regexp never distinguishes between. +// A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1, +// 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit +// but in exchange we typically cut the size of a State (and thus our +// memory footprint) by about 5-10x. The comments still refer to +// s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]]. +// +// Third, it is common for a DFA for an unanchored match to begin in a +// state in which only one particular byte value can take the DFA to a +// different state. That is, s->next[c] != s for only one c. In this +// situation, the DFA can do better than executing the simple loop. +// Instead, it can call memchr to search very quickly for the byte c. +// Whether the start state has this property is determined during a // pre-compilation pass and the "can_prefix_accel" argument is set. -// -// Fourth, the desired behavior is to search for the leftmost-best match -// (approximately, the same one that Perl would find), which is not -// necessarily the match ending earliest in the string. Each time a -// match is found, it must be noted, but the DFA must continue on in -// hope of finding a higher-priority match. In some cases, the caller only -// cares whether there is any match at all, not which one is found. -// The "want_earliest_match" flag causes the search to stop at the first -// match found. -// -// Fifth, one algorithm that uses the DFA needs it to run over the -// input string backward, beginning at the end and ending at the beginning. -// Passing false for the "run_forward" flag causes the DFA to run backward. -// -// The checks for these last three cases, which in a naive implementation -// would be performed once per input byte, slow the general loop enough -// to merit specialized versions of the search loop for each of the -// eight possible settings of the three booleans. Rather than write -// eight different functions, we write one general implementation and then -// inline it to create the specialized ones. -// -// Note that matches are delayed by one byte, to make it easier to -// accomodate match conditions depending on the next input byte (like $ and \b). -// When s->next[c]->IsMatch(), it means that there is a match ending just -// *before* byte c. - -// The generic search loop. Searches text for a match, returning -// the pointer to the end of the chosen match, or NULL if no match. -// The bools are equal to the same-named variables in params, but -// making them function arguments lets the inliner specialize -// this function to each combination (see two paragraphs above). +// +// Fourth, the desired behavior is to search for the leftmost-best match +// (approximately, the same one that Perl would find), which is not +// necessarily the match ending earliest in the string. Each time a +// match is found, it must be noted, but the DFA must continue on in +// hope of finding a higher-priority match. In some cases, the caller only +// cares whether there is any match at all, not which one is found. +// The "want_earliest_match" flag causes the search to stop at the first +// match found. +// +// Fifth, one algorithm that uses the DFA needs it to run over the +// input string backward, beginning at the end and ending at the beginning. +// Passing false for the "run_forward" flag causes the DFA to run backward. +// +// The checks for these last three cases, which in a naive implementation +// would be performed once per input byte, slow the general loop enough +// to merit specialized versions of the search loop for each of the +// eight possible settings of the three booleans. Rather than write +// eight different functions, we write one general implementation and then +// inline it to create the specialized ones. +// +// Note that matches are delayed by one byte, to make it easier to +// accomodate match conditions depending on the next input byte (like $ and \b). +// When s->next[c]->IsMatch(), it means that there is a match ending just +// *before* byte c. + +// The generic search loop. Searches text for a match, returning +// the pointer to the end of the chosen match, or NULL if no match. +// The bools are equal to the same-named variables in params, but +// making them function arguments lets the inliner specialize +// this function to each combination (see two paragraphs above). template <bool can_prefix_accel, bool want_earliest_match, bool run_forward> inline bool DFA::InlinedSearchLoop(SearchParams* params) { - State* start = params->start; + State* start = params->start; const uint8_t* bp = BytePtr(params->text.data()); // start of text const uint8_t* p = bp; // text scanning point const uint8_t* ep = BytePtr(params->text.data() + @@ -1333,20 +1333,20 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { const uint8_t* resetp = NULL; // p at last cache reset if (!run_forward) { using std::swap; - swap(p, ep); + swap(p, ep); } - + const uint8_t* bytemap = prog_->bytemap(); const uint8_t* lastmatch = NULL; // most recent matching position in text - bool matched = false; + bool matched = false; - State* s = start; + State* s = start; if (ExtraDebug) fprintf(stderr, "@stx: %s\n", DumpState(s).c_str()); - - if (s->IsMatch()) { - matched = true; - lastmatch = p; + + if (s->IsMatch()) { + matched = true; + lastmatch = p; if (ExtraDebug) fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str()); if (params->matches != NULL && kind_ == Prog::kManyMatch) { @@ -1357,13 +1357,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { params->matches->insert(id); } } - if (want_earliest_match) { - params->ep = reinterpret_cast<const char*>(lastmatch); - return true; - } - } - - while (p != ep) { + if (want_earliest_match) { + params->ep = reinterpret_cast<const char*>(lastmatch); + return true; + } + } + + while (p != ep) { if (ExtraDebug) fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str()); @@ -1375,95 +1375,95 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { if (p == NULL) { p = ep; break; - } - } - - int c; - if (run_forward) - c = *p++; - else - c = *--p; - - // Note that multiple threads might be consulting - // s->next_[bytemap[c]] simultaneously. - // RunStateOnByte takes care of the appropriate locking, - // including a memory barrier so that the unlocked access - // (sometimes known as "double-checked locking") is safe. - // The alternative would be either one DFA per thread - // or one mutex operation per input byte. - // - // ns == DeadState means the state is known to be dead - // (no more matches are possible). - // ns == NULL means the state has not yet been computed - // (need to call RunStateOnByteUnlocked). - // RunStateOnByte returns ns == NULL if it is out of memory. - // ns == FullMatchState means the rest of the string matches. - // - // Okay to use bytemap[] not ByteMap() here, because - // c is known to be an actual byte and not kByteEndText. - + } + } + + int c; + if (run_forward) + c = *p++; + else + c = *--p; + + // Note that multiple threads might be consulting + // s->next_[bytemap[c]] simultaneously. + // RunStateOnByte takes care of the appropriate locking, + // including a memory barrier so that the unlocked access + // (sometimes known as "double-checked locking") is safe. + // The alternative would be either one DFA per thread + // or one mutex operation per input byte. + // + // ns == DeadState means the state is known to be dead + // (no more matches are possible). + // ns == NULL means the state has not yet been computed + // (need to call RunStateOnByteUnlocked). + // RunStateOnByte returns ns == NULL if it is out of memory. + // ns == FullMatchState means the rest of the string matches. + // + // Okay to use bytemap[] not ByteMap() here, because + // c is known to be an actual byte and not kByteEndText. + State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire); - if (ns == NULL) { - ns = RunStateOnByteUnlocked(s, c); - if (ns == NULL) { - // After we reset the cache, we hold cache_mutex exclusively, - // so if resetp != NULL, it means we filled the DFA state - // cache with this search alone (without any other threads). - // Benchmarks show that doing a state computation on every - // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the - // same at about 2 MB/s. Unless we're processing an average - // of 10 bytes per state computation, fail so that RE2 can + if (ns == NULL) { + ns = RunStateOnByteUnlocked(s, c); + if (ns == NULL) { + // After we reset the cache, we hold cache_mutex exclusively, + // so if resetp != NULL, it means we filled the DFA state + // cache with this search alone (without any other threads). + // Benchmarks show that doing a state computation on every + // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the + // same at about 2 MB/s. Unless we're processing an average + // of 10 bytes per state computation, fail so that RE2 can // fall back to the NFA. However, RE2::Set cannot fall back, // so we just have to keep on keeping on in that case. if (dfa_should_bail_when_slow && resetp != NULL && static_cast<size_t>(p - resetp) < 10*state_cache_.size() && kind_ != Prog::kManyMatch) { - params->failed = true; - return false; - } - resetp = p; - - // Prepare to save start and s across the reset. - StateSaver save_start(this, start); - StateSaver save_s(this, s); - - // Discard all the States in the cache. - ResetCache(params->cache_lock); - - // Restore start and s so we can continue. - if ((start = save_start.Restore()) == NULL || - (s = save_s.Restore()) == NULL) { - // Restore already did LOG(DFATAL). - params->failed = true; - return false; - } - ns = RunStateOnByteUnlocked(s, c); - if (ns == NULL) { - LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache"; - params->failed = true; - return false; - } - } - } - if (ns <= SpecialStateMax) { - if (ns == DeadState) { - params->ep = reinterpret_cast<const char*>(lastmatch); - return matched; - } - // FullMatchState - params->ep = reinterpret_cast<const char*>(ep); - return true; - } - - s = ns; - if (s->IsMatch()) { - matched = true; - // The DFA notices the match one byte late, - // so adjust p before using it in the match. - if (run_forward) - lastmatch = p - 1; - else - lastmatch = p + 1; + params->failed = true; + return false; + } + resetp = p; + + // Prepare to save start and s across the reset. + StateSaver save_start(this, start); + StateSaver save_s(this, s); + + // Discard all the States in the cache. + ResetCache(params->cache_lock); + + // Restore start and s so we can continue. + if ((start = save_start.Restore()) == NULL || + (s = save_s.Restore()) == NULL) { + // Restore already did LOG(DFATAL). + params->failed = true; + return false; + } + ns = RunStateOnByteUnlocked(s, c); + if (ns == NULL) { + LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache"; + params->failed = true; + return false; + } + } + } + if (ns <= SpecialStateMax) { + if (ns == DeadState) { + params->ep = reinterpret_cast<const char*>(lastmatch); + return matched; + } + // FullMatchState + params->ep = reinterpret_cast<const char*>(ep); + return true; + } + + s = ns; + if (s->IsMatch()) { + matched = true; + // The DFA notices the match one byte late, + // so adjust p before using it in the match. + if (run_forward) + lastmatch = p - 1; + else + lastmatch = p + 1; if (ExtraDebug) fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str()); if (params->matches != NULL && kind_ == Prog::kManyMatch) { @@ -1474,63 +1474,63 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { params->matches->insert(id); } } - if (want_earliest_match) { - params->ep = reinterpret_cast<const char*>(lastmatch); - return true; - } - } - } - - // Process one more byte to see if it triggers a match. - // (Remember, matches are delayed one byte.) + if (want_earliest_match) { + params->ep = reinterpret_cast<const char*>(lastmatch); + return true; + } + } + } + + // Process one more byte to see if it triggers a match. + // (Remember, matches are delayed one byte.) if (ExtraDebug) fprintf(stderr, "@etx: %s\n", DumpState(s).c_str()); - int lastbyte; - if (run_forward) { + int lastbyte; + if (run_forward) { if (EndPtr(params->text) == EndPtr(params->context)) - lastbyte = kByteEndText; - else + lastbyte = kByteEndText; + else lastbyte = EndPtr(params->text)[0] & 0xFF; - } else { + } else { if (BeginPtr(params->text) == BeginPtr(params->context)) - lastbyte = kByteEndText; - else + lastbyte = kByteEndText; + else lastbyte = BeginPtr(params->text)[-1] & 0xFF; - } - + } + State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire); - if (ns == NULL) { - ns = RunStateOnByteUnlocked(s, lastbyte); - if (ns == NULL) { - StateSaver save_s(this, s); - ResetCache(params->cache_lock); - if ((s = save_s.Restore()) == NULL) { - params->failed = true; - return false; - } - ns = RunStateOnByteUnlocked(s, lastbyte); - if (ns == NULL) { - LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset"; - params->failed = true; - return false; - } - } - } + if (ns == NULL) { + ns = RunStateOnByteUnlocked(s, lastbyte); + if (ns == NULL) { + StateSaver save_s(this, s); + ResetCache(params->cache_lock); + if ((s = save_s.Restore()) == NULL) { + params->failed = true; + return false; + } + ns = RunStateOnByteUnlocked(s, lastbyte); + if (ns == NULL) { + LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset"; + params->failed = true; + return false; + } + } + } if (ns <= SpecialStateMax) { if (ns == DeadState) { params->ep = reinterpret_cast<const char*>(lastmatch); return matched; } // FullMatchState - params->ep = reinterpret_cast<const char*>(ep); - return true; - } + params->ep = reinterpret_cast<const char*>(ep); + return true; + } s = ns; if (s->IsMatch()) { - matched = true; - lastmatch = p; + matched = true; + lastmatch = p; if (ExtraDebug) fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str()); if (params->matches != NULL && kind_ == Prog::kManyMatch) { @@ -1541,146 +1541,146 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { params->matches->insert(id); } } - } - - params->ep = reinterpret_cast<const char*>(lastmatch); - return matched; -} - -// Inline specializations of the general loop. -bool DFA::SearchFFF(SearchParams* params) { + } + + params->ep = reinterpret_cast<const char*>(lastmatch); + return matched; +} + +// Inline specializations of the general loop. +bool DFA::SearchFFF(SearchParams* params) { return InlinedSearchLoop<false, false, false>(params); -} -bool DFA::SearchFFT(SearchParams* params) { +} +bool DFA::SearchFFT(SearchParams* params) { return InlinedSearchLoop<false, false, true>(params); -} -bool DFA::SearchFTF(SearchParams* params) { +} +bool DFA::SearchFTF(SearchParams* params) { return InlinedSearchLoop<false, true, false>(params); -} -bool DFA::SearchFTT(SearchParams* params) { +} +bool DFA::SearchFTT(SearchParams* params) { return InlinedSearchLoop<false, true, true>(params); -} -bool DFA::SearchTFF(SearchParams* params) { +} +bool DFA::SearchTFF(SearchParams* params) { return InlinedSearchLoop<true, false, false>(params); -} -bool DFA::SearchTFT(SearchParams* params) { +} +bool DFA::SearchTFT(SearchParams* params) { return InlinedSearchLoop<true, false, true>(params); -} -bool DFA::SearchTTF(SearchParams* params) { +} +bool DFA::SearchTTF(SearchParams* params) { return InlinedSearchLoop<true, true, false>(params); -} -bool DFA::SearchTTT(SearchParams* params) { +} +bool DFA::SearchTTT(SearchParams* params) { return InlinedSearchLoop<true, true, true>(params); -} - -// For performance, calls the appropriate specialized version -// of InlinedSearchLoop. -bool DFA::FastSearchLoop(SearchParams* params) { - // Because the methods are private, the Searches array - // cannot be declared at top level. - static bool (DFA::*Searches[])(SearchParams*) = { - &DFA::SearchFFF, - &DFA::SearchFFT, - &DFA::SearchFTF, - &DFA::SearchFTT, - &DFA::SearchTFF, - &DFA::SearchTFT, - &DFA::SearchTTF, - &DFA::SearchTTT, - }; - +} + +// For performance, calls the appropriate specialized version +// of InlinedSearchLoop. +bool DFA::FastSearchLoop(SearchParams* params) { + // Because the methods are private, the Searches array + // cannot be declared at top level. + static bool (DFA::*Searches[])(SearchParams*) = { + &DFA::SearchFFF, + &DFA::SearchFFT, + &DFA::SearchFTF, + &DFA::SearchFTT, + &DFA::SearchTFF, + &DFA::SearchTFT, + &DFA::SearchTTF, + &DFA::SearchTTT, + }; + int index = 4 * params->can_prefix_accel + - 2 * params->want_earliest_match + - 1 * params->run_forward; - return (this->*Searches[index])(params); -} - - -// The discussion of DFA execution above ignored the question of how -// to determine the initial state for the search loop. There are two -// factors that influence the choice of start state. -// -// The first factor is whether the search is anchored or not. -// The regexp program (Prog*) itself has -// two different entry points: one for anchored searches and one for -// unanchored searches. (The unanchored version starts with a leading ".*?" -// and then jumps to the anchored one.) -// -// The second factor is where text appears in the larger context, which -// determines which empty-string operators can be matched at the beginning -// of execution. If text is at the very beginning of context, \A and ^ match. -// Otherwise if text is at the beginning of a line, then ^ matches. -// Otherwise it matters whether the character before text is a word character -// or a non-word character. -// -// The two cases (unanchored vs not) and four cases (empty-string flags) -// combine to make the eight cases recorded in the DFA's begin_text_[2], -// begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached -// StartInfos. The start state for each is filled in the first time it -// is used for an actual search. - -// Examines text, context, and anchored to determine the right start -// state for the DFA search loop. Fills in params and returns true on success. -// Returns false on failure. -bool DFA::AnalyzeSearch(SearchParams* params) { - const StringPiece& text = params->text; - const StringPiece& context = params->context; - - // Sanity check: make sure that text lies within context. + 2 * params->want_earliest_match + + 1 * params->run_forward; + return (this->*Searches[index])(params); +} + + +// The discussion of DFA execution above ignored the question of how +// to determine the initial state for the search loop. There are two +// factors that influence the choice of start state. +// +// The first factor is whether the search is anchored or not. +// The regexp program (Prog*) itself has +// two different entry points: one for anchored searches and one for +// unanchored searches. (The unanchored version starts with a leading ".*?" +// and then jumps to the anchored one.) +// +// The second factor is where text appears in the larger context, which +// determines which empty-string operators can be matched at the beginning +// of execution. If text is at the very beginning of context, \A and ^ match. +// Otherwise if text is at the beginning of a line, then ^ matches. +// Otherwise it matters whether the character before text is a word character +// or a non-word character. +// +// The two cases (unanchored vs not) and four cases (empty-string flags) +// combine to make the eight cases recorded in the DFA's begin_text_[2], +// begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached +// StartInfos. The start state for each is filled in the first time it +// is used for an actual search. + +// Examines text, context, and anchored to determine the right start +// state for the DFA search loop. Fills in params and returns true on success. +// Returns false on failure. +bool DFA::AnalyzeSearch(SearchParams* params) { + const StringPiece& text = params->text; + const StringPiece& context = params->context; + + // Sanity check: make sure that text lies within context. if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) { LOG(DFATAL) << "context does not contain text"; - params->start = DeadState; - return true; - } - - // Determine correct search type. - int start; + params->start = DeadState; + return true; + } + + // Determine correct search type. + int start; uint32_t flags; - if (params->run_forward) { + if (params->run_forward) { if (BeginPtr(text) == BeginPtr(context)) { - start = kStartBeginText; - flags = kEmptyBeginText|kEmptyBeginLine; + start = kStartBeginText; + flags = kEmptyBeginText|kEmptyBeginLine; } else if (BeginPtr(text)[-1] == '\n') { - start = kStartBeginLine; - flags = kEmptyBeginLine; + start = kStartBeginLine; + flags = kEmptyBeginLine; } else if (Prog::IsWordChar(BeginPtr(text)[-1] & 0xFF)) { - start = kStartAfterWordChar; - flags = kFlagLastWord; - } else { - start = kStartAfterNonWordChar; - flags = 0; - } - } else { + start = kStartAfterWordChar; + flags = kFlagLastWord; + } else { + start = kStartAfterNonWordChar; + flags = 0; + } + } else { if (EndPtr(text) == EndPtr(context)) { - start = kStartBeginText; - flags = kEmptyBeginText|kEmptyBeginLine; + start = kStartBeginText; + flags = kEmptyBeginText|kEmptyBeginLine; } else if (EndPtr(text)[0] == '\n') { - start = kStartBeginLine; - flags = kEmptyBeginLine; + start = kStartBeginLine; + flags = kEmptyBeginLine; } else if (Prog::IsWordChar(EndPtr(text)[0] & 0xFF)) { - start = kStartAfterWordChar; - flags = kFlagLastWord; - } else { - start = kStartAfterNonWordChar; - flags = 0; - } - } + start = kStartAfterWordChar; + flags = kFlagLastWord; + } else { + start = kStartAfterNonWordChar; + flags = 0; + } + } if (params->anchored) - start |= kStartAnchored; - StartInfo* info = &start_[start]; - - // Try once without cache_lock for writing. - // Try again after resetting the cache - // (ResetCache will relock cache_lock for writing). - if (!AnalyzeSearchHelper(params, info, flags)) { - ResetCache(params->cache_lock); - if (!AnalyzeSearchHelper(params, info, flags)) { - LOG(DFATAL) << "Failed to analyze start state."; - params->failed = true; - return false; - } - } - + start |= kStartAnchored; + StartInfo* info = &start_[start]; + + // Try once without cache_lock for writing. + // Try again after resetting the cache + // (ResetCache will relock cache_lock for writing). + if (!AnalyzeSearchHelper(params, info, flags)) { + ResetCache(params->cache_lock); + if (!AnalyzeSearchHelper(params, info, flags)) { + LOG(DFATAL) << "Failed to analyze start state."; + params->failed = true; + return false; + } + } + params->start = info->start.load(std::memory_order_acquire); // Even if we could prefix accel, we cannot do so when anchored and, @@ -1695,99 +1695,99 @@ bool DFA::AnalyzeSearch(SearchParams* params) { if (ExtraDebug) fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n", - params->anchored, params->run_forward, flags, + params->anchored, params->run_forward, flags, DumpState(params->start).c_str(), params->can_prefix_accel); - - return true; -} - -// Fills in info if needed. Returns true on success, false on failure. -bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, + + return true; +} + +// Fills in info if needed. Returns true on success, false on failure. +bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint32_t flags) { // Quick check. State* start = info->start.load(std::memory_order_acquire); if (start != NULL) - return true; - - MutexLock l(&mutex_); + return true; + + MutexLock l(&mutex_); start = info->start.load(std::memory_order_relaxed); if (start != NULL) - return true; - - q0_->clear(); - AddToQueue(q0_, - params->anchored ? prog_->start() : prog_->start_unanchored(), - flags); + return true; + + q0_->clear(); + AddToQueue(q0_, + params->anchored ? prog_->start() : prog_->start_unanchored(), + flags); start = WorkqToCachedState(q0_, NULL, flags); if (start == NULL) - return false; - + return false; + // Synchronize with "quick check" above. info->start.store(start, std::memory_order_release); - return true; -} - -// The actual DFA search: calls AnalyzeSearch and then FastSearchLoop. -bool DFA::Search(const StringPiece& text, - const StringPiece& context, - bool anchored, - bool want_earliest_match, - bool run_forward, - bool* failed, - const char** epp, + return true; +} + +// The actual DFA search: calls AnalyzeSearch and then FastSearchLoop. +bool DFA::Search(const StringPiece& text, + const StringPiece& context, + bool anchored, + bool want_earliest_match, + bool run_forward, + bool* failed, + const char** epp, SparseSet* matches) { - *epp = NULL; - if (!ok()) { - *failed = true; - return false; - } - *failed = false; - + *epp = NULL; + if (!ok()) { + *failed = true; + return false; + } + *failed = false; + if (ExtraDebug) { - fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); - fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", + fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); + fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_); - } - - RWLocker l(&cache_mutex_); - SearchParams params(text, context, &l); - params.anchored = anchored; - params.want_earliest_match = want_earliest_match; - params.run_forward = run_forward; - params.matches = matches; - - if (!AnalyzeSearch(¶ms)) { - *failed = true; - return false; - } - if (params.start == DeadState) + } + + RWLocker l(&cache_mutex_); + SearchParams params(text, context, &l); + params.anchored = anchored; + params.want_earliest_match = want_earliest_match; + params.run_forward = run_forward; + params.matches = matches; + + if (!AnalyzeSearch(¶ms)) { + *failed = true; + return false; + } + if (params.start == DeadState) return false; - if (params.start == FullMatchState) { - if (run_forward == want_earliest_match) + if (params.start == FullMatchState) { + if (run_forward == want_earliest_match) *epp = text.data(); - else + else *epp = text.data() + text.size(); - return true; - } + return true; + } if (ExtraDebug) - fprintf(stderr, "start %s\n", DumpState(params.start).c_str()); - bool ret = FastSearchLoop(¶ms); - if (params.failed) { - *failed = true; - return false; - } - *epp = params.ep; - return ret; -} - -DFA* Prog::GetDFA(MatchKind kind) { - // For a forward DFA, half the memory goes to each DFA. + fprintf(stderr, "start %s\n", DumpState(params.start).c_str()); + bool ret = FastSearchLoop(¶ms); + if (params.failed) { + *failed = true; + return false; + } + *epp = params.ep; + return ret; +} + +DFA* Prog::GetDFA(MatchKind kind) { + // For a forward DFA, half the memory goes to each DFA. // However, if it is a "many match" DFA, then there is // no counterpart with which the memory must be shared. // - // For a reverse DFA, all the memory goes to the - // "longest match" DFA, because RE2 never does reverse - // "first match" searches. + // For a reverse DFA, all the memory goes to the + // "longest match" DFA, because RE2 never does reverse + // "first match" searches. if (kind == kFirstMatch) { std::call_once(dfa_first_once_, [](Prog* prog) { prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2); @@ -1806,55 +1806,55 @@ DFA* Prog::GetDFA(MatchKind kind) { prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_); }, this); return dfa_longest_; - } + } } - + void Prog::DeleteDFA(DFA* dfa) { delete dfa; -} - -// Executes the regexp program to search in text, -// which itself is inside the larger context. (As a convenience, -// passing a NULL context is equivalent to passing text.) -// Returns true if a match is found, false if not. -// If a match is found, fills in match0->end() to point at the end of the match -// and sets match0->begin() to text.begin(), since the DFA can't track -// where the match actually began. -// -// This is the only external interface (class DFA only exists in this file). -// -bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, +} + +// Executes the regexp program to search in text, +// which itself is inside the larger context. (As a convenience, +// passing a NULL context is equivalent to passing text.) +// Returns true if a match is found, false if not. +// If a match is found, fills in match0->end() to point at the end of the match +// and sets match0->begin() to text.begin(), since the DFA can't track +// where the match actually began. +// +// This is the only external interface (class DFA only exists in this file). +// +bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, Anchor anchor, MatchKind kind, StringPiece* match0, bool* failed, SparseSet* matches) { - *failed = false; - - StringPiece context = const_context; + *failed = false; + + StringPiece context = const_context; if (context.data() == NULL) - context = text; + context = text; bool caret = anchor_start(); - bool dollar = anchor_end(); - if (reversed_) { + bool dollar = anchor_end(); + if (reversed_) { using std::swap; swap(caret, dollar); - } + } if (caret && BeginPtr(context) != BeginPtr(text)) - return false; + return false; if (dollar && EndPtr(context) != EndPtr(text)) - return false; - - // Handle full match by running an anchored longest match - // and then checking if it covers all of text. - bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch; - bool endmatch = false; - if (kind == kManyMatch) { + return false; + + // Handle full match by running an anchored longest match + // and then checking if it covers all of text. + bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch; + bool endmatch = false; + if (kind == kManyMatch) { // This is split out in order to avoid clobbering kind. - } else if (kind == kFullMatch || anchor_end()) { - endmatch = true; - kind = kLongestMatch; - } - - // If the caller doesn't care where the match is (just whether one exists), - // then we can stop at the very first match we find, the so-called + } else if (kind == kFullMatch || anchor_end()) { + endmatch = true; + kind = kLongestMatch; + } + + // If the caller doesn't care where the match is (just whether one exists), + // then we can stop at the very first match we find, the so-called // "earliest match". bool want_earliest_match = false; if (kind == kManyMatch) { @@ -1864,62 +1864,62 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, } } else if (match0 == NULL && !endmatch) { want_earliest_match = true; - kind = kLongestMatch; - } - - DFA* dfa = GetDFA(kind); - const char* ep; - bool matched = dfa->Search(text, context, anchored, + kind = kLongestMatch; + } + + DFA* dfa = GetDFA(kind); + const char* ep; + bool matched = dfa->Search(text, context, anchored, want_earliest_match, !reversed_, - failed, &ep, matches); + failed, &ep, matches); if (*failed) { hooks::GetDFASearchFailureHook()({ // Nothing yet... }); - return false; + return false; } - if (!matched) - return false; + if (!matched) + return false; if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size())) - return false; - - // If caller cares, record the boundary of the match. - // We only know where it ends, so use the boundary of text - // as the beginning. - if (match0) { - if (reversed_) + return false; + + // If caller cares, record the boundary of the match. + // We only know where it ends, so use the boundary of text + // as the beginning. + if (match0) { + if (reversed_) *match0 = StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep)); - else + else *match0 = StringPiece(text.data(), static_cast<size_t>(ep - text.data())); - } - return true; -} - -// Build out all states in DFA. Returns number of states. + } + return true; +} + +// Build out all states in DFA. Returns number of states. int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) { - if (!ok()) - return 0; - - // Pick out start state for unanchored search - // at beginning of text. - RWLocker l(&cache_mutex_); + if (!ok()) + return 0; + + // Pick out start state for unanchored search + // at beginning of text. + RWLocker l(&cache_mutex_); SearchParams params(StringPiece(), StringPiece(), &l); - params.anchored = false; + params.anchored = false; if (!AnalyzeSearch(¶ms) || params.start == NULL || params.start == DeadState) - return 0; - - // Add start state to work queue. + return 0; + + // Add start state to work queue. // Note that any State* that we handle here must point into the cache, // so we can simply depend on pointer-as-a-number hashing and equality. std::unordered_map<State*, int> m; std::deque<State*> q; m.emplace(params.start, static_cast<int>(m.size())); - q.push_back(params.start); - + q.push_back(params.start); + // Compute the input bytes needed to cover all of the next pointers. int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot std::vector<int> input(nnext); @@ -1934,13 +1934,13 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) { // Scratch space for the output. std::vector<int> output(nnext); - // Flood to expand every state. + // Flood to expand every state. bool oom = false; while (!q.empty()) { State* s = q.front(); q.pop_front(); for (int c : input) { - State* ns = RunStateOnByteUnlocked(s, c); + State* ns = RunStateOnByteUnlocked(s, c); if (ns == NULL) { oom = true; break; @@ -1951,168 +1951,168 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) { } if (m.find(ns) == m.end()) { m.emplace(ns, static_cast<int>(m.size())); - q.push_back(ns); - } + q.push_back(ns); + } output[ByteMap(c)] = m[ns]; - } + } if (cb) cb(oom ? NULL : output.data(), s == FullMatchState || s->IsMatch()); if (oom) break; - } - + } + return static_cast<int>(m.size()); -} - -// Build out all states in DFA for kind. Returns number of states. +} + +// Build out all states in DFA for kind. Returns number of states. int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) { return GetDFA(kind)->BuildAllStates(cb); -} - -// Computes min and max for matching string. -// Won't return strings bigger than maxlen. +} + +// Computes min and max for matching string. +// Won't return strings bigger than maxlen. bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { - if (!ok()) - return false; - - // NOTE: if future users of PossibleMatchRange want more precision when - // presented with infinitely repeated elements, consider making this a - // parameter to PossibleMatchRange. - static int kMaxEltRepetitions = 0; - - // Keep track of the number of times we've visited states previously. We only - // revisit a given state if it's part of a repeated group, so if the value - // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set - // |*max| to |PrefixSuccessor(*max)|. - // - // Also note that previously_visited_states[UnseenStatePtr] will, in the STL - // tradition, implicitly insert a '0' value at first use. We take advantage - // of that property below. + if (!ok()) + return false; + + // NOTE: if future users of PossibleMatchRange want more precision when + // presented with infinitely repeated elements, consider making this a + // parameter to PossibleMatchRange. + static int kMaxEltRepetitions = 0; + + // Keep track of the number of times we've visited states previously. We only + // revisit a given state if it's part of a repeated group, so if the value + // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set + // |*max| to |PrefixSuccessor(*max)|. + // + // Also note that previously_visited_states[UnseenStatePtr] will, in the STL + // tradition, implicitly insert a '0' value at first use. We take advantage + // of that property below. std::unordered_map<State*, int> previously_visited_states; - - // Pick out start state for anchored search at beginning of text. - RWLocker l(&cache_mutex_); + + // Pick out start state for anchored search at beginning of text. + RWLocker l(&cache_mutex_); SearchParams params(StringPiece(), StringPiece(), &l); - params.anchored = true; - if (!AnalyzeSearch(¶ms)) - return false; - if (params.start == DeadState) { // No matching strings - *min = ""; - *max = ""; - return true; - } - if (params.start == FullMatchState) // Every string matches: no max - return false; - - // The DFA is essentially a big graph rooted at params.start, - // and paths in the graph correspond to accepted strings. - // Each node in the graph has potentially 256+1 arrows - // coming out, one for each byte plus the magic end of - // text character kByteEndText. - - // To find the smallest possible prefix of an accepted - // string, we just walk the graph preferring to follow - // arrows with the lowest bytes possible. To find the - // largest possible prefix, we follow the largest bytes - // possible. - - // The test for whether there is an arrow from s on byte j is - // ns = RunStateOnByteUnlocked(s, j); - // if (ns == NULL) - // return false; - // if (ns != DeadState && ns->ninst > 0) - // The RunStateOnByteUnlocked call asks the DFA to build out the graph. - // It returns NULL only if the DFA has run out of memory, - // in which case we can't be sure of anything. - // The second check sees whether there was graph built - // and whether it is interesting graph. Nodes might have - // ns->ninst == 0 if they exist only to represent the fact - // that a match was found on the previous byte. - - // Build minimum prefix. - State* s = params.start; - min->clear(); + params.anchored = true; + if (!AnalyzeSearch(¶ms)) + return false; + if (params.start == DeadState) { // No matching strings + *min = ""; + *max = ""; + return true; + } + if (params.start == FullMatchState) // Every string matches: no max + return false; + + // The DFA is essentially a big graph rooted at params.start, + // and paths in the graph correspond to accepted strings. + // Each node in the graph has potentially 256+1 arrows + // coming out, one for each byte plus the magic end of + // text character kByteEndText. + + // To find the smallest possible prefix of an accepted + // string, we just walk the graph preferring to follow + // arrows with the lowest bytes possible. To find the + // largest possible prefix, we follow the largest bytes + // possible. + + // The test for whether there is an arrow from s on byte j is + // ns = RunStateOnByteUnlocked(s, j); + // if (ns == NULL) + // return false; + // if (ns != DeadState && ns->ninst > 0) + // The RunStateOnByteUnlocked call asks the DFA to build out the graph. + // It returns NULL only if the DFA has run out of memory, + // in which case we can't be sure of anything. + // The second check sees whether there was graph built + // and whether it is interesting graph. Nodes might have + // ns->ninst == 0 if they exist only to represent the fact + // that a match was found on the previous byte. + + // Build minimum prefix. + State* s = params.start; + min->clear(); MutexLock lock(&mutex_); - for (int i = 0; i < maxlen; i++) { + for (int i = 0; i < maxlen; i++) { if (previously_visited_states[s] > kMaxEltRepetitions) - break; - previously_visited_states[s]++; - - // Stop if min is a match. + break; + previously_visited_states[s]++; + + // Stop if min is a match. State* ns = RunStateOnByte(s, kByteEndText); - if (ns == NULL) // DFA out of memory - return false; - if (ns != DeadState && (ns == FullMatchState || ns->IsMatch())) - break; - - // Try to extend the string with low bytes. - bool extended = false; - for (int j = 0; j < 256; j++) { + if (ns == NULL) // DFA out of memory + return false; + if (ns != DeadState && (ns == FullMatchState || ns->IsMatch())) + break; + + // Try to extend the string with low bytes. + bool extended = false; + for (int j = 0; j < 256; j++) { ns = RunStateOnByte(s, j); - if (ns == NULL) // DFA out of memory - return false; - if (ns == FullMatchState || - (ns > SpecialStateMax && ns->ninst_ > 0)) { - extended = true; + if (ns == NULL) // DFA out of memory + return false; + if (ns == FullMatchState || + (ns > SpecialStateMax && ns->ninst_ > 0)) { + extended = true; min->append(1, static_cast<char>(j)); - s = ns; - break; - } - } - if (!extended) - break; - } - - // Build maximum prefix. - previously_visited_states.clear(); - s = params.start; - max->clear(); - for (int i = 0; i < maxlen; i++) { + s = ns; + break; + } + } + if (!extended) + break; + } + + // Build maximum prefix. + previously_visited_states.clear(); + s = params.start; + max->clear(); + for (int i = 0; i < maxlen; i++) { if (previously_visited_states[s] > kMaxEltRepetitions) - break; - previously_visited_states[s] += 1; - - // Try to extend the string with high bytes. - bool extended = false; - for (int j = 255; j >= 0; j--) { + break; + previously_visited_states[s] += 1; + + // Try to extend the string with high bytes. + bool extended = false; + for (int j = 255; j >= 0; j--) { State* ns = RunStateOnByte(s, j); - if (ns == NULL) - return false; - if (ns == FullMatchState || - (ns > SpecialStateMax && ns->ninst_ > 0)) { - extended = true; + if (ns == NULL) + return false; + if (ns == FullMatchState || + (ns > SpecialStateMax && ns->ninst_ > 0)) { + extended = true; max->append(1, static_cast<char>(j)); - s = ns; - break; - } - } - if (!extended) { - // Done, no need for PrefixSuccessor. - return true; - } - } - - // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b + s = ns; + break; + } + } + if (!extended) { + // Done, no need for PrefixSuccessor. + return true; + } + } + + // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b PrefixSuccessor(max); - - // If there are no bytes left, we have no way to say "there is no maximum - // string". We could make the interface more complicated and be able to - // return "there is no maximum but here is a minimum", but that seems like - // overkill -- the most common no-max case is all possible strings, so not - // telling the caller that the empty string is the minimum match isn't a - // great loss. - if (max->empty()) - return false; - - return true; -} - -// PossibleMatchRange for a Prog. + + // If there are no bytes left, we have no way to say "there is no maximum + // string". We could make the interface more complicated and be able to + // return "there is no maximum but here is a minimum", but that seems like + // overkill -- the most common no-max case is all possible strings, so not + // telling the caller that the empty string is the minimum match isn't a + // great loss. + if (max->empty()) + return false; + + return true; +} + +// PossibleMatchRange for a Prog. bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // Have to use dfa_longest_ to get all strings for full matches. // For example, (a|aa) never matches aa in first-match mode. return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen); -} - -} // namespace re2 +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/filtered_re2.cc b/contrib/libs/re2/re2/filtered_re2.cc index 5df97456e2..3de2ec8124 100644 --- a/contrib/libs/re2/re2/filtered_re2.cc +++ b/contrib/libs/re2/re2/filtered_re2.cc @@ -1,8 +1,8 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re2/filtered_re2.h" +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/filtered_re2.h" #include <stddef.h> #include <string> @@ -10,26 +10,26 @@ #include "util/util.h" #include "util/logging.h" -#include "re2/prefilter.h" -#include "re2/prefilter_tree.h" - -namespace re2 { - -FilteredRE2::FilteredRE2() - : compiled_(false), - prefilter_tree_(new PrefilterTree()) { -} - +#include "re2/prefilter.h" +#include "re2/prefilter_tree.h" + +namespace re2 { + +FilteredRE2::FilteredRE2() + : compiled_(false), + prefilter_tree_(new PrefilterTree()) { +} + FilteredRE2::FilteredRE2(int min_atom_len) : compiled_(false), prefilter_tree_(new PrefilterTree(min_atom_len)) { } -FilteredRE2::~FilteredRE2() { +FilteredRE2::~FilteredRE2() { for (size_t i = 0; i < re2_vec_.size(); i++) - delete re2_vec_[i]; -} - + delete re2_vec_[i]; +} + FilteredRE2::FilteredRE2(FilteredRE2&& other) : re2_vec_(std::move(other.re2_vec_)), compiled_(other.compiled_), @@ -46,79 +46,79 @@ FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) { return *this; } -RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, - const RE2::Options& options, int* id) { - RE2* re = new RE2(pattern, options); - RE2::ErrorCode code = re->error_code(); - - if (!re->ok()) { +RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, + const RE2::Options& options, int* id) { + RE2* re = new RE2(pattern, options); + RE2::ErrorCode code = re->error_code(); + + if (!re->ok()) { if (options.log_errors()) { LOG(ERROR) << "Couldn't compile regular expression, skipping: " << pattern << " due to error " << re->error(); } - delete re; - } else { + delete re; + } else { *id = static_cast<int>(re2_vec_.size()); - re2_vec_.push_back(re); - } - - return code; -} - + re2_vec_.push_back(re); + } + + return code; +} + void FilteredRE2::Compile(std::vector<std::string>* atoms) { if (compiled_) { LOG(ERROR) << "Compile called already."; - return; - } - + return; + } + if (re2_vec_.empty()) { LOG(ERROR) << "Compile called before Add."; return; } for (size_t i = 0; i < re2_vec_.size(); i++) { - Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); - prefilter_tree_->Add(prefilter); - } - atoms->clear(); - prefilter_tree_->Compile(atoms); - compiled_ = true; -} - -int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { + Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); + prefilter_tree_->Add(prefilter); + } + atoms->clear(); + prefilter_tree_->Compile(atoms); + compiled_ = true; +} + +int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { for (size_t i = 0; i < re2_vec_.size(); i++) - if (RE2::PartialMatch(text, *re2_vec_[i])) + if (RE2::PartialMatch(text, *re2_vec_[i])) return static_cast<int>(i); - return -1; -} - -int FilteredRE2::FirstMatch(const StringPiece& text, + return -1; +} + +int FilteredRE2::FirstMatch(const StringPiece& text, const std::vector<int>& atoms) const { - if (!compiled_) { + if (!compiled_) { LOG(DFATAL) << "FirstMatch called before Compile."; - return -1; - } + return -1; + } std::vector<int> regexps; - prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); + prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); for (size_t i = 0; i < regexps.size(); i++) - if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) - return regexps[i]; - return -1; -} - -bool FilteredRE2::AllMatches( - const StringPiece& text, + if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) + return regexps[i]; + return -1; +} + +bool FilteredRE2::AllMatches( + const StringPiece& text, const std::vector<int>& atoms, std::vector<int>* matching_regexps) const { - matching_regexps->clear(); + matching_regexps->clear(); std::vector<int> regexps; - prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); + prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); for (size_t i = 0; i < regexps.size(); i++) - if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) - matching_regexps->push_back(regexps[i]); - return !matching_regexps->empty(); -} - + if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) + matching_regexps->push_back(regexps[i]); + return !matching_regexps->empty(); +} + void FilteredRE2::AllPotentials( const std::vector<int>& atoms, std::vector<int>* potential_regexps) const { @@ -127,11 +127,11 @@ void FilteredRE2::AllPotentials( void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms, std::vector<int>* passed_regexps) { - prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); -} - -void FilteredRE2::PrintPrefilter(int regexpid) { - prefilter_tree_->PrintPrefilter(regexpid); -} - + prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); +} + +void FilteredRE2::PrintPrefilter(int regexpid) { + prefilter_tree_->PrintPrefilter(regexpid); +} + } // namespace re2 diff --git a/contrib/libs/re2/re2/filtered_re2.h b/contrib/libs/re2/re2/filtered_re2.h index dd618c70e8..c436b2eca2 100644 --- a/contrib/libs/re2/re2/filtered_re2.h +++ b/contrib/libs/re2/re2/filtered_re2.h @@ -1,17 +1,17 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_FILTERED_RE2_H_ #define RE2_FILTERED_RE2_H_ -// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. -// It provides a prefilter mechanism that helps in cutting down the -// number of regexps that need to be actually searched. -// -// By design, it does not include a string matching engine. This is to +// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. +// It provides a prefilter mechanism that helps in cutting down the +// number of regexps that need to be actually searched. +// +// By design, it does not include a string matching engine. This is to // allow the user of the class to use their favorite string matching -// engine. The overall flow is: Add all the regexps using Add, then +// engine. The overall flow is: Add all the regexps using Add, then // Compile the FilteredRE2. Compile returns strings that need to be // matched. Note that the returned strings are lowercased and distinct. // For applying regexps to a search text, the caller does the string @@ -20,23 +20,23 @@ // on a lowercased version of the search text. Then call FirstMatch // or AllMatches with a vector of indices of strings that were found // in the text to get the actual regexp matches. - + #include <memory> #include <string> #include <vector> - + #include "re2/re2.h" - -namespace re2 { - -class PrefilterTree; - -class FilteredRE2 { - public: - FilteredRE2(); + +namespace re2 { + +class PrefilterTree; + +class FilteredRE2 { + public: + FilteredRE2(); explicit FilteredRE2(int min_atom_len); - ~FilteredRE2(); - + ~FilteredRE2(); + // Not copyable. FilteredRE2(const FilteredRE2&) = delete; FilteredRE2& operator=(const FilteredRE2&) = delete; @@ -44,39 +44,39 @@ class FilteredRE2 { FilteredRE2(FilteredRE2&& other); FilteredRE2& operator=(FilteredRE2&& other); - // Uses RE2 constructor to create a RE2 object (re). Returns - // re->error_code(). If error_code is other than NoError, then re is - // deleted and not added to re2_vec_. - RE2::ErrorCode Add(const StringPiece& pattern, - const RE2::Options& options, + // Uses RE2 constructor to create a RE2 object (re). Returns + // re->error_code(). If error_code is other than NoError, then re is + // deleted and not added to re2_vec_. + RE2::ErrorCode Add(const StringPiece& pattern, + const RE2::Options& options, int* id); - - // Prepares the regexps added by Add for filtering. Returns a set - // of strings that the caller should check for in candidate texts. + + // Prepares the regexps added by Add for filtering. Returns a set + // of strings that the caller should check for in candidate texts. // The returned strings are lowercased and distinct. When doing // string matching, it should be performed in a case-insensitive // way or the search text should be lowercased first. Call after - // all Add calls are done. + // all Add calls are done. void Compile(std::vector<std::string>* strings_to_match); - - // Returns the index of the first matching regexp. - // Returns -1 on no match. Can be called prior to Compile. - // Does not do any filtering: simply tries to Match the - // regexps in a loop. - int SlowFirstMatch(const StringPiece& text) const; - - // Returns the index of the first matching regexp. - // Returns -1 on no match. Compile has to be called before - // calling this. - int FirstMatch(const StringPiece& text, + + // Returns the index of the first matching regexp. + // Returns -1 on no match. Can be called prior to Compile. + // Does not do any filtering: simply tries to Match the + // regexps in a loop. + int SlowFirstMatch(const StringPiece& text) const; + + // Returns the index of the first matching regexp. + // Returns -1 on no match. Compile has to be called before + // calling this. + int FirstMatch(const StringPiece& text, const std::vector<int>& atoms) const; - - // Returns the indices of all matching regexps, after first clearing - // matched_regexps. - bool AllMatches(const StringPiece& text, + + // Returns the indices of all matching regexps, after first clearing + // matched_regexps. + bool AllMatches(const StringPiece& text, const std::vector<int>& atoms, std::vector<int>* matching_regexps) const; - + // Returns the indices of all potentially matching regexps after first // clearing potential_regexps. // A regexp is potentially matching if it passes the filter. @@ -85,30 +85,30 @@ class FilteredRE2 { void AllPotentials(const std::vector<int>& atoms, std::vector<int>* potential_regexps) const; - // The number of regexps added. + // The number of regexps added. int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } - + // Get the individual RE2 objects. const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; } - private: - // Print prefilter. - void PrintPrefilter(int regexpid); - - // Useful for testing and debugging. + private: + // Print prefilter. + void PrintPrefilter(int regexpid); + + // Useful for testing and debugging. void RegexpsGivenStrings(const std::vector<int>& matched_atoms, std::vector<int>* passed_regexps); - - // All the regexps in the FilteredRE2. + + // All the regexps in the FilteredRE2. std::vector<RE2*> re2_vec_; - - // Has the FilteredRE2 been compiled using Compile() - bool compiled_; - - // An AND-OR tree of string atoms used for filtering regexps. + + // Has the FilteredRE2 been compiled using Compile() + bool compiled_; + + // An AND-OR tree of string atoms used for filtering regexps. std::unique_ptr<PrefilterTree> prefilter_tree_; -}; - -} // namespace re2 - -#endif // RE2_FILTERED_RE2_H_ +}; + +} // namespace re2 + +#endif // RE2_FILTERED_RE2_H_ diff --git a/contrib/libs/re2/re2/mimics_pcre.cc b/contrib/libs/re2/re2/mimics_pcre.cc index b1d6a51228..7be60e4212 100644 --- a/contrib/libs/re2/re2/mimics_pcre.cc +++ b/contrib/libs/re2/re2/mimics_pcre.cc @@ -1,44 +1,44 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Determine whether this library should match PCRE exactly -// for a particular Regexp. (If so, the testing framework can -// check that it does.) -// -// This library matches PCRE except in these cases: -// * the regexp contains a repetition of an empty string, -// like (a*)* or (a*)+. In this case, PCRE will treat -// the repetition sequence as ending with an empty string, -// while this library does not. -// * Perl and PCRE differ on whether \v matches \n. -// For historical reasons, this library implements the Perl behavior. -// * Perl and PCRE allow $ in one-line mode to match either the very -// end of the text or just before a \n at the end of the text. -// This library requires it to match only the end of the text. -// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to -// match the end of the text if the last character is a \n. -// This library does allow it. -// -// Regexp::MimicsPCRE checks for any of these conditions. - +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Determine whether this library should match PCRE exactly +// for a particular Regexp. (If so, the testing framework can +// check that it does.) +// +// This library matches PCRE except in these cases: +// * the regexp contains a repetition of an empty string, +// like (a*)* or (a*)+. In this case, PCRE will treat +// the repetition sequence as ending with an empty string, +// while this library does not. +// * Perl and PCRE differ on whether \v matches \n. +// For historical reasons, this library implements the Perl behavior. +// * Perl and PCRE allow $ in one-line mode to match either the very +// end of the text or just before a \n at the end of the text. +// This library requires it to match only the end of the text. +// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to +// match the end of the text if the last character is a \n. +// This library does allow it. +// +// Regexp::MimicsPCRE checks for any of these conditions. + #include "util/util.h" #include "util/logging.h" -#include "re2/regexp.h" -#include "re2/walker-inl.h" - -namespace re2 { - -// Returns whether re might match an empty string. -static bool CanBeEmptyString(Regexp *re); - -// Walker class to compute whether library handles a regexp -// exactly as PCRE would. See comment at top for conditions. - -class PCREWalker : public Regexp::Walker<bool> { - public: - PCREWalker() {} - +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Returns whether re might match an empty string. +static bool CanBeEmptyString(Regexp *re); + +// Walker class to compute whether library handles a regexp +// exactly as PCRE would. See comment at top for conditions. + +class PCREWalker : public Regexp::Walker<bool> { + public: + PCREWalker() {} + virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args, int nchild_args); @@ -47,151 +47,151 @@ class PCREWalker : public Regexp::Walker<bool> { #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION LOG(DFATAL) << "PCREWalker::ShortVisit called"; #endif - return a; - } + return a; + } private: PCREWalker(const PCREWalker&) = delete; PCREWalker& operator=(const PCREWalker&) = delete; -}; - -// Called after visiting each of re's children and accumulating -// the return values in child_args. So child_args contains whether -// this library mimics PCRE for those subexpressions. -bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, - bool* child_args, int nchild_args) { - // If children failed, so do we. - for (int i = 0; i < nchild_args; i++) - if (!child_args[i]) - return false; - - // Otherwise look for other reasons to fail. - switch (re->op()) { - // Look for repeated empty string. - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - if (CanBeEmptyString(re->sub()[0])) - return false; - break; - case kRegexpRepeat: - if (re->max() == -1 && CanBeEmptyString(re->sub()[0])) - return false; - break; - - // Look for \v - case kRegexpLiteral: - if (re->rune() == '\v') - return false; - break; - - // Look for $ in single-line mode. - case kRegexpEndText: - case kRegexpEmptyMatch: - if (re->parse_flags() & Regexp::WasDollar) - return false; - break; - - // Look for ^ in multi-line mode. - case kRegexpBeginLine: - // No condition: in single-line mode ^ becomes kRegexpBeginText. - return false; - - default: - break; - } - - // Not proven guilty. - return true; -} - -// Returns whether this regexp's behavior will mimic PCRE's exactly. -bool Regexp::MimicsPCRE() { - PCREWalker w; - return w.Walk(this, true); -} - - -// Walker class to compute whether a Regexp can match an empty string. -// It is okay to overestimate. For example, \b\B cannot match an empty -// string, because \b and \B are mutually exclusive, but this isn't -// that smart and will say it can. Spurious empty strings -// will reduce the number of regexps we sanity check against PCRE, -// but they won't break anything. - -class EmptyStringWalker : public Regexp::Walker<bool> { - public: +}; + +// Called after visiting each of re's children and accumulating +// the return values in child_args. So child_args contains whether +// this library mimics PCRE for those subexpressions. +bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, + bool* child_args, int nchild_args) { + // If children failed, so do we. + for (int i = 0; i < nchild_args; i++) + if (!child_args[i]) + return false; + + // Otherwise look for other reasons to fail. + switch (re->op()) { + // Look for repeated empty string. + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + if (CanBeEmptyString(re->sub()[0])) + return false; + break; + case kRegexpRepeat: + if (re->max() == -1 && CanBeEmptyString(re->sub()[0])) + return false; + break; + + // Look for \v + case kRegexpLiteral: + if (re->rune() == '\v') + return false; + break; + + // Look for $ in single-line mode. + case kRegexpEndText: + case kRegexpEmptyMatch: + if (re->parse_flags() & Regexp::WasDollar) + return false; + break; + + // Look for ^ in multi-line mode. + case kRegexpBeginLine: + // No condition: in single-line mode ^ becomes kRegexpBeginText. + return false; + + default: + break; + } + + // Not proven guilty. + return true; +} + +// Returns whether this regexp's behavior will mimic PCRE's exactly. +bool Regexp::MimicsPCRE() { + PCREWalker w; + return w.Walk(this, true); +} + + +// Walker class to compute whether a Regexp can match an empty string. +// It is okay to overestimate. For example, \b\B cannot match an empty +// string, because \b and \B are mutually exclusive, but this isn't +// that smart and will say it can. Spurious empty strings +// will reduce the number of regexps we sanity check against PCRE, +// but they won't break anything. + +class EmptyStringWalker : public Regexp::Walker<bool> { + public: EmptyStringWalker() {} - + virtual bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args, int nchild_args); virtual bool ShortVisit(Regexp* re, bool a) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "EmptyStringWalker::ShortVisit called"; + LOG(DFATAL) << "EmptyStringWalker::ShortVisit called"; #endif - return a; - } - - private: + return a; + } + + private: EmptyStringWalker(const EmptyStringWalker&) = delete; EmptyStringWalker& operator=(const EmptyStringWalker&) = delete; -}; - -// Called after visiting re's children. child_args contains the return -// value from each of the children's PostVisits (i.e., whether each child -// can match an empty string). Returns whether this clause can match an -// empty string. -bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, - bool* child_args, int nchild_args) { - switch (re->op()) { - case kRegexpNoMatch: // never empty - case kRegexpLiteral: - case kRegexpAnyChar: - case kRegexpAnyByte: - case kRegexpCharClass: - case kRegexpLiteralString: - return false; - - case kRegexpEmptyMatch: // always empty - case kRegexpBeginLine: // always empty, when they match - case kRegexpEndLine: - case kRegexpNoWordBoundary: - case kRegexpWordBoundary: - case kRegexpBeginText: - case kRegexpEndText: - case kRegexpStar: // can always be empty - case kRegexpQuest: - case kRegexpHaveMatch: - return true; - - case kRegexpConcat: // can be empty if all children can - for (int i = 0; i < nchild_args; i++) - if (!child_args[i]) - return false; - return true; - - case kRegexpAlternate: // can be empty if any child can - for (int i = 0; i < nchild_args; i++) - if (child_args[i]) - return true; - return false; - - case kRegexpPlus: // can be empty if the child can - case kRegexpCapture: - return child_args[0]; - - case kRegexpRepeat: // can be empty if child can or is x{0} - return child_args[0] || re->min() == 0; - } - return false; -} - -// Returns whether re can match an empty string. -static bool CanBeEmptyString(Regexp* re) { - EmptyStringWalker w; - return w.Walk(re, true); -} - -} // namespace re2 +}; + +// Called after visiting re's children. child_args contains the return +// value from each of the children's PostVisits (i.e., whether each child +// can match an empty string). Returns whether this clause can match an +// empty string. +bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, + bool* child_args, int nchild_args) { + switch (re->op()) { + case kRegexpNoMatch: // never empty + case kRegexpLiteral: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpCharClass: + case kRegexpLiteralString: + return false; + + case kRegexpEmptyMatch: // always empty + case kRegexpBeginLine: // always empty, when they match + case kRegexpEndLine: + case kRegexpNoWordBoundary: + case kRegexpWordBoundary: + case kRegexpBeginText: + case kRegexpEndText: + case kRegexpStar: // can always be empty + case kRegexpQuest: + case kRegexpHaveMatch: + return true; + + case kRegexpConcat: // can be empty if all children can + for (int i = 0; i < nchild_args; i++) + if (!child_args[i]) + return false; + return true; + + case kRegexpAlternate: // can be empty if any child can + for (int i = 0; i < nchild_args; i++) + if (child_args[i]) + return true; + return false; + + case kRegexpPlus: // can be empty if the child can + case kRegexpCapture: + return child_args[0]; + + case kRegexpRepeat: // can be empty if child can or is x{0} + return child_args[0] || re->min() == 0; + } + return false; +} + +// Returns whether re can match an empty string. +static bool CanBeEmptyString(Regexp* re) { + EmptyStringWalker w; + return w.Walk(re, true); +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/nfa.cc b/contrib/libs/re2/re2/nfa.cc index c7339f8ffd..3c0ed1f60e 100644 --- a/contrib/libs/re2/re2/nfa.cc +++ b/contrib/libs/re2/re2/nfa.cc @@ -1,29 +1,29 @@ -// Copyright 2006-2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Tested by search_test.cc. -// -// Prog::SearchNFA, an NFA search. -// This is an actual NFA like the theorists talk about, -// not the pseudo-NFA found in backtracking regexp implementations. -// -// IMPLEMENTATION -// -// This algorithm is a variant of one that appeared in Rob Pike's sam editor, -// which is a variant of the one described in Thompson's 1968 CACM paper. -// See http://swtch.com/~rsc/regexp/ for various history. The main feature -// over the DFA implementation is that it tracks submatch boundaries. -// -// When the choice of submatch boundaries is ambiguous, this particular -// implementation makes the same choices that traditional backtracking -// implementations (in particular, Perl and PCRE) do. -// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential -// time in the length of the input. -// -// Like Thompson's original machine and like the DFA implementation, this -// implementation notices a match only once it is one byte past it. - +// Copyright 2006-2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc. +// +// Prog::SearchNFA, an NFA search. +// This is an actual NFA like the theorists talk about, +// not the pseudo-NFA found in backtracking regexp implementations. +// +// IMPLEMENTATION +// +// This algorithm is a variant of one that appeared in Rob Pike's sam editor, +// which is a variant of the one described in Thompson's 1968 CACM paper. +// See http://swtch.com/~rsc/regexp/ for various history. The main feature +// over the DFA implementation is that it tracks submatch boundaries. +// +// When the choice of submatch boundaries is ambiguous, this particular +// implementation makes the same choices that traditional backtracking +// implementations (in particular, Perl and PCRE) do. +// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential +// time in the length of the input. +// +// Like Thompson's original machine and like the DFA implementation, this +// implementation notices a match only once it is one byte past it. + #include <stdio.h> #include <string.h> #include <algorithm> @@ -35,68 +35,68 @@ #include "util/logging.h" #include "util/strutil.h" #include "re2/pod_array.h" -#include "re2/prog.h" -#include "re2/regexp.h" +#include "re2/prog.h" +#include "re2/regexp.h" #include "re2/sparse_array.h" #include "re2/sparse_set.h" - -namespace re2 { - + +namespace re2 { + static const bool ExtraDebug = false; -class NFA { - public: - NFA(Prog* prog); - ~NFA(); - - // Searches for a matching string. - // * If anchored is true, only considers matches starting at offset. - // Otherwise finds lefmost match at or after offset. - // * If longest is true, returns the longest match starting - // at the chosen start point. Otherwise returns the so-called - // left-biased match, the one traditional backtracking engines - // (like Perl and PCRE) find. - // Records submatch boundaries in submatch[1..nsubmatch-1]. - // Submatch[0] is the entire match. When there is a choice in - // which text matches each subexpression, the submatch boundaries - // are chosen to match what a backtracking implementation would choose. - bool Search(const StringPiece& text, const StringPiece& context, - bool anchored, bool longest, - StringPiece* submatch, int nsubmatch); - - private: - struct Thread { - union { +class NFA { + public: + NFA(Prog* prog); + ~NFA(); + + // Searches for a matching string. + // * If anchored is true, only considers matches starting at offset. + // Otherwise finds lefmost match at or after offset. + // * If longest is true, returns the longest match starting + // at the chosen start point. Otherwise returns the so-called + // left-biased match, the one traditional backtracking engines + // (like Perl and PCRE) find. + // Records submatch boundaries in submatch[1..nsubmatch-1]. + // Submatch[0] is the entire match. When there is a choice in + // which text matches each subexpression, the submatch boundaries + // are chosen to match what a backtracking implementation would choose. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch); + + private: + struct Thread { + union { int ref; - Thread* next; // when on free list - }; - const char** capture; - }; - - // State for explicit stack in AddToThreadq. - struct AddState { + Thread* next; // when on free list + }; + const char** capture; + }; + + // State for explicit stack in AddToThreadq. + struct AddState { int id; // Inst to process Thread* t; // if not null, set t0 = t before processing id - }; - - // Threadq is a list of threads. The list is sorted by the order - // in which Perl would explore that particular state -- the earlier - // choices appear earlier in the list. - typedef SparseArray<Thread*> Threadq; - - inline Thread* AllocThread(); + }; + + // Threadq is a list of threads. The list is sorted by the order + // in which Perl would explore that particular state -- the earlier + // choices appear earlier in the list. + typedef SparseArray<Thread*> Threadq; + + inline Thread* AllocThread(); inline Thread* Incref(Thread* t); inline void Decref(Thread* t); - + // Follows all empty arrows from id0 and enqueues all the states reached. // Enqueues only the ByteRange instructions that match byte c. // context is used (with p) for evaluating empty-width specials. // p is the current input position, and t0 is the current thread. void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0); - - // Run runq on byte c, appending new states to nextq. - // Updates matched_ and match_ as new, better matches are found. + + // Run runq on byte c, appending new states to nextq. + // Updates matched_ and match_ as new, better matches are found. // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. @@ -104,14 +104,14 @@ class NFA { // If there is a shortcut to the end, returns that shortcut. int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, const char* p); - - // Returns text version of capture information, for debugging. + + // Returns text version of capture information, for debugging. std::string FormatCapture(const char** capture); - + void CopyCapture(const char** dst, const char** src) { memmove(dst, src, ncapture_*sizeof src[0]); } - + Prog* prog_; // underlying program int start_; // start instruction in program int ncapture_; // number of submatches to track @@ -125,53 +125,53 @@ class NFA { Thread* freelist_; // thread freelist const char** match_; // best match so far bool matched_; // any match so far? - + NFA(const NFA&) = delete; NFA& operator=(const NFA&) = delete; -}; - -NFA::NFA(Prog* prog) { - prog_ = prog; +}; + +NFA::NFA(Prog* prog) { + prog_ = prog; start_ = prog_->start(); - ncapture_ = 0; - longest_ = false; - endmatch_ = false; - btext_ = NULL; - etext_ = NULL; - q0_.resize(prog_->size()); - q1_.resize(prog_->size()); + ncapture_ = 0; + longest_ = false; + endmatch_ = false; + btext_ = NULL; + etext_ = NULL; + q0_.resize(prog_->size()); + q1_.resize(prog_->size()); // See NFA::AddToThreadq() for why this is so. int nstack = 2*prog_->inst_count(kInstCapture) + prog_->inst_count(kInstEmptyWidth) + prog_->inst_count(kInstNop) + 1; // + 1 for start inst stack_ = PODArray<AddState>(nstack); freelist_ = NULL; - match_ = NULL; - matched_ = false; -} - -NFA::~NFA() { - delete[] match_; + match_ = NULL; + matched_ = false; +} + +NFA::~NFA() { + delete[] match_; for (const Thread& t : arena_) delete[] t.capture; -} - -NFA::Thread* NFA::AllocThread() { +} + +NFA::Thread* NFA::AllocThread() { Thread* t = freelist_; if (t != NULL) { freelist_ = t->next; t->ref = 1; // We don't need to touch t->capture because // the caller will immediately overwrite it. - return t; - } + return t; + } arena_.emplace_back(); t = &arena_.back(); t->ref = 1; t->capture = new const char*[ncapture_]; - return t; -} - + return t; +} + NFA::Thread* NFA::Incref(Thread* t) { DCHECK(t != NULL); t->ref++; @@ -194,9 +194,9 @@ void NFA::Decref(Thread* t) { // p is the current input position, and t0 is the current thread. void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0) { - if (id0 == 0) - return; - + if (id0 == 0) + return; + // Use stack_ to hold our stack of instructions yet to process. // It was preallocated as follows: // two entries per Capture; @@ -206,12 +206,12 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, // perform. (Each instruction can be processed at most once.) AddState* stk = stack_.data(); int nstk = 0; - + stk[nstk++] = {id0, NULL}; - while (nstk > 0) { + while (nstk > 0) { DCHECK_LE(nstk, stack_.size()); AddState a = stk[--nstk]; - + Loop: if (a.t != NULL) { // t0 was a thread that we allocated and copied in order to @@ -220,76 +220,76 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, t0 = a.t; } - int id = a.id; - if (id == 0) - continue; - if (q->has_index(id)) { + int id = a.id; + if (id == 0) + continue; + if (q->has_index(id)) { if (ExtraDebug) fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str()); - continue; - } - - // Create entry in q no matter what. We might fill it in below, - // or we might not. Even if not, it is necessary to have it, + continue; + } + + // Create entry in q no matter what. We might fill it in below, + // or we might not. Even if not, it is necessary to have it, // so that we don't revisit id0 during the recursion. - q->set_new(id, NULL); + q->set_new(id, NULL); Thread** tp = &q->get_existing(id); - int j; - Thread* t; - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq"; - break; - - case kInstFail: - break; - - case kInstAltMatch: - // Save state; will pick up at next byte. + int j; + Thread* t; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq"; + break; + + case kInstFail: + break; + + case kInstAltMatch: + // Save state; will pick up at next byte. t = Incref(t0); - *tp = t; - + *tp = t; + DCHECK(!ip->last()); a = {id+1, NULL}; goto Loop; - - case kInstNop: + + case kInstNop: if (!ip->last()) stk[nstk++] = {id+1, NULL}; - // Continue on. + // Continue on. a = {ip->out(), NULL}; goto Loop; - - case kInstCapture: + + case kInstCapture: if (!ip->last()) stk[nstk++] = {id+1, NULL}; - if ((j=ip->cap()) < ncapture_) { + if ((j=ip->cap()) < ncapture_) { // Push a dummy whose only job is to restore t0 - // once we finish exploring this possibility. + // once we finish exploring this possibility. stk[nstk++] = {0, t0}; - - // Record capture. + + // Record capture. t = AllocThread(); CopyCapture(t->capture, t0->capture); t->capture[j] = p; t0 = t; - } + } a = {ip->out(), NULL}; goto Loop; - + case kInstByteRange: if (!ip->Matches(c)) goto Next; - // Save state; will pick up at next byte. + // Save state; will pick up at next byte. t = Incref(t0); - *tp = t; + *tp = t; if (ExtraDebug) fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); - + if (ip->hint() == 0) break; a = {id+ip->hint(), NULL}; @@ -308,61 +308,61 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, a = {id+1, NULL}; goto Loop; - case kInstEmptyWidth: + case kInstEmptyWidth: if (!ip->last()) stk[nstk++] = {id+1, NULL}; - // Continue on if we have all the right flag bits. + // Continue on if we have all the right flag bits. if (ip->empty() & ~Prog::EmptyFlags(context, p)) - break; + break; a = {ip->out(), NULL}; goto Loop; - } - } -} - -// Run runq on byte c, appending new states to nextq. + } + } +} + +// Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. -// Frees all the threads on runq. -// If there is a shortcut to the end, returns that shortcut. +// Frees all the threads on runq. +// If there is a shortcut to the end, returns that shortcut. int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, const char* p) { - nextq->clear(); - - for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { + nextq->clear(); + + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->value(); - if (t == NULL) - continue; - - if (longest_) { - // Can skip any threads started after our current best match. - if (matched_ && match_[0] < t->capture[0]) { + if (t == NULL) + continue; + + if (longest_) { + // Can skip any threads started after our current best match. + if (matched_ && match_[0] < t->capture[0]) { Decref(t); - continue; - } - } - + continue; + } + } + int id = i->index(); - Prog::Inst* ip = prog_->inst(id); - - switch (ip->opcode()) { - default: - // Should only see the values handled below. - LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step"; - break; - - case kInstByteRange: + Prog::Inst* ip = prog_->inst(id); + + switch (ip->opcode()) { + default: + // Should only see the values handled below. + LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step"; + break; + + case kInstByteRange: AddToThreadq(nextq, ip->out(), c, context, p, t); - break; - - case kInstAltMatch: - if (i != runq->begin()) - break; - // The match is ours if we want it. - if (ip->greedy(prog_) || longest_) { + break; + + case kInstAltMatch: + if (i != runq->begin()) + break; + // The match is ours if we want it. + if (ip->greedy(prog_) || longest_) { CopyCapture(match_, t->capture); matched_ = true; @@ -371,13 +371,13 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, if (i->value() != NULL) Decref(i->value()); } - runq->clear(); - if (ip->greedy(prog_)) - return ip->out1(); - return ip->out(); - } - break; - + runq->clear(); + if (ip->greedy(prog_)) + return ip->out1(); + return ip->out(); + } + break; + case kInstMatch: { // Avoid invoking undefined behavior (arithmetic on a null pointer) // by storing p instead of p-1. (What would the latter even mean?!) @@ -386,127 +386,127 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, CopyCapture(match_, t->capture); match_[1] = p; matched_ = true; - break; + break; } - + if (endmatch_ && p-1 != etext_) break; - if (longest_) { - // Leftmost-longest mode: save this match only if - // it is either farther to the left or at the same - // point but longer than an existing match. - if (!matched_ || t->capture[0] < match_[0] || + if (longest_) { + // Leftmost-longest mode: save this match only if + // it is either farther to the left or at the same + // point but longer than an existing match. + if (!matched_ || t->capture[0] < match_[0] || (t->capture[0] == match_[0] && p-1 > match_[1])) { CopyCapture(match_, t->capture); match_[1] = p-1; matched_ = true; } - } else { - // Leftmost-biased mode: this match is by definition - // better than what we've already found (see next line). + } else { + // Leftmost-biased mode: this match is by definition + // better than what we've already found (see next line). CopyCapture(match_, t->capture); match_[1] = p-1; matched_ = true; - - // Cut off the threads that can only find matches - // worse than the one we just found: don't run the - // rest of the current Threadq. + + // Cut off the threads that can only find matches + // worse than the one we just found: don't run the + // rest of the current Threadq. Decref(t); for (++i; i != runq->end(); ++i) { if (i->value() != NULL) Decref(i->value()); } - runq->clear(); - return 0; - } - break; + runq->clear(); + return 0; + } + break; } - } + } Decref(t); - } - runq->clear(); - return 0; -} - + } + runq->clear(); + return 0; +} + std::string NFA::FormatCapture(const char** capture) { std::string s; - for (int i = 0; i < ncapture_; i+=2) { - if (capture[i] == NULL) + for (int i = 0; i < ncapture_; i+=2) { + if (capture[i] == NULL) s += "(?,?)"; - else if (capture[i+1] == NULL) + else if (capture[i+1] == NULL) s += StringPrintf("(%td,?)", capture[i] - btext_); - else + else s += StringPrintf("(%td,%td)", capture[i] - btext_, capture[i+1] - btext_); - } - return s; -} - -bool NFA::Search(const StringPiece& text, const StringPiece& const_context, - bool anchored, bool longest, - StringPiece* submatch, int nsubmatch) { - if (start_ == 0) - return false; - - StringPiece context = const_context; + } + return s; +} + +bool NFA::Search(const StringPiece& text, const StringPiece& const_context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch) { + if (start_ == 0) + return false; + + StringPiece context = const_context; if (context.data() == NULL) - context = text; - + context = text; + // Sanity check: make sure that text lies within context. if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) { LOG(DFATAL) << "context does not contain text"; - return false; - } - + return false; + } + if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text)) - return false; + return false; if (prog_->anchor_end() && EndPtr(context) != EndPtr(text)) - return false; - anchored |= prog_->anchor_start(); - if (prog_->anchor_end()) { - longest = true; - endmatch_ = true; - } - - if (nsubmatch < 0) { - LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch; - return false; - } - - // Save search parameters. - ncapture_ = 2*nsubmatch; - longest_ = longest; - - if (nsubmatch == 0) { - // We need to maintain match[0], both to distinguish the - // longest match (if longest is true) and also to tell - // whether we've seen any matches at all. - ncapture_ = 2; - } - - match_ = new const char*[ncapture_]; + return false; + anchored |= prog_->anchor_start(); + if (prog_->anchor_end()) { + longest = true; + endmatch_ = true; + } + + if (nsubmatch < 0) { + LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch; + return false; + } + + // Save search parameters. + ncapture_ = 2*nsubmatch; + longest_ = longest; + + if (nsubmatch == 0) { + // We need to maintain match[0], both to distinguish the + // longest match (if longest is true) and also to tell + // whether we've seen any matches at all. + ncapture_ = 2; + } + + match_ = new const char*[ncapture_]; memset(match_, 0, ncapture_*sizeof match_[0]); - matched_ = false; - - // For debugging prints. + matched_ = false; + + // For debugging prints. btext_ = context.data(); // For convenience. etext_ = text.data() + text.size(); - + if (ExtraDebug) - fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", + fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", std::string(text).c_str(), std::string(context).c_str(), anchored, longest); - - // Set up search. - Threadq* runq = &q0_; - Threadq* nextq = &q1_; - runq->clear(); - nextq->clear(); - - // Loop over the text, stepping the machine. + + // Set up search. + Threadq* runq = &q0_; + Threadq* nextq = &q1_; + runq->clear(); + nextq->clear(); + + // Loop over the text, stepping the machine. for (const char* p = text.data();; p++) { if (ExtraDebug) { int c = 0; @@ -518,58 +518,58 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, c = p[0] & 0xFF; fprintf(stderr, "%c:", c); - for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->value(); - if (t == NULL) - continue; + if (t == NULL) + continue; fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str()); - } - fprintf(stderr, "\n"); - } - + } + fprintf(stderr, "\n"); + } + // This is a no-op the first time around the loop because runq is empty. int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p); - DCHECK_EQ(runq->size(), 0); + DCHECK_EQ(runq->size(), 0); using std::swap; - swap(nextq, runq); - nextq->clear(); - if (id != 0) { - // We're done: full match ahead. + swap(nextq, runq); + nextq->clear(); + if (id != 0) { + // We're done: full match ahead. p = etext_; - for (;;) { - Prog::Inst* ip = prog_->inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode(); - break; - - case kInstCapture: + for (;;) { + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode(); + break; + + case kInstCapture: if (ip->cap() < ncapture_) match_[ip->cap()] = p; - id = ip->out(); - continue; - - case kInstNop: - id = ip->out(); - continue; - - case kInstMatch: - match_[1] = p; - matched_ = true; - break; - } - break; - } - break; - } - + id = ip->out(); + continue; + + case kInstNop: + id = ip->out(); + continue; + + case kInstMatch: + match_[1] = p; + matched_ = true; + break; + } + break; + } + break; + } + if (p > etext_) - break; - - // Start a new thread if there have not been any matches. - // (No point in starting a new thread if there have been - // matches, since it would be to the right of the match - // we already found.) + break; + + // Start a new thread if there have not been any matches. + // (No point in starting a new thread if there have been + // matches, since it would be to the right of the match + // we already found.) if (!matched_ && (!anchored || p == text.data())) { // Try to use prefix accel (e.g. memchr) to skip ahead. // The search must be unanchored and there must be zero @@ -579,23 +579,23 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p)); if (p == NULL) p = etext_; - } - + } + Thread* t = AllocThread(); CopyCapture(t->capture, match_); t->capture[0] = p; AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p, t); Decref(t); - } - - // If all the threads have died, stop early. - if (runq->size() == 0) { + } + + // If all the threads have died, stop early. + if (runq->size() == 0) { if (ExtraDebug) - fprintf(stderr, "dead\n"); - break; - } - + fprintf(stderr, "dead\n"); + break; + } + // Avoid invoking undefined behavior (arithmetic on a null pointer) // by simply not continuing the loop. // This complements the special case in NFA::Step(). @@ -607,15 +607,15 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, nextq->clear(); break; } - } - + } + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { if (i->value() != NULL) Decref(i->value()); } - - if (matched_) { - for (int i = 0; i < nsubmatch; i++) + + if (matched_) { + for (int i = 0; i < nsubmatch; i++) submatch[i] = StringPiece(match_[2 * i], static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); @@ -623,34 +623,34 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, fprintf(stderr, "match (%td,%td)\n", match_[0] - btext_, match_[1] - btext_); - return true; - } - return false; -} - -bool -Prog::SearchNFA(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch) { + return true; + } + return false; +} + +bool +Prog::SearchNFA(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch) { if (ExtraDebug) - Dump(); - - NFA nfa(this); - StringPiece sp; - if (kind == kFullMatch) { - anchor = kAnchored; - if (nmatch == 0) { - match = &sp; - nmatch = 1; - } - } - if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch)) - return false; + Dump(); + + NFA nfa(this); + StringPiece sp; + if (kind == kFullMatch) { + anchor = kAnchored; + if (nmatch == 0) { + match = &sp; + nmatch = 1; + } + } + if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch)) + return false; if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text)) - return false; - return true; -} - + return false; + return true; +} + // For each instruction i in the program reachable from the start, compute the // number of instructions reachable from i by following only empty transitions // and record that count as fanout[i]. @@ -710,4 +710,4 @@ void Prog::Fanout(SparseArray<int>* fanout) { } } -} // namespace re2 +} // namespace re2 diff --git a/contrib/libs/re2/re2/onepass.cc b/contrib/libs/re2/re2/onepass.cc index 263974654d..ff53b54e59 100644 --- a/contrib/libs/re2/re2/onepass.cc +++ b/contrib/libs/re2/re2/onepass.cc @@ -1,59 +1,59 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Tested by search_test.cc. -// -// Prog::SearchOnePass is an efficient implementation of -// regular expression search with submatch tracking for -// what I call "one-pass regular expressions". (An alternate -// name might be "backtracking-free regular expressions".) -// -// One-pass regular expressions have the property that -// at each input byte during an anchored match, there may be -// multiple alternatives but only one can proceed for any -// given input byte. -// -// For example, the regexp /x*yx*/ is one-pass: you read -// x's until a y, then you read the y, then you keep reading x's. -// At no point do you have to guess what to do or back up -// and try a different guess. -// -// On the other hand, /x*x/ is not one-pass: when you're -// looking at an input "x", it's not clear whether you should -// use it to extend the x* or as the final x. -// -// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not. -// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not. -// -// A simple intuition for identifying one-pass regular expressions -// is that it's always immediately obvious when a repetition ends. -// It must also be immediately obvious which branch of an | to take: -// -// /x(y|z)/ is one-pass, but /(xy|xz)/ is not. -// -// The NFA-based search in nfa.cc does some bookkeeping to -// avoid the need for backtracking and its associated exponential blowup. -// But if we have a one-pass regular expression, there is no -// possibility of backtracking, so there is no need for the -// extra bookkeeping. Hence, this code. -// -// On a one-pass regular expression, the NFA code in nfa.cc -// runs at about 1/20 of the backtracking-based PCRE speed. -// In contrast, the code in this file runs at about the same -// speed as PCRE. -// -// One-pass regular expressions get used a lot when RE is -// used for parsing simple strings, so it pays off to -// notice them and handle them efficiently. -// -// See also Anne Brüggemann-Klein and Derick Wood, -// "One-unambiguous regular languages", Information and Computation 142(2). - +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc. +// +// Prog::SearchOnePass is an efficient implementation of +// regular expression search with submatch tracking for +// what I call "one-pass regular expressions". (An alternate +// name might be "backtracking-free regular expressions".) +// +// One-pass regular expressions have the property that +// at each input byte during an anchored match, there may be +// multiple alternatives but only one can proceed for any +// given input byte. +// +// For example, the regexp /x*yx*/ is one-pass: you read +// x's until a y, then you read the y, then you keep reading x's. +// At no point do you have to guess what to do or back up +// and try a different guess. +// +// On the other hand, /x*x/ is not one-pass: when you're +// looking at an input "x", it's not clear whether you should +// use it to extend the x* or as the final x. +// +// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not. +// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not. +// +// A simple intuition for identifying one-pass regular expressions +// is that it's always immediately obvious when a repetition ends. +// It must also be immediately obvious which branch of an | to take: +// +// /x(y|z)/ is one-pass, but /(xy|xz)/ is not. +// +// The NFA-based search in nfa.cc does some bookkeeping to +// avoid the need for backtracking and its associated exponential blowup. +// But if we have a one-pass regular expression, there is no +// possibility of backtracking, so there is no need for the +// extra bookkeeping. Hence, this code. +// +// On a one-pass regular expression, the NFA code in nfa.cc +// runs at about 1/20 of the backtracking-based PCRE speed. +// In contrast, the code in this file runs at about the same +// speed as PCRE. +// +// One-pass regular expressions get used a lot when RE is +// used for parsing simple strings, so it pays off to +// notice them and handle them efficiently. +// +// See also Anne Brüggemann-Klein and Derick Wood, +// "One-unambiguous regular languages", Information and Computation 142(2). + #include <stdint.h> -#include <string.h> +#include <string.h> #include <algorithm> -#include <map> +#include <map> #include <string> #include <vector> @@ -62,188 +62,188 @@ #include "util/strutil.h" #include "util/utf.h" #include "re2/pod_array.h" -#include "re2/prog.h" +#include "re2/prog.h" #include "re2/sparse_set.h" #include "re2/stringpiece.h" - + // Silence "zero-sized array in struct/union" warning for OneState::action. #ifdef _MSC_VER #pragma warning(disable: 4200) #endif -namespace re2 { - +namespace re2 { + static const bool ExtraDebug = false; - -// The key insight behind this implementation is that the -// non-determinism in an NFA for a one-pass regular expression -// is contained. To explain what that means, first a -// refresher about what regular expression programs look like -// and how the usual NFA execution runs. -// -// In a regular expression program, only the kInstByteRange -// instruction processes an input byte c and moves on to the -// next byte in the string (it does so if c is in the given range). -// The kInstByteRange instructions correspond to literal characters -// and character classes in the regular expression. -// -// The kInstAlt instructions are used as wiring to connect the -// kInstByteRange instructions together in interesting ways when -// implementing | + and *. -// The kInstAlt instruction forks execution, like a goto that -// jumps to ip->out() and ip->out1() in parallel. Each of the -// resulting computation paths is called a thread. -// -// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture -- -// are interesting in their own right but like kInstAlt they don't -// advance the input pointer. Only kInstByteRange does. -// -// The automaton execution in nfa.cc runs all the possible -// threads of execution in lock-step over the input. To process -// a particular byte, each thread gets run until it either dies -// or finds a kInstByteRange instruction matching the byte. -// If the latter happens, the thread stops just past the -// kInstByteRange instruction (at ip->out()) and waits for -// the other threads to finish processing the input byte. -// Then, once all the threads have processed that input byte, -// the whole process repeats. The kInstAlt state instruction -// might create new threads during input processing, but no -// matter what, all the threads stop after a kInstByteRange -// and wait for the other threads to "catch up". -// Running in lock step like this ensures that the NFA reads -// the input string only once. -// -// Each thread maintains its own set of capture registers -// (the string positions at which it executed the kInstCapture -// instructions corresponding to capturing parentheses in the -// regular expression). Repeated copying of the capture registers -// is the main performance bottleneck in the NFA implementation. -// -// A regular expression program is "one-pass" if, no matter what -// the input string, there is only one thread that makes it -// past a kInstByteRange instruction at each input byte. This means -// that there is in some sense only one active thread throughout -// the execution. Other threads might be created during the -// processing of an input byte, but they are ephemeral: only one -// thread is left to start processing the next input byte. -// This is what I meant above when I said the non-determinism -// was "contained". -// -// To execute a one-pass regular expression program, we can build -// a DFA (no non-determinism) that has at most as many states as -// the NFA (compare this to the possibly exponential number of states -// in the general case). Each state records, for each possible -// input byte, the next state along with the conditions required -// before entering that state -- empty-width flags that must be true -// and capture operations that must be performed. It also records -// whether a set of conditions required to finish a match at that -// point in the input rather than process the next byte. - -// A state in the one-pass NFA - just an array of actions indexed -// by the bytemap_[] of the next input byte. (The bytemap -// maps next input bytes into equivalence classes, to reduce -// the memory footprint.) -struct OneState { + +// The key insight behind this implementation is that the +// non-determinism in an NFA for a one-pass regular expression +// is contained. To explain what that means, first a +// refresher about what regular expression programs look like +// and how the usual NFA execution runs. +// +// In a regular expression program, only the kInstByteRange +// instruction processes an input byte c and moves on to the +// next byte in the string (it does so if c is in the given range). +// The kInstByteRange instructions correspond to literal characters +// and character classes in the regular expression. +// +// The kInstAlt instructions are used as wiring to connect the +// kInstByteRange instructions together in interesting ways when +// implementing | + and *. +// The kInstAlt instruction forks execution, like a goto that +// jumps to ip->out() and ip->out1() in parallel. Each of the +// resulting computation paths is called a thread. +// +// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture -- +// are interesting in their own right but like kInstAlt they don't +// advance the input pointer. Only kInstByteRange does. +// +// The automaton execution in nfa.cc runs all the possible +// threads of execution in lock-step over the input. To process +// a particular byte, each thread gets run until it either dies +// or finds a kInstByteRange instruction matching the byte. +// If the latter happens, the thread stops just past the +// kInstByteRange instruction (at ip->out()) and waits for +// the other threads to finish processing the input byte. +// Then, once all the threads have processed that input byte, +// the whole process repeats. The kInstAlt state instruction +// might create new threads during input processing, but no +// matter what, all the threads stop after a kInstByteRange +// and wait for the other threads to "catch up". +// Running in lock step like this ensures that the NFA reads +// the input string only once. +// +// Each thread maintains its own set of capture registers +// (the string positions at which it executed the kInstCapture +// instructions corresponding to capturing parentheses in the +// regular expression). Repeated copying of the capture registers +// is the main performance bottleneck in the NFA implementation. +// +// A regular expression program is "one-pass" if, no matter what +// the input string, there is only one thread that makes it +// past a kInstByteRange instruction at each input byte. This means +// that there is in some sense only one active thread throughout +// the execution. Other threads might be created during the +// processing of an input byte, but they are ephemeral: only one +// thread is left to start processing the next input byte. +// This is what I meant above when I said the non-determinism +// was "contained". +// +// To execute a one-pass regular expression program, we can build +// a DFA (no non-determinism) that has at most as many states as +// the NFA (compare this to the possibly exponential number of states +// in the general case). Each state records, for each possible +// input byte, the next state along with the conditions required +// before entering that state -- empty-width flags that must be true +// and capture operations that must be performed. It also records +// whether a set of conditions required to finish a match at that +// point in the input rather than process the next byte. + +// A state in the one-pass NFA - just an array of actions indexed +// by the bytemap_[] of the next input byte. (The bytemap +// maps next input bytes into equivalence classes, to reduce +// the memory footprint.) +struct OneState { uint32_t matchcond; // conditions to match right now. uint32_t action[]; -}; - +}; + // The uint32_t conditions in the action are a combination of -// condition and capture bits and the next state. The bottom 16 bits -// are the condition and capture bits, and the top 16 are the index of -// the next state. -// -// Bits 0-5 are the empty-width flags from prog.h. -// Bit 6 is kMatchWins, which means the match takes -// priority over moving to next in a first-match search. -// The remaining bits mark capture registers that should -// be set to the current input position. The capture bits -// start at index 2, since the search loop can take care of -// cap[0], cap[1] (the overall match position). -// That means we can handle up to 5 capturing parens: $1 through $4, plus $0. -// No input position can satisfy both kEmptyWordBoundary -// and kEmptyNonWordBoundary, so we can use that as a sentinel -// instead of needing an extra bit. - +// condition and capture bits and the next state. The bottom 16 bits +// are the condition and capture bits, and the top 16 are the index of +// the next state. +// +// Bits 0-5 are the empty-width flags from prog.h. +// Bit 6 is kMatchWins, which means the match takes +// priority over moving to next in a first-match search. +// The remaining bits mark capture registers that should +// be set to the current input position. The capture bits +// start at index 2, since the search loop can take care of +// cap[0], cap[1] (the overall match position). +// That means we can handle up to 5 capturing parens: $1 through $4, plus $0. +// No input position can satisfy both kEmptyWordBoundary +// and kEmptyNonWordBoundary, so we can use that as a sentinel +// instead of needing an extra bit. + static const int kIndexShift = 16; // number of bits below index static const int kEmptyShift = 6; // number of empty flags in prog.h -static const int kRealCapShift = kEmptyShift + 1; -static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; - -// Parameters used to skip over cap[0], cap[1]. -static const int kCapShift = kRealCapShift - 2; -static const int kMaxCap = kRealMaxCap + 2; - +static const int kRealCapShift = kEmptyShift + 1; +static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; + +// Parameters used to skip over cap[0], cap[1]. +static const int kCapShift = kRealCapShift - 2; +static const int kMaxCap = kRealMaxCap + 2; + static const uint32_t kMatchWins = 1 << kEmptyShift; static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; - + static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; - -// Check, at compile time, that prog.h agrees with math above. -// This function is never called. -void OnePass_Checks() { + +// Check, at compile time, that prog.h agrees with math above. +// This function is never called. +void OnePass_Checks() { static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags, "kEmptyShift disagrees with kEmptyAllFlags"); - // kMaxCap counts pointers, kMaxOnePassCapture counts pairs. + // kMaxCap counts pointers, kMaxOnePassCapture counts pairs. static_assert(kMaxCap == Prog::kMaxOnePassCapture*2, "kMaxCap disagrees with kMaxOnePassCapture"); -} - +} + static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) { uint32_t satisfied = Prog::EmptyFlags(context, p); - if (cond & kEmptyAllFlags & ~satisfied) - return false; - return true; -} - -// Apply the capture bits in cond, saving p to the appropriate -// locations in cap[]. + if (cond & kEmptyAllFlags & ~satisfied) + return false; + return true; +} + +// Apply the capture bits in cond, saving p to the appropriate +// locations in cap[]. static void ApplyCaptures(uint32_t cond, const char* p, - const char** cap, int ncap) { - for (int i = 2; i < ncap; i++) - if (cond & (1 << kCapShift << i)) - cap[i] = p; -} - + const char** cap, int ncap) { + for (int i = 2; i < ncap; i++) + if (cond & (1 << kCapShift << i)) + cap[i] = p; +} + // Computes the OneState* for the given nodeindex. static inline OneState* IndexToNode(uint8_t* nodes, int statesize, - int nodeindex) { + int nodeindex) { return reinterpret_cast<OneState*>(nodes + statesize*nodeindex); -} - -bool Prog::SearchOnePass(const StringPiece& text, - const StringPiece& const_context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch) { - if (anchor != kAnchored && kind != kFullMatch) { - LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches."; - return false; - } - - // Make sure we have at least cap[1], - // because we use it to tell if we matched. - int ncap = 2*nmatch; - if (ncap < 2) - ncap = 2; - - const char* cap[kMaxCap]; - for (int i = 0; i < ncap; i++) - cap[i] = NULL; - - const char* matchcap[kMaxCap]; - for (int i = 0; i < ncap; i++) - matchcap[i] = NULL; - - StringPiece context = const_context; +} + +bool Prog::SearchOnePass(const StringPiece& text, + const StringPiece& const_context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch) { + if (anchor != kAnchored && kind != kFullMatch) { + LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches."; + return false; + } + + // Make sure we have at least cap[1], + // because we use it to tell if we matched. + int ncap = 2*nmatch; + if (ncap < 2) + ncap = 2; + + const char* cap[kMaxCap]; + for (int i = 0; i < ncap; i++) + cap[i] = NULL; + + const char* matchcap[kMaxCap]; + for (int i = 0; i < ncap; i++) + matchcap[i] = NULL; + + StringPiece context = const_context; if (context.data() == NULL) - context = text; + context = text; if (anchor_start() && BeginPtr(context) != BeginPtr(text)) - return false; + return false; if (anchor_end() && EndPtr(context) != EndPtr(text)) - return false; - if (anchor_end()) - kind = kFullMatch; - + return false; + if (anchor_end()) + kind = kFullMatch; + uint8_t* nodes = onepass_nodes_.data(); int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); // start() is always mapped to the zeroth OneState. @@ -251,231 +251,231 @@ bool Prog::SearchOnePass(const StringPiece& text, uint8_t* bytemap = bytemap_; const char* bp = text.data(); const char* ep = text.data() + text.size(); - const char* p; - bool matched = false; - matchcap[0] = bp; - cap[0] = bp; + const char* p; + bool matched = false; + matchcap[0] = bp; + cap[0] = bp; uint32_t nextmatchcond = state->matchcond; - for (p = bp; p < ep; p++) { - int c = bytemap[*p & 0xFF]; + for (p = bp; p < ep; p++) { + int c = bytemap[*p & 0xFF]; uint32_t matchcond = nextmatchcond; uint32_t cond = state->action[c]; - - // Determine whether we can reach act->next. - // If so, advance state and nextmatchcond. - if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { + + // Determine whether we can reach act->next. + // If so, advance state and nextmatchcond. + if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { uint32_t nextindex = cond >> kIndexShift; - state = IndexToNode(nodes, statesize, nextindex); - nextmatchcond = state->matchcond; - } else { - state = NULL; - nextmatchcond = kImpossible; - } - - // This code section is carefully tuned. - // The goto sequence is about 10% faster than the - // obvious rewrite as a large if statement in the - // ASCIIMatchRE2 and DotMatchRE2 benchmarks. - - // Saving the match capture registers is expensive. - // Is this intermediate match worth thinking about? - - // Not if we want a full match. - if (kind == kFullMatch) - goto skipmatch; - - // Not if it's impossible. - if (matchcond == kImpossible) - goto skipmatch; - - // Not if the possible match is beaten by the certain - // match at the next byte. When this test is useless - // (e.g., HTTPPartialMatchRE2) it slows the loop by - // about 10%, but when it avoids work (e.g., DotMatchRE2), - // it cuts the loop execution by about 45%. - if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0) - goto skipmatch; - - // Finally, the match conditions must be satisfied. - if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) { - for (int i = 2; i < 2*nmatch; i++) - matchcap[i] = cap[i]; - if (nmatch > 1 && (matchcond & kCapMask)) - ApplyCaptures(matchcond, p, matchcap, ncap); - matchcap[1] = p; - matched = true; - - // If we're in longest match mode, we have to keep - // going and see if we find a longer match. - // In first match mode, we can stop if the match - // takes priority over the next state for this input byte. - // That bit is per-input byte and thus in cond, not matchcond. - if (kind == kFirstMatch && (cond & kMatchWins)) - goto done; - } - - skipmatch: - if (state == NULL) - goto done; - if ((cond & kCapMask) && nmatch > 1) - ApplyCaptures(cond, p, cap, ncap); - } - - // Look for match at end of input. - { + state = IndexToNode(nodes, statesize, nextindex); + nextmatchcond = state->matchcond; + } else { + state = NULL; + nextmatchcond = kImpossible; + } + + // This code section is carefully tuned. + // The goto sequence is about 10% faster than the + // obvious rewrite as a large if statement in the + // ASCIIMatchRE2 and DotMatchRE2 benchmarks. + + // Saving the match capture registers is expensive. + // Is this intermediate match worth thinking about? + + // Not if we want a full match. + if (kind == kFullMatch) + goto skipmatch; + + // Not if it's impossible. + if (matchcond == kImpossible) + goto skipmatch; + + // Not if the possible match is beaten by the certain + // match at the next byte. When this test is useless + // (e.g., HTTPPartialMatchRE2) it slows the loop by + // about 10%, but when it avoids work (e.g., DotMatchRE2), + // it cuts the loop execution by about 45%. + if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0) + goto skipmatch; + + // Finally, the match conditions must be satisfied. + if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) { + for (int i = 2; i < 2*nmatch; i++) + matchcap[i] = cap[i]; + if (nmatch > 1 && (matchcond & kCapMask)) + ApplyCaptures(matchcond, p, matchcap, ncap); + matchcap[1] = p; + matched = true; + + // If we're in longest match mode, we have to keep + // going and see if we find a longer match. + // In first match mode, we can stop if the match + // takes priority over the next state for this input byte. + // That bit is per-input byte and thus in cond, not matchcond. + if (kind == kFirstMatch && (cond & kMatchWins)) + goto done; + } + + skipmatch: + if (state == NULL) + goto done; + if ((cond & kCapMask) && nmatch > 1) + ApplyCaptures(cond, p, cap, ncap); + } + + // Look for match at end of input. + { uint32_t matchcond = state->matchcond; - if (matchcond != kImpossible && - ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { - if (nmatch > 1 && (matchcond & kCapMask)) - ApplyCaptures(matchcond, p, cap, ncap); - for (int i = 2; i < ncap; i++) - matchcap[i] = cap[i]; - matchcap[1] = p; - matched = true; - } - } - -done: - if (!matched) - return false; - for (int i = 0; i < nmatch; i++) + if (matchcond != kImpossible && + ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { + if (nmatch > 1 && (matchcond & kCapMask)) + ApplyCaptures(matchcond, p, cap, ncap); + for (int i = 2; i < ncap; i++) + matchcap[i] = cap[i]; + matchcap[1] = p; + matched = true; + } + } + +done: + if (!matched) + return false; + for (int i = 0; i < nmatch; i++) match[i] = StringPiece(matchcap[2 * i], static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i])); - return true; -} - - -// Analysis to determine whether a given regexp program is one-pass. - -// If ip is not on workq, adds ip to work queue and returns true. -// If ip is already on work queue, does nothing and returns false. -// If ip is NULL, does nothing and returns true (pretends to add it). -typedef SparseSet Instq; -static bool AddQ(Instq *q, int id) { - if (id == 0) - return true; - if (q->contains(id)) - return false; - q->insert(id); - return true; -} - -struct InstCond { - int id; + return true; +} + + +// Analysis to determine whether a given regexp program is one-pass. + +// If ip is not on workq, adds ip to work queue and returns true. +// If ip is already on work queue, does nothing and returns false. +// If ip is NULL, does nothing and returns true (pretends to add it). +typedef SparseSet Instq; +static bool AddQ(Instq *q, int id) { + if (id == 0) + return true; + if (q->contains(id)) + return false; + q->insert(id); + return true; +} + +struct InstCond { + int id; uint32_t cond; -}; - -// Returns whether this is a one-pass program; that is, -// returns whether it is safe to use SearchOnePass on this program. -// These conditions must be true for any instruction ip: -// -// (1) for any other Inst nip, there is at most one input-free -// path from ip to nip. -// (2) there is at most one kInstByte instruction reachable from -// ip that matches any particular byte c. -// (3) there is at most one input-free path from ip to a kInstMatch -// instruction. -// -// This is actually just a conservative approximation: it might -// return false when the answer is true, when kInstEmptyWidth -// instructions are involved. -// Constructs and saves corresponding one-pass NFA on success. -bool Prog::IsOnePass() { - if (did_onepass_) +}; + +// Returns whether this is a one-pass program; that is, +// returns whether it is safe to use SearchOnePass on this program. +// These conditions must be true for any instruction ip: +// +// (1) for any other Inst nip, there is at most one input-free +// path from ip to nip. +// (2) there is at most one kInstByte instruction reachable from +// ip that matches any particular byte c. +// (3) there is at most one input-free path from ip to a kInstMatch +// instruction. +// +// This is actually just a conservative approximation: it might +// return false when the answer is true, when kInstEmptyWidth +// instructions are involved. +// Constructs and saves corresponding one-pass NFA on success. +bool Prog::IsOnePass() { + if (did_onepass_) return onepass_nodes_.data() != NULL; - did_onepass_ = true; - - if (start() == 0) // no match - return false; - - // Steal memory for the one-pass NFA from the overall DFA budget. - // Willing to use at most 1/4 of the DFA budget (heuristic). - // Limit max node count to 65000 as a conservative estimate to - // avoid overflowing 16-bit node index in encoding. + did_onepass_ = true; + + if (start() == 0) // no match + return false; + + // Steal memory for the one-pass NFA from the overall DFA budget. + // Willing to use at most 1/4 of the DFA budget (heuristic). + // Limit max node count to 65000 as a conservative estimate to + // avoid overflowing 16-bit node index in encoding. int maxnodes = 2 + inst_count(kInstByteRange); int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); - if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) - return false; - - // Flood the graph starting at the start state, and check - // that in each reachable state, each possible byte leads - // to a unique next state. + if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) + return false; + + // Flood the graph starting at the start state, and check + // that in each reachable state, each possible byte leads + // to a unique next state. int stacksize = inst_count(kInstCapture) + inst_count(kInstEmptyWidth) + inst_count(kInstNop) + 1; // + 1 for start inst PODArray<InstCond> stack(stacksize); - int size = this->size(); + int size = this->size(); PODArray<int> nodebyid(size); // indexed by ip memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]); - + // Originally, nodes was a uint8_t[maxnodes*statesize], but that was // unnecessarily optimistic: why allocate a large amount of memory // upfront for a large program when it is unlikely to be one-pass? std::vector<uint8_t> nodes; - - Instq tovisit(size), workq(size); - AddQ(&tovisit, start()); - nodebyid[start()] = 0; - int nalloc = 1; + + Instq tovisit(size), workq(size); + AddQ(&tovisit, start()); + nodebyid[start()] = 0; + int nalloc = 1; nodes.insert(nodes.end(), statesize, 0); - for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { - int id = *it; - int nodeindex = nodebyid[id]; + for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { + int id = *it; + int nodeindex = nodebyid[id]; OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); - - // Flood graph using manual stack, filling in actions as found. - // Default is none. - for (int b = 0; b < bytemap_range_; b++) - node->action[b] = kImpossible; - node->matchcond = kImpossible; - - workq.clear(); - bool matched = false; - int nstack = 0; - stack[nstack].id = id; - stack[nstack++].cond = 0; - while (nstack > 0) { - int id = stack[--nstack].id; + + // Flood graph using manual stack, filling in actions as found. + // Default is none. + for (int b = 0; b < bytemap_range_; b++) + node->action[b] = kImpossible; + node->matchcond = kImpossible; + + workq.clear(); + bool matched = false; + int nstack = 0; + stack[nstack].id = id; + stack[nstack++].cond = 0; + while (nstack > 0) { + int id = stack[--nstack].id; uint32_t cond = stack[nstack].cond; Loop: - Prog::Inst* ip = inst(id); - switch (ip->opcode()) { + Prog::Inst* ip = inst(id); + switch (ip->opcode()) { default: LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); break; - case kInstAltMatch: - // TODO(rsc): Ignoring kInstAltMatch optimization. - // Should implement it in this engine, but it's subtle. + case kInstAltMatch: + // TODO(rsc): Ignoring kInstAltMatch optimization. + // Should implement it in this engine, but it's subtle. DCHECK(!ip->last()); - // If already on work queue, (1) is violated: bail out. + // If already on work queue, (1) is violated: bail out. if (!AddQ(&workq, id+1)) - goto fail; + goto fail; id = id+1; goto Loop; - - case kInstByteRange: { - int nextindex = nodebyid[ip->out()]; - if (nextindex == -1) { - if (nalloc >= maxnodes) { + + case kInstByteRange: { + int nextindex = nodebyid[ip->out()]; + if (nextindex == -1) { + if (nalloc >= maxnodes) { if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes); - goto fail; - } - nextindex = nalloc; + goto fail; + } + nextindex = nalloc; AddQ(&tovisit, ip->out()); nodebyid[ip->out()] = nalloc; - nalloc++; + nalloc++; nodes.insert(nodes.end(), statesize, 0); // Update node because it might have been invalidated. node = IndexToNode(nodes.data(), statesize, nodeindex); - } - for (int c = ip->lo(); c <= ip->hi(); c++) { - int b = bytemap_[c]; + } + for (int c = ip->lo(); c <= ip->hi(); c++) { + int b = bytemap_[c]; // Skip any bytes immediately after c that are also in b. while (c < 256-1 && bytemap_[c+1] == b) c++; @@ -483,20 +483,20 @@ bool Prog::IsOnePass() { uint32_t newact = (nextindex << kIndexShift) | cond; if (matched) newact |= kMatchWins; - if ((act & kImpossible) == kImpossible) { - node->action[b] = newact; - } else if (act != newact) { + if ((act & kImpossible) == kImpossible) { + node->action[b] = newact; + } else if (act != newact) { if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: conflict on byte %#x at state %d", c, *it); - goto fail; - } - } - if (ip->foldcase()) { + goto fail; + } + } + if (ip->foldcase()) { Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a'; Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a'; - for (int c = lo; c <= hi; c++) { - int b = bytemap_[c]; + for (int c = lo; c <= hi; c++) { + int b = bytemap_[c]; // Skip any bytes immediately after c that are also in b. while (c < 256-1 && bytemap_[c+1] == b) c++; @@ -504,16 +504,16 @@ bool Prog::IsOnePass() { uint32_t newact = (nextindex << kIndexShift) | cond; if (matched) newact |= kMatchWins; - if ((act & kImpossible) == kImpossible) { - node->action[b] = newact; - } else if (act != newact) { + if ((act & kImpossible) == kImpossible) { + node->action[b] = newact; + } else if (act != newact) { if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: conflict on byte %#x at state %d", c, *it); - goto fail; - } - } - } + goto fail; + } + } + } if (ip->last()) break; @@ -522,9 +522,9 @@ bool Prog::IsOnePass() { goto fail; id = id+1; goto Loop; - } - - case kInstCapture: + } + + case kInstCapture: case kInstEmptyWidth: case kInstNop: if (!ip->last()) { @@ -536,37 +536,37 @@ bool Prog::IsOnePass() { } if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap) - cond |= (1 << kCapShift) << ip->cap(); + cond |= (1 << kCapShift) << ip->cap(); if (ip->opcode() == kInstEmptyWidth) cond |= ip->empty(); - - // kInstCapture and kInstNop always proceed to ip->out(). - // kInstEmptyWidth only sometimes proceeds to ip->out(), - // but as a conservative approximation we assume it always does. - // We could be a little more precise by looking at what c - // is, but that seems like overkill. - - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, ip->out())) { + + // kInstCapture and kInstNop always proceed to ip->out(). + // kInstEmptyWidth only sometimes proceeds to ip->out(), + // but as a conservative approximation we assume it always does. + // We could be a little more precise by looking at what c + // is, but that seems like overkill. + + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, ip->out())) { if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: multiple paths %d -> %d", *it, ip->out()); - goto fail; - } + goto fail; + } id = ip->out(); goto Loop; - - case kInstMatch: - if (matched) { - // (3) is violated + + case kInstMatch: + if (matched) { + // (3) is violated if (ExtraDebug) LOG(ERROR) << StringPrintf( "Not OnePass: multiple matches from %d", *it); - goto fail; - } - matched = true; - node->matchcond = cond; - + goto fail; + } + matched = true; + node->matchcond = cond; + if (ip->last()) break; // If already on work queue, (1) is violated: bail out. @@ -575,49 +575,49 @@ bool Prog::IsOnePass() { id = id+1; goto Loop; - case kInstFail: - break; - } - } - } - + case kInstFail: + break; + } + } + } + if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR). LOG(ERROR) << "bytemap:\n" << DumpByteMap(); LOG(ERROR) << "prog:\n" << Dump(); std::map<int, int> idmap; - for (int i = 0; i < size; i++) - if (nodebyid[i] != -1) - idmap[nodebyid[i]] = i; - + for (int i = 0; i < size; i++) + if (nodebyid[i] != -1) + idmap[nodebyid[i]] = i; + std::string dump; - for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { - int id = *it; - int nodeindex = nodebyid[id]; - if (nodeindex == -1) + for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { + int id = *it; + int nodeindex = nodebyid[id]; + if (nodeindex == -1) continue; OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); dump += StringPrintf("node %d id=%d: matchcond=%#x\n", nodeindex, id, node->matchcond); - for (int i = 0; i < bytemap_range_; i++) { - if ((node->action[i] & kImpossible) == kImpossible) - continue; + for (int i = 0; i < bytemap_range_; i++) { + if ((node->action[i] & kImpossible) == kImpossible) + continue; dump += StringPrintf(" %d cond %#x -> %d id=%d\n", i, node->action[i] & 0xFFFF, node->action[i] >> kIndexShift, idmap[node->action[i] >> kIndexShift]); - } - } + } + } LOG(ERROR) << "nodes:\n" << dump; - } - - dfa_mem_ -= nalloc*statesize; + } + + dfa_mem_ -= nalloc*statesize; onepass_nodes_ = PODArray<uint8_t>(nalloc*statesize); memmove(onepass_nodes_.data(), nodes.data(), nalloc*statesize); - return true; - -fail: - return false; -} - -} // namespace re2 + return true; + +fail: + return false; +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 85f16f060b..ed7c34db16 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -1,21 +1,21 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Regular expression parser. - -// The parser is a simple precedence-based parser with a -// manual stack. The parsing work is done by the methods -// of the ParseState class. The Regexp::Parse function is -// essentially just a lexer that calls the ParseState method -// for each token. - -// The parser recognizes POSIX extended regular expressions -// excluding backreferences, collating elements, and collating -// classes. It also allows the empty string as a regular expression -// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. -// See regexp.h for rationale. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression parser. + +// The parser is a simple precedence-based parser with a +// manual stack. The parsing work is done by the methods +// of the ParseState class. The Regexp::Parse function is +// essentially just a lexer that calls the ParseState method +// for each token. + +// The parser recognizes POSIX extended regular expressions +// excluding backreferences, collating elements, and collating +// classes. It also allows the empty string as a regular expression +// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. +// See regexp.h for rationale. + #include <ctype.h> #include <stddef.h> #include <stdint.h> @@ -30,20 +30,20 @@ #include "util/strutil.h" #include "util/utf.h" #include "re2/pod_array.h" -#include "re2/regexp.h" +#include "re2/regexp.h" #include "re2/stringpiece.h" -#include "re2/unicode_casefold.h" -#include "re2/unicode_groups.h" +#include "re2/unicode_casefold.h" +#include "re2/unicode_groups.h" #include "re2/walker-inl.h" - + #if defined(RE2_USE_ICU) #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/utypes.h" #endif -namespace re2 { - +namespace re2 { + // Controls the maximum repeat count permitted by the parser. static int maximum_repeat_count = 1000; @@ -51,437 +51,437 @@ void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) { maximum_repeat_count = i; } -// Regular expression parse state. -// The list of parsed regexps so far is maintained as a vector of -// Regexp pointers called the stack. Left parenthesis and vertical -// bar markers are also placed on the stack, as Regexps with -// non-standard opcodes. -// Scanning a left parenthesis causes the parser to push a left parenthesis -// marker on the stack. -// Scanning a vertical bar causes the parser to pop the stack until it finds a -// vertical bar or left parenthesis marker (not popping the marker), -// concatenate all the popped results, and push them back on -// the stack (DoConcatenation). -// Scanning a right parenthesis causes the parser to act as though it -// has seen a vertical bar, which then leaves the top of the stack in the -// form LeftParen regexp VerticalBar regexp VerticalBar ... regexp VerticalBar. -// The parser pops all this off the stack and creates an alternation of the -// regexps (DoAlternation). - -class Regexp::ParseState { - public: - ParseState(ParseFlags flags, const StringPiece& whole_regexp, - RegexpStatus* status); - ~ParseState(); - - ParseFlags flags() { return flags_; } - int rune_max() { return rune_max_; } - - // Parse methods. All public methods return a bool saying - // whether parsing should continue. If a method returns - // false, it has set fields in *status_, and the parser - // should return NULL. - - // Pushes the given regular expression onto the stack. - // Could check for too much memory used here. - bool PushRegexp(Regexp* re); - - // Pushes the literal rune r onto the stack. - bool PushLiteral(Rune r); - - // Pushes a regexp with the given op (and no args) onto the stack. - bool PushSimpleOp(RegexpOp op); - - // Pushes a ^ onto the stack. +// Regular expression parse state. +// The list of parsed regexps so far is maintained as a vector of +// Regexp pointers called the stack. Left parenthesis and vertical +// bar markers are also placed on the stack, as Regexps with +// non-standard opcodes. +// Scanning a left parenthesis causes the parser to push a left parenthesis +// marker on the stack. +// Scanning a vertical bar causes the parser to pop the stack until it finds a +// vertical bar or left parenthesis marker (not popping the marker), +// concatenate all the popped results, and push them back on +// the stack (DoConcatenation). +// Scanning a right parenthesis causes the parser to act as though it +// has seen a vertical bar, which then leaves the top of the stack in the +// form LeftParen regexp VerticalBar regexp VerticalBar ... regexp VerticalBar. +// The parser pops all this off the stack and creates an alternation of the +// regexps (DoAlternation). + +class Regexp::ParseState { + public: + ParseState(ParseFlags flags, const StringPiece& whole_regexp, + RegexpStatus* status); + ~ParseState(); + + ParseFlags flags() { return flags_; } + int rune_max() { return rune_max_; } + + // Parse methods. All public methods return a bool saying + // whether parsing should continue. If a method returns + // false, it has set fields in *status_, and the parser + // should return NULL. + + // Pushes the given regular expression onto the stack. + // Could check for too much memory used here. + bool PushRegexp(Regexp* re); + + // Pushes the literal rune r onto the stack. + bool PushLiteral(Rune r); + + // Pushes a regexp with the given op (and no args) onto the stack. + bool PushSimpleOp(RegexpOp op); + + // Pushes a ^ onto the stack. bool PushCaret(); - - // Pushes a \b (word == true) or \B (word == false) onto the stack. - bool PushWordBoundary(bool word); - - // Pushes a $ onto the stack. - bool PushDollar(); - - // Pushes a . onto the stack - bool PushDot(); - - // Pushes a repeat operator regexp onto the stack. - // A valid argument for the operator must already be on the stack. - // s is the name of the operator, for use in error messages. - bool PushRepeatOp(RegexpOp op, const StringPiece& s, bool nongreedy); - - // Pushes a repetition regexp onto the stack. - // A valid argument for the operator must already be on the stack. - bool PushRepetition(int min, int max, const StringPiece& s, bool nongreedy); - - // Checks whether a particular regexp op is a marker. - bool IsMarker(RegexpOp op); - - // Processes a left parenthesis in the input. - // Pushes a marker onto the stack. - bool DoLeftParen(const StringPiece& name); - bool DoLeftParenNoCapture(); - - // Processes a vertical bar in the input. - bool DoVerticalBar(); - - // Processes a right parenthesis in the input. - bool DoRightParen(); - - // Processes the end of input, returning the final regexp. - Regexp* DoFinish(); - - // Finishes the regexp if necessary, preparing it for use - // in a more complicated expression. - // If it is a CharClassBuilder, converts into a CharClass. - Regexp* FinishRegexp(Regexp*); - - // These routines don't manipulate the parse stack - // directly, but they do need to look at flags_. - // ParseCharClass also manipulates the internals of Regexp - // while creating *out_re. - - // Parse a character class into *out_re. - // Removes parsed text from s. - bool ParseCharClass(StringPiece* s, Regexp** out_re, - RegexpStatus* status); - - // Parse a character class character into *rp. - // Removes parsed text from s. - bool ParseCCCharacter(StringPiece* s, Rune *rp, - const StringPiece& whole_class, - RegexpStatus* status); - - // Parse a character class range into rr. - // Removes parsed text from s. - bool ParseCCRange(StringPiece* s, RuneRange* rr, - const StringPiece& whole_class, - RegexpStatus* status); - - // Parse a Perl flag set or non-capturing group from s. - bool ParsePerlFlags(StringPiece* s); - - - // Finishes the current concatenation, - // collapsing it into a single regexp on the stack. - void DoConcatenation(); - - // Finishes the current alternation, - // collapsing it to a single regexp on the stack. - void DoAlternation(); - - // Generalized DoAlternation/DoConcatenation. - void DoCollapse(RegexpOp op); - - // Maybe concatenate Literals into LiteralString. - bool MaybeConcatString(int r, ParseFlags flags); - -private: - ParseFlags flags_; - StringPiece whole_regexp_; - RegexpStatus* status_; - Regexp* stacktop_; - int ncap_; // number of capturing parens seen - int rune_max_; // maximum char value for this encoding - + + // Pushes a \b (word == true) or \B (word == false) onto the stack. + bool PushWordBoundary(bool word); + + // Pushes a $ onto the stack. + bool PushDollar(); + + // Pushes a . onto the stack + bool PushDot(); + + // Pushes a repeat operator regexp onto the stack. + // A valid argument for the operator must already be on the stack. + // s is the name of the operator, for use in error messages. + bool PushRepeatOp(RegexpOp op, const StringPiece& s, bool nongreedy); + + // Pushes a repetition regexp onto the stack. + // A valid argument for the operator must already be on the stack. + bool PushRepetition(int min, int max, const StringPiece& s, bool nongreedy); + + // Checks whether a particular regexp op is a marker. + bool IsMarker(RegexpOp op); + + // Processes a left parenthesis in the input. + // Pushes a marker onto the stack. + bool DoLeftParen(const StringPiece& name); + bool DoLeftParenNoCapture(); + + // Processes a vertical bar in the input. + bool DoVerticalBar(); + + // Processes a right parenthesis in the input. + bool DoRightParen(); + + // Processes the end of input, returning the final regexp. + Regexp* DoFinish(); + + // Finishes the regexp if necessary, preparing it for use + // in a more complicated expression. + // If it is a CharClassBuilder, converts into a CharClass. + Regexp* FinishRegexp(Regexp*); + + // These routines don't manipulate the parse stack + // directly, but they do need to look at flags_. + // ParseCharClass also manipulates the internals of Regexp + // while creating *out_re. + + // Parse a character class into *out_re. + // Removes parsed text from s. + bool ParseCharClass(StringPiece* s, Regexp** out_re, + RegexpStatus* status); + + // Parse a character class character into *rp. + // Removes parsed text from s. + bool ParseCCCharacter(StringPiece* s, Rune *rp, + const StringPiece& whole_class, + RegexpStatus* status); + + // Parse a character class range into rr. + // Removes parsed text from s. + bool ParseCCRange(StringPiece* s, RuneRange* rr, + const StringPiece& whole_class, + RegexpStatus* status); + + // Parse a Perl flag set or non-capturing group from s. + bool ParsePerlFlags(StringPiece* s); + + + // Finishes the current concatenation, + // collapsing it into a single regexp on the stack. + void DoConcatenation(); + + // Finishes the current alternation, + // collapsing it to a single regexp on the stack. + void DoAlternation(); + + // Generalized DoAlternation/DoConcatenation. + void DoCollapse(RegexpOp op); + + // Maybe concatenate Literals into LiteralString. + bool MaybeConcatString(int r, ParseFlags flags); + +private: + ParseFlags flags_; + StringPiece whole_regexp_; + RegexpStatus* status_; + Regexp* stacktop_; + int ncap_; // number of capturing parens seen + int rune_max_; // maximum char value for this encoding + ParseState(const ParseState&) = delete; ParseState& operator=(const ParseState&) = delete; -}; - -// Pseudo-operators - only on parse stack. -const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1); -const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2); - -Regexp::ParseState::ParseState(ParseFlags flags, - const StringPiece& whole_regexp, - RegexpStatus* status) - : flags_(flags), whole_regexp_(whole_regexp), - status_(status), stacktop_(NULL), ncap_(0) { - if (flags_ & Latin1) - rune_max_ = 0xFF; - else - rune_max_ = Runemax; -} - -// Cleans up by freeing all the regexps on the stack. -Regexp::ParseState::~ParseState() { - Regexp* next; - for (Regexp* re = stacktop_; re != NULL; re = next) { - next = re->down_; - re->down_ = NULL; - if (re->op() == kLeftParen) - delete re->name_; - re->Decref(); - } -} - -// Finishes the regexp if necessary, preparing it for use in -// a more complex expression. -// If it is a CharClassBuilder, converts into a CharClass. -Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) { - if (re == NULL) - return NULL; - re->down_ = NULL; - - if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { - CharClassBuilder* ccb = re->ccb_; - re->ccb_ = NULL; - re->cc_ = ccb->GetCharClass(); - delete ccb; - } - - return re; -} - -// Pushes the given regular expression onto the stack. -// Could check for too much memory used here. -bool Regexp::ParseState::PushRegexp(Regexp* re) { - MaybeConcatString(-1, NoParseFlags); - - // Special case: a character class of one character is just - // a literal. This is a common idiom for escaping - // single characters (e.g., [.] instead of \.), and some - // analysis does better with fewer character classes. - // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding. +}; + +// Pseudo-operators - only on parse stack. +const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1); +const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2); + +Regexp::ParseState::ParseState(ParseFlags flags, + const StringPiece& whole_regexp, + RegexpStatus* status) + : flags_(flags), whole_regexp_(whole_regexp), + status_(status), stacktop_(NULL), ncap_(0) { + if (flags_ & Latin1) + rune_max_ = 0xFF; + else + rune_max_ = Runemax; +} + +// Cleans up by freeing all the regexps on the stack. +Regexp::ParseState::~ParseState() { + Regexp* next; + for (Regexp* re = stacktop_; re != NULL; re = next) { + next = re->down_; + re->down_ = NULL; + if (re->op() == kLeftParen) + delete re->name_; + re->Decref(); + } +} + +// Finishes the regexp if necessary, preparing it for use in +// a more complex expression. +// If it is a CharClassBuilder, converts into a CharClass. +Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) { + if (re == NULL) + return NULL; + re->down_ = NULL; + + if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { + CharClassBuilder* ccb = re->ccb_; + re->ccb_ = NULL; + re->cc_ = ccb->GetCharClass(); + delete ccb; + } + + return re; +} + +// Pushes the given regular expression onto the stack. +// Could check for too much memory used here. +bool Regexp::ParseState::PushRegexp(Regexp* re) { + MaybeConcatString(-1, NoParseFlags); + + // Special case: a character class of one character is just + // a literal. This is a common idiom for escaping + // single characters (e.g., [.] instead of \.), and some + // analysis does better with fewer character classes. + // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding. if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { re->ccb_->RemoveAbove(rune_max_); - if (re->ccb_->size() == 1) { - Rune r = re->ccb_->begin()->lo; - re->Decref(); - re = new Regexp(kRegexpLiteral, flags_); - re->rune_ = r; - } else if (re->ccb_->size() == 2) { - Rune r = re->ccb_->begin()->lo; - if ('A' <= r && r <= 'Z' && re->ccb_->Contains(r + 'a' - 'A')) { - re->Decref(); - re = new Regexp(kRegexpLiteral, flags_ | FoldCase); - re->rune_ = r + 'a' - 'A'; - } - } - } - - if (!IsMarker(re->op())) - re->simple_ = re->ComputeSimple(); - re->down_ = stacktop_; - stacktop_ = re; - return true; -} - -// Searches the case folding tables and returns the CaseFold* that contains r. -// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r. -// If there isn't one, returns NULL. + if (re->ccb_->size() == 1) { + Rune r = re->ccb_->begin()->lo; + re->Decref(); + re = new Regexp(kRegexpLiteral, flags_); + re->rune_ = r; + } else if (re->ccb_->size() == 2) { + Rune r = re->ccb_->begin()->lo; + if ('A' <= r && r <= 'Z' && re->ccb_->Contains(r + 'a' - 'A')) { + re->Decref(); + re = new Regexp(kRegexpLiteral, flags_ | FoldCase); + re->rune_ = r + 'a' - 'A'; + } + } + } + + if (!IsMarker(re->op())) + re->simple_ = re->ComputeSimple(); + re->down_ = stacktop_; + stacktop_ = re; + return true; +} + +// Searches the case folding tables and returns the CaseFold* that contains r. +// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r. +// If there isn't one, returns NULL. const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { const CaseFold* ef = f + n; - - // Binary search for entry containing r. - while (n > 0) { + + // Binary search for entry containing r. + while (n > 0) { int m = n/2; - if (f[m].lo <= r && r <= f[m].hi) - return &f[m]; - if (r < f[m].lo) { - n = m; - } else { - f += m+1; - n -= m+1; - } - } - - // There is no entry that contains r, but f points + if (f[m].lo <= r && r <= f[m].hi) + return &f[m]; + if (r < f[m].lo) { + n = m; + } else { + f += m+1; + n -= m+1; + } + } + + // There is no entry that contains r, but f points // where it would have been. Unless f points at - // the end of the array, it points at the next entry - // after r. + // the end of the array, it points at the next entry + // after r. if (f < ef) - return f; - - // No entry contains r; no entry contains runes > r. - return NULL; -} - -// Returns the result of applying the fold f to the rune r. + return f; + + // No entry contains r; no entry contains runes > r. + return NULL; +} + +// Returns the result of applying the fold f to the rune r. Rune ApplyFold(const CaseFold *f, Rune r) { - switch (f->delta) { - default: - return r + f->delta; - + switch (f->delta) { + default: + return r + f->delta; + case EvenOddSkip: // even <-> odd but only applies to every other if ((r - f->lo) % 2) return r; FALLTHROUGH_INTENDED; - case EvenOdd: // even <-> odd - if (r%2 == 0) - return r + 1; - return r - 1; - + case EvenOdd: // even <-> odd + if (r%2 == 0) + return r + 1; + return r - 1; + case OddEvenSkip: // odd <-> even but only applies to every other if ((r - f->lo) % 2) return r; FALLTHROUGH_INTENDED; - case OddEven: // odd <-> even - if (r%2 == 1) - return r + 1; - return r - 1; - } -} - -// Returns the next Rune in r's folding cycle (see unicode_casefold.h). -// Examples: -// CycleFoldRune('A') = 'a' -// CycleFoldRune('a') = 'A' -// -// CycleFoldRune('K') = 'k' -// CycleFoldRune('k') = 0x212A (Kelvin) -// CycleFoldRune(0x212A) = 'K' -// -// CycleFoldRune('?') = '?' -Rune CycleFoldRune(Rune r) { + case OddEven: // odd <-> even + if (r%2 == 1) + return r + 1; + return r - 1; + } +} + +// Returns the next Rune in r's folding cycle (see unicode_casefold.h). +// Examples: +// CycleFoldRune('A') = 'a' +// CycleFoldRune('a') = 'A' +// +// CycleFoldRune('K') = 'k' +// CycleFoldRune('k') = 0x212A (Kelvin) +// CycleFoldRune(0x212A) = 'K' +// +// CycleFoldRune('?') = '?' +Rune CycleFoldRune(Rune r) { const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r); - if (f == NULL || r < f->lo) - return r; - return ApplyFold(f, r); -} - -// Add lo-hi to the class, along with their fold-equivalent characters. -// If lo-hi is already in the class, assume that the fold-equivalent -// chars are there too, so there's no work to do. -static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { - // AddFoldedRange calls itself recursively for each rune in the fold cycle. - // Most folding cycles are small: there aren't any bigger than four in the - // current Unicode tables. make_unicode_casefold.py checks that - // the cycles are not too long, and we double-check here using depth. - if (depth > 10) { - LOG(DFATAL) << "AddFoldedRange recurses too much."; - return; - } - - if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done - return; - - while (lo <= hi) { + if (f == NULL || r < f->lo) + return r; + return ApplyFold(f, r); +} + +// Add lo-hi to the class, along with their fold-equivalent characters. +// If lo-hi is already in the class, assume that the fold-equivalent +// chars are there too, so there's no work to do. +static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { + // AddFoldedRange calls itself recursively for each rune in the fold cycle. + // Most folding cycles are small: there aren't any bigger than four in the + // current Unicode tables. make_unicode_casefold.py checks that + // the cycles are not too long, and we double-check here using depth. + if (depth > 10) { + LOG(DFATAL) << "AddFoldedRange recurses too much."; + return; + } + + if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done + return; + + while (lo <= hi) { const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo); - if (f == NULL) // lo has no fold, nor does anything above lo - break; - if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo - lo = f->lo; - continue; - } - - // Add in the result of folding the range lo - f->hi - // and that range's fold, recursively. - Rune lo1 = lo; + if (f == NULL) // lo has no fold, nor does anything above lo + break; + if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo + lo = f->lo; + continue; + } + + // Add in the result of folding the range lo - f->hi + // and that range's fold, recursively. + Rune lo1 = lo; Rune hi1 = std::min<Rune>(hi, f->hi); - switch (f->delta) { - default: - lo1 += f->delta; - hi1 += f->delta; - break; - case EvenOdd: - if (lo1%2 == 1) - lo1--; - if (hi1%2 == 0) - hi1++; - break; - case OddEven: - if (lo1%2 == 0) - lo1--; - if (hi1%2 == 1) - hi1++; - break; - } - AddFoldedRange(cc, lo1, hi1, depth+1); - - // Pick up where this fold left off. - lo = f->hi + 1; - } -} - -// Pushes the literal rune r onto the stack. -bool Regexp::ParseState::PushLiteral(Rune r) { - // Do case folding if needed. - if ((flags_ & FoldCase) && CycleFoldRune(r) != r) { - Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); - re->ccb_ = new CharClassBuilder; - Rune r1 = r; - do { - if (!(flags_ & NeverNL) || r != '\n') { - re->ccb_->AddRange(r, r); - } - r = CycleFoldRune(r); - } while (r != r1); - return PushRegexp(re); - } - - // Exclude newline if applicable. - if ((flags_ & NeverNL) && r == '\n') - return PushRegexp(new Regexp(kRegexpNoMatch, flags_)); - - // No fancy stuff worked. Ordinary literal. - if (MaybeConcatString(r, flags_)) - return true; - - Regexp* re = new Regexp(kRegexpLiteral, flags_); - re->rune_ = r; - return PushRegexp(re); -} - -// Pushes a ^ onto the stack. + switch (f->delta) { + default: + lo1 += f->delta; + hi1 += f->delta; + break; + case EvenOdd: + if (lo1%2 == 1) + lo1--; + if (hi1%2 == 0) + hi1++; + break; + case OddEven: + if (lo1%2 == 0) + lo1--; + if (hi1%2 == 1) + hi1++; + break; + } + AddFoldedRange(cc, lo1, hi1, depth+1); + + // Pick up where this fold left off. + lo = f->hi + 1; + } +} + +// Pushes the literal rune r onto the stack. +bool Regexp::ParseState::PushLiteral(Rune r) { + // Do case folding if needed. + if ((flags_ & FoldCase) && CycleFoldRune(r) != r) { + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + Rune r1 = r; + do { + if (!(flags_ & NeverNL) || r != '\n') { + re->ccb_->AddRange(r, r); + } + r = CycleFoldRune(r); + } while (r != r1); + return PushRegexp(re); + } + + // Exclude newline if applicable. + if ((flags_ & NeverNL) && r == '\n') + return PushRegexp(new Regexp(kRegexpNoMatch, flags_)); + + // No fancy stuff worked. Ordinary literal. + if (MaybeConcatString(r, flags_)) + return true; + + Regexp* re = new Regexp(kRegexpLiteral, flags_); + re->rune_ = r; + return PushRegexp(re); +} + +// Pushes a ^ onto the stack. bool Regexp::ParseState::PushCaret() { - if (flags_ & OneLine) { - return PushSimpleOp(kRegexpBeginText); - } - return PushSimpleOp(kRegexpBeginLine); -} - -// Pushes a \b or \B onto the stack. -bool Regexp::ParseState::PushWordBoundary(bool word) { - if (word) - return PushSimpleOp(kRegexpWordBoundary); - return PushSimpleOp(kRegexpNoWordBoundary); -} - -// Pushes a $ onto the stack. -bool Regexp::ParseState::PushDollar() { - if (flags_ & OneLine) { - // Clumsy marker so that MimicsPCRE() can tell whether - // this kRegexpEndText was a $ and not a \z. - Regexp::ParseFlags oflags = flags_; - flags_ = flags_ | WasDollar; - bool ret = PushSimpleOp(kRegexpEndText); - flags_ = oflags; - return ret; - } - return PushSimpleOp(kRegexpEndLine); -} - -// Pushes a . onto the stack. -bool Regexp::ParseState::PushDot() { - if ((flags_ & DotNL) && !(flags_ & NeverNL)) - return PushSimpleOp(kRegexpAnyChar); - // Rewrite . into [^\n] - Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); - re->ccb_ = new CharClassBuilder; - re->ccb_->AddRange(0, '\n' - 1); - re->ccb_->AddRange('\n' + 1, rune_max_); - return PushRegexp(re); -} - -// Pushes a regexp with the given op (and no args) onto the stack. -bool Regexp::ParseState::PushSimpleOp(RegexpOp op) { - Regexp* re = new Regexp(op, flags_); - return PushRegexp(re); -} - -// Pushes a repeat operator regexp onto the stack. -// A valid argument for the operator must already be on the stack. -// The char c is the name of the operator, for use in error messages. -bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, - bool nongreedy) { - if (stacktop_ == NULL || IsMarker(stacktop_->op())) { - status_->set_code(kRegexpRepeatArgument); - status_->set_error_arg(s); - return false; - } - Regexp::ParseFlags fl = flags_; - if (nongreedy) - fl = fl ^ NonGreedy; + if (flags_ & OneLine) { + return PushSimpleOp(kRegexpBeginText); + } + return PushSimpleOp(kRegexpBeginLine); +} + +// Pushes a \b or \B onto the stack. +bool Regexp::ParseState::PushWordBoundary(bool word) { + if (word) + return PushSimpleOp(kRegexpWordBoundary); + return PushSimpleOp(kRegexpNoWordBoundary); +} + +// Pushes a $ onto the stack. +bool Regexp::ParseState::PushDollar() { + if (flags_ & OneLine) { + // Clumsy marker so that MimicsPCRE() can tell whether + // this kRegexpEndText was a $ and not a \z. + Regexp::ParseFlags oflags = flags_; + flags_ = flags_ | WasDollar; + bool ret = PushSimpleOp(kRegexpEndText); + flags_ = oflags; + return ret; + } + return PushSimpleOp(kRegexpEndLine); +} + +// Pushes a . onto the stack. +bool Regexp::ParseState::PushDot() { + if ((flags_ & DotNL) && !(flags_ & NeverNL)) + return PushSimpleOp(kRegexpAnyChar); + // Rewrite . into [^\n] + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + re->ccb_->AddRange(0, '\n' - 1); + re->ccb_->AddRange('\n' + 1, rune_max_); + return PushRegexp(re); +} + +// Pushes a regexp with the given op (and no args) onto the stack. +bool Regexp::ParseState::PushSimpleOp(RegexpOp op) { + Regexp* re = new Regexp(op, flags_); + return PushRegexp(re); +} + +// Pushes a repeat operator regexp onto the stack. +// A valid argument for the operator must already be on the stack. +// The char c is the name of the operator, for use in error messages. +bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, + bool nongreedy) { + if (stacktop_ == NULL || IsMarker(stacktop_->op())) { + status_->set_code(kRegexpRepeatArgument); + status_->set_error_arg(s); + return false; + } + Regexp::ParseFlags fl = flags_; + if (nongreedy) + fl = fl ^ NonGreedy; // Squash **, ++ and ??. Regexp::Star() et al. handle this too, but // they're mostly for use during simplification, not during parsing. @@ -499,15 +499,15 @@ bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, return true; } - Regexp* re = new Regexp(op, fl); - re->AllocSub(1); - re->down_ = stacktop_->down_; - re->sub()[0] = FinishRegexp(stacktop_); - re->simple_ = re->ComputeSimple(); - stacktop_ = re; - return true; -} - + Regexp* re = new Regexp(op, fl); + re->AllocSub(1); + re->down_ = stacktop_->down_; + re->sub()[0] = FinishRegexp(stacktop_); + re->simple_ = re->ComputeSimple(); + stacktop_ = re; + return true; +} + // RepetitionWalker reports whether the repetition regexp is valid. // Valid means that the combination of the top-level repetition // and any inner repetitions does not exceed n copies of the @@ -563,34 +563,34 @@ int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) { return 0; } -// Pushes a repetition regexp onto the stack. -// A valid argument for the operator must already be on the stack. -bool Regexp::ParseState::PushRepetition(int min, int max, - const StringPiece& s, - bool nongreedy) { +// Pushes a repetition regexp onto the stack. +// A valid argument for the operator must already be on the stack. +bool Regexp::ParseState::PushRepetition(int min, int max, + const StringPiece& s, + bool nongreedy) { if ((max != -1 && max < min) || min > maximum_repeat_count || max > maximum_repeat_count) { - status_->set_code(kRegexpRepeatSize); - status_->set_error_arg(s); - return false; - } - if (stacktop_ == NULL || IsMarker(stacktop_->op())) { - status_->set_code(kRegexpRepeatArgument); - status_->set_error_arg(s); - return false; - } - Regexp::ParseFlags fl = flags_; - if (nongreedy) - fl = fl ^ NonGreedy; - Regexp* re = new Regexp(kRegexpRepeat, fl); - re->min_ = min; - re->max_ = max; - re->AllocSub(1); - re->down_ = stacktop_->down_; - re->sub()[0] = FinishRegexp(stacktop_); - re->simple_ = re->ComputeSimple(); - stacktop_ = re; + status_->set_code(kRegexpRepeatSize); + status_->set_error_arg(s); + return false; + } + if (stacktop_ == NULL || IsMarker(stacktop_->op())) { + status_->set_code(kRegexpRepeatArgument); + status_->set_error_arg(s); + return false; + } + Regexp::ParseFlags fl = flags_; + if (nongreedy) + fl = fl ^ NonGreedy; + Regexp* re = new Regexp(kRegexpRepeat, fl); + re->min_ = min; + re->max_ = max; + re->AllocSub(1); + re->down_ = stacktop_->down_; + re->sub()[0] = FinishRegexp(stacktop_); + re->simple_ = re->ComputeSimple(); + stacktop_ = re; if (min >= 2 || max >= 2) { RepetitionWalker w; if (w.Walk(stacktop_, maximum_repeat_count) == 0) { @@ -599,47 +599,47 @@ bool Regexp::ParseState::PushRepetition(int min, int max, return false; } } - return true; -} - -// Checks whether a particular regexp op is a marker. -bool Regexp::ParseState::IsMarker(RegexpOp op) { - return op >= kLeftParen; -} - -// Processes a left parenthesis in the input. -// Pushes a marker onto the stack. -bool Regexp::ParseState::DoLeftParen(const StringPiece& name) { - Regexp* re = new Regexp(kLeftParen, flags_); - re->cap_ = ++ncap_; - if (name.data() != NULL) + return true; +} + +// Checks whether a particular regexp op is a marker. +bool Regexp::ParseState::IsMarker(RegexpOp op) { + return op >= kLeftParen; +} + +// Processes a left parenthesis in the input. +// Pushes a marker onto the stack. +bool Regexp::ParseState::DoLeftParen(const StringPiece& name) { + Regexp* re = new Regexp(kLeftParen, flags_); + re->cap_ = ++ncap_; + if (name.data() != NULL) re->name_ = new std::string(name); - return PushRegexp(re); -} - -// Pushes a non-capturing marker onto the stack. -bool Regexp::ParseState::DoLeftParenNoCapture() { - Regexp* re = new Regexp(kLeftParen, flags_); - re->cap_ = -1; - return PushRegexp(re); -} - -// Processes a vertical bar in the input. -bool Regexp::ParseState::DoVerticalBar() { - MaybeConcatString(-1, NoParseFlags); - DoConcatenation(); - - // Below the vertical bar is a list to alternate. - // Above the vertical bar is a list to concatenate. - // We just did the concatenation, so either swap - // the result below the vertical bar or push a new - // vertical bar on the stack. - Regexp* r1; - Regexp* r2; - if ((r1 = stacktop_) != NULL && + return PushRegexp(re); +} + +// Pushes a non-capturing marker onto the stack. +bool Regexp::ParseState::DoLeftParenNoCapture() { + Regexp* re = new Regexp(kLeftParen, flags_); + re->cap_ = -1; + return PushRegexp(re); +} + +// Processes a vertical bar in the input. +bool Regexp::ParseState::DoVerticalBar() { + MaybeConcatString(-1, NoParseFlags); + DoConcatenation(); + + // Below the vertical bar is a list to alternate. + // Above the vertical bar is a list to concatenate. + // We just did the concatenation, so either swap + // the result below the vertical bar or push a new + // vertical bar on the stack. + Regexp* r1; + Regexp* r2; + if ((r1 = stacktop_) != NULL && (r2 = r1->down_) != NULL && - r2->op() == kVerticalBar) { - Regexp* r3; + r2->op() == kVerticalBar) { + Regexp* r3; if ((r3 = r2->down_) != NULL && (r1->op() == kRegexpAnyChar || r3->op() == kRegexpAnyChar)) { // AnyChar is above or below the vertical bar. Let it subsume @@ -652,7 +652,7 @@ bool Regexp::ParseState::DoVerticalBar() { stacktop_ = r2; r1->Decref(); return true; - } + } if (r1->op() == kRegexpAnyChar && (r3->op() == kRegexpLiteral || r3->op() == kRegexpCharClass || @@ -664,212 +664,212 @@ bool Regexp::ParseState::DoVerticalBar() { r3->Decref(); return true; } - } - // Swap r1 below vertical bar (r2). - r1->down_ = r2->down_; - r2->down_ = r1; - stacktop_ = r2; - return true; - } - return PushSimpleOp(kVerticalBar); -} - -// Processes a right parenthesis in the input. -bool Regexp::ParseState::DoRightParen() { - // Finish the current concatenation and alternation. - DoAlternation(); - - // The stack should be: LeftParen regexp - // Remove the LeftParen, leaving the regexp, - // parenthesized. - Regexp* r1; - Regexp* r2; - if ((r1 = stacktop_) == NULL || - (r2 = r1->down_) == NULL || - r2->op() != kLeftParen) { + } + // Swap r1 below vertical bar (r2). + r1->down_ = r2->down_; + r2->down_ = r1; + stacktop_ = r2; + return true; + } + return PushSimpleOp(kVerticalBar); +} + +// Processes a right parenthesis in the input. +bool Regexp::ParseState::DoRightParen() { + // Finish the current concatenation and alternation. + DoAlternation(); + + // The stack should be: LeftParen regexp + // Remove the LeftParen, leaving the regexp, + // parenthesized. + Regexp* r1; + Regexp* r2; + if ((r1 = stacktop_) == NULL || + (r2 = r1->down_) == NULL || + r2->op() != kLeftParen) { status_->set_code(kRegexpUnexpectedParen); - status_->set_error_arg(whole_regexp_); - return false; - } - - // Pop off r1, r2. Will Decref or reuse below. - stacktop_ = r2->down_; - - // Restore flags from when paren opened. - Regexp* re = r2; - flags_ = re->parse_flags(); - - // Rewrite LeftParen as capture if needed. - if (re->cap_ > 0) { - re->op_ = kRegexpCapture; - // re->cap_ is already set - re->AllocSub(1); - re->sub()[0] = FinishRegexp(r1); - re->simple_ = re->ComputeSimple(); - } else { - re->Decref(); - re = r1; - } - return PushRegexp(re); -} - -// Processes the end of input, returning the final regexp. -Regexp* Regexp::ParseState::DoFinish() { - DoAlternation(); - Regexp* re = stacktop_; - if (re != NULL && re->down_ != NULL) { - status_->set_code(kRegexpMissingParen); - status_->set_error_arg(whole_regexp_); - return NULL; - } - stacktop_ = NULL; - return FinishRegexp(re); -} - -// Returns the leading regexp that re starts with. -// The returned Regexp* points into a piece of re, -// so it must not be used after the caller calls re->Decref(). -Regexp* Regexp::LeadingRegexp(Regexp* re) { - if (re->op() == kRegexpEmptyMatch) - return NULL; - if (re->op() == kRegexpConcat && re->nsub() >= 2) { - Regexp** sub = re->sub(); - if (sub[0]->op() == kRegexpEmptyMatch) - return NULL; - return sub[0]; - } - return re; -} - -// Removes LeadingRegexp(re) from re and returns what's left. -// Consumes the reference to re and may edit it in place. -// If caller wants to hold on to LeadingRegexp(re), -// must have already Incref'ed it. -Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) { - if (re->op() == kRegexpEmptyMatch) - return re; - if (re->op() == kRegexpConcat && re->nsub() >= 2) { - Regexp** sub = re->sub(); - if (sub[0]->op() == kRegexpEmptyMatch) - return re; - sub[0]->Decref(); - sub[0] = NULL; - if (re->nsub() == 2) { - // Collapse concatenation to single regexp. - Regexp* nre = sub[1]; - sub[1] = NULL; - re->Decref(); - return nre; - } - // 3 or more -> 2 or more. - re->nsub_--; - memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); - return re; - } - Regexp::ParseFlags pf = re->parse_flags(); - re->Decref(); - return new Regexp(kRegexpEmptyMatch, pf); -} - -// Returns the leading string that re starts with. -// The returned Rune* points into a piece of re, -// so it must not be used after the caller calls re->Decref(). -Rune* Regexp::LeadingString(Regexp* re, int *nrune, - Regexp::ParseFlags *flags) { - while (re->op() == kRegexpConcat && re->nsub() > 0) - re = re->sub()[0]; - - *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase); - - if (re->op() == kRegexpLiteral) { - *nrune = 1; - return &re->rune_; - } - - if (re->op() == kRegexpLiteralString) { - *nrune = re->nrunes_; - return re->runes_; - } - - *nrune = 0; - return NULL; -} - -// Removes the first n leading runes from the beginning of re. -// Edits re in place. -void Regexp::RemoveLeadingString(Regexp* re, int n) { - // Chase down concats to find first string. - // For regexps generated by parser, nested concats are - // flattened except when doing so would overflow the 16-bit - // limit on the size of a concatenation, so we should never - // see more than two here. - Regexp* stk[4]; + status_->set_error_arg(whole_regexp_); + return false; + } + + // Pop off r1, r2. Will Decref or reuse below. + stacktop_ = r2->down_; + + // Restore flags from when paren opened. + Regexp* re = r2; + flags_ = re->parse_flags(); + + // Rewrite LeftParen as capture if needed. + if (re->cap_ > 0) { + re->op_ = kRegexpCapture; + // re->cap_ is already set + re->AllocSub(1); + re->sub()[0] = FinishRegexp(r1); + re->simple_ = re->ComputeSimple(); + } else { + re->Decref(); + re = r1; + } + return PushRegexp(re); +} + +// Processes the end of input, returning the final regexp. +Regexp* Regexp::ParseState::DoFinish() { + DoAlternation(); + Regexp* re = stacktop_; + if (re != NULL && re->down_ != NULL) { + status_->set_code(kRegexpMissingParen); + status_->set_error_arg(whole_regexp_); + return NULL; + } + stacktop_ = NULL; + return FinishRegexp(re); +} + +// Returns the leading regexp that re starts with. +// The returned Regexp* points into a piece of re, +// so it must not be used after the caller calls re->Decref(). +Regexp* Regexp::LeadingRegexp(Regexp* re) { + if (re->op() == kRegexpEmptyMatch) + return NULL; + if (re->op() == kRegexpConcat && re->nsub() >= 2) { + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) + return NULL; + return sub[0]; + } + return re; +} + +// Removes LeadingRegexp(re) from re and returns what's left. +// Consumes the reference to re and may edit it in place. +// If caller wants to hold on to LeadingRegexp(re), +// must have already Incref'ed it. +Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) { + if (re->op() == kRegexpEmptyMatch) + return re; + if (re->op() == kRegexpConcat && re->nsub() >= 2) { + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) + return re; + sub[0]->Decref(); + sub[0] = NULL; + if (re->nsub() == 2) { + // Collapse concatenation to single regexp. + Regexp* nre = sub[1]; + sub[1] = NULL; + re->Decref(); + return nre; + } + // 3 or more -> 2 or more. + re->nsub_--; + memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); + return re; + } + Regexp::ParseFlags pf = re->parse_flags(); + re->Decref(); + return new Regexp(kRegexpEmptyMatch, pf); +} + +// Returns the leading string that re starts with. +// The returned Rune* points into a piece of re, +// so it must not be used after the caller calls re->Decref(). +Rune* Regexp::LeadingString(Regexp* re, int *nrune, + Regexp::ParseFlags *flags) { + while (re->op() == kRegexpConcat && re->nsub() > 0) + re = re->sub()[0]; + + *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase); + + if (re->op() == kRegexpLiteral) { + *nrune = 1; + return &re->rune_; + } + + if (re->op() == kRegexpLiteralString) { + *nrune = re->nrunes_; + return re->runes_; + } + + *nrune = 0; + return NULL; +} + +// Removes the first n leading runes from the beginning of re. +// Edits re in place. +void Regexp::RemoveLeadingString(Regexp* re, int n) { + // Chase down concats to find first string. + // For regexps generated by parser, nested concats are + // flattened except when doing so would overflow the 16-bit + // limit on the size of a concatenation, so we should never + // see more than two here. + Regexp* stk[4]; size_t d = 0; - while (re->op() == kRegexpConcat) { - if (d < arraysize(stk)) - stk[d++] = re; - re = re->sub()[0]; - } - - // Remove leading string from re. - if (re->op() == kRegexpLiteral) { - re->rune_ = 0; - re->op_ = kRegexpEmptyMatch; - } else if (re->op() == kRegexpLiteralString) { - if (n >= re->nrunes_) { - delete[] re->runes_; - re->runes_ = NULL; - re->nrunes_ = 0; - re->op_ = kRegexpEmptyMatch; - } else if (n == re->nrunes_ - 1) { - Rune rune = re->runes_[re->nrunes_ - 1]; - delete[] re->runes_; - re->runes_ = NULL; - re->nrunes_ = 0; - re->rune_ = rune; - re->op_ = kRegexpLiteral; - } else { - re->nrunes_ -= n; - memmove(re->runes_, re->runes_ + n, re->nrunes_ * sizeof re->runes_[0]); - } - } - - // If re is now empty, concatenations might simplify too. + while (re->op() == kRegexpConcat) { + if (d < arraysize(stk)) + stk[d++] = re; + re = re->sub()[0]; + } + + // Remove leading string from re. + if (re->op() == kRegexpLiteral) { + re->rune_ = 0; + re->op_ = kRegexpEmptyMatch; + } else if (re->op() == kRegexpLiteralString) { + if (n >= re->nrunes_) { + delete[] re->runes_; + re->runes_ = NULL; + re->nrunes_ = 0; + re->op_ = kRegexpEmptyMatch; + } else if (n == re->nrunes_ - 1) { + Rune rune = re->runes_[re->nrunes_ - 1]; + delete[] re->runes_; + re->runes_ = NULL; + re->nrunes_ = 0; + re->rune_ = rune; + re->op_ = kRegexpLiteral; + } else { + re->nrunes_ -= n; + memmove(re->runes_, re->runes_ + n, re->nrunes_ * sizeof re->runes_[0]); + } + } + + // If re is now empty, concatenations might simplify too. while (d > 0) { re = stk[--d]; - Regexp** sub = re->sub(); - if (sub[0]->op() == kRegexpEmptyMatch) { - sub[0]->Decref(); - sub[0] = NULL; - // Delete first element of concat. - switch (re->nsub()) { - case 0: - case 1: - // Impossible. - LOG(DFATAL) << "Concat of " << re->nsub(); - re->submany_ = NULL; - re->op_ = kRegexpEmptyMatch; - break; - - case 2: { - // Replace re with sub[1]. - Regexp* old = sub[1]; - sub[1] = NULL; - re->Swap(old); - old->Decref(); - break; - } - - default: - // Slide down. - re->nsub_--; - memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); - break; - } - } - } -} - + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) { + sub[0]->Decref(); + sub[0] = NULL; + // Delete first element of concat. + switch (re->nsub()) { + case 0: + case 1: + // Impossible. + LOG(DFATAL) << "Concat of " << re->nsub(); + re->submany_ = NULL; + re->op_ = kRegexpEmptyMatch; + break; + + case 2: { + // Replace re with sub[1]. + Regexp* old = sub[1]; + sub[1] = NULL; + re->Swap(old); + old->Decref(); + break; + } + + default: + // Slide down. + re->nsub_--; + memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); + break; + } + } + } +} + // In the context of factoring alternations, a Splice is: a factored prefix or // merged character class computed by one iteration of one round of factoring; // the span of subexpressions of the alternation to be "spliced" (i.e. removed @@ -921,28 +921,28 @@ class FactorAlternationImpl { std::vector<Splice>* splices); }; -// Factors common prefixes from alternation. -// For example, -// ABC|ABD|AEF|BCX|BCY -// simplifies to -// A(B(C|D)|EF)|BC(X|Y) +// Factors common prefixes from alternation. +// For example, +// ABC|ABD|AEF|BCX|BCY +// simplifies to +// A(B(C|D)|EF)|BC(X|Y) // and thence to -// A(B[CD]|EF)|BC[XY] -// -// Rewrites sub to contain simplified list to alternate and returns -// the new length of sub. Adjusts reference counts accordingly -// (incoming sub[i] decremented, outgoing sub[i] incremented). +// A(B[CD]|EF)|BC[XY] +// +// Rewrites sub to contain simplified list to alternate and returns +// the new length of sub. Adjusts reference counts accordingly +// (incoming sub[i] decremented, outgoing sub[i] incremented). int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { std::vector<Frame> stk; stk.emplace_back(sub, nsub); - + for (;;) { auto& sub = stk.back().sub; auto& nsub = stk.back().nsub; auto& round = stk.back().round; auto& splices = stk.back().splices; auto& spliceidx = stk.back().spliceidx; - + if (splices.empty()) { // Advance to the next round of factoring. Note that this covers // the initialised state: when splices is empty and round is 0. @@ -990,7 +990,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { // Advance to the next round of factoring. round++; } - + switch (round) { case 1: FactorAlternationImpl::Round1(sub, nsub, flags, &splices); @@ -1018,7 +1018,7 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { LOG(DFATAL) << "unknown round: " << round; break; } - + // Set spliceidx depending on whether we have Splices to factor. if (splices.empty() || round == 3) { spliceidx = static_cast<int>(splices.size()); @@ -1027,59 +1027,59 @@ int Regexp::FactorAlternation(Regexp** sub, int nsub, ParseFlags flags) { } } } - + void FactorAlternationImpl::Round1(Regexp** sub, int nsub, Regexp::ParseFlags flags, std::vector<Splice>* splices) { - // Round 1: Factor out common literal prefixes. + // Round 1: Factor out common literal prefixes. int start = 0; Rune* rune = NULL; - int nrune = 0; - Regexp::ParseFlags runeflags = Regexp::NoParseFlags; + int nrune = 0; + Regexp::ParseFlags runeflags = Regexp::NoParseFlags; for (int i = 0; i <= nsub; i++) { // Invariant: sub[start:i] consists of regexps that all // begin with rune[0:nrune]. - Rune* rune_i = NULL; - int nrune_i = 0; - Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags; + Rune* rune_i = NULL; + int nrune_i = 0; + Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags; if (i < nsub) { rune_i = Regexp::LeadingString(sub[i], &nrune_i, &runeflags_i); - if (runeflags_i == runeflags) { - int same = 0; - while (same < nrune && same < nrune_i && rune[same] == rune_i[same]) - same++; - if (same > 0) { - // Matches at least one rune in current range. Keep going around. - nrune = same; - continue; - } - } - } - - // Found end of a run with common leading literal string: + if (runeflags_i == runeflags) { + int same = 0; + while (same < nrune && same < nrune_i && rune[same] == rune_i[same]) + same++; + if (same > 0) { + // Matches at least one rune in current range. Keep going around. + nrune = same; + continue; + } + } + } + + // Found end of a run with common leading literal string: // sub[start:i] all begin with rune[0:nrune], // but sub[i] does not even begin with rune[0]. - if (i == start) { - // Nothing to do - first iteration. - } else if (i == start+1) { - // Just one: don't bother factoring. - } else { + if (i == start) { + // Nothing to do - first iteration. + } else if (i == start+1) { + // Just one: don't bother factoring. + } else { Regexp* prefix = Regexp::LiteralString(rune, nrune, runeflags); - for (int j = start; j < i; j++) + for (int j = start; j < i; j++) Regexp::RemoveLeadingString(sub[j], nrune); splices->emplace_back(prefix, sub + start, i - start); - } - + } + // Prepare for next iteration (if there is one). if (i < nsub) { - start = i; - rune = rune_i; - nrune = nrune_i; - runeflags = runeflags_i; - } - } + start = i; + rune = rune_i; + nrune = nrune_i; + runeflags = runeflags_i; + } + } } - + void FactorAlternationImpl::Round2(Regexp** sub, int nsub, Regexp::ParseFlags flags, std::vector<Splice>* splices) { @@ -1092,11 +1092,11 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub, // distinct paths through the automaton, which affects // correctness in some cases. int start = 0; - Regexp* first = NULL; + Regexp* first = NULL; for (int i = 0; i <= nsub; i++) { // Invariant: sub[start:i] consists of regexps that all // begin with first. - Regexp* first_i = NULL; + Regexp* first_i = NULL; if (i < nsub) { first_i = Regexp::LeadingRegexp(sub[i]); if (first != NULL && @@ -1119,31 +1119,31 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub, first->sub()[0]->op() == kRegexpAnyChar || first->sub()[0]->op() == kRegexpAnyByte))) && Regexp::Equal(first, first_i)) - continue; - } - - // Found end of a run with common leading regexp: + continue; + } + + // Found end of a run with common leading regexp: // sub[start:i] all begin with first, // but sub[i] does not. - if (i == start) { - // Nothing to do - first iteration. - } else if (i == start+1) { - // Just one: don't bother factoring. - } else { + if (i == start) { + // Nothing to do - first iteration. + } else if (i == start+1) { + // Just one: don't bother factoring. + } else { Regexp* prefix = first->Incref(); - for (int j = start; j < i; j++) + for (int j = start; j < i; j++) sub[j] = Regexp::RemoveLeadingRegexp(sub[j]); splices->emplace_back(prefix, sub + start, i - start); - } - + } + // Prepare for next iteration (if there is one). if (i < nsub) { - start = i; - first = first_i; - } - } + start = i; + first = first_i; + } + } } - + void FactorAlternationImpl::Round3(Regexp** sub, int nsub, Regexp::ParseFlags flags, std::vector<Splice>* splices) { @@ -1163,234 +1163,234 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub, first_i->op() == kRegexpCharClass)) continue; } - + // Found end of a run of Literal/CharClass: // sub[start:i] all are either one or the other, // but sub[i] is not. - if (i == start) { + if (i == start) { // Nothing to do - first iteration. - } else if (i == start+1) { + } else if (i == start+1) { // Just one: don't bother factoring. - } else { - CharClassBuilder ccb; - for (int j = start; j < i; j++) { - Regexp* re = sub[j]; - if (re->op() == kRegexpCharClass) { - CharClass* cc = re->cc(); - for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) - ccb.AddRange(it->lo, it->hi); - } else if (re->op() == kRegexpLiteral) { - ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); - } else { - LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " " - << re->ToString(); - } - re->Decref(); - } + } else { + CharClassBuilder ccb; + for (int j = start; j < i; j++) { + Regexp* re = sub[j]; + if (re->op() == kRegexpCharClass) { + CharClass* cc = re->cc(); + for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) + ccb.AddRange(it->lo, it->hi); + } else if (re->op() == kRegexpLiteral) { + ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + } else { + LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " " + << re->ToString(); + } + re->Decref(); + } Regexp* re = Regexp::NewCharClass(ccb.GetCharClass(), flags); splices->emplace_back(re, sub + start, i - start); - } - + } + // Prepare for next iteration (if there is one). if (i < nsub) { start = i; first = first_i; - } - } -} - -// Collapse the regexps on top of the stack, down to the -// first marker, into a new op node (op == kRegexpAlternate -// or op == kRegexpConcat). -void Regexp::ParseState::DoCollapse(RegexpOp op) { - // Scan backward to marker, counting children of composite. - int n = 0; - Regexp* next = NULL; - Regexp* sub; - for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { - next = sub->down_; - if (sub->op_ == op) - n += sub->nsub_; - else - n++; - } - - // If there's just one child, leave it alone. - // (Concat of one thing is that one thing; alternate of one thing is same.) - if (stacktop_ != NULL && stacktop_->down_ == next) - return; - - // Construct op (alternation or concatenation), flattening op of op. + } + } +} + +// Collapse the regexps on top of the stack, down to the +// first marker, into a new op node (op == kRegexpAlternate +// or op == kRegexpConcat). +void Regexp::ParseState::DoCollapse(RegexpOp op) { + // Scan backward to marker, counting children of composite. + int n = 0; + Regexp* next = NULL; + Regexp* sub; + for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { + next = sub->down_; + if (sub->op_ == op) + n += sub->nsub_; + else + n++; + } + + // If there's just one child, leave it alone. + // (Concat of one thing is that one thing; alternate of one thing is same.) + if (stacktop_ != NULL && stacktop_->down_ == next) + return; + + // Construct op (alternation or concatenation), flattening op of op. PODArray<Regexp*> subs(n); - next = NULL; - int i = n; - for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { - next = sub->down_; - if (sub->op_ == op) { - Regexp** sub_subs = sub->sub(); - for (int k = sub->nsub_ - 1; k >= 0; k--) - subs[--i] = sub_subs[k]->Incref(); - sub->Decref(); - } else { - subs[--i] = FinishRegexp(sub); - } - } - + next = NULL; + int i = n; + for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { + next = sub->down_; + if (sub->op_ == op) { + Regexp** sub_subs = sub->sub(); + for (int k = sub->nsub_ - 1; k >= 0; k--) + subs[--i] = sub_subs[k]->Incref(); + sub->Decref(); + } else { + subs[--i] = FinishRegexp(sub); + } + } + Regexp* re = ConcatOrAlternate(op, subs.data(), n, flags_, true); - re->simple_ = re->ComputeSimple(); - re->down_ = next; - stacktop_ = re; -} - -// Finishes the current concatenation, -// collapsing it into a single regexp on the stack. -void Regexp::ParseState::DoConcatenation() { - Regexp* r1 = stacktop_; - if (r1 == NULL || IsMarker(r1->op())) { - // empty concatenation is special case - Regexp* re = new Regexp(kRegexpEmptyMatch, flags_); - PushRegexp(re); - } - DoCollapse(kRegexpConcat); -} - -// Finishes the current alternation, -// collapsing it to a single regexp on the stack. -void Regexp::ParseState::DoAlternation() { - DoVerticalBar(); - // Now stack top is kVerticalBar. - Regexp* r1 = stacktop_; - stacktop_ = r1->down_; - r1->Decref(); - DoCollapse(kRegexpAlternate); -} - -// Incremental conversion of concatenated literals into strings. -// If top two elements on stack are both literal or string, -// collapse into single string. -// Don't walk down the stack -- the parser calls this frequently -// enough that below the bottom two is known to be collapsed. -// Only called when another regexp is about to be pushed -// on the stack, so that the topmost literal is not being considered. -// (Otherwise ab* would turn into (ab)*.) -// If r >= 0, consider pushing a literal r on the stack. -// Return whether that happened. -bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { - Regexp* re1; - Regexp* re2; - if ((re1 = stacktop_) == NULL || (re2 = re1->down_) == NULL) - return false; - - if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString) - return false; - if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString) - return false; - if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase)) - return false; - - if (re2->op_ == kRegexpLiteral) { - // convert into string - Rune rune = re2->rune_; - re2->op_ = kRegexpLiteralString; - re2->nrunes_ = 0; - re2->runes_ = NULL; - re2->AddRuneToString(rune); - } - - // push re1 into re2. - if (re1->op_ == kRegexpLiteral) { - re2->AddRuneToString(re1->rune_); - } else { - for (int i = 0; i < re1->nrunes_; i++) - re2->AddRuneToString(re1->runes_[i]); - re1->nrunes_ = 0; - delete[] re1->runes_; - re1->runes_ = NULL; - } - - // reuse re1 if possible - if (r >= 0) { - re1->op_ = kRegexpLiteral; - re1->rune_ = r; + re->simple_ = re->ComputeSimple(); + re->down_ = next; + stacktop_ = re; +} + +// Finishes the current concatenation, +// collapsing it into a single regexp on the stack. +void Regexp::ParseState::DoConcatenation() { + Regexp* r1 = stacktop_; + if (r1 == NULL || IsMarker(r1->op())) { + // empty concatenation is special case + Regexp* re = new Regexp(kRegexpEmptyMatch, flags_); + PushRegexp(re); + } + DoCollapse(kRegexpConcat); +} + +// Finishes the current alternation, +// collapsing it to a single regexp on the stack. +void Regexp::ParseState::DoAlternation() { + DoVerticalBar(); + // Now stack top is kVerticalBar. + Regexp* r1 = stacktop_; + stacktop_ = r1->down_; + r1->Decref(); + DoCollapse(kRegexpAlternate); +} + +// Incremental conversion of concatenated literals into strings. +// If top two elements on stack are both literal or string, +// collapse into single string. +// Don't walk down the stack -- the parser calls this frequently +// enough that below the bottom two is known to be collapsed. +// Only called when another regexp is about to be pushed +// on the stack, so that the topmost literal is not being considered. +// (Otherwise ab* would turn into (ab)*.) +// If r >= 0, consider pushing a literal r on the stack. +// Return whether that happened. +bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { + Regexp* re1; + Regexp* re2; + if ((re1 = stacktop_) == NULL || (re2 = re1->down_) == NULL) + return false; + + if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString) + return false; + if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString) + return false; + if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase)) + return false; + + if (re2->op_ == kRegexpLiteral) { + // convert into string + Rune rune = re2->rune_; + re2->op_ = kRegexpLiteralString; + re2->nrunes_ = 0; + re2->runes_ = NULL; + re2->AddRuneToString(rune); + } + + // push re1 into re2. + if (re1->op_ == kRegexpLiteral) { + re2->AddRuneToString(re1->rune_); + } else { + for (int i = 0; i < re1->nrunes_; i++) + re2->AddRuneToString(re1->runes_[i]); + re1->nrunes_ = 0; + delete[] re1->runes_; + re1->runes_ = NULL; + } + + // reuse re1 if possible + if (r >= 0) { + re1->op_ = kRegexpLiteral; + re1->rune_ = r; re1->parse_flags_ = static_cast<uint16_t>(flags); - return true; - } - - stacktop_ = re2; - re1->Decref(); - return false; -} - -// Lexing routines. - + return true; + } + + stacktop_ = re2; + re1->Decref(); + return false; +} + +// Lexing routines. + // Parses a decimal integer, storing it in *np. -// Sets *s to span the remainder of the string. -static bool ParseInteger(StringPiece* s, int* np) { +// Sets *s to span the remainder of the string. +static bool ParseInteger(StringPiece* s, int* np) { if (s->empty() || !isdigit((*s)[0] & 0xFF)) - return false; - // Disallow leading zeros. + return false; + // Disallow leading zeros. if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) - return false; - int n = 0; - int c; + return false; + int n = 0; + int c; while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) { - // Avoid overflow. - if (n >= 100000000) - return false; - n = n*10 + c - '0'; - s->remove_prefix(1); // digit - } - *np = n; - return true; -} - -// Parses a repetition suffix like {1,2} or {2} or {2,}. -// Sets *s to span the remainder of the string on success. -// Sets *lo and *hi to the given range. -// In the case of {2,}, the high number is unbounded; -// sets *hi to -1 to signify this. -// {,2} is NOT a valid suffix. -// The Maybe in the name signifies that the regexp parse -// doesn't fail even if ParseRepetition does, so the StringPiece -// s must NOT be edited unless MaybeParseRepetition returns true. -static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { - StringPiece s = *sp; + // Avoid overflow. + if (n >= 100000000) + return false; + n = n*10 + c - '0'; + s->remove_prefix(1); // digit + } + *np = n; + return true; +} + +// Parses a repetition suffix like {1,2} or {2} or {2,}. +// Sets *s to span the remainder of the string on success. +// Sets *lo and *hi to the given range. +// In the case of {2,}, the high number is unbounded; +// sets *hi to -1 to signify this. +// {,2} is NOT a valid suffix. +// The Maybe in the name signifies that the regexp parse +// doesn't fail even if ParseRepetition does, so the StringPiece +// s must NOT be edited unless MaybeParseRepetition returns true. +static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { + StringPiece s = *sp; if (s.empty() || s[0] != '{') - return false; - s.remove_prefix(1); // '{' - if (!ParseInteger(&s, lo)) - return false; + return false; + s.remove_prefix(1); // '{' + if (!ParseInteger(&s, lo)) + return false; if (s.empty()) - return false; - if (s[0] == ',') { - s.remove_prefix(1); // ',' + return false; + if (s[0] == ',') { + s.remove_prefix(1); // ',' if (s.empty()) - return false; - if (s[0] == '}') { - // {2,} means at least 2 - *hi = -1; - } else { - // {2,4} means 2, 3, or 4. - if (!ParseInteger(&s, hi)) - return false; - } - } else { - // {2} means exactly two - *hi = *lo; - } + return false; + if (s[0] == '}') { + // {2,} means at least 2 + *hi = -1; + } else { + // {2,4} means 2, 3, or 4. + if (!ParseInteger(&s, hi)) + return false; + } + } else { + // {2} means exactly two + *hi = *lo; + } if (s.empty() || s[0] != '}') - return false; - s.remove_prefix(1); // '}' - *sp = s; - return true; -} - -// Removes the next Rune from the StringPiece and stores it in *r. -// Returns number of bytes removed from sp. -// Behaves as though there is a terminating NUL at the end of sp. -// Argument order is backwards from usual Google style -// but consistent with chartorune. -static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { + return false; + s.remove_prefix(1); // '}' + *sp = s; + return true; +} + +// Removes the next Rune from the StringPiece and stores it in *r. +// Returns number of bytes removed from sp. +// Behaves as though there is a terminating NUL at the end of sp. +// Argument order is backwards from usual Google style +// but consistent with chartorune. +static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { // fullrune() takes int, not size_t. However, it just looks // at the leading byte and treats any length >= 4 the same. if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) { @@ -1403,278 +1403,278 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { n = 1; *r = Runeerror; } - if (!(n == 1 && *r == Runeerror)) { // no decoding error - sp->remove_prefix(n); - return n; - } - } - + if (!(n == 1 && *r == Runeerror)) { // no decoding error + sp->remove_prefix(n); + return n; + } + } + if (status != NULL) { status->set_code(kRegexpBadUTF8); status->set_error_arg(StringPiece()); } - return -1; -} - + return -1; +} + // Returns whether name is valid UTF-8. // If not, sets status to kRegexpBadUTF8. -static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) { - StringPiece t = s; - Rune r; +static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) { + StringPiece t = s; + Rune r; while (!t.empty()) { - if (StringPieceToRune(&r, &t, status) < 0) - return false; - } - return true; -} - -// Is c a hex digit? -static int IsHex(int c) { - return ('0' <= c && c <= '9') || - ('A' <= c && c <= 'F') || - ('a' <= c && c <= 'f'); -} - -// Convert hex digit to value. -static int UnHex(int c) { - if ('0' <= c && c <= '9') - return c - '0'; - if ('A' <= c && c <= 'F') - return c - 'A' + 10; - if ('a' <= c && c <= 'f') - return c - 'a' + 10; - LOG(DFATAL) << "Bad hex digit " << c; - return 0; -} - -// Parse an escape sequence (e.g., \n, \{). -// Sets *s to span the remainder of the string. -// Sets *rp to the named character. -static bool ParseEscape(StringPiece* s, Rune* rp, - RegexpStatus* status, int rune_max) { + if (StringPieceToRune(&r, &t, status) < 0) + return false; + } + return true; +} + +// Is c a hex digit? +static int IsHex(int c) { + return ('0' <= c && c <= '9') || + ('A' <= c && c <= 'F') || + ('a' <= c && c <= 'f'); +} + +// Convert hex digit to value. +static int UnHex(int c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + LOG(DFATAL) << "Bad hex digit " << c; + return 0; +} + +// Parse an escape sequence (e.g., \n, \{). +// Sets *s to span the remainder of the string. +// Sets *rp to the named character. +static bool ParseEscape(StringPiece* s, Rune* rp, + RegexpStatus* status, int rune_max) { const char* begin = s->data(); if (s->empty() || (*s)[0] != '\\') { - // Should not happen - caller always checks. - status->set_code(kRegexpInternalError); + // Should not happen - caller always checks. + status->set_code(kRegexpInternalError); status->set_error_arg(StringPiece()); - return false; - } + return false; + } if (s->size() == 1) { - status->set_code(kRegexpTrailingBackslash); + status->set_code(kRegexpTrailingBackslash); status->set_error_arg(StringPiece()); - return false; - } - Rune c, c1; - s->remove_prefix(1); // backslash - if (StringPieceToRune(&c, s, status) < 0) - return false; - int code; - switch (c) { - default: + return false; + } + Rune c, c1; + s->remove_prefix(1); // backslash + if (StringPieceToRune(&c, s, status) < 0) + return false; + int code; + switch (c) { + default: if (c < Runeself && !isalpha(c) && !isdigit(c)) { - // Escaped non-word characters are always themselves. - // PCRE is not quite so rigorous: it accepts things like - // \q, but we don't. We once rejected \_, but too many - // programs and people insist on using it, so allow \_. - *rp = c; - return true; - } - goto BadEscape; - - // Octal escapes. - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - // Single non-zero octal digit is a backreference; not supported. + // Escaped non-word characters are always themselves. + // PCRE is not quite so rigorous: it accepts things like + // \q, but we don't. We once rejected \_, but too many + // programs and people insist on using it, so allow \_. + *rp = c; + return true; + } + goto BadEscape; + + // Octal escapes. + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // Single non-zero octal digit is a backreference; not supported. if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7') - goto BadEscape; + goto BadEscape; FALLTHROUGH_INTENDED; - case '0': - // consume up to three octal digits; already have one. - code = c - '0'; + case '0': + // consume up to three octal digits; already have one. + code = c - '0'; if (!s->empty() && '0' <= (c = (*s)[0]) && c <= '7') { - code = code * 8 + c - '0'; - s->remove_prefix(1); // digit + code = code * 8 + c - '0'; + s->remove_prefix(1); // digit if (!s->empty()) { - c = (*s)[0]; - if ('0' <= c && c <= '7') { - code = code * 8 + c - '0'; - s->remove_prefix(1); // digit - } - } - } + c = (*s)[0]; + if ('0' <= c && c <= '7') { + code = code * 8 + c - '0'; + s->remove_prefix(1); // digit + } + } + } if (code > rune_max) goto BadEscape; - *rp = code; - return true; - - // Hexadecimal escapes - case 'x': + *rp = code; + return true; + + // Hexadecimal escapes + case 'x': if (s->empty()) - goto BadEscape; - if (StringPieceToRune(&c, s, status) < 0) - return false; - if (c == '{') { - // Any number of digits in braces. - // Update n as we consume the string, so that - // the whole thing gets shown in the error message. - // Perl accepts any text at all; it ignores all text - // after the first non-hex digit. We require only hex digits, - // and at least one. - if (StringPieceToRune(&c, s, status) < 0) - return false; - int nhex = 0; - code = 0; - while (IsHex(c)) { - nhex++; - code = code * 16 + UnHex(c); - if (code > rune_max) - goto BadEscape; + goto BadEscape; + if (StringPieceToRune(&c, s, status) < 0) + return false; + if (c == '{') { + // Any number of digits in braces. + // Update n as we consume the string, so that + // the whole thing gets shown in the error message. + // Perl accepts any text at all; it ignores all text + // after the first non-hex digit. We require only hex digits, + // and at least one. + if (StringPieceToRune(&c, s, status) < 0) + return false; + int nhex = 0; + code = 0; + while (IsHex(c)) { + nhex++; + code = code * 16 + UnHex(c); + if (code > rune_max) + goto BadEscape; if (s->empty()) - goto BadEscape; - if (StringPieceToRune(&c, s, status) < 0) - return false; - } - if (c != '}' || nhex == 0) - goto BadEscape; - *rp = code; - return true; - } - // Easy case: two hex digits. + goto BadEscape; + if (StringPieceToRune(&c, s, status) < 0) + return false; + } + if (c != '}' || nhex == 0) + goto BadEscape; + *rp = code; + return true; + } + // Easy case: two hex digits. if (s->empty()) - goto BadEscape; - if (StringPieceToRune(&c1, s, status) < 0) - return false; - if (!IsHex(c) || !IsHex(c1)) - goto BadEscape; - *rp = UnHex(c) * 16 + UnHex(c1); - return true; - - // C escapes. - case 'n': - *rp = '\n'; - return true; - case 'r': - *rp = '\r'; - return true; - case 't': - *rp = '\t'; - return true; - - // Less common C escapes. - case 'a': - *rp = '\a'; - return true; - case 'f': - *rp = '\f'; - return true; - case 'v': - *rp = '\v'; - return true; - - // This code is disabled to avoid misparsing - // the Perl word-boundary \b as a backspace - // when in POSIX regexp mode. Surprisingly, - // in Perl, \b means word-boundary but [\b] - // means backspace. We don't support that: - // if you want a backspace embed a literal + goto BadEscape; + if (StringPieceToRune(&c1, s, status) < 0) + return false; + if (!IsHex(c) || !IsHex(c1)) + goto BadEscape; + *rp = UnHex(c) * 16 + UnHex(c1); + return true; + + // C escapes. + case 'n': + *rp = '\n'; + return true; + case 'r': + *rp = '\r'; + return true; + case 't': + *rp = '\t'; + return true; + + // Less common C escapes. + case 'a': + *rp = '\a'; + return true; + case 'f': + *rp = '\f'; + return true; + case 'v': + *rp = '\v'; + return true; + + // This code is disabled to avoid misparsing + // the Perl word-boundary \b as a backspace + // when in POSIX regexp mode. Surprisingly, + // in Perl, \b means word-boundary but [\b] + // means backspace. We don't support that: + // if you want a backspace embed a literal // backspace character or use \x08. - // - // case 'b': - // *rp = '\b'; - // return true; - } - - LOG(DFATAL) << "Not reached in ParseEscape."; - -BadEscape: - // Unrecognized escape sequence. - status->set_code(kRegexpBadEscape); + // + // case 'b': + // *rp = '\b'; + // return true; + } + + LOG(DFATAL) << "Not reached in ParseEscape."; + +BadEscape: + // Unrecognized escape sequence. + status->set_code(kRegexpBadEscape); status->set_error_arg( StringPiece(begin, static_cast<size_t>(s->data() - begin))); - return false; -} - -// Add a range to the character class, but exclude newline if asked. -// Also handle case folding. -void CharClassBuilder::AddRangeFlags( - Rune lo, Rune hi, Regexp::ParseFlags parse_flags) { - - // Take out \n if the flags say so. - bool cutnl = !(parse_flags & Regexp::ClassNL) || - (parse_flags & Regexp::NeverNL); - if (cutnl && lo <= '\n' && '\n' <= hi) { - if (lo < '\n') - AddRangeFlags(lo, '\n' - 1, parse_flags); - if (hi > '\n') - AddRangeFlags('\n' + 1, hi, parse_flags); - return; - } - - // If folding case, add fold-equivalent characters too. - if (parse_flags & Regexp::FoldCase) - AddFoldedRange(this, lo, hi, 0); - else - AddRange(lo, hi); -} - -// Look for a group with the given name. + return false; +} + +// Add a range to the character class, but exclude newline if asked. +// Also handle case folding. +void CharClassBuilder::AddRangeFlags( + Rune lo, Rune hi, Regexp::ParseFlags parse_flags) { + + // Take out \n if the flags say so. + bool cutnl = !(parse_flags & Regexp::ClassNL) || + (parse_flags & Regexp::NeverNL); + if (cutnl && lo <= '\n' && '\n' <= hi) { + if (lo < '\n') + AddRangeFlags(lo, '\n' - 1, parse_flags); + if (hi > '\n') + AddRangeFlags('\n' + 1, hi, parse_flags); + return; + } + + // If folding case, add fold-equivalent characters too. + if (parse_flags & Regexp::FoldCase) + AddFoldedRange(this, lo, hi, 0); + else + AddRange(lo, hi); +} + +// Look for a group with the given name. static const UGroup* LookupGroup(const StringPiece& name, const UGroup *groups, int ngroups) { - // Simple name lookup. - for (int i = 0; i < ngroups; i++) - if (StringPiece(groups[i].name) == name) - return &groups[i]; - return NULL; -} - -// Look for a POSIX group with the given name (e.g., "[:^alpha:]") + // Simple name lookup. + for (int i = 0; i < ngroups; i++) + if (StringPiece(groups[i].name) == name) + return &groups[i]; + return NULL; +} + +// Look for a POSIX group with the given name (e.g., "[:^alpha:]") static const UGroup* LookupPosixGroup(const StringPiece& name) { - return LookupGroup(name, posix_groups, num_posix_groups); -} - + return LookupGroup(name, posix_groups, num_posix_groups); +} + static const UGroup* LookupPerlGroup(const StringPiece& name) { - return LookupGroup(name, perl_groups, num_perl_groups); -} - + return LookupGroup(name, perl_groups, num_perl_groups); +} + #if !defined(RE2_USE_ICU) // Fake UGroup containing all Runes static URange16 any16[] = { { 0, 65535 } }; static URange32 any32[] = { { 65536, Runemax } }; static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; -// Look for a Unicode group with the given name (e.g., "Han") +// Look for a Unicode group with the given name (e.g., "Han") static const UGroup* LookupUnicodeGroup(const StringPiece& name) { - // Special case: "Any" means any. - if (name == StringPiece("Any")) - return &anygroup; - return LookupGroup(name, unicode_groups, num_unicode_groups); -} + // Special case: "Any" means any. + if (name == StringPiece("Any")) + return &anygroup; + return LookupGroup(name, unicode_groups, num_unicode_groups); +} #endif - -// Add a UGroup or its negation to the character class. + +// Add a UGroup or its negation to the character class. static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, Regexp::ParseFlags parse_flags) { - if (sign == +1) { - for (int i = 0; i < g->nr16; i++) { - cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags); - } - for (int i = 0; i < g->nr32; i++) { - cc->AddRangeFlags(g->r32[i].lo, g->r32[i].hi, parse_flags); - } - } else { - if (parse_flags & Regexp::FoldCase) { - // Normally adding a case-folded group means - // adding all the extra fold-equivalent runes too. - // But if we're adding the negation of the group, - // we have to exclude all the runes that are fold-equivalent - // to what's already missing. Too hard, so do in two steps. - CharClassBuilder ccb1; - AddUGroup(&ccb1, g, +1, parse_flags); + if (sign == +1) { + for (int i = 0; i < g->nr16; i++) { + cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags); + } + for (int i = 0; i < g->nr32; i++) { + cc->AddRangeFlags(g->r32[i].lo, g->r32[i].hi, parse_flags); + } + } else { + if (parse_flags & Regexp::FoldCase) { + // Normally adding a case-folded group means + // adding all the extra fold-equivalent runes too. + // But if we're adding the negation of the group, + // we have to exclude all the runes that are fold-equivalent + // to what's already missing. Too hard, so do in two steps. + CharClassBuilder ccb1; + AddUGroup(&ccb1, g, +1, parse_flags); // If the flags say to take out \n, put it in, so that negating will take it out. // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags. bool cutnl = !(parse_flags & Regexp::ClassNL) || @@ -1682,115 +1682,115 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, if (cutnl) { ccb1.AddRange('\n', '\n'); } - ccb1.Negate(); - cc->AddCharClass(&ccb1); - return; - } - int next = 0; - for (int i = 0; i < g->nr16; i++) { - if (next < g->r16[i].lo) - cc->AddRangeFlags(next, g->r16[i].lo - 1, parse_flags); - next = g->r16[i].hi + 1; - } - for (int i = 0; i < g->nr32; i++) { - if (next < g->r32[i].lo) - cc->AddRangeFlags(next, g->r32[i].lo - 1, parse_flags); - next = g->r32[i].hi + 1; - } - if (next <= Runemax) - cc->AddRangeFlags(next, Runemax, parse_flags); - } -} - -// Maybe parse a Perl character class escape sequence. -// Only recognizes the Perl character classes (\d \s \w \D \S \W), -// not the Perl empty-string classes (\b \B \A \Z \z). -// On success, sets *s to span the remainder of the string -// and returns the corresponding UGroup. -// The StringPiece must *NOT* be edited unless the call succeeds. + ccb1.Negate(); + cc->AddCharClass(&ccb1); + return; + } + int next = 0; + for (int i = 0; i < g->nr16; i++) { + if (next < g->r16[i].lo) + cc->AddRangeFlags(next, g->r16[i].lo - 1, parse_flags); + next = g->r16[i].hi + 1; + } + for (int i = 0; i < g->nr32; i++) { + if (next < g->r32[i].lo) + cc->AddRangeFlags(next, g->r32[i].lo - 1, parse_flags); + next = g->r32[i].hi + 1; + } + if (next <= Runemax) + cc->AddRangeFlags(next, Runemax, parse_flags); + } +} + +// Maybe parse a Perl character class escape sequence. +// Only recognizes the Perl character classes (\d \s \w \D \S \W), +// not the Perl empty-string classes (\b \B \A \Z \z). +// On success, sets *s to span the remainder of the string +// and returns the corresponding UGroup. +// The StringPiece must *NOT* be edited unless the call succeeds. const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) { - if (!(parse_flags & Regexp::PerlClasses)) - return NULL; - if (s->size() < 2 || (*s)[0] != '\\') - return NULL; - // Could use StringPieceToRune, but there aren't - // any non-ASCII Perl group names. + if (!(parse_flags & Regexp::PerlClasses)) + return NULL; + if (s->size() < 2 || (*s)[0] != '\\') + return NULL; + // Could use StringPieceToRune, but there aren't + // any non-ASCII Perl group names. StringPiece name(s->data(), 2); const UGroup *g = LookupPerlGroup(name); - if (g == NULL) - return NULL; - s->remove_prefix(name.size()); - return g; -} - -enum ParseStatus { - kParseOk, // Did some parsing. - kParseError, // Found an error. - kParseNothing, // Decided not to parse. -}; - -// Maybe parses a Unicode character group like \p{Han} or \P{Han} -// (the latter is a negated group). -ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, - CharClassBuilder *cc, - RegexpStatus* status) { - // Decide whether to parse. - if (!(parse_flags & Regexp::UnicodeGroups)) - return kParseNothing; - if (s->size() < 2 || (*s)[0] != '\\') - return kParseNothing; - Rune c = (*s)[1]; - if (c != 'p' && c != 'P') - return kParseNothing; - - // Committed to parse. Results: - int sign = +1; // -1 = negated char class - if (c == 'P') + if (g == NULL) + return NULL; + s->remove_prefix(name.size()); + return g; +} + +enum ParseStatus { + kParseOk, // Did some parsing. + kParseError, // Found an error. + kParseNothing, // Decided not to parse. +}; + +// Maybe parses a Unicode character group like \p{Han} or \P{Han} +// (the latter is a negated group). +ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, + CharClassBuilder *cc, + RegexpStatus* status) { + // Decide whether to parse. + if (!(parse_flags & Regexp::UnicodeGroups)) + return kParseNothing; + if (s->size() < 2 || (*s)[0] != '\\') + return kParseNothing; + Rune c = (*s)[1]; + if (c != 'p' && c != 'P') + return kParseNothing; + + // Committed to parse. Results: + int sign = +1; // -1 = negated char class + if (c == 'P') sign = -sign; - StringPiece seq = *s; // \p{Han} or \pL - StringPiece name; // Han or L - s->remove_prefix(2); // '\\', 'p' - - if (!StringPieceToRune(&c, s, status)) - return kParseError; - if (c != '{') { - // Name is the bit of string we just skipped over for c. + StringPiece seq = *s; // \p{Han} or \pL + StringPiece name; // Han or L + s->remove_prefix(2); // '\\', 'p' + + if (!StringPieceToRune(&c, s, status)) + return kParseError; + if (c != '{') { + // Name is the bit of string we just skipped over for c. const char* p = seq.data() + 2; name = StringPiece(p, static_cast<size_t>(s->data() - p)); - } else { - // Name is in braces. Look for closing } + } else { + // Name is in braces. Look for closing } size_t end = s->find('}', 0); if (end == StringPiece::npos) { - if (!IsValidUTF8(seq, status)) - return kParseError; - status->set_code(kRegexpBadCharRange); - status->set_error_arg(seq); - return kParseError; - } + if (!IsValidUTF8(seq, status)) + return kParseError; + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } name = StringPiece(s->data(), end); // without '}' - s->remove_prefix(end + 1); // with '}' - if (!IsValidUTF8(name, status)) - return kParseError; - } - - // Chop seq where s now begins. + s->remove_prefix(end + 1); // with '}' + if (!IsValidUTF8(name, status)) + return kParseError; + } + + // Chop seq where s now begins. seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data())); - + if (!name.empty() && name[0] == '^') { - sign = -sign; - name.remove_prefix(1); // '^' - } + sign = -sign; + name.remove_prefix(1); // '^' + } #if !defined(RE2_USE_ICU) // Look up the group in the RE2 Unicode data. const UGroup *g = LookupUnicodeGroup(name); - if (g == NULL) { - status->set_code(kRegexpBadCharRange); - status->set_error_arg(seq); - return kParseError; - } - - AddUGroup(cc, g, sign, parse_flags); + if (g == NULL) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } + + AddUGroup(cc, g, sign, parse_flags); #else // Look up the group in the ICU Unicode data. Because ICU provides full // Unicode properties support, this could be more than a lookup by name. @@ -1815,210 +1815,210 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, AddUGroup(cc, &g, sign, parse_flags); #endif - return kParseOk; -} - -// Parses a character class name like [:alnum:]. -// Sets *s to span the remainder of the string. -// Adds the ranges corresponding to the class to ranges. -static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, - CharClassBuilder *cc, - RegexpStatus* status) { - // Check begins with [: - const char* p = s->data(); - const char* ep = s->data() + s->size(); - if (ep - p < 2 || p[0] != '[' || p[1] != ':') - return kParseNothing; - - // Look for closing :]. - const char* q; - for (q = p+2; q <= ep-2 && (*q != ':' || *(q+1) != ']'); q++) - ; - - // If no closing :], then ignore. - if (q > ep-2) - return kParseNothing; - - // Got it. Check that it's valid. - q += 2; + return kParseOk; +} + +// Parses a character class name like [:alnum:]. +// Sets *s to span the remainder of the string. +// Adds the ranges corresponding to the class to ranges. +static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, + CharClassBuilder *cc, + RegexpStatus* status) { + // Check begins with [: + const char* p = s->data(); + const char* ep = s->data() + s->size(); + if (ep - p < 2 || p[0] != '[' || p[1] != ':') + return kParseNothing; + + // Look for closing :]. + const char* q; + for (q = p+2; q <= ep-2 && (*q != ':' || *(q+1) != ']'); q++) + ; + + // If no closing :], then ignore. + if (q > ep-2) + return kParseNothing; + + // Got it. Check that it's valid. + q += 2; StringPiece name(p, static_cast<size_t>(q - p)); - + const UGroup *g = LookupPosixGroup(name); - if (g == NULL) { - status->set_code(kRegexpBadCharRange); - status->set_error_arg(name); - return kParseError; - } - - s->remove_prefix(name.size()); - AddUGroup(cc, g, g->sign, parse_flags); - return kParseOk; -} - -// Parses a character inside a character class. -// There are fewer special characters here than in the rest of the regexp. -// Sets *s to span the remainder of the string. -// Sets *rp to the character. -bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp, - const StringPiece& whole_class, - RegexpStatus* status) { + if (g == NULL) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(name); + return kParseError; + } + + s->remove_prefix(name.size()); + AddUGroup(cc, g, g->sign, parse_flags); + return kParseOk; +} + +// Parses a character inside a character class. +// There are fewer special characters here than in the rest of the regexp. +// Sets *s to span the remainder of the string. +// Sets *rp to the character. +bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp, + const StringPiece& whole_class, + RegexpStatus* status) { if (s->empty()) { - status->set_code(kRegexpMissingBracket); - status->set_error_arg(whole_class); - return false; - } - - // Allow regular escape sequences even though - // many need not be escaped in this context. + status->set_code(kRegexpMissingBracket); + status->set_error_arg(whole_class); + return false; + } + + // Allow regular escape sequences even though + // many need not be escaped in this context. if ((*s)[0] == '\\') - return ParseEscape(s, rp, status, rune_max_); - - // Otherwise take the next rune. - return StringPieceToRune(rp, s, status) >= 0; -} - -// Parses a character class character, or, if the character -// is followed by a hyphen, parses a character class range. -// For single characters, rr->lo == rr->hi. -// Sets *s to span the remainder of the string. -// Sets *rp to the character. -bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr, - const StringPiece& whole_class, - RegexpStatus* status) { - StringPiece os = *s; - if (!ParseCCCharacter(s, &rr->lo, whole_class, status)) - return false; - // [a-] means (a|-), so check for final ]. - if (s->size() >= 2 && (*s)[0] == '-' && (*s)[1] != ']') { - s->remove_prefix(1); // '-' - if (!ParseCCCharacter(s, &rr->hi, whole_class, status)) - return false; - if (rr->hi < rr->lo) { - status->set_code(kRegexpBadCharRange); + return ParseEscape(s, rp, status, rune_max_); + + // Otherwise take the next rune. + return StringPieceToRune(rp, s, status) >= 0; +} + +// Parses a character class character, or, if the character +// is followed by a hyphen, parses a character class range. +// For single characters, rr->lo == rr->hi. +// Sets *s to span the remainder of the string. +// Sets *rp to the character. +bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr, + const StringPiece& whole_class, + RegexpStatus* status) { + StringPiece os = *s; + if (!ParseCCCharacter(s, &rr->lo, whole_class, status)) + return false; + // [a-] means (a|-), so check for final ]. + if (s->size() >= 2 && (*s)[0] == '-' && (*s)[1] != ']') { + s->remove_prefix(1); // '-' + if (!ParseCCCharacter(s, &rr->hi, whole_class, status)) + return false; + if (rr->hi < rr->lo) { + status->set_code(kRegexpBadCharRange); status->set_error_arg( StringPiece(os.data(), static_cast<size_t>(s->data() - os.data()))); - return false; - } - } else { - rr->hi = rr->lo; - } - return true; -} - -// Parses a possibly-negated character class expression like [^abx-z[:digit:]]. -// Sets *s to span the remainder of the string. -// Sets *out_re to the regexp for the class. -bool Regexp::ParseState::ParseCharClass(StringPiece* s, - Regexp** out_re, - RegexpStatus* status) { - StringPiece whole_class = *s; + return false; + } + } else { + rr->hi = rr->lo; + } + return true; +} + +// Parses a possibly-negated character class expression like [^abx-z[:digit:]]. +// Sets *s to span the remainder of the string. +// Sets *out_re to the regexp for the class. +bool Regexp::ParseState::ParseCharClass(StringPiece* s, + Regexp** out_re, + RegexpStatus* status) { + StringPiece whole_class = *s; if (s->empty() || (*s)[0] != '[') { - // Caller checked this. - status->set_code(kRegexpInternalError); + // Caller checked this. + status->set_code(kRegexpInternalError); status->set_error_arg(StringPiece()); - return false; - } - bool negated = false; - Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); - re->ccb_ = new CharClassBuilder; - s->remove_prefix(1); // '[' + return false; + } + bool negated = false; + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + s->remove_prefix(1); // '[' if (!s->empty() && (*s)[0] == '^') { - s->remove_prefix(1); // '^' - negated = true; - if (!(flags_ & ClassNL) || (flags_ & NeverNL)) { - // If NL can't match implicitly, then pretend - // negated classes include a leading \n. - re->ccb_->AddRange('\n', '\n'); - } - } - bool first = true; // ] is okay as first char in class + s->remove_prefix(1); // '^' + negated = true; + if (!(flags_ & ClassNL) || (flags_ & NeverNL)) { + // If NL can't match implicitly, then pretend + // negated classes include a leading \n. + re->ccb_->AddRange('\n', '\n'); + } + } + bool first = true; // ] is okay as first char in class while (!s->empty() && ((*s)[0] != ']' || first)) { - // - is only okay unescaped as first or last in class. - // Except that Perl allows - anywhere. - if ((*s)[0] == '-' && !first && !(flags_&PerlX) && - (s->size() == 1 || (*s)[1] != ']')) { - StringPiece t = *s; - t.remove_prefix(1); // '-' - Rune r; - int n = StringPieceToRune(&r, &t, status); - if (n < 0) { - re->Decref(); - return false; - } - status->set_code(kRegexpBadCharRange); - status->set_error_arg(StringPiece(s->data(), 1+n)); - re->Decref(); - return false; - } - first = false; - - // Look for [:alnum:] etc. - if (s->size() > 2 && (*s)[0] == '[' && (*s)[1] == ':') { - switch (ParseCCName(s, flags_, re->ccb_, status)) { - case kParseOk: - continue; - case kParseError: - re->Decref(); - return false; - case kParseNothing: - break; - } - } - - // Look for Unicode character group like \p{Han} - if (s->size() > 2 && - (*s)[0] == '\\' && - ((*s)[1] == 'p' || (*s)[1] == 'P')) { - switch (ParseUnicodeGroup(s, flags_, re->ccb_, status)) { - case kParseOk: - continue; - case kParseError: - re->Decref(); - return false; - case kParseNothing: - break; - } - } - - // Look for Perl character class symbols (extension). + // - is only okay unescaped as first or last in class. + // Except that Perl allows - anywhere. + if ((*s)[0] == '-' && !first && !(flags_&PerlX) && + (s->size() == 1 || (*s)[1] != ']')) { + StringPiece t = *s; + t.remove_prefix(1); // '-' + Rune r; + int n = StringPieceToRune(&r, &t, status); + if (n < 0) { + re->Decref(); + return false; + } + status->set_code(kRegexpBadCharRange); + status->set_error_arg(StringPiece(s->data(), 1+n)); + re->Decref(); + return false; + } + first = false; + + // Look for [:alnum:] etc. + if (s->size() > 2 && (*s)[0] == '[' && (*s)[1] == ':') { + switch (ParseCCName(s, flags_, re->ccb_, status)) { + case kParseOk: + continue; + case kParseError: + re->Decref(); + return false; + case kParseNothing: + break; + } + } + + // Look for Unicode character group like \p{Han} + if (s->size() > 2 && + (*s)[0] == '\\' && + ((*s)[1] == 'p' || (*s)[1] == 'P')) { + switch (ParseUnicodeGroup(s, flags_, re->ccb_, status)) { + case kParseOk: + continue; + case kParseError: + re->Decref(); + return false; + case kParseNothing: + break; + } + } + + // Look for Perl character class symbols (extension). const UGroup *g = MaybeParsePerlCCEscape(s, flags_); - if (g != NULL) { - AddUGroup(re->ccb_, g, g->sign, flags_); - continue; - } - - // Otherwise assume single character or simple range. - RuneRange rr; - if (!ParseCCRange(s, &rr, whole_class, status)) { - re->Decref(); - return false; - } - // AddRangeFlags is usually called in response to a class like - // \p{Foo} or [[:foo:]]; for those, it filters \n out unless - // Regexp::ClassNL is set. In an explicit range or singleton - // like we just parsed, we do not filter \n out, so set ClassNL - // in the flags. - re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL); - } + if (g != NULL) { + AddUGroup(re->ccb_, g, g->sign, flags_); + continue; + } + + // Otherwise assume single character or simple range. + RuneRange rr; + if (!ParseCCRange(s, &rr, whole_class, status)) { + re->Decref(); + return false; + } + // AddRangeFlags is usually called in response to a class like + // \p{Foo} or [[:foo:]]; for those, it filters \n out unless + // Regexp::ClassNL is set. In an explicit range or singleton + // like we just parsed, we do not filter \n out, so set ClassNL + // in the flags. + re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL); + } if (s->empty()) { - status->set_code(kRegexpMissingBracket); - status->set_error_arg(whole_class); - re->Decref(); - return false; - } - s->remove_prefix(1); // ']' - - if (negated) - re->ccb_->Negate(); - - *out_re = re; - return true; -} - + status->set_code(kRegexpMissingBracket); + status->set_error_arg(whole_class); + re->Decref(); + return false; + } + s->remove_prefix(1); // ']' + + if (negated) + re->ccb_->Negate(); + + *out_re = re; + return true; +} + // Returns whether name is a valid capture name. -static bool IsValidCaptureName(const StringPiece& name) { +static bool IsValidCaptureName(const StringPiece& name) { if (name.empty()) - return false; + return false; // Historically, we effectively used [0-9A-Za-z_]+ to validate; that // followed Python 2 except for not restricting the first character. @@ -2043,230 +2043,230 @@ static bool IsValidCaptureName(const StringPiece& name) { if (StringPieceToRune(&r, &t, NULL) < 0) return false; if (cc->Contains(r)) - continue; - return false; - } - return true; -} - -// Parses a Perl flag setting or non-capturing group or both, -// like (?i) or (?: or (?i:. Removes from s, updates parse state. -// The caller must check that s begins with "(?". -// Returns true on success. If the Perl flag is not -// well-formed or not supported, sets status_ and returns false. -bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { - StringPiece t = *s; - - // Caller is supposed to check this. - if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') { - LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags"; - status_->set_code(kRegexpInternalError); - return false; - } - - t.remove_prefix(2); // "(?" - - // Check for named captures, first introduced in Python's regexp library. - // As usual, there are three slightly different syntaxes: - // - // (?P<name>expr) the original, introduced by Python - // (?<name>expr) the .NET alteration, adopted by Perl 5.10 - // (?'name'expr) another .NET alteration, adopted by Perl 5.10 - // - // Perl 5.10 gave in and implemented the Python version too, - // but they claim that the last two are the preferred forms. - // PCRE and languages based on it (specifically, PHP and Ruby) - // support all three as well. EcmaScript 4 uses only the Python form. - // - // In both the open source world (via Code Search) and the - // Google source tree, (?P<expr>name) is the dominant form, - // so that's the one we implement. One is enough. - if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { - // Pull out name. + continue; + return false; + } + return true; +} + +// Parses a Perl flag setting or non-capturing group or both, +// like (?i) or (?: or (?i:. Removes from s, updates parse state. +// The caller must check that s begins with "(?". +// Returns true on success. If the Perl flag is not +// well-formed or not supported, sets status_ and returns false. +bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { + StringPiece t = *s; + + // Caller is supposed to check this. + if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') { + LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags"; + status_->set_code(kRegexpInternalError); + return false; + } + + t.remove_prefix(2); // "(?" + + // Check for named captures, first introduced in Python's regexp library. + // As usual, there are three slightly different syntaxes: + // + // (?P<name>expr) the original, introduced by Python + // (?<name>expr) the .NET alteration, adopted by Perl 5.10 + // (?'name'expr) another .NET alteration, adopted by Perl 5.10 + // + // Perl 5.10 gave in and implemented the Python version too, + // but they claim that the last two are the preferred forms. + // PCRE and languages based on it (specifically, PHP and Ruby) + // support all three as well. EcmaScript 4 uses only the Python form. + // + // In both the open source world (via Code Search) and the + // Google source tree, (?P<expr>name) is the dominant form, + // so that's the one we implement. One is enough. + if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { + // Pull out name. size_t end = t.find('>', 2); if (end == StringPiece::npos) { - if (!IsValidUTF8(*s, status_)) - return false; - status_->set_code(kRegexpBadNamedCapture); - status_->set_error_arg(*s); - return false; - } - - // t is "P<name>...", t[end] == '>' + if (!IsValidUTF8(*s, status_)) + return false; + status_->set_code(kRegexpBadNamedCapture); + status_->set_error_arg(*s); + return false; + } + + // t is "P<name>...", t[end] == '>' StringPiece capture(t.data()-2, end+3); // "(?P<name>" StringPiece name(t.data()+2, end-2); // "name" - if (!IsValidUTF8(name, status_)) - return false; - if (!IsValidCaptureName(name)) { - status_->set_code(kRegexpBadNamedCapture); - status_->set_error_arg(capture); - return false; - } - - if (!DoLeftParen(name)) { - // DoLeftParen's failure set status_. - return false; - } - + if (!IsValidUTF8(name, status_)) + return false; + if (!IsValidCaptureName(name)) { + status_->set_code(kRegexpBadNamedCapture); + status_->set_error_arg(capture); + return false; + } + + if (!DoLeftParen(name)) { + // DoLeftParen's failure set status_. + return false; + } + s->remove_prefix( static_cast<size_t>(capture.data() + capture.size() - s->data())); - return true; - } - - bool negated = false; - bool sawflags = false; - int nflags = flags_; - Rune c; - for (bool done = false; !done; ) { + return true; + } + + bool negated = false; + bool sawflags = false; + int nflags = flags_; + Rune c; + for (bool done = false; !done; ) { if (t.empty()) - goto BadPerlOp; - if (StringPieceToRune(&c, &t, status_) < 0) - return false; - switch (c) { - default: - goto BadPerlOp; - - // Parse flags. - case 'i': - sawflags = true; - if (negated) - nflags &= ~FoldCase; - else - nflags |= FoldCase; - break; - - case 'm': // opposite of our OneLine - sawflags = true; - if (negated) - nflags |= OneLine; - else - nflags &= ~OneLine; - break; - - case 's': - sawflags = true; - if (negated) - nflags &= ~DotNL; - else - nflags |= DotNL; - break; - - case 'U': - sawflags = true; - if (negated) - nflags &= ~NonGreedy; - else - nflags |= NonGreedy; - break; - - // Negation - case '-': - if (negated) - goto BadPerlOp; - negated = true; - sawflags = false; - break; - - // Open new group. - case ':': - if (!DoLeftParenNoCapture()) { - // DoLeftParenNoCapture's failure set status_. - return false; - } - done = true; - break; - - // Finish flags. - case ')': - done = true; - break; - } - } - - if (negated && !sawflags) - goto BadPerlOp; - - flags_ = static_cast<Regexp::ParseFlags>(nflags); - *s = t; - return true; - -BadPerlOp: - status_->set_code(kRegexpBadPerlOp); + goto BadPerlOp; + if (StringPieceToRune(&c, &t, status_) < 0) + return false; + switch (c) { + default: + goto BadPerlOp; + + // Parse flags. + case 'i': + sawflags = true; + if (negated) + nflags &= ~FoldCase; + else + nflags |= FoldCase; + break; + + case 'm': // opposite of our OneLine + sawflags = true; + if (negated) + nflags |= OneLine; + else + nflags &= ~OneLine; + break; + + case 's': + sawflags = true; + if (negated) + nflags &= ~DotNL; + else + nflags |= DotNL; + break; + + case 'U': + sawflags = true; + if (negated) + nflags &= ~NonGreedy; + else + nflags |= NonGreedy; + break; + + // Negation + case '-': + if (negated) + goto BadPerlOp; + negated = true; + sawflags = false; + break; + + // Open new group. + case ':': + if (!DoLeftParenNoCapture()) { + // DoLeftParenNoCapture's failure set status_. + return false; + } + done = true; + break; + + // Finish flags. + case ')': + done = true; + break; + } + } + + if (negated && !sawflags) + goto BadPerlOp; + + flags_ = static_cast<Regexp::ParseFlags>(nflags); + *s = t; + return true; + +BadPerlOp: + status_->set_code(kRegexpBadPerlOp); status_->set_error_arg( StringPiece(s->data(), static_cast<size_t>(t.data() - s->data()))); - return false; -} - -// Converts latin1 (assumed to be encoded as Latin1 bytes) -// into UTF8 encoding in string. -// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is -// deprecated and because it rejects code points 0x80-0x9F. + return false; +} + +// Converts latin1 (assumed to be encoded as Latin1 bytes) +// into UTF8 encoding in string. +// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is +// deprecated and because it rejects code points 0x80-0x9F. void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) { - char buf[UTFmax]; - - utf->clear(); + char buf[UTFmax]; + + utf->clear(); for (size_t i = 0; i < latin1.size(); i++) { - Rune r = latin1[i] & 0xFF; - int n = runetochar(buf, &r); - utf->append(buf, n); - } -} - -// Parses the regular expression given by s, -// returning the corresponding Regexp tree. -// The caller must Decref the return value when done with it. -// Returns NULL on error. -Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, - RegexpStatus* status) { - // Make status non-NULL (easier on everyone else). - RegexpStatus xstatus; - if (status == NULL) - status = &xstatus; - - ParseState ps(global_flags, s, status); - StringPiece t = s; - - // Convert regexp to UTF-8 (easier on the rest of the parser). - if (global_flags & Latin1) { + Rune r = latin1[i] & 0xFF; + int n = runetochar(buf, &r); + utf->append(buf, n); + } +} + +// Parses the regular expression given by s, +// returning the corresponding Regexp tree. +// The caller must Decref the return value when done with it. +// Returns NULL on error. +Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, + RegexpStatus* status) { + // Make status non-NULL (easier on everyone else). + RegexpStatus xstatus; + if (status == NULL) + status = &xstatus; + + ParseState ps(global_flags, s, status); + StringPiece t = s; + + // Convert regexp to UTF-8 (easier on the rest of the parser). + if (global_flags & Latin1) { std::string* tmp = new std::string; - ConvertLatin1ToUTF8(t, tmp); - status->set_tmp(tmp); - t = *tmp; - } - - if (global_flags & Literal) { - // Special parse loop for literal string. + ConvertLatin1ToUTF8(t, tmp); + status->set_tmp(tmp); + t = *tmp; + } + + if (global_flags & Literal) { + // Special parse loop for literal string. while (!t.empty()) { - Rune r; - if (StringPieceToRune(&r, &t, status) < 0) - return NULL; - if (!ps.PushLiteral(r)) - return NULL; - } - return ps.DoFinish(); - } - + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + } + return ps.DoFinish(); + } + StringPiece lastunary = StringPiece(); while (!t.empty()) { StringPiece isunary = StringPiece(); - switch (t[0]) { - default: { - Rune r; - if (StringPieceToRune(&r, &t, status) < 0) - return NULL; - if (!ps.PushLiteral(r)) - return NULL; - break; - } - - case '(': - // "(?" introduces Perl escape. - if ((ps.flags() & PerlX) && (t.size() >= 2 && t[1] == '?')) { - // Flag changes and non-capturing groups. - if (!ps.ParsePerlFlags(&t)) - return NULL; - break; - } + switch (t[0]) { + default: { + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + break; + } + + case '(': + // "(?" introduces Perl escape. + if ((ps.flags() & PerlX) && (t.size() >= 2 && t[1] == '?')) { + // Flag changes and non-capturing groups. + if (!ps.ParsePerlFlags(&t)) + return NULL; + break; + } if (ps.flags() & NeverCapture) { if (!ps.DoLeftParenNoCapture()) return NULL; @@ -2274,210 +2274,210 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, if (!ps.DoLeftParen(StringPiece())) return NULL; } - t.remove_prefix(1); // '(' - break; - - case '|': - if (!ps.DoVerticalBar()) - return NULL; - t.remove_prefix(1); // '|' - break; - - case ')': - if (!ps.DoRightParen()) - return NULL; - t.remove_prefix(1); // ')' - break; - - case '^': // Beginning of line. + t.remove_prefix(1); // '(' + break; + + case '|': + if (!ps.DoVerticalBar()) + return NULL; + t.remove_prefix(1); // '|' + break; + + case ')': + if (!ps.DoRightParen()) + return NULL; + t.remove_prefix(1); // ')' + break; + + case '^': // Beginning of line. if (!ps.PushCaret()) - return NULL; - t.remove_prefix(1); // '^' - break; - - case '$': // End of line. - if (!ps.PushDollar()) - return NULL; - t.remove_prefix(1); // '$' - break; - - case '.': // Any character (possibly except newline). - if (!ps.PushDot()) - return NULL; - t.remove_prefix(1); // '.' - break; - - case '[': { // Character class. - Regexp* re; - if (!ps.ParseCharClass(&t, &re, status)) - return NULL; - if (!ps.PushRegexp(re)) - return NULL; - break; - } - - case '*': { // Zero or more. - RegexpOp op; - op = kRegexpStar; - goto Rep; - case '+': // One or more. - op = kRegexpPlus; - goto Rep; - case '?': // Zero or one. - op = kRegexpQuest; - goto Rep; - Rep: - StringPiece opstr = t; - bool nongreedy = false; - t.remove_prefix(1); // '*' or '+' or '?' - if (ps.flags() & PerlX) { + return NULL; + t.remove_prefix(1); // '^' + break; + + case '$': // End of line. + if (!ps.PushDollar()) + return NULL; + t.remove_prefix(1); // '$' + break; + + case '.': // Any character (possibly except newline). + if (!ps.PushDot()) + return NULL; + t.remove_prefix(1); // '.' + break; + + case '[': { // Character class. + Regexp* re; + if (!ps.ParseCharClass(&t, &re, status)) + return NULL; + if (!ps.PushRegexp(re)) + return NULL; + break; + } + + case '*': { // Zero or more. + RegexpOp op; + op = kRegexpStar; + goto Rep; + case '+': // One or more. + op = kRegexpPlus; + goto Rep; + case '?': // Zero or one. + op = kRegexpQuest; + goto Rep; + Rep: + StringPiece opstr = t; + bool nongreedy = false; + t.remove_prefix(1); // '*' or '+' or '?' + if (ps.flags() & PerlX) { if (!t.empty() && t[0] == '?') { - nongreedy = true; - t.remove_prefix(1); // '?' - } + nongreedy = true; + t.remove_prefix(1); // '?' + } if (!lastunary.empty()) { - // In Perl it is not allowed to stack repetition operators: - // a** is a syntax error, not a double-star. - // (and a++ means something else entirely, which we don't support!) - status->set_code(kRegexpRepeatOp); + // In Perl it is not allowed to stack repetition operators: + // a** is a syntax error, not a double-star. + // (and a++ means something else entirely, which we don't support!) + status->set_code(kRegexpRepeatOp); status->set_error_arg(StringPiece( lastunary.data(), static_cast<size_t>(t.data() - lastunary.data()))); - return NULL; - } - } + return NULL; + } + } opstr = StringPiece(opstr.data(), static_cast<size_t>(t.data() - opstr.data())); - if (!ps.PushRepeatOp(op, opstr, nongreedy)) - return NULL; - isunary = opstr; - break; - } - - case '{': { // Counted repetition. - int lo, hi; - StringPiece opstr = t; - if (!MaybeParseRepetition(&t, &lo, &hi)) { - // Treat like a literal. - if (!ps.PushLiteral('{')) - return NULL; - t.remove_prefix(1); // '{' - break; - } - bool nongreedy = false; - if (ps.flags() & PerlX) { + if (!ps.PushRepeatOp(op, opstr, nongreedy)) + return NULL; + isunary = opstr; + break; + } + + case '{': { // Counted repetition. + int lo, hi; + StringPiece opstr = t; + if (!MaybeParseRepetition(&t, &lo, &hi)) { + // Treat like a literal. + if (!ps.PushLiteral('{')) + return NULL; + t.remove_prefix(1); // '{' + break; + } + bool nongreedy = false; + if (ps.flags() & PerlX) { if (!t.empty() && t[0] == '?') { - nongreedy = true; - t.remove_prefix(1); // '?' - } + nongreedy = true; + t.remove_prefix(1); // '?' + } if (!lastunary.empty()) { - // Not allowed to stack repetition operators. - status->set_code(kRegexpRepeatOp); + // Not allowed to stack repetition operators. + status->set_code(kRegexpRepeatOp); status->set_error_arg(StringPiece( lastunary.data(), static_cast<size_t>(t.data() - lastunary.data()))); - return NULL; - } - } + return NULL; + } + } opstr = StringPiece(opstr.data(), static_cast<size_t>(t.data() - opstr.data())); - if (!ps.PushRepetition(lo, hi, opstr, nongreedy)) - return NULL; - isunary = opstr; - break; - } - - case '\\': { // Escaped character or Perl sequence. - // \b and \B: word boundary or not - if ((ps.flags() & Regexp::PerlB) && - t.size() >= 2 && (t[1] == 'b' || t[1] == 'B')) { - if (!ps.PushWordBoundary(t[1] == 'b')) - return NULL; - t.remove_prefix(2); // '\\', 'b' - break; - } - - if ((ps.flags() & Regexp::PerlX) && t.size() >= 2) { - if (t[1] == 'A') { - if (!ps.PushSimpleOp(kRegexpBeginText)) - return NULL; - t.remove_prefix(2); // '\\', 'A' - break; - } - if (t[1] == 'z') { - if (!ps.PushSimpleOp(kRegexpEndText)) - return NULL; - t.remove_prefix(2); // '\\', 'z' - break; - } - // Do not recognize \Z, because this library can't - // implement the exact Perl/PCRE semantics. - // (This library treats "(?-m)$" as \z, even though - // in Perl and PCRE it is equivalent to \Z.) - - if (t[1] == 'C') { // \C: any byte [sic] - if (!ps.PushSimpleOp(kRegexpAnyByte)) - return NULL; - t.remove_prefix(2); // '\\', 'C' - break; - } - - if (t[1] == 'Q') { // \Q ... \E: the ... is always literals - t.remove_prefix(2); // '\\', 'Q' + if (!ps.PushRepetition(lo, hi, opstr, nongreedy)) + return NULL; + isunary = opstr; + break; + } + + case '\\': { // Escaped character or Perl sequence. + // \b and \B: word boundary or not + if ((ps.flags() & Regexp::PerlB) && + t.size() >= 2 && (t[1] == 'b' || t[1] == 'B')) { + if (!ps.PushWordBoundary(t[1] == 'b')) + return NULL; + t.remove_prefix(2); // '\\', 'b' + break; + } + + if ((ps.flags() & Regexp::PerlX) && t.size() >= 2) { + if (t[1] == 'A') { + if (!ps.PushSimpleOp(kRegexpBeginText)) + return NULL; + t.remove_prefix(2); // '\\', 'A' + break; + } + if (t[1] == 'z') { + if (!ps.PushSimpleOp(kRegexpEndText)) + return NULL; + t.remove_prefix(2); // '\\', 'z' + break; + } + // Do not recognize \Z, because this library can't + // implement the exact Perl/PCRE semantics. + // (This library treats "(?-m)$" as \z, even though + // in Perl and PCRE it is equivalent to \Z.) + + if (t[1] == 'C') { // \C: any byte [sic] + if (!ps.PushSimpleOp(kRegexpAnyByte)) + return NULL; + t.remove_prefix(2); // '\\', 'C' + break; + } + + if (t[1] == 'Q') { // \Q ... \E: the ... is always literals + t.remove_prefix(2); // '\\', 'Q' while (!t.empty()) { - if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') { - t.remove_prefix(2); // '\\', 'E' - break; - } - Rune r; - if (StringPieceToRune(&r, &t, status) < 0) - return NULL; - if (!ps.PushLiteral(r)) - return NULL; - } - break; - } - } - - if (t.size() >= 2 && (t[1] == 'p' || t[1] == 'P')) { - Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); - re->ccb_ = new CharClassBuilder; - switch (ParseUnicodeGroup(&t, ps.flags(), re->ccb_, status)) { - case kParseOk: - if (!ps.PushRegexp(re)) - return NULL; - goto Break2; - case kParseError: - re->Decref(); - return NULL; - case kParseNothing: - re->Decref(); - break; - } - } - + if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') { + t.remove_prefix(2); // '\\', 'E' + break; + } + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + } + break; + } + } + + if (t.size() >= 2 && (t[1] == 'p' || t[1] == 'P')) { + Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); + re->ccb_ = new CharClassBuilder; + switch (ParseUnicodeGroup(&t, ps.flags(), re->ccb_, status)) { + case kParseOk: + if (!ps.PushRegexp(re)) + return NULL; + goto Break2; + case kParseError: + re->Decref(); + return NULL; + case kParseNothing: + re->Decref(); + break; + } + } + const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags()); - if (g != NULL) { - Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); - re->ccb_ = new CharClassBuilder; - AddUGroup(re->ccb_, g, g->sign, ps.flags()); - if (!ps.PushRegexp(re)) - return NULL; - break; - } - - Rune r; - if (!ParseEscape(&t, &r, status, ps.rune_max())) - return NULL; - if (!ps.PushLiteral(r)) - return NULL; - break; - } - } - Break2: - lastunary = isunary; - } - return ps.DoFinish(); -} - -} // namespace re2 + if (g != NULL) { + Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); + re->ccb_ = new CharClassBuilder; + AddUGroup(re->ccb_, g, g->sign, ps.flags()); + if (!ps.PushRegexp(re)) + return NULL; + break; + } + + Rune r; + if (!ParseEscape(&t, &r, status, ps.rune_max())) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + break; + } + } + Break2: + lastunary = isunary; + } + return ps.DoFinish(); +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/perl_groups.cc b/contrib/libs/re2/re2/perl_groups.cc index 4687444581..c8f4dbde5e 100644 --- a/contrib/libs/re2/re2/perl_groups.cc +++ b/contrib/libs/re2/re2/perl_groups.cc @@ -1,24 +1,24 @@ -// GENERATED BY make_perl_groups.pl; DO NOT EDIT. -// make_perl_groups.pl >perl_groups.cc - -#include "re2/unicode_groups.h" - -namespace re2 { - +// GENERATED BY make_perl_groups.pl; DO NOT EDIT. +// make_perl_groups.pl >perl_groups.cc + +#include "re2/unicode_groups.h" + +namespace re2 { + static const URange16 code1[] = { /* \d */ - { 0x30, 0x39 }, -}; + { 0x30, 0x39 }, +}; static const URange16 code2[] = { /* \s */ - { 0x9, 0xa }, - { 0xc, 0xd }, - { 0x20, 0x20 }, -}; + { 0x9, 0xa }, + { 0xc, 0xd }, + { 0x20, 0x20 }, +}; static const URange16 code3[] = { /* \w */ - { 0x30, 0x39 }, - { 0x41, 0x5a }, - { 0x5f, 0x5f }, - { 0x61, 0x7a }, -}; + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, +}; const UGroup perl_groups[] = { { "\\d", +1, code1, 1, 0, 0 }, { "\\D", -1, code1, 1, 0, 0 }, @@ -26,64 +26,64 @@ const UGroup perl_groups[] = { { "\\S", -1, code2, 3, 0, 0 }, { "\\w", +1, code3, 4, 0, 0 }, { "\\W", -1, code3, 4, 0, 0 }, -}; +}; const int num_perl_groups = 6; static const URange16 code4[] = { /* [:alnum:] */ - { 0x30, 0x39 }, - { 0x41, 0x5a }, - { 0x61, 0x7a }, -}; + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x61, 0x7a }, +}; static const URange16 code5[] = { /* [:alpha:] */ - { 0x41, 0x5a }, - { 0x61, 0x7a }, -}; + { 0x41, 0x5a }, + { 0x61, 0x7a }, +}; static const URange16 code6[] = { /* [:ascii:] */ - { 0x0, 0x7f }, -}; + { 0x0, 0x7f }, +}; static const URange16 code7[] = { /* [:blank:] */ - { 0x9, 0x9 }, - { 0x20, 0x20 }, -}; + { 0x9, 0x9 }, + { 0x20, 0x20 }, +}; static const URange16 code8[] = { /* [:cntrl:] */ - { 0x0, 0x1f }, - { 0x7f, 0x7f }, -}; + { 0x0, 0x1f }, + { 0x7f, 0x7f }, +}; static const URange16 code9[] = { /* [:digit:] */ - { 0x30, 0x39 }, -}; + { 0x30, 0x39 }, +}; static const URange16 code10[] = { /* [:graph:] */ - { 0x21, 0x7e }, -}; + { 0x21, 0x7e }, +}; static const URange16 code11[] = { /* [:lower:] */ - { 0x61, 0x7a }, -}; + { 0x61, 0x7a }, +}; static const URange16 code12[] = { /* [:print:] */ - { 0x20, 0x7e }, -}; + { 0x20, 0x7e }, +}; static const URange16 code13[] = { /* [:punct:] */ - { 0x21, 0x2f }, - { 0x3a, 0x40 }, - { 0x5b, 0x60 }, - { 0x7b, 0x7e }, -}; + { 0x21, 0x2f }, + { 0x3a, 0x40 }, + { 0x5b, 0x60 }, + { 0x7b, 0x7e }, +}; static const URange16 code14[] = { /* [:space:] */ - { 0x9, 0xd }, - { 0x20, 0x20 }, -}; + { 0x9, 0xd }, + { 0x20, 0x20 }, +}; static const URange16 code15[] = { /* [:upper:] */ - { 0x41, 0x5a }, -}; + { 0x41, 0x5a }, +}; static const URange16 code16[] = { /* [:word:] */ - { 0x30, 0x39 }, - { 0x41, 0x5a }, - { 0x5f, 0x5f }, - { 0x61, 0x7a }, -}; + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, +}; static const URange16 code17[] = { /* [:xdigit:] */ - { 0x30, 0x39 }, - { 0x41, 0x46 }, - { 0x61, 0x66 }, -}; + { 0x30, 0x39 }, + { 0x41, 0x46 }, + { 0x61, 0x66 }, +}; const UGroup posix_groups[] = { { "[:alnum:]", +1, code4, 3, 0, 0 }, { "[:^alnum:]", -1, code4, 3, 0, 0 }, @@ -113,7 +113,7 @@ const UGroup posix_groups[] = { { "[:^word:]", -1, code16, 4, 0, 0 }, { "[:xdigit:]", +1, code17, 3, 0, 0 }, { "[:^xdigit:]", -1, code17, 3, 0, 0 }, -}; +}; const int num_posix_groups = 28; - -} // namespace re2 + +} // namespace re2 diff --git a/contrib/libs/re2/re2/prefilter.cc b/contrib/libs/re2/re2/prefilter.cc index a47b3120fb..6a9a670381 100644 --- a/contrib/libs/re2/re2/prefilter.cc +++ b/contrib/libs/re2/re2/prefilter.cc @@ -1,8 +1,8 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re2/prefilter.h" +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/prefilter.h" #include <stddef.h> #include <stdint.h> @@ -15,163 +15,163 @@ #include "util/utf.h" #include "re2/re2.h" #include "re2/unicode_casefold.h" -#include "re2/walker-inl.h" - -namespace re2 { - +#include "re2/walker-inl.h" + +namespace re2 { + static const bool ExtraDebug = false; - + typedef std::set<std::string>::iterator SSIter; typedef std::set<std::string>::const_iterator ConstSSIter; - -// Initializes a Prefilter, allocating subs_ as necessary. -Prefilter::Prefilter(Op op) { - op_ = op; - subs_ = NULL; - if (op_ == AND || op_ == OR) + +// Initializes a Prefilter, allocating subs_ as necessary. +Prefilter::Prefilter(Op op) { + op_ = op; + subs_ = NULL; + if (op_ == AND || op_ == OR) subs_ = new std::vector<Prefilter*>; -} - -// Destroys a Prefilter. -Prefilter::~Prefilter() { - if (subs_) { +} + +// Destroys a Prefilter. +Prefilter::~Prefilter() { + if (subs_) { for (size_t i = 0; i < subs_->size(); i++) - delete (*subs_)[i]; - delete subs_; - subs_ = NULL; - } -} - -// Simplify if the node is an empty Or or And. -Prefilter* Prefilter::Simplify() { - if (op_ != AND && op_ != OR) { - return this; - } - - // Nothing left in the AND/OR. + delete (*subs_)[i]; + delete subs_; + subs_ = NULL; + } +} + +// Simplify if the node is an empty Or or And. +Prefilter* Prefilter::Simplify() { + if (op_ != AND && op_ != OR) { + return this; + } + + // Nothing left in the AND/OR. if (subs_->empty()) { - if (op_ == AND) - op_ = ALL; // AND of nothing is true - else - op_ = NONE; // OR of nothing is false - - return this; - } - - // Just one subnode: throw away wrapper. - if (subs_->size() == 1) { - Prefilter* a = (*subs_)[0]; - subs_->clear(); - delete this; - return a->Simplify(); - } - - return this; -} - -// Combines two Prefilters together to create an "op" (AND or OR). -// The passed Prefilters will be part of the returned Prefilter or deleted. -// Does lots of work to avoid creating unnecessarily complicated structures. -Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { - // If a, b can be rewritten as op, do so. - a = a->Simplify(); - b = b->Simplify(); - - // Canonicalize: a->op <= b->op. - if (a->op() > b->op()) { - Prefilter* t = a; - a = b; - b = t; - } - - // Trivial cases. - // ALL AND b = b - // NONE OR b = b - // ALL OR b = ALL - // NONE AND b = NONE - // Don't need to look at b, because of canonicalization above. - // ALL and NONE are smallest opcodes. - if (a->op() == ALL || a->op() == NONE) { - if ((a->op() == ALL && op == AND) || - (a->op() == NONE && op == OR)) { - delete a; - return b; - } else { - delete b; - return a; - } - } - - // If a and b match op, merge their contents. - if (a->op() == op && b->op() == op) { + if (op_ == AND) + op_ = ALL; // AND of nothing is true + else + op_ = NONE; // OR of nothing is false + + return this; + } + + // Just one subnode: throw away wrapper. + if (subs_->size() == 1) { + Prefilter* a = (*subs_)[0]; + subs_->clear(); + delete this; + return a->Simplify(); + } + + return this; +} + +// Combines two Prefilters together to create an "op" (AND or OR). +// The passed Prefilters will be part of the returned Prefilter or deleted. +// Does lots of work to avoid creating unnecessarily complicated structures. +Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { + // If a, b can be rewritten as op, do so. + a = a->Simplify(); + b = b->Simplify(); + + // Canonicalize: a->op <= b->op. + if (a->op() > b->op()) { + Prefilter* t = a; + a = b; + b = t; + } + + // Trivial cases. + // ALL AND b = b + // NONE OR b = b + // ALL OR b = ALL + // NONE AND b = NONE + // Don't need to look at b, because of canonicalization above. + // ALL and NONE are smallest opcodes. + if (a->op() == ALL || a->op() == NONE) { + if ((a->op() == ALL && op == AND) || + (a->op() == NONE && op == OR)) { + delete a; + return b; + } else { + delete b; + return a; + } + } + + // If a and b match op, merge their contents. + if (a->op() == op && b->op() == op) { for (size_t i = 0; i < b->subs()->size(); i++) { - Prefilter* bb = (*b->subs())[i]; - a->subs()->push_back(bb); - } - b->subs()->clear(); - delete b; - return a; - } - - // If a already has the same op as the op that is under construction - // add in b (similarly if b already has the same op, add in a). - if (b->op() == op) { - Prefilter* t = a; - a = b; - b = t; - } - if (a->op() == op) { - a->subs()->push_back(b); - return a; - } - - // Otherwise just return the op. - Prefilter* c = new Prefilter(op); - c->subs()->push_back(a); - c->subs()->push_back(b); - return c; -} - -Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) { - return AndOr(AND, a, b); -} - -Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { - return AndOr(OR, a, b); -} - + Prefilter* bb = (*b->subs())[i]; + a->subs()->push_back(bb); + } + b->subs()->clear(); + delete b; + return a; + } + + // If a already has the same op as the op that is under construction + // add in b (similarly if b already has the same op, add in a). + if (b->op() == op) { + Prefilter* t = a; + a = b; + b = t; + } + if (a->op() == op) { + a->subs()->push_back(b); + return a; + } + + // Otherwise just return the op. + Prefilter* c = new Prefilter(op); + c->subs()->push_back(a); + c->subs()->push_back(b); + return c; +} + +Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) { + return AndOr(AND, a, b); +} + +Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { + return AndOr(OR, a, b); +} + static void SimplifyStringSet(std::set<std::string>* ss) { - // Now make sure that the strings aren't redundant. For example, if - // we know "ab" is a required string, then it doesn't help at all to - // know that "abc" is also a required string, so delete "abc". This - // is because, when we are performing a string search to filter + // Now make sure that the strings aren't redundant. For example, if + // we know "ab" is a required string, then it doesn't help at all to + // know that "abc" is also a required string, so delete "abc". This + // is because, when we are performing a string search to filter // regexps, matching "ab" will already allow this regexp to be a // candidate for match, so further matching "abc" is redundant. // Note that we must ignore "" because find() would find it at the // start of everything and thus we would end up erasing everything. - for (SSIter i = ss->begin(); i != ss->end(); ++i) { + for (SSIter i = ss->begin(); i != ss->end(); ++i) { if (i->empty()) continue; - SSIter j = i; - ++j; - while (j != ss->end()) { + SSIter j = i; + ++j; + while (j != ss->end()) { if (j->find(*i) != std::string::npos) { j = ss->erase(j); continue; } - ++j; - } - } -} - + ++j; + } + } +} + Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) { Prefilter* or_prefilter = new Prefilter(NONE); - SimplifyStringSet(ss); + SimplifyStringSet(ss); for (SSIter i = ss->begin(); i != ss->end(); ++i) or_prefilter = Or(or_prefilter, FromString(*i)); - return or_prefilter; -} - + return or_prefilter; +} + static Rune ToLowerRune(Rune r) { if (r < Runeself) { if ('A' <= r && r <= 'Z') @@ -192,221 +192,221 @@ static Rune ToLowerRuneLatin1(Rune r) { } Prefilter* Prefilter::FromString(const std::string& str) { - Prefilter* m = new Prefilter(Prefilter::ATOM); + Prefilter* m = new Prefilter(Prefilter::ATOM); m->atom_ = str; - return m; -} - -// Information about a regexp used during computation of Prefilter. -// Can be thought of as information about the set of strings matching -// the given regular expression. -class Prefilter::Info { - public: - Info(); - ~Info(); - - // More constructors. They delete their Info* arguments. - static Info* Alt(Info* a, Info* b); - static Info* Concat(Info* a, Info* b); - static Info* And(Info* a, Info* b); - static Info* Star(Info* a); - static Info* Plus(Info* a); - static Info* Quest(Info* a); - static Info* EmptyString(); - static Info* NoMatch(); + return m; +} + +// Information about a regexp used during computation of Prefilter. +// Can be thought of as information about the set of strings matching +// the given regular expression. +class Prefilter::Info { + public: + Info(); + ~Info(); + + // More constructors. They delete their Info* arguments. + static Info* Alt(Info* a, Info* b); + static Info* Concat(Info* a, Info* b); + static Info* And(Info* a, Info* b); + static Info* Star(Info* a); + static Info* Plus(Info* a); + static Info* Quest(Info* a); + static Info* EmptyString(); + static Info* NoMatch(); static Info* AnyCharOrAnyByte(); static Info* CClass(CharClass* cc, bool latin1); - static Info* Literal(Rune r); + static Info* Literal(Rune r); static Info* LiteralLatin1(Rune r); - static Info* AnyMatch(); - - // Format Info as a string. + static Info* AnyMatch(); + + // Format Info as a string. std::string ToString(); - - // Caller takes ownership of the Prefilter. - Prefilter* TakeMatch(); - + + // Caller takes ownership of the Prefilter. + Prefilter* TakeMatch(); + std::set<std::string>& exact() { return exact_; } - - bool is_exact() const { return is_exact_; } - - class Walker; - - private: + + bool is_exact() const { return is_exact_; } + + class Walker; + + private: std::set<std::string> exact_; - - // When is_exact_ is true, the strings that match - // are placed in exact_. When it is no longer an exact - // set of strings that match this RE, then is_exact_ - // is false and the match_ contains the required match - // criteria. - bool is_exact_; - - // Accumulated Prefilter query that any - // match for this regexp is guaranteed to match. - Prefilter* match_; -}; - - -Prefilter::Info::Info() - : is_exact_(false), - match_(NULL) { -} - -Prefilter::Info::~Info() { - delete match_; -} - -Prefilter* Prefilter::Info::TakeMatch() { - if (is_exact_) { - match_ = Prefilter::OrStrings(&exact_); - is_exact_ = false; - } - Prefilter* m = match_; - match_ = NULL; - return m; -} - -// Format a Info in string form. + + // When is_exact_ is true, the strings that match + // are placed in exact_. When it is no longer an exact + // set of strings that match this RE, then is_exact_ + // is false and the match_ contains the required match + // criteria. + bool is_exact_; + + // Accumulated Prefilter query that any + // match for this regexp is guaranteed to match. + Prefilter* match_; +}; + + +Prefilter::Info::Info() + : is_exact_(false), + match_(NULL) { +} + +Prefilter::Info::~Info() { + delete match_; +} + +Prefilter* Prefilter::Info::TakeMatch() { + if (is_exact_) { + match_ = Prefilter::OrStrings(&exact_); + is_exact_ = false; + } + Prefilter* m = match_; + match_ = NULL; + return m; +} + +// Format a Info in string form. std::string Prefilter::Info::ToString() { - if (is_exact_) { - int n = 0; + if (is_exact_) { + int n = 0; std::string s; for (SSIter i = exact_.begin(); i != exact_.end(); ++i) { - if (n++ > 0) - s += ","; - s += *i; - } - return s; - } - - if (match_) - return match_->DebugString(); - - return ""; -} - -// Add the strings from src to dst. + if (n++ > 0) + s += ","; + s += *i; + } + return s; + } + + if (match_) + return match_->DebugString(); + + return ""; +} + +// Add the strings from src to dst. static void CopyIn(const std::set<std::string>& src, std::set<std::string>* dst) { - for (ConstSSIter i = src.begin(); i != src.end(); ++i) - dst->insert(*i); -} - -// Add the cross-product of a and b to dst. -// (For each string i in a and j in b, add i+j.) + for (ConstSSIter i = src.begin(); i != src.end(); ++i) + dst->insert(*i); +} + +// Add the cross-product of a and b to dst. +// (For each string i in a and j in b, add i+j.) static void CrossProduct(const std::set<std::string>& a, const std::set<std::string>& b, std::set<std::string>* dst) { - for (ConstSSIter i = a.begin(); i != a.end(); ++i) - for (ConstSSIter j = b.begin(); j != b.end(); ++j) - dst->insert(*i + *j); -} - -// Concats a and b. Requires that both are exact sets. -// Forms an exact set that is a crossproduct of a and b. -Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) { - if (a == NULL) - return b; - DCHECK(a->is_exact_); - DCHECK(b && b->is_exact_); - Info *ab = new Info(); - - CrossProduct(a->exact_, b->exact_, &ab->exact_); - ab->is_exact_ = true; - - delete a; - delete b; - return ab; -} - -// Constructs an inexact Info for ab given a and b. -// Used only when a or b is not exact or when the -// exact cross product is likely to be too big. -Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) { - if (a == NULL) - return b; - if (b == NULL) - return a; - - Info *ab = new Info(); - - ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch()); - ab->is_exact_ = false; - delete a; - delete b; - return ab; -} - -// Constructs Info for a|b given a and b. -Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) { - Info *ab = new Info(); - - if (a->is_exact_ && b->is_exact_) { - CopyIn(a->exact_, &ab->exact_); - CopyIn(b->exact_, &ab->exact_); - ab->is_exact_ = true; - } else { - // Either a or b has is_exact_ = false. If the other - // one has is_exact_ = true, we move it to match_ and - // then create a OR of a,b. The resulting Info has - // is_exact_ = false. - ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch()); - ab->is_exact_ = false; - } - - delete a; - delete b; - return ab; -} - -// Constructs Info for a? given a. -Prefilter::Info* Prefilter::Info::Quest(Info *a) { - Info *ab = new Info(); - - ab->is_exact_ = false; - ab->match_ = new Prefilter(ALL); - delete a; - return ab; -} - -// Constructs Info for a* given a. -// Same as a? -- not much to do. -Prefilter::Info* Prefilter::Info::Star(Info *a) { - return Quest(a); -} - -// Constructs Info for a+ given a. If a was exact set, it isn't -// anymore. -Prefilter::Info* Prefilter::Info::Plus(Info *a) { - Info *ab = new Info(); - - ab->match_ = a->TakeMatch(); - ab->is_exact_ = false; - - delete a; - return ab; -} - + for (ConstSSIter i = a.begin(); i != a.end(); ++i) + for (ConstSSIter j = b.begin(); j != b.end(); ++j) + dst->insert(*i + *j); +} + +// Concats a and b. Requires that both are exact sets. +// Forms an exact set that is a crossproduct of a and b. +Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) { + if (a == NULL) + return b; + DCHECK(a->is_exact_); + DCHECK(b && b->is_exact_); + Info *ab = new Info(); + + CrossProduct(a->exact_, b->exact_, &ab->exact_); + ab->is_exact_ = true; + + delete a; + delete b; + return ab; +} + +// Constructs an inexact Info for ab given a and b. +// Used only when a or b is not exact or when the +// exact cross product is likely to be too big. +Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) { + if (a == NULL) + return b; + if (b == NULL) + return a; + + Info *ab = new Info(); + + ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch()); + ab->is_exact_ = false; + delete a; + delete b; + return ab; +} + +// Constructs Info for a|b given a and b. +Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) { + Info *ab = new Info(); + + if (a->is_exact_ && b->is_exact_) { + CopyIn(a->exact_, &ab->exact_); + CopyIn(b->exact_, &ab->exact_); + ab->is_exact_ = true; + } else { + // Either a or b has is_exact_ = false. If the other + // one has is_exact_ = true, we move it to match_ and + // then create a OR of a,b. The resulting Info has + // is_exact_ = false. + ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch()); + ab->is_exact_ = false; + } + + delete a; + delete b; + return ab; +} + +// Constructs Info for a? given a. +Prefilter::Info* Prefilter::Info::Quest(Info *a) { + Info *ab = new Info(); + + ab->is_exact_ = false; + ab->match_ = new Prefilter(ALL); + delete a; + return ab; +} + +// Constructs Info for a* given a. +// Same as a? -- not much to do. +Prefilter::Info* Prefilter::Info::Star(Info *a) { + return Quest(a); +} + +// Constructs Info for a+ given a. If a was exact set, it isn't +// anymore. +Prefilter::Info* Prefilter::Info::Plus(Info *a) { + Info *ab = new Info(); + + ab->match_ = a->TakeMatch(); + ab->is_exact_ = false; + + delete a; + return ab; +} + static std::string RuneToString(Rune r) { - char buf[UTFmax]; - int n = runetochar(buf, &r); + char buf[UTFmax]; + int n = runetochar(buf, &r); return std::string(buf, n); -} - +} + static std::string RuneToStringLatin1(Rune r) { char c = r & 0xff; return std::string(&c, 1); } -// Constructs Info for literal rune. -Prefilter::Info* Prefilter::Info::Literal(Rune r) { - Info* info = new Info(); +// Constructs Info for literal rune. +Prefilter::Info* Prefilter::Info::Literal(Rune r) { + Info* info = new Info(); info->exact_.insert(RuneToString(ToLowerRune(r))); - info->is_exact_ = true; - return info; -} - + info->is_exact_ = true; + return info; +} + // Constructs Info for literal rune for Latin1 encoded string. Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { Info* info = new Info(); @@ -417,52 +417,52 @@ Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { // Constructs Info for dot (any character) or \C (any byte). Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() { - Prefilter::Info* info = new Prefilter::Info(); - info->match_ = new Prefilter(ALL); - return info; -} - -// Constructs Prefilter::Info for no possible match. -Prefilter::Info* Prefilter::Info::NoMatch() { - Prefilter::Info* info = new Prefilter::Info(); - info->match_ = new Prefilter(NONE); - return info; -} - -// Constructs Prefilter::Info for any possible match. -// This Prefilter::Info is valid for any regular expression, -// since it makes no assertions whatsoever about the -// strings being matched. -Prefilter::Info* Prefilter::Info::AnyMatch() { - Prefilter::Info *info = new Prefilter::Info(); - info->match_ = new Prefilter(ALL); - return info; -} - -// Constructs Prefilter::Info for just the empty string. -Prefilter::Info* Prefilter::Info::EmptyString() { - Prefilter::Info* info = new Prefilter::Info(); - info->is_exact_ = true; - info->exact_.insert(""); - return info; -} - -// Constructs Prefilter::Info for a character class. -typedef CharClass::iterator CCIter; + Prefilter::Info* info = new Prefilter::Info(); + info->match_ = new Prefilter(ALL); + return info; +} + +// Constructs Prefilter::Info for no possible match. +Prefilter::Info* Prefilter::Info::NoMatch() { + Prefilter::Info* info = new Prefilter::Info(); + info->match_ = new Prefilter(NONE); + return info; +} + +// Constructs Prefilter::Info for any possible match. +// This Prefilter::Info is valid for any regular expression, +// since it makes no assertions whatsoever about the +// strings being matched. +Prefilter::Info* Prefilter::Info::AnyMatch() { + Prefilter::Info *info = new Prefilter::Info(); + info->match_ = new Prefilter(ALL); + return info; +} + +// Constructs Prefilter::Info for just the empty string. +Prefilter::Info* Prefilter::Info::EmptyString() { + Prefilter::Info* info = new Prefilter::Info(); + info->is_exact_ = true; + info->exact_.insert(""); + return info; +} + +// Constructs Prefilter::Info for a character class. +typedef CharClass::iterator CCIter; Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, bool latin1) { if (ExtraDebug) { LOG(ERROR) << "CharClassInfo:"; - for (CCIter i = cc->begin(); i != cc->end(); ++i) + for (CCIter i = cc->begin(); i != cc->end(); ++i) LOG(ERROR) << " " << i->lo << "-" << i->hi; - } - - // If the class is too large, it's okay to overestimate. - if (cc->size() > 10) + } + + // If the class is too large, it's okay to overestimate. + if (cc->size() > 10) return AnyCharOrAnyByte(); - - Prefilter::Info *a = new Prefilter::Info(); - for (CCIter i = cc->begin(); i != cc->end(); ++i) + + Prefilter::Info *a = new Prefilter::Info(); + for (CCIter i = cc->begin(); i != cc->end(); ++i) for (Rune r = i->lo; r <= i->hi; r++) { if (latin1) { a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); @@ -470,101 +470,101 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, a->exact_.insert(RuneToString(ToLowerRune(r))); } } + - - a->is_exact_ = true; - + a->is_exact_ = true; + if (ExtraDebug) LOG(ERROR) << " = " << a->ToString(); - - return a; -} - -class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { - public: + + return a; +} + +class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { + public: Walker(bool latin1) : latin1_(latin1) {} - - virtual Info* PostVisit( - Regexp* re, Info* parent_arg, - Info* pre_arg, - Info** child_args, int nchild_args); - - virtual Info* ShortVisit( - Regexp* re, - Info* parent_arg); - + + virtual Info* PostVisit( + Regexp* re, Info* parent_arg, + Info* pre_arg, + Info** child_args, int nchild_args); + + virtual Info* ShortVisit( + Regexp* re, + Info* parent_arg); + bool latin1() { return latin1_; } - private: + private: bool latin1_; Walker(const Walker&) = delete; Walker& operator=(const Walker&) = delete; -}; - -Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { +}; + +Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { if (ExtraDebug) LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString(); bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; Prefilter::Info::Walker w(latin1); - Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); - - if (w.stopped_early()) { - delete info; - return NULL; - } - - return info; -} - -Prefilter::Info* Prefilter::Info::Walker::ShortVisit( - Regexp* re, Prefilter::Info* parent_arg) { - return AnyMatch(); -} - -// Constructs the Prefilter::Info for the given regular expression. -// Assumes re is simplified. -Prefilter::Info* Prefilter::Info::Walker::PostVisit( - Regexp* re, Prefilter::Info* parent_arg, - Prefilter::Info* pre_arg, Prefilter::Info** child_args, - int nchild_args) { - Prefilter::Info *info; - switch (re->op()) { - default: - case kRegexpRepeat: - LOG(DFATAL) << "Bad regexp op " << re->op(); - info = EmptyString(); - break; - - case kRegexpNoMatch: - info = NoMatch(); - break; - - // These ops match the empty string: - case kRegexpEmptyMatch: // anywhere - case kRegexpBeginLine: // at beginning of line - case kRegexpEndLine: // at end of line - case kRegexpBeginText: // at beginning of text - case kRegexpEndText: // at end of text - case kRegexpWordBoundary: // at word boundary - case kRegexpNoWordBoundary: // not at word boundary - info = EmptyString(); - break; - - case kRegexpLiteral: + Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); + + if (w.stopped_early()) { + delete info; + return NULL; + } + + return info; +} + +Prefilter::Info* Prefilter::Info::Walker::ShortVisit( + Regexp* re, Prefilter::Info* parent_arg) { + return AnyMatch(); +} + +// Constructs the Prefilter::Info for the given regular expression. +// Assumes re is simplified. +Prefilter::Info* Prefilter::Info::Walker::PostVisit( + Regexp* re, Prefilter::Info* parent_arg, + Prefilter::Info* pre_arg, Prefilter::Info** child_args, + int nchild_args) { + Prefilter::Info *info; + switch (re->op()) { + default: + case kRegexpRepeat: + LOG(DFATAL) << "Bad regexp op " << re->op(); + info = EmptyString(); + break; + + case kRegexpNoMatch: + info = NoMatch(); + break; + + // These ops match the empty string: + case kRegexpEmptyMatch: // anywhere + case kRegexpBeginLine: // at beginning of line + case kRegexpEndLine: // at end of line + case kRegexpBeginText: // at beginning of text + case kRegexpEndText: // at end of text + case kRegexpWordBoundary: // at word boundary + case kRegexpNoWordBoundary: // not at word boundary + info = EmptyString(); + break; + + case kRegexpLiteral: if (latin1()) { info = LiteralLatin1(re->rune()); } else { info = Literal(re->rune()); } - break; - - case kRegexpLiteralString: - if (re->nrunes() == 0) { - info = NoMatch(); - break; - } + break; + + case kRegexpLiteralString: + if (re->nrunes() == 0) { + info = NoMatch(); + break; + } if (latin1()) { info = LiteralLatin1(re->runes()[0]); for (int i = 1; i < re->nrunes(); i++) { @@ -576,136 +576,136 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( info = Concat(info, Literal(re->runes()[i])); } } - break; - - case kRegexpConcat: { - // Accumulate in info. - // Exact is concat of recent contiguous exact nodes. - info = NULL; - Info* exact = NULL; - for (int i = 0; i < nchild_args; i++) { - Info* ci = child_args[i]; // child info - if (!ci->is_exact() || - (exact && ci->exact().size() * exact->exact().size() > 16)) { - // Exact run is over. - info = And(info, exact); - exact = NULL; - // Add this child's info. - info = And(info, ci); - } else { - // Append to exact run. - exact = Concat(exact, ci); - } - } - info = And(info, exact); - } - break; - - case kRegexpAlternate: - info = child_args[0]; - for (int i = 1; i < nchild_args; i++) - info = Alt(info, child_args[i]); - break; - - case kRegexpStar: - info = Star(child_args[0]); - break; - - case kRegexpQuest: - info = Quest(child_args[0]); - break; - - case kRegexpPlus: - info = Plus(child_args[0]); - break; - - case kRegexpAnyChar: + break; + + case kRegexpConcat: { + // Accumulate in info. + // Exact is concat of recent contiguous exact nodes. + info = NULL; + Info* exact = NULL; + for (int i = 0; i < nchild_args; i++) { + Info* ci = child_args[i]; // child info + if (!ci->is_exact() || + (exact && ci->exact().size() * exact->exact().size() > 16)) { + // Exact run is over. + info = And(info, exact); + exact = NULL; + // Add this child's info. + info = And(info, ci); + } else { + // Append to exact run. + exact = Concat(exact, ci); + } + } + info = And(info, exact); + } + break; + + case kRegexpAlternate: + info = child_args[0]; + for (int i = 1; i < nchild_args; i++) + info = Alt(info, child_args[i]); + break; + + case kRegexpStar: + info = Star(child_args[0]); + break; + + case kRegexpQuest: + info = Quest(child_args[0]); + break; + + case kRegexpPlus: + info = Plus(child_args[0]); + break; + + case kRegexpAnyChar: case kRegexpAnyByte: - // Claim nothing, except that it's not empty. + // Claim nothing, except that it's not empty. info = AnyCharOrAnyByte(); - break; - - case kRegexpCharClass: + break; + + case kRegexpCharClass: info = CClass(re->cc(), latin1()); - break; - - case kRegexpCapture: - // These don't affect the set of matching strings. - info = child_args[0]; - break; - } - + break; + + case kRegexpCapture: + // These don't affect the set of matching strings. + info = child_args[0]; + break; + } + if (ExtraDebug) LOG(ERROR) << "BuildInfo " << re->ToString() << ": " << (info ? info->ToString() : ""); - - return info; -} - - -Prefilter* Prefilter::FromRegexp(Regexp* re) { - if (re == NULL) - return NULL; - - Regexp* simple = re->Simplify(); + + return info; +} + + +Prefilter* Prefilter::FromRegexp(Regexp* re) { + if (re == NULL) + return NULL; + + Regexp* simple = re->Simplify(); if (simple == NULL) return NULL; - + Prefilter::Info* info = BuildInfo(simple); - simple->Decref(); - if (info == NULL) - return NULL; - - Prefilter* m = info->TakeMatch(); - delete info; - return m; -} - + simple->Decref(); + if (info == NULL) + return NULL; + + Prefilter* m = info->TakeMatch(); + delete info; + return m; +} + std::string Prefilter::DebugString() const { - switch (op_) { - default: - LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; - return StringPrintf("op%d", op_); - case NONE: - return "*no-matches*"; - case ATOM: - return atom_; - case ALL: - return ""; - case AND: { + switch (op_) { + default: + LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; + return StringPrintf("op%d", op_); + case NONE: + return "*no-matches*"; + case ATOM: + return atom_; + case ALL: + return ""; + case AND: { std::string s = ""; for (size_t i = 0; i < subs_->size(); i++) { - if (i > 0) - s += " "; + if (i > 0) + s += " "; Prefilter* sub = (*subs_)[i]; s += sub ? sub->DebugString() : "<nil>"; - } - return s; - } - case OR: { + } + return s; + } + case OR: { std::string s = "("; for (size_t i = 0; i < subs_->size(); i++) { - if (i > 0) - s += "|"; + if (i > 0) + s += "|"; Prefilter* sub = (*subs_)[i]; s += sub ? sub->DebugString() : "<nil>"; - } - s += ")"; - return s; - } - } -} - -Prefilter* Prefilter::FromRE2(const RE2* re2) { - if (re2 == NULL) - return NULL; - - Regexp* regexp = re2->Regexp(); - if (regexp == NULL) - return NULL; - - return FromRegexp(regexp); -} - - -} // namespace re2 + } + s += ")"; + return s; + } + } +} + +Prefilter* Prefilter::FromRE2(const RE2* re2) { + if (re2 == NULL) + return NULL; + + Regexp* regexp = re2->Regexp(); + if (regexp == NULL) + return NULL; + + return FromRegexp(regexp); +} + + +} // namespace re2 diff --git a/contrib/libs/re2/re2/prefilter.h b/contrib/libs/re2/re2/prefilter.h index 4fedeb4a7c..1ce0b63c76 100644 --- a/contrib/libs/re2/re2/prefilter.h +++ b/contrib/libs/re2/re2/prefilter.h @@ -1,108 +1,108 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_PREFILTER_H_ #define RE2_PREFILTER_H_ -// Prefilter is the class used to extract string guards from regexps. -// Rather than using Prefilter class directly, use FilteredRE2. -// See filtered_re2.h - +// Prefilter is the class used to extract string guards from regexps. +// Rather than using Prefilter class directly, use FilteredRE2. +// See filtered_re2.h + #include <set> #include <string> #include <vector> - + #include "util/util.h" #include "util/logging.h" - -namespace re2 { - -class RE2; - -class Regexp; - -class Prefilter { - // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h - public: - enum Op { - ALL = 0, // Everything matches - NONE, // Nothing matches - ATOM, // The string atom() must match - AND, // All in subs() must match - OR, // One of subs() must match - }; - - explicit Prefilter(Op op); - ~Prefilter(); - - Op op() { return op_; } + +namespace re2 { + +class RE2; + +class Regexp; + +class Prefilter { + // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h + public: + enum Op { + ALL = 0, // Everything matches + NONE, // Nothing matches + ATOM, // The string atom() must match + AND, // All in subs() must match + OR, // One of subs() must match + }; + + explicit Prefilter(Op op); + ~Prefilter(); + + Op op() { return op_; } const std::string& atom() const { return atom_; } - void set_unique_id(int id) { unique_id_ = id; } - int unique_id() const { return unique_id_; } - - // The children of the Prefilter node. + void set_unique_id(int id) { unique_id_ = id; } + int unique_id() const { return unique_id_; } + + // The children of the Prefilter node. std::vector<Prefilter*>* subs() { DCHECK(op_ == AND || op_ == OR); - return subs_; - } - - // Set the children vector. Prefilter takes ownership of subs and - // subs_ will be deleted when Prefilter is deleted. + return subs_; + } + + // Set the children vector. Prefilter takes ownership of subs and + // subs_ will be deleted when Prefilter is deleted. void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } - - // Given a RE2, return a Prefilter. The caller takes ownership of - // the Prefilter and should deallocate it. Returns NULL if Prefilter - // cannot be formed. - static Prefilter* FromRE2(const RE2* re2); - - // Returns a readable debug string of the prefilter. + + // Given a RE2, return a Prefilter. The caller takes ownership of + // the Prefilter and should deallocate it. Returns NULL if Prefilter + // cannot be formed. + static Prefilter* FromRE2(const RE2* re2); + + // Returns a readable debug string of the prefilter. std::string DebugString() const; - - private: - class Info; - - // Combines two prefilters together to create an AND. The passed - // Prefilters will be part of the returned Prefilter or deleted. - static Prefilter* And(Prefilter* a, Prefilter* b); - - // Combines two prefilters together to create an OR. The passed - // Prefilters will be part of the returned Prefilter or deleted. - static Prefilter* Or(Prefilter* a, Prefilter* b); - - // Generalized And/Or - static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); - - static Prefilter* FromRegexp(Regexp* a); - + + private: + class Info; + + // Combines two prefilters together to create an AND. The passed + // Prefilters will be part of the returned Prefilter or deleted. + static Prefilter* And(Prefilter* a, Prefilter* b); + + // Combines two prefilters together to create an OR. The passed + // Prefilters will be part of the returned Prefilter or deleted. + static Prefilter* Or(Prefilter* a, Prefilter* b); + + // Generalized And/Or + static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); + + static Prefilter* FromRegexp(Regexp* a); + static Prefilter* FromString(const std::string& str); - + static Prefilter* OrStrings(std::set<std::string>* ss); - - static Info* BuildInfo(Regexp* re); - - Prefilter* Simplify(); - - // Kind of Prefilter. - Op op_; - - // Sub-matches for AND or OR Prefilter. + + static Info* BuildInfo(Regexp* re); + + Prefilter* Simplify(); + + // Kind of Prefilter. + Op op_; + + // Sub-matches for AND or OR Prefilter. std::vector<Prefilter*>* subs_; - - // Actual string to match in leaf node. + + // Actual string to match in leaf node. std::string atom_; - - // If different prefilters have the same string atom, or if they are - // structurally the same (e.g., OR of same atom strings) they are - // considered the same unique nodes. This is the id for each unique - // node. This field is populated with a unique id for every node, - // and -1 for duplicate nodes. - int unique_id_; - + + // If different prefilters have the same string atom, or if they are + // structurally the same (e.g., OR of same atom strings) they are + // considered the same unique nodes. This is the id for each unique + // node. This field is populated with a unique id for every node, + // and -1 for duplicate nodes. + int unique_id_; + Prefilter(const Prefilter&) = delete; Prefilter& operator=(const Prefilter&) = delete; -}; - -} // namespace re2 - -#endif // RE2_PREFILTER_H_ +}; + +} // namespace re2 + +#endif // RE2_PREFILTER_H_ diff --git a/contrib/libs/re2/re2/prefilter_tree.cc b/contrib/libs/re2/re2/prefilter_tree.cc index fdf4e083c9..1d24198590 100644 --- a/contrib/libs/re2/re2/prefilter_tree.cc +++ b/contrib/libs/re2/re2/prefilter_tree.cc @@ -1,9 +1,9 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #include "re2/prefilter_tree.h" - + #include <stddef.h> #include <algorithm> #include <map> @@ -16,118 +16,118 @@ #include "util/util.h" #include "util/logging.h" #include "util/strutil.h" -#include "re2/prefilter.h" +#include "re2/prefilter.h" #include "re2/re2.h" - -namespace re2 { - + +namespace re2 { + static const bool ExtraDebug = false; -PrefilterTree::PrefilterTree() +PrefilterTree::PrefilterTree() : compiled_(false), min_atom_len_(3) { -} - +} + PrefilterTree::PrefilterTree(int min_atom_len) : compiled_(false), min_atom_len_(min_atom_len) { } -PrefilterTree::~PrefilterTree() { +PrefilterTree::~PrefilterTree() { for (size_t i = 0; i < prefilter_vec_.size(); i++) - delete prefilter_vec_[i]; - + delete prefilter_vec_[i]; + for (size_t i = 0; i < entries_.size(); i++) - delete entries_[i].parents; -} - + delete entries_[i].parents; +} + void PrefilterTree::Add(Prefilter* prefilter) { - if (compiled_) { + if (compiled_) { LOG(DFATAL) << "Add called after Compile."; - return; - } + return; + } if (prefilter != NULL && !KeepNode(prefilter)) { delete prefilter; prefilter = NULL; - } - + } + prefilter_vec_.push_back(prefilter); -} - +} + void PrefilterTree::Compile(std::vector<std::string>* atom_vec) { - if (compiled_) { + if (compiled_) { LOG(DFATAL) << "Compile called already."; - return; - } - + return; + } + // Some legacy users of PrefilterTree call Compile() before // adding any regexps and expect Compile() to have no effect. - if (prefilter_vec_.empty()) - return; - - compiled_ = true; - + if (prefilter_vec_.empty()) + return; + + compiled_ = true; + // TODO(junyer): Use std::unordered_set<Prefilter*> instead? NodeMap nodes; AssignUniqueIds(&nodes, atom_vec); - - // Identify nodes that are too common among prefilters and are - // triggering too many parents. Then get rid of them if possible. - // Note that getting rid of a prefilter node simply means they are - // no longer necessary for their parent to trigger; that is, we do - // not miss out on any regexps triggering by getting rid of a - // prefilter node. + + // Identify nodes that are too common among prefilters and are + // triggering too many parents. Then get rid of them if possible. + // Note that getting rid of a prefilter node simply means they are + // no longer necessary for their parent to trigger; that is, we do + // not miss out on any regexps triggering by getting rid of a + // prefilter node. for (size_t i = 0; i < entries_.size(); i++) { StdIntMap* parents = entries_[i].parents; - if (parents->size() > 8) { - // This one triggers too many things. If all the parents are AND - // nodes and have other things guarding them, then get rid of - // this trigger. TODO(vsri): Adjust the threshold appropriately, - // make it a function of total number of nodes? - bool have_other_guard = true; + if (parents->size() > 8) { + // This one triggers too many things. If all the parents are AND + // nodes and have other things guarding them, then get rid of + // this trigger. TODO(vsri): Adjust the threshold appropriately, + // make it a function of total number of nodes? + bool have_other_guard = true; for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) { - have_other_guard = have_other_guard && + have_other_guard = have_other_guard && (entries_[it->first].propagate_up_at_count > 1); } - - if (have_other_guard) { + + if (have_other_guard) { for (StdIntMap::iterator it = parents->begin(); - it != parents->end(); ++it) + it != parents->end(); ++it) entries_[it->first].propagate_up_at_count -= 1; - - parents->clear(); // Forget the parents - } - } - } - + + parents->clear(); // Forget the parents + } + } + } + if (ExtraDebug) PrintDebugInfo(&nodes); -} - +} + Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) { std::string node_string = NodeString(node); NodeMap::iterator iter = nodes->find(node_string); if (iter == nodes->end()) - return NULL; - return (*iter).second; -} - + return NULL; + return (*iter).second; +} + std::string PrefilterTree::NodeString(Prefilter* node) const { - // Adding the operation disambiguates AND/OR/atom nodes. + // Adding the operation disambiguates AND/OR/atom nodes. std::string s = StringPrintf("%d", node->op()) + ":"; - if (node->op() == Prefilter::ATOM) { - s += node->atom(); - } else { + if (node->op() == Prefilter::ATOM) { + s += node->atom(); + } else { for (size_t i = 0; i < node->subs()->size(); i++) { - if (i > 0) - s += ','; + if (i > 0) + s += ','; s += StringPrintf("%d", (*node->subs())[i]->unique_id()); - } - } - return s; -} - + } + } + return s; +} + bool PrefilterTree::KeepNode(Prefilter* node) const { if (node == NULL) return false; @@ -167,137 +167,137 @@ bool PrefilterTree::KeepNode(Prefilter* node) const { void PrefilterTree::AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec) { - atom_vec->clear(); - - // Build vector of all filter nodes, sorted topologically - // from top to bottom in v. + atom_vec->clear(); + + // Build vector of all filter nodes, sorted topologically + // from top to bottom in v. std::vector<Prefilter*> v; - - // Add the top level nodes of each regexp prefilter. + + // Add the top level nodes of each regexp prefilter. for (size_t i = 0; i < prefilter_vec_.size(); i++) { - Prefilter* f = prefilter_vec_[i]; - if (f == NULL) + Prefilter* f = prefilter_vec_[i]; + if (f == NULL) unfiltered_.push_back(static_cast<int>(i)); - - // We push NULL also on to v, so that we maintain the - // mapping of index==regexpid for level=0 prefilter nodes. - v.push_back(f); - } - - // Now add all the descendant nodes. + + // We push NULL also on to v, so that we maintain the + // mapping of index==regexpid for level=0 prefilter nodes. + v.push_back(f); + } + + // Now add all the descendant nodes. for (size_t i = 0; i < v.size(); i++) { - Prefilter* f = v[i]; - if (f == NULL) - continue; - if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) { + Prefilter* f = v[i]; + if (f == NULL) + continue; + if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) { const std::vector<Prefilter*>& subs = *f->subs(); for (size_t j = 0; j < subs.size(); j++) - v.push_back(subs[j]); - } - } - - // Identify unique nodes. - int unique_id = 0; + v.push_back(subs[j]); + } + } + + // Identify unique nodes. + int unique_id = 0; for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { - Prefilter *node = v[i]; - if (node == NULL) - continue; - node->set_unique_id(-1); + Prefilter *node = v[i]; + if (node == NULL) + continue; + node->set_unique_id(-1); Prefilter* canonical = CanonicalNode(nodes, node); - if (canonical == NULL) { - // Any further nodes that have the same node string - // will find this node as the canonical node. + if (canonical == NULL) { + // Any further nodes that have the same node string + // will find this node as the canonical node. nodes->emplace(NodeString(node), node); - if (node->op() == Prefilter::ATOM) { - atom_vec->push_back(node->atom()); - atom_index_to_id_.push_back(unique_id); - } - node->set_unique_id(unique_id++); - } else { - node->set_unique_id(canonical->unique_id()); - } - } + if (node->op() == Prefilter::ATOM) { + atom_vec->push_back(node->atom()); + atom_index_to_id_.push_back(unique_id); + } + node->set_unique_id(unique_id++); + } else { + node->set_unique_id(canonical->unique_id()); + } + } entries_.resize(nodes->size()); - + // Create parent StdIntMap for the entries. for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { - Prefilter* prefilter = v[i]; - if (prefilter == NULL) - continue; - + Prefilter* prefilter = v[i]; + if (prefilter == NULL) + continue; + if (CanonicalNode(nodes, prefilter) != prefilter) - continue; - - Entry* entry = &entries_[prefilter->unique_id()]; + continue; + + Entry* entry = &entries_[prefilter->unique_id()]; entry->parents = new StdIntMap(); - } - - // Fill the entries. + } + + // Fill the entries. for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { - Prefilter* prefilter = v[i]; - if (prefilter == NULL) - continue; - + Prefilter* prefilter = v[i]; + if (prefilter == NULL) + continue; + if (CanonicalNode(nodes, prefilter) != prefilter) - continue; - - Entry* entry = &entries_[prefilter->unique_id()]; - - switch (prefilter->op()) { - default: - case Prefilter::ALL: - LOG(DFATAL) << "Unexpected op: " << prefilter->op(); - return; - - case Prefilter::ATOM: - entry->propagate_up_at_count = 1; - break; - - case Prefilter::OR: - case Prefilter::AND: { + continue; + + Entry* entry = &entries_[prefilter->unique_id()]; + + switch (prefilter->op()) { + default: + case Prefilter::ALL: + LOG(DFATAL) << "Unexpected op: " << prefilter->op(); + return; + + case Prefilter::ATOM: + entry->propagate_up_at_count = 1; + break; + + case Prefilter::OR: + case Prefilter::AND: { std::set<int> uniq_child; for (size_t j = 0; j < prefilter->subs()->size(); j++) { - Prefilter* child = (*prefilter->subs())[j]; + Prefilter* child = (*prefilter->subs())[j]; Prefilter* canonical = CanonicalNode(nodes, child); - if (canonical == NULL) { - LOG(DFATAL) << "Null canonical node"; - return; - } - int child_id = canonical->unique_id(); + if (canonical == NULL) { + LOG(DFATAL) << "Null canonical node"; + return; + } + int child_id = canonical->unique_id(); uniq_child.insert(child_id); - // To the child, we want to add to parent indices. - Entry* child_entry = &entries_[child_id]; + // To the child, we want to add to parent indices. + Entry* child_entry = &entries_[child_id]; if (child_entry->parents->find(prefilter->unique_id()) == child_entry->parents->end()) { (*child_entry->parents)[prefilter->unique_id()] = 1; } - } + } entry->propagate_up_at_count = prefilter->op() == Prefilter::AND ? static_cast<int>(uniq_child.size()) : 1; - - break; - } - } - } - - // For top level nodes, populate regexp id. + + break; + } + } + } + + // For top level nodes, populate regexp id. for (size_t i = 0; i < prefilter_vec_.size(); i++) { - if (prefilter_vec_[i] == NULL) - continue; + if (prefilter_vec_[i] == NULL) + continue; int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id(); - DCHECK_LE(0, id); - Entry* entry = &entries_[id]; + DCHECK_LE(0, id); + Entry* entry = &entries_[id]; entry->regexps.push_back(static_cast<int>(i)); - } -} - -// Functions for triggering during search. -void PrefilterTree::RegexpsGivenStrings( + } +} + +// Functions for triggering during search. +void PrefilterTree::RegexpsGivenStrings( const std::vector<int>& matched_atoms, std::vector<int>* regexps) const { - regexps->clear(); - if (!compiled_) { + regexps->clear(); + if (!compiled_) { // Some legacy users of PrefilterTree call Compile() before // adding any regexps and expect Compile() to have no effect. // This kludge is a counterpart to that kludge. @@ -307,7 +307,7 @@ void PrefilterTree::RegexpsGivenStrings( LOG(ERROR) << "RegexpsGivenStrings called before Compile."; for (size_t i = 0; i < prefilter_vec_.size(); i++) regexps->push_back(static_cast<int>(i)); - } else { + } else { IntMap regexps_map(static_cast<int>(prefilter_vec_.size())); std::vector<int> matched_atom_ids; for (size_t j = 0; j < matched_atoms.size(); j++) @@ -317,57 +317,57 @@ void PrefilterTree::RegexpsGivenStrings( it != regexps_map.end(); ++it) regexps->push_back(it->index()); - + regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end()); - } + } std::sort(regexps->begin(), regexps->end()); -} - +} + void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, - IntMap* regexps) const { + IntMap* regexps) const { IntMap count(static_cast<int>(entries_.size())); IntMap work(static_cast<int>(entries_.size())); for (size_t i = 0; i < atom_ids.size(); i++) - work.set(atom_ids[i], 1); - for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { - const Entry& entry = entries_[it->index()]; - // Record regexps triggered. + work.set(atom_ids[i], 1); + for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { + const Entry& entry = entries_[it->index()]; + // Record regexps triggered. for (size_t i = 0; i < entry.regexps.size(); i++) - regexps->set(entry.regexps[i], 1); - int c; - // Pass trigger up to parents. + regexps->set(entry.regexps[i], 1); + int c; + // Pass trigger up to parents. for (StdIntMap::iterator it = entry.parents->begin(); - it != entry.parents->end(); - ++it) { + it != entry.parents->end(); + ++it) { int j = it->first; - const Entry& parent = entries_[j]; - // Delay until all the children have succeeded. - if (parent.propagate_up_at_count > 1) { - if (count.has_index(j)) { - c = count.get_existing(j) + 1; - count.set_existing(j, c); - } else { - c = 1; - count.set_new(j, c); - } - if (c < parent.propagate_up_at_count) - continue; - } - // Trigger the parent. - work.set(j, 1); - } - } -} - -// Debugging help. -void PrefilterTree::PrintPrefilter(int regexpid) { + const Entry& parent = entries_[j]; + // Delay until all the children have succeeded. + if (parent.propagate_up_at_count > 1) { + if (count.has_index(j)) { + c = count.get_existing(j) + 1; + count.set_existing(j, c); + } else { + c = 1; + count.set_new(j, c); + } + if (c < parent.propagate_up_at_count) + continue; + } + // Trigger the parent. + work.set(j, 1); + } + } +} + +// Debugging help. +void PrefilterTree::PrintPrefilter(int regexpid) { LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]); -} - +} + void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size(); LOG(ERROR) << "#Unique Nodes: " << entries_.size(); - + for (size_t i = 0; i < entries_.size(); i++) { StdIntMap* parents = entries_[i].parents; const std::vector<int>& regexps = entries_[i].regexps; @@ -375,33 +375,33 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { << " N: " << parents->size() << " R: " << regexps.size(); for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) LOG(ERROR) << it->first; - } + } LOG(ERROR) << "Map:"; for (NodeMap::const_iterator iter = nodes->begin(); iter != nodes->end(); ++iter) LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() << " Str: " << (*iter).first; -} - +} + std::string PrefilterTree::DebugNodeString(Prefilter* node) const { std::string node_string = ""; - if (node->op() == Prefilter::ATOM) { - DCHECK(!node->atom().empty()); - node_string += node->atom(); - } else { - // Adding the operation disambiguates AND and OR nodes. - node_string += node->op() == Prefilter::AND ? "AND" : "OR"; - node_string += "("; + if (node->op() == Prefilter::ATOM) { + DCHECK(!node->atom().empty()); + node_string += node->atom(); + } else { + // Adding the operation disambiguates AND and OR nodes. + node_string += node->op() == Prefilter::AND ? "AND" : "OR"; + node_string += "("; for (size_t i = 0; i < node->subs()->size(); i++) { - if (i > 0) - node_string += ','; + if (i > 0) + node_string += ','; node_string += StringPrintf("%d", (*node->subs())[i]->unique_id()); - node_string += ":"; - node_string += DebugNodeString((*node->subs())[i]); - } - node_string += ")"; - } - return node_string; -} - -} // namespace re2 + node_string += ":"; + node_string += DebugNodeString((*node->subs())[i]); + } + node_string += ")"; + } + return node_string; +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/prefilter_tree.h b/contrib/libs/re2/re2/prefilter_tree.h index 5d73074d97..2d30fbd717 100644 --- a/contrib/libs/re2/re2/prefilter_tree.h +++ b/contrib/libs/re2/re2/prefilter_tree.h @@ -1,21 +1,21 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_PREFILTER_TREE_H_ #define RE2_PREFILTER_TREE_H_ -// The PrefilterTree class is used to form an AND-OR tree of strings -// that would trigger each regexp. The 'prefilter' of each regexp is +// The PrefilterTree class is used to form an AND-OR tree of strings +// that would trigger each regexp. The 'prefilter' of each regexp is // added to PrefilterTree, and then PrefilterTree is used to find all -// the unique strings across the prefilters. During search, by using -// matches from a string matching engine, PrefilterTree deduces the -// set of regexps that are to be triggered. The 'string matching -// engine' itself is outside of this class, and the caller can use any -// favorite engine. PrefilterTree provides a set of strings (called -// atoms) that the user of this class should use to do the string -// matching. - +// the unique strings across the prefilters. During search, by using +// matches from a string matching engine, PrefilterTree deduces the +// set of regexps that are to be triggered. The 'string matching +// engine' itself is outside of this class, and the caller can use any +// favorite engine. PrefilterTree provides a set of strings (called +// atoms) that the user of this class should use to do the string +// matching. + #include <map> #include <string> #include <vector> @@ -23,117 +23,117 @@ #include "util/util.h" #include "re2/prefilter.h" #include "re2/sparse_array.h" - -namespace re2 { - -class PrefilterTree { - public: - PrefilterTree(); + +namespace re2 { + +class PrefilterTree { + public: + PrefilterTree(); explicit PrefilterTree(int min_atom_len); - ~PrefilterTree(); - - // Adds the prefilter for the next regexp. Note that we assume that - // Add called sequentially for all regexps. All Add calls - // must precede Compile. - void Add(Prefilter* prefilter); - - // The Compile returns a vector of string in atom_vec. - // Call this after all the prefilters are added through Add. - // No calls to Add after Compile are allowed. - // The caller should use the returned set of strings to do string matching. - // Each time a string matches, the corresponding index then has to be - // and passed to RegexpsGivenStrings below. + ~PrefilterTree(); + + // Adds the prefilter for the next regexp. Note that we assume that + // Add called sequentially for all regexps. All Add calls + // must precede Compile. + void Add(Prefilter* prefilter); + + // The Compile returns a vector of string in atom_vec. + // Call this after all the prefilters are added through Add. + // No calls to Add after Compile are allowed. + // The caller should use the returned set of strings to do string matching. + // Each time a string matches, the corresponding index then has to be + // and passed to RegexpsGivenStrings below. void Compile(std::vector<std::string>* atom_vec); - - // Given the indices of the atoms that matched, returns the indexes - // of regexps that should be searched. The matched_atoms should - // contain all the ids of string atoms that were found to match the - // content. The caller can use any string match engine to perform - // this function. This function is thread safe. + + // Given the indices of the atoms that matched, returns the indexes + // of regexps that should be searched. The matched_atoms should + // contain all the ids of string atoms that were found to match the + // content. The caller can use any string match engine to perform + // this function. This function is thread safe. void RegexpsGivenStrings(const std::vector<int>& matched_atoms, std::vector<int>* regexps) const; - - // Print debug prefilter. Also prints unique ids associated with - // nodes of the prefilter of the regexp. - void PrintPrefilter(int regexpid); - + + // Print debug prefilter. Also prints unique ids associated with + // nodes of the prefilter of the regexp. + void PrintPrefilter(int regexpid); + private: typedef SparseArray<int> IntMap; typedef std::map<int, int> StdIntMap; typedef std::map<std::string, Prefilter*> NodeMap; - - // Each unique node has a corresponding Entry that helps in - // passing the matching trigger information along the tree. - struct Entry { - public: - // How many children should match before this node triggers the - // parent. For an atom and an OR node, this is 1 and for an AND - // node, it is the number of unique children. - int propagate_up_at_count; - - // When this node is ready to trigger the parent, what are the indices - // of the parent nodes to trigger. The reason there may be more than - // one is because of sharing. For example (abc | def) and (xyz | def) - // are two different nodes, but they share the atom 'def'. So when - // 'def' matches, it triggers two parents, corresponding to the two - // different OR nodes. + + // Each unique node has a corresponding Entry that helps in + // passing the matching trigger information along the tree. + struct Entry { + public: + // How many children should match before this node triggers the + // parent. For an atom and an OR node, this is 1 and for an AND + // node, it is the number of unique children. + int propagate_up_at_count; + + // When this node is ready to trigger the parent, what are the indices + // of the parent nodes to trigger. The reason there may be more than + // one is because of sharing. For example (abc | def) and (xyz | def) + // are two different nodes, but they share the atom 'def'. So when + // 'def' matches, it triggers two parents, corresponding to the two + // different OR nodes. StdIntMap* parents; - - // When this node is ready to trigger the parent, what are the - // regexps that are triggered. + + // When this node is ready to trigger the parent, what are the + // regexps that are triggered. std::vector<int> regexps; - }; - + }; + // Returns true if the prefilter node should be kept. bool KeepNode(Prefilter* node) const; - // This function assigns unique ids to various parts of the - // prefilter, by looking at if these nodes are already in the - // PrefilterTree. + // This function assigns unique ids to various parts of the + // prefilter, by looking at if these nodes are already in the + // PrefilterTree. void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec); - - // Given the matching atoms, find the regexps to be triggered. + + // Given the matching atoms, find the regexps to be triggered. void PropagateMatch(const std::vector<int>& atom_ids, - IntMap* regexps) const; - - // Returns the prefilter node that has the same NodeString as this - // node. For the canonical node, returns node. + IntMap* regexps) const; + + // Returns the prefilter node that has the same NodeString as this + // node. For the canonical node, returns node. Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node); - - // A string that uniquely identifies the node. Assumes that the - // children of node has already been assigned unique ids. + + // A string that uniquely identifies the node. Assumes that the + // children of node has already been assigned unique ids. std::string NodeString(Prefilter* node) const; - - // Recursively constructs a readable prefilter string. + + // Recursively constructs a readable prefilter string. std::string DebugNodeString(Prefilter* node) const; - - // Used for debugging. + + // Used for debugging. void PrintDebugInfo(NodeMap* nodes); - - // These are all the nodes formed by Compile. Essentially, there is - // one node for each unique atom and each unique AND/OR node. + + // These are all the nodes formed by Compile. Essentially, there is + // one node for each unique atom and each unique AND/OR node. std::vector<Entry> entries_; - - // indices of regexps that always pass through the filter (since we - // found no required literals in these regexps). + + // indices of regexps that always pass through the filter (since we + // found no required literals in these regexps). std::vector<int> unfiltered_; - - // vector of Prefilter for all regexps. + + // vector of Prefilter for all regexps. std::vector<Prefilter*> prefilter_vec_; - - // Atom index in returned strings to entry id mapping. + + // Atom index in returned strings to entry id mapping. std::vector<int> atom_index_to_id_; - - // Has the prefilter tree been compiled. - bool compiled_; - + + // Has the prefilter tree been compiled. + bool compiled_; + // Strings less than this length are not stored as atoms. const int min_atom_len_; PrefilterTree(const PrefilterTree&) = delete; PrefilterTree& operator=(const PrefilterTree&) = delete; -}; - +}; + } // namespace - -#endif // RE2_PREFILTER_TREE_H_ + +#endif // RE2_PREFILTER_TREE_H_ diff --git a/contrib/libs/re2/re2/prog.cc b/contrib/libs/re2/re2/prog.cc index a700d35de3..3756c67f66 100644 --- a/contrib/libs/re2/re2/prog.cc +++ b/contrib/libs/re2/re2/prog.cc @@ -1,12 +1,12 @@ -// Copyright 2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Compiled regular expression representation. -// Tested by compile_test.cc - -#include "re2/prog.h" - +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Compiled regular expression representation. +// Tested by compile_test.cc + +#include "re2/prog.h" + #if defined(__AVX2__) #include <immintrin.h> #ifdef _MSC_VER @@ -25,132 +25,132 @@ #include "re2/bitmap256.h" #include "re2/stringpiece.h" -namespace re2 { - -// Constructors per Inst opcode - +namespace re2 { + +// Constructors per Inst opcode + void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) { - DCHECK_EQ(out_opcode_, 0); - set_out_opcode(out, kInstAlt); - out1_ = out1; -} - + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstAlt); + out1_ = out1; +} + void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { - DCHECK_EQ(out_opcode_, 0); - set_out_opcode(out, kInstByteRange); - lo_ = lo & 0xFF; - hi_ = hi & 0xFF; + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstByteRange); + lo_ = lo & 0xFF; + hi_ = hi & 0xFF; hint_foldcase_ = foldcase&1; -} - +} + void Prog::Inst::InitCapture(int cap, uint32_t out) { - DCHECK_EQ(out_opcode_, 0); - set_out_opcode(out, kInstCapture); - cap_ = cap; -} - + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstCapture); + cap_ = cap; +} + void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) { - DCHECK_EQ(out_opcode_, 0); - set_out_opcode(out, kInstEmptyWidth); - empty_ = empty; -} - + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstEmptyWidth); + empty_ = empty; +} + void Prog::Inst::InitMatch(int32_t id) { - DCHECK_EQ(out_opcode_, 0); - set_opcode(kInstMatch); - match_id_ = id; -} - + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstMatch); + match_id_ = id; +} + void Prog::Inst::InitNop(uint32_t out) { - DCHECK_EQ(out_opcode_, 0); - set_opcode(kInstNop); -} - -void Prog::Inst::InitFail() { - DCHECK_EQ(out_opcode_, 0); - set_opcode(kInstFail); -} - + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstNop); +} + +void Prog::Inst::InitFail() { + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstFail); +} + std::string Prog::Inst::Dump() { - switch (opcode()) { - default: - return StringPrintf("opcode %d", static_cast<int>(opcode())); - - case kInstAlt: - return StringPrintf("alt -> %d | %d", out(), out1_); - - case kInstAltMatch: - return StringPrintf("altmatch -> %d | %d", out(), out1_); - - case kInstByteRange: + switch (opcode()) { + default: + return StringPrintf("opcode %d", static_cast<int>(opcode())); + + case kInstAlt: + return StringPrintf("alt -> %d | %d", out(), out1_); + + case kInstAltMatch: + return StringPrintf("altmatch -> %d | %d", out(), out1_); + + case kInstByteRange: return StringPrintf("byte%s [%02x-%02x] %d -> %d", foldcase() ? "/i" : "", lo_, hi_, hint(), out()); - - case kInstCapture: - return StringPrintf("capture %d -> %d", cap_, out()); - - case kInstEmptyWidth: - return StringPrintf("emptywidth %#x -> %d", - static_cast<int>(empty_), out()); - - case kInstMatch: - return StringPrintf("match! %d", match_id()); - - case kInstNop: - return StringPrintf("nop -> %d", out()); - - case kInstFail: - return StringPrintf("fail"); - } -} - -Prog::Prog() - : anchor_start_(false), - anchor_end_(false), - reversed_(false), + + case kInstCapture: + return StringPrintf("capture %d -> %d", cap_, out()); + + case kInstEmptyWidth: + return StringPrintf("emptywidth %#x -> %d", + static_cast<int>(empty_), out()); + + case kInstMatch: + return StringPrintf("match! %d", match_id()); + + case kInstNop: + return StringPrintf("nop -> %d", out()); + + case kInstFail: + return StringPrintf("fail"); + } +} + +Prog::Prog() + : anchor_start_(false), + anchor_end_(false), + reversed_(false), did_flatten_(false), - did_onepass_(false), - start_(0), - start_unanchored_(0), - size_(0), - bytemap_range_(0), + did_onepass_(false), + start_(0), + start_unanchored_(0), + size_(0), + bytemap_range_(0), prefix_foldcase_(false), prefix_size_(0), list_count_(0), bit_state_text_max_size_(0), dfa_mem_(0), - dfa_first_(NULL), + dfa_first_(NULL), dfa_longest_(NULL) { -} - -Prog::~Prog() { +} + +Prog::~Prog() { DeleteDFA(dfa_longest_); DeleteDFA(dfa_first_); if (prefix_foldcase_) delete[] prefix_dfa_; -} - -typedef SparseSet Workq; - -static inline void AddToQueue(Workq* q, int id) { - if (id != 0) - q->insert(id); -} - +} + +typedef SparseSet Workq; + +static inline void AddToQueue(Workq* q, int id) { + if (id != 0) + q->insert(id); +} + static std::string ProgToString(Prog* prog, Workq* q) { std::string s; - for (Workq::iterator i = q->begin(); i != q->end(); ++i) { - int id = *i; - Prog::Inst* ip = prog->inst(id); + for (Workq::iterator i = q->begin(); i != q->end(); ++i) { + int id = *i; + Prog::Inst* ip = prog->inst(id); s += StringPrintf("%d. %s\n", id, ip->Dump().c_str()); - AddToQueue(q, ip->out()); - if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch) - AddToQueue(q, ip->out1()); - } - return s; -} - + AddToQueue(q, ip->out()); + if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch) + AddToQueue(q, ip->out1()); + } + return s; +} + static std::string FlattenedProgToString(Prog* prog, int start) { std::string s; for (int id = start; id < prog->size(); id++) { @@ -159,28 +159,28 @@ static std::string FlattenedProgToString(Prog* prog, int start) { s += StringPrintf("%d. %s\n", id, ip->Dump().c_str()); else s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str()); - } + } return s; } - + std::string Prog::Dump() { if (did_flatten_) return FlattenedProgToString(this, start_); - Workq q(size_); - AddToQueue(&q, start_); + Workq q(size_); + AddToQueue(&q, start_); return ProgToString(this, &q); -} - +} + std::string Prog::DumpUnanchored() { if (did_flatten_) return FlattenedProgToString(this, start_unanchored_); - Workq q(size_); - AddToQueue(&q, start_unanchored_); - return ProgToString(this, &q); -} - + Workq q(size_); + AddToQueue(&q, start_unanchored_); + return ProgToString(this, &q); +} + std::string Prog::DumpByteMap() { std::string map; for (int c = 0; c < 256; c++) { @@ -220,104 +220,104 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) { } } -// Peep-hole optimizer. -void Prog::Optimize() { - Workq q(size_); - - // Eliminate nops. Most are taken out during compilation - // but a few are hard to avoid. - q.clear(); - AddToQueue(&q, start_); - for (Workq::iterator i = q.begin(); i != q.end(); ++i) { - int id = *i; - - Inst* ip = inst(id); - int j = ip->out(); - Inst* jp; - while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { - j = jp->out(); - } - ip->set_out(j); - AddToQueue(&q, ip->out()); - - if (ip->opcode() == kInstAlt) { - j = ip->out1(); - while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { - j = jp->out(); - } - ip->out1_ = j; - AddToQueue(&q, ip->out1()); - } - } - - // Insert kInstAltMatch instructions - // Look for - // ip: Alt -> j | k - // j: ByteRange [00-FF] -> ip - // k: Match - // or the reverse (the above is the greedy one). - // Rewrite Alt to AltMatch. - q.clear(); - AddToQueue(&q, start_); - for (Workq::iterator i = q.begin(); i != q.end(); ++i) { - int id = *i; - Inst* ip = inst(id); - AddToQueue(&q, ip->out()); - if (ip->opcode() == kInstAlt) - AddToQueue(&q, ip->out1()); - - if (ip->opcode() == kInstAlt) { - Inst* j = inst(ip->out()); - Inst* k = inst(ip->out1()); - if (j->opcode() == kInstByteRange && j->out() == id && - j->lo() == 0x00 && j->hi() == 0xFF && - IsMatch(this, k)) { - ip->set_opcode(kInstAltMatch); - continue; - } - if (IsMatch(this, j) && - k->opcode() == kInstByteRange && k->out() == id && - k->lo() == 0x00 && k->hi() == 0xFF) { - ip->set_opcode(kInstAltMatch); - } - } - } -} - +// Peep-hole optimizer. +void Prog::Optimize() { + Workq q(size_); + + // Eliminate nops. Most are taken out during compilation + // but a few are hard to avoid. + q.clear(); + AddToQueue(&q, start_); + for (Workq::iterator i = q.begin(); i != q.end(); ++i) { + int id = *i; + + Inst* ip = inst(id); + int j = ip->out(); + Inst* jp; + while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { + j = jp->out(); + } + ip->set_out(j); + AddToQueue(&q, ip->out()); + + if (ip->opcode() == kInstAlt) { + j = ip->out1(); + while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { + j = jp->out(); + } + ip->out1_ = j; + AddToQueue(&q, ip->out1()); + } + } + + // Insert kInstAltMatch instructions + // Look for + // ip: Alt -> j | k + // j: ByteRange [00-FF] -> ip + // k: Match + // or the reverse (the above is the greedy one). + // Rewrite Alt to AltMatch. + q.clear(); + AddToQueue(&q, start_); + for (Workq::iterator i = q.begin(); i != q.end(); ++i) { + int id = *i; + Inst* ip = inst(id); + AddToQueue(&q, ip->out()); + if (ip->opcode() == kInstAlt) + AddToQueue(&q, ip->out1()); + + if (ip->opcode() == kInstAlt) { + Inst* j = inst(ip->out()); + Inst* k = inst(ip->out1()); + if (j->opcode() == kInstByteRange && j->out() == id && + j->lo() == 0x00 && j->hi() == 0xFF && + IsMatch(this, k)) { + ip->set_opcode(kInstAltMatch); + continue; + } + if (IsMatch(this, j) && + k->opcode() == kInstByteRange && k->out() == id && + k->lo() == 0x00 && k->hi() == 0xFF) { + ip->set_opcode(kInstAltMatch); + } + } + } +} + uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { - int flags = 0; - - // ^ and \A + int flags = 0; + + // ^ and \A if (p == text.data()) - flags |= kEmptyBeginText | kEmptyBeginLine; - else if (p[-1] == '\n') - flags |= kEmptyBeginLine; - - // $ and \z + flags |= kEmptyBeginText | kEmptyBeginLine; + else if (p[-1] == '\n') + flags |= kEmptyBeginLine; + + // $ and \z if (p == text.data() + text.size()) - flags |= kEmptyEndText | kEmptyEndLine; + flags |= kEmptyEndText | kEmptyEndLine; else if (p < text.data() + text.size() && p[0] == '\n') - flags |= kEmptyEndLine; - - // \b and \B + flags |= kEmptyEndLine; + + // \b and \B if (p == text.data() && p == text.data() + text.size()) { - // no word boundary here + // no word boundary here } else if (p == text.data()) { - if (IsWordChar(p[0])) - flags |= kEmptyWordBoundary; + if (IsWordChar(p[0])) + flags |= kEmptyWordBoundary; } else if (p == text.data() + text.size()) { - if (IsWordChar(p[-1])) - flags |= kEmptyWordBoundary; - } else { - if (IsWordChar(p[-1]) != IsWordChar(p[0])) - flags |= kEmptyWordBoundary; - } - if (!(flags & kEmptyWordBoundary)) - flags |= kEmptyNonWordBoundary; - - return flags; -} - + if (IsWordChar(p[-1])) + flags |= kEmptyWordBoundary; + } else { + if (IsWordChar(p[-1]) != IsWordChar(p[0])) + flags |= kEmptyWordBoundary; + } + if (!(flags & kEmptyWordBoundary)) + flags |= kEmptyNonWordBoundary; + + return flags; +} + // ByteMapBuilder implements a coloring algorithm. // // The first phase is a series of "mark and merge" batches: we mark one or more @@ -375,8 +375,8 @@ void ByteMapBuilder::Mark(int lo, int hi) { return; ranges_.emplace_back(lo, hi); -} - +} + void ByteMapBuilder::Merge() { for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin(); it != ranges_.end(); @@ -443,12 +443,12 @@ int ByteMapBuilder::Recolor(int oldcolor) { return newcolor; } -void Prog::ComputeByteMap() { +void Prog::ComputeByteMap() { // Fill in bytemap with byte classes for the program. // Ranges of bytes that are treated indistinguishably // will be mapped to a single byte class. ByteMapBuilder builder; - + // Don't repeat the work for ^ and $. bool marked_line_boundaries = false; // Don't repeat the work for \b and \B. @@ -507,18 +507,18 @@ void Prog::ComputeByteMap() { marked_word_boundaries = true; } } - } - + } + builder.Build(bytemap_, &bytemap_range_); if (0) { // For debugging, use trivial bytemap. LOG(ERROR) << "Using trivial bytemap."; for (int i = 0; i < 256; i++) bytemap_[i] = static_cast<uint8_t>(i); - bytemap_range_ = 256; - } -} - + bytemap_range_ = 256; + } +} + // Prog::Flatten() implements a graph rewriting algorithm. // // The overall process is similar to epsilon removal, but retains some epsilon @@ -1172,4 +1172,4 @@ const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) { } } -} // namespace re2 +} // namespace re2 diff --git a/contrib/libs/re2/re2/prog.h b/contrib/libs/re2/re2/prog.h index 4af012ab6f..2f35a918b6 100644 --- a/contrib/libs/re2/re2/prog.h +++ b/contrib/libs/re2/re2/prog.h @@ -1,150 +1,150 @@ -// Copyright 2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_PROG_H_ #define RE2_PROG_H_ -// Compiled representation of regular expressions. -// See regexp.h for the Regexp class, which represents a regular -// expression symbolically. - +// Compiled representation of regular expressions. +// See regexp.h for the Regexp class, which represents a regular +// expression symbolically. + #include <stdint.h> #include <functional> #include <mutex> #include <string> #include <vector> #include <type_traits> - + #include "util/util.h" #include "util/logging.h" #include "re2/pod_array.h" #include "re2/re2.h" #include "re2/sparse_array.h" #include "re2/sparse_set.h" - -namespace re2 { - -// Opcodes for Inst -enum InstOp { - kInstAlt = 0, // choose between out_ and out1_ - kInstAltMatch, // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa. - kInstByteRange, // next (possible case-folded) byte must be in [lo_, hi_] - kInstCapture, // capturing parenthesis number cap_ - kInstEmptyWidth, // empty-width special (^ $ ...); bit(s) set in empty_ - kInstMatch, // found a match! - kInstNop, // no-op; occasionally unavoidable - kInstFail, // never match; occasionally unavoidable + +namespace re2 { + +// Opcodes for Inst +enum InstOp { + kInstAlt = 0, // choose between out_ and out1_ + kInstAltMatch, // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa. + kInstByteRange, // next (possible case-folded) byte must be in [lo_, hi_] + kInstCapture, // capturing parenthesis number cap_ + kInstEmptyWidth, // empty-width special (^ $ ...); bit(s) set in empty_ + kInstMatch, // found a match! + kInstNop, // no-op; occasionally unavoidable + kInstFail, // never match; occasionally unavoidable kNumInst, -}; - -// Bit flags for empty-width specials -enum EmptyOp { - kEmptyBeginLine = 1<<0, // ^ - beginning of line - kEmptyEndLine = 1<<1, // $ - end of line - kEmptyBeginText = 1<<2, // \A - beginning of text - kEmptyEndText = 1<<3, // \z - end of text - kEmptyWordBoundary = 1<<4, // \b - word boundary - kEmptyNonWordBoundary = 1<<5, // \B - not \b - kEmptyAllFlags = (1<<6)-1, -}; - +}; + +// Bit flags for empty-width specials +enum EmptyOp { + kEmptyBeginLine = 1<<0, // ^ - beginning of line + kEmptyEndLine = 1<<1, // $ - end of line + kEmptyBeginText = 1<<2, // \A - beginning of text + kEmptyEndText = 1<<3, // \z - end of text + kEmptyWordBoundary = 1<<4, // \b - word boundary + kEmptyNonWordBoundary = 1<<5, // \B - not \b + kEmptyAllFlags = (1<<6)-1, +}; + class DFA; -class Regexp; - -// Compiled form of regexp program. -class Prog { - public: - Prog(); - ~Prog(); - - // Single instruction in regexp program. - class Inst { - public: +class Regexp; + +// Compiled form of regexp program. +class Prog { + public: + Prog(); + ~Prog(); + + // Single instruction in regexp program. + class Inst { + public: // See the assertion below for why this is so. Inst() = default; - + // Copyable. Inst(const Inst&) = default; Inst& operator=(const Inst&) = default; - // Constructors per opcode + // Constructors per opcode void InitAlt(uint32_t out, uint32_t out1); void InitByteRange(int lo, int hi, int foldcase, uint32_t out); void InitCapture(int cap, uint32_t out); void InitEmptyWidth(EmptyOp empty, uint32_t out); - void InitMatch(int id); + void InitMatch(int id); void InitNop(uint32_t out); - void InitFail(); - - // Getters + void InitFail(); + + // Getters int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); } - InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); } + InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); } int last() { return (out_opcode_>>3)&1; } int out() { return out_opcode_>>4; } int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; } - int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; } - int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; } - int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; } + int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; } + int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; } + int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; } int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; } int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; } - int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; } - EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; } + int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; } + EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; } bool greedy(Prog* p) { - DCHECK_EQ(opcode(), kInstAltMatch); + DCHECK_EQ(opcode(), kInstAltMatch); return p->inst(out())->opcode() == kInstByteRange || (p->inst(out())->opcode() == kInstNop && p->inst(p->inst(out())->out())->opcode() == kInstByteRange); - } - - // Does this inst (an kInstByteRange) match c? - inline bool Matches(int c) { - DCHECK_EQ(opcode(), kInstByteRange); + } + + // Does this inst (an kInstByteRange) match c? + inline bool Matches(int c) { + DCHECK_EQ(opcode(), kInstByteRange); if (foldcase() && 'A' <= c && c <= 'Z') - c += 'a' - 'A'; - return lo_ <= c && c <= hi_; - } - - // Returns string representation for debugging. + c += 'a' - 'A'; + return lo_ <= c && c <= hi_; + } + + // Returns string representation for debugging. std::string Dump(); - - // Maximum instruction id. + + // Maximum instruction id. // (Must fit in out_opcode_. PatchList/last steal another bit.) - static const int kMaxInst = (1<<28) - 1; - - private: - void set_opcode(InstOp opcode) { + static const int kMaxInst = (1<<28) - 1; + + private: + void set_opcode(InstOp opcode) { out_opcode_ = (out()<<4) | (last()<<3) | opcode; - } - + } + void set_last() { out_opcode_ = (out()<<4) | (1<<3) | opcode(); } - void set_out(int out) { + void set_out(int out) { out_opcode_ = (out<<4) | (last()<<3) | opcode(); - } - - void set_out_opcode(int out, InstOp opcode) { + } + + void set_out_opcode(int out, InstOp opcode) { out_opcode_ = (out<<4) | (last()<<3) | opcode; - } - + } + uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode union { // additional instruction arguments: uint32_t out1_; // opcode == kInstAlt // alternate next instruction - + int32_t cap_; // opcode == kInstCapture // Index of capture register (holds text // position recorded by capturing parentheses). // For \n (the submatch for the nth parentheses), // the left parenthesis captures into register 2*n // and the right one captures into register 2*n+1. - + int32_t match_id_; // opcode == kInstMatch // Match ID to identify this match (for re2::Set). - + struct { // opcode == kInstByteRange uint8_t lo_; // byte range is lo_-hi_ inclusive uint8_t hi_; // @@ -155,69 +155,69 @@ class Prog { // means there are no remaining possibilities, // which is most likely for character classes. // foldcase: A-Z -> a-z before checking range. - }; - + }; + EmptyOp empty_; // opcode == kInstEmptyWidth // empty_ is bitwise OR of kEmpty* flags above. - }; - - friend class Compiler; - friend struct PatchList; - friend class Prog; - }; - + }; + + friend class Compiler; + friend struct PatchList; + friend class Prog; + }; + // Inst must be trivial so that we can freely clear it with memset(3). // Arrays of Inst are initialised by copying the initial elements with // memmove(3) and then clearing any remaining elements with memset(3). static_assert(std::is_trivial<Inst>::value, "Inst must be trivial"); - // Whether to anchor the search. - enum Anchor { - kUnanchored, // match anywhere - kAnchored, // match only starting at beginning of text - }; - - // Kind of match to look for (for anchor != kFullMatch) - // - // kLongestMatch mode finds the overall longest - // match but still makes its submatch choices the way - // Perl would, not in the way prescribed by POSIX. - // The POSIX rules are much more expensive to implement, - // and no one has needed them. - // - // kFullMatch is not strictly necessary -- we could use - // kLongestMatch and then check the length of the match -- but - // the matching code can run faster if it knows to consider only - // full matches. - enum MatchKind { - kFirstMatch, // like Perl, PCRE - kLongestMatch, // like egrep or POSIX - kFullMatch, // match only entire text; implies anchor==kAnchored - kManyMatch // for SearchDFA, records set of matches - }; - - Inst *inst(int id) { return &inst_[id]; } - int start() { return start_; } + // Whether to anchor the search. + enum Anchor { + kUnanchored, // match anywhere + kAnchored, // match only starting at beginning of text + }; + + // Kind of match to look for (for anchor != kFullMatch) + // + // kLongestMatch mode finds the overall longest + // match but still makes its submatch choices the way + // Perl would, not in the way prescribed by POSIX. + // The POSIX rules are much more expensive to implement, + // and no one has needed them. + // + // kFullMatch is not strictly necessary -- we could use + // kLongestMatch and then check the length of the match -- but + // the matching code can run faster if it knows to consider only + // full matches. + enum MatchKind { + kFirstMatch, // like Perl, PCRE + kLongestMatch, // like egrep or POSIX + kFullMatch, // match only entire text; implies anchor==kAnchored + kManyMatch // for SearchDFA, records set of matches + }; + + Inst *inst(int id) { return &inst_[id]; } + int start() { return start_; } void set_start(int start) { start_ = start; } - int start_unanchored() { return start_unanchored_; } - void set_start_unanchored(int start) { start_unanchored_ = start; } + int start_unanchored() { return start_unanchored_; } + void set_start_unanchored(int start) { start_unanchored_ = start; } int size() { return size_; } - bool reversed() { return reversed_; } - void set_reversed(bool reversed) { reversed_ = reversed; } + bool reversed() { return reversed_; } + void set_reversed(bool reversed) { reversed_ = reversed; } int list_count() { return list_count_; } int inst_count(InstOp op) { return inst_count_[op]; } uint16_t* list_heads() { return list_heads_.data(); } size_t bit_state_text_max_size() { return bit_state_text_max_size_; } int64_t dfa_mem() { return dfa_mem_; } void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; } - bool anchor_start() { return anchor_start_; } - void set_anchor_start(bool b) { anchor_start_ = b; } - bool anchor_end() { return anchor_end_; } - void set_anchor_end(bool b) { anchor_end_ = b; } - int bytemap_range() { return bytemap_range_; } + bool anchor_start() { return anchor_start_; } + void set_anchor_start(bool b) { anchor_start_ = b; } + bool anchor_end() { return anchor_end_; } + void set_anchor_end(bool b) { anchor_end_ = b; } + int bytemap_range() { return bytemap_range_; } const uint8_t* bytemap() { return bytemap_; } bool can_prefix_accel() { return prefix_size_ != 0; } - + // Accelerates to the first likely occurrence of the prefix. // Returns a pointer to the first byte or NULL if not found. const void* PrefixAccel(const void* data, size_t size) { @@ -242,58 +242,58 @@ class Prog { // prefix_back_ to return fewer false positives than memchr(3) alone. const void* PrefixAccel_FrontAndBack(const void* data, size_t size); - // Returns string representation of program for debugging. + // Returns string representation of program for debugging. std::string Dump(); std::string DumpUnanchored(); std::string DumpByteMap(); - - // Returns the set of kEmpty flags that are in effect at - // position p within context. + + // Returns the set of kEmpty flags that are in effect at + // position p within context. static uint32_t EmptyFlags(const StringPiece& context, const char* p); - - // Returns whether byte c is a word character: ASCII only. - // Used by the implementation of \b and \B. - // This is not right for Unicode, but: - // - it's hard to get right in a byte-at-a-time matching world - // (the DFA has only one-byte lookahead). - // - even if the lookahead were possible, the Progs would be huge. - // This crude approximation is the same one PCRE uses. + + // Returns whether byte c is a word character: ASCII only. + // Used by the implementation of \b and \B. + // This is not right for Unicode, but: + // - it's hard to get right in a byte-at-a-time matching world + // (the DFA has only one-byte lookahead). + // - even if the lookahead were possible, the Progs would be huge. + // This crude approximation is the same one PCRE uses. static bool IsWordChar(uint8_t c) { - return ('A' <= c && c <= 'Z') || - ('a' <= c && c <= 'z') || - ('0' <= c && c <= '9') || - c == '_'; - } - - // Execution engines. They all search for the regexp (run the prog) - // in text, which is in the larger context (used for ^ $ \b etc). - // Anchor and kind control the kind of search. - // Returns true if match found, false if not. - // If match found, fills match[0..nmatch-1] with submatch info. - // match[0] is overall match, match[1] is first set of parens, etc. - // If a particular submatch is not matched during the regexp match, - // it is set to NULL. - // - // Matching text == StringPiece(NULL, 0) is treated as any other empty - // string, but note that on return, it will not be possible to distinguish - // submatches that matched that empty string from submatches that didn't - // match anything. Either way, match[i] == NULL. - - // Search using NFA: can find submatches but kind of slow. - bool SearchNFA(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch); - - // Search using DFA: much faster than NFA but only finds - // end of match and can use a lot more memory. - // Returns whether a match was found. - // If the DFA runs out of memory, sets *failed to true and returns false. - // If matches != NULL and kind == kManyMatch and there is a match, - // SearchDFA fills matches with the match IDs of the final matching state. - bool SearchDFA(const StringPiece& text, const StringPiece& context, + return ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '_'; + } + + // Execution engines. They all search for the regexp (run the prog) + // in text, which is in the larger context (used for ^ $ \b etc). + // Anchor and kind control the kind of search. + // Returns true if match found, false if not. + // If match found, fills match[0..nmatch-1] with submatch info. + // match[0] is overall match, match[1] is first set of parens, etc. + // If a particular submatch is not matched during the regexp match, + // it is set to NULL. + // + // Matching text == StringPiece(NULL, 0) is treated as any other empty + // string, but note that on return, it will not be possible to distinguish + // submatches that matched that empty string from submatches that didn't + // match anything. Either way, match[i] == NULL. + + // Search using NFA: can find submatches but kind of slow. + bool SearchNFA(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Search using DFA: much faster than NFA but only finds + // end of match and can use a lot more memory. + // Returns whether a match was found. + // If the DFA runs out of memory, sets *failed to true and returns false. + // If matches != NULL and kind == kManyMatch and there is a match, + // SearchDFA fills matches with the match IDs of the final matching state. + bool SearchDFA(const StringPiece& text, const StringPiece& context, Anchor anchor, MatchKind kind, StringPiece* match0, bool* failed, SparseSet* matches); - + // The callback issued after building each DFA state with BuildEntireDFA(). // If next is null, then the memory budget has been exhausted and building // will halt. Otherwise, the state has been built and next points to an array @@ -304,71 +304,71 @@ class Prog { using DFAStateCallback = std::function<void(const int* next, bool match)>; // Build the entire DFA for the given match kind. - // Usually the DFA is built out incrementally, as needed, which + // Usually the DFA is built out incrementally, as needed, which // avoids lots of unnecessary work. // If cb is not empty, it receives one callback per state built. // Returns the number of states built. // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb); - + // Compute bytemap. - void ComputeByteMap(); - - // Run peep-hole optimizer on program. - void Optimize(); - - // One-pass NFA: only correct if IsOnePass() is true, - // but much faster than NFA (competitive with PCRE) - // for those expressions. - bool IsOnePass(); - bool SearchOnePass(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch); - - // Bit-state backtracking. Fast on small cases but uses memory + void ComputeByteMap(); + + // Run peep-hole optimizer on program. + void Optimize(); + + // One-pass NFA: only correct if IsOnePass() is true, + // but much faster than NFA (competitive with PCRE) + // for those expressions. + bool IsOnePass(); + bool SearchOnePass(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Bit-state backtracking. Fast on small cases but uses memory // proportional to the product of the list count and the text size. bool CanBitState() { return list_heads_.data() != NULL; } - bool SearchBitState(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch); - - static const int kMaxOnePassCapture = 5; // $0 through $4 - - // Backtracking search: the gold standard against which the other - // implementations are checked. FOR TESTING ONLY. - // It allocates a ton of memory to avoid running forever. - // It is also recursive, so can't use in production (will overflow stacks). - // The name "Unsafe" here is supposed to be a flag that - // you should not be using this function. - bool UnsafeSearchBacktrack(const StringPiece& text, - const StringPiece& context, - Anchor anchor, MatchKind kind, - StringPiece* match, int nmatch); - - // Computes range for any strings matching regexp. The min and max can in - // some cases be arbitrarily precise, so the caller gets to specify the - // maximum desired length of string returned. - // - // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any - // string s that is an anchored match for this regexp satisfies - // min <= s && s <= max. - // - // Note that PossibleMatchRange() will only consider the first copy of an - // infinitely repeated element (i.e., any regexp element followed by a '*' or - // '+' operator). Regexps with "{N}" constructions are not affected, as those - // do not compile down to infinite repetitions. - // - // Returns true on success, false on error. + bool SearchBitState(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + static const int kMaxOnePassCapture = 5; // $0 through $4 + + // Backtracking search: the gold standard against which the other + // implementations are checked. FOR TESTING ONLY. + // It allocates a ton of memory to avoid running forever. + // It is also recursive, so can't use in production (will overflow stacks). + // The name "Unsafe" here is supposed to be a flag that + // you should not be using this function. + bool UnsafeSearchBacktrack(const StringPiece& text, + const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Computes range for any strings matching regexp. The min and max can in + // some cases be arbitrarily precise, so the caller gets to specify the + // maximum desired length of string returned. + // + // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any + // string s that is an anchored match for this regexp satisfies + // min <= s && s <= max. + // + // Note that PossibleMatchRange() will only consider the first copy of an + // infinitely repeated element (i.e., any regexp element followed by a '*' or + // '+' operator). Regexps with "{N}" constructions are not affected, as those + // do not compile down to infinite repetitions. + // + // Returns true on success, false on error. bool PossibleMatchRange(std::string* min, std::string* max, int maxlen); - + // EXPERIMENTAL! SUBJECT TO CHANGE! // Outputs the program fanout into the given sparse array. void Fanout(SparseArray<int>* fanout); - // Compiles a collection of regexps to Prog. Each regexp will have + // Compiles a collection of regexps to Prog. Each regexp will have // its own Match instruction recording the index in the output vector. static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem); - + // Flattens the Prog from "tree" form to "list" form. This is an in-place // operation in the sense that the old instructions are lost. void Flatten(); @@ -403,22 +403,22 @@ class Prog { // FOR TESTING ONLY. static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b); - private: - friend class Compiler; - - DFA* GetDFA(MatchKind kind); + private: + friend class Compiler; + + DFA* GetDFA(MatchKind kind); void DeleteDFA(DFA* dfa); - - bool anchor_start_; // regexp has explicit start anchor - bool anchor_end_; // regexp has explicit end anchor - bool reversed_; // whether program runs backward over input + + bool anchor_start_; // regexp has explicit start anchor + bool anchor_end_; // regexp has explicit end anchor + bool reversed_; // whether program runs backward over input bool did_flatten_; // has Flatten been called? - bool did_onepass_; // has IsOnePass been called? - - int start_; // entry point for program - int start_unanchored_; // unanchored entry point for program - int size_; // number of instructions - int bytemap_range_; // bytemap_[x] < bytemap_range_ + bool did_onepass_; // has IsOnePass been called? + + int start_; // entry point for program + int start_unanchored_; // unanchored entry point for program + int size_; // number of instructions + int bytemap_range_; // bytemap_[x] < bytemap_range_ bool prefix_foldcase_; // whether prefix is case-insensitive size_t prefix_size_; // size of prefix (0 if no prefix) @@ -429,7 +429,7 @@ class Prog { int prefix_back_; // last byte of prefix }; }; - + int list_count_; // count of lists (see above) int inst_count_[kNumInst]; // count of instructions by opcode PODArray<uint16_t> list_heads_; // sparse array enumerating list heads @@ -438,20 +438,20 @@ class Prog { PODArray<Inst> inst_; // pointer to instruction array PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes - + int64_t dfa_mem_; // Maximum memory for DFAs. DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch - + uint8_t bytemap_[256]; // map from input bytes to byte classes - + std::once_flag dfa_first_once_; std::once_flag dfa_longest_once_; - + Prog(const Prog&) = delete; Prog& operator=(const Prog&) = delete; -}; - +}; + // std::string_view in MSVC has iterators that aren't just pointers and // that don't allow comparisons between different objects - not even if // those objects are views into the same string! Thus, we provide these @@ -463,6 +463,6 @@ static inline const char* EndPtr(const StringPiece& s) { return s.data() + s.size(); } -} // namespace re2 - +} // namespace re2 + #endif // RE2_PROG_H_ diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc index 47fb385e4e..c32090b4fc 100644 --- a/contrib/libs/re2/re2/re2.cc +++ b/contrib/libs/re2/re2/re2.cc @@ -1,14 +1,14 @@ -// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Regular expression interface RE2. -// -// Originally the PCRE C++ wrapper, but adapted to use -// the new automata-based regular expression engines. - +// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression interface RE2. +// +// Originally the PCRE C++ wrapper, but adapted to use +// the new automata-based regular expression engines. + #include "re2/re2.h" - + #include <assert.h> #include <ctype.h> #include <errno.h> @@ -22,7 +22,7 @@ #include <atomic> #include <iterator> #include <mutex> -#include <string> +#include <string> #include <utility> #include <vector> @@ -30,18 +30,18 @@ #include "util/logging.h" #include "util/strutil.h" #include "util/utf.h" -#include "re2/prog.h" -#include "re2/regexp.h" +#include "re2/prog.h" +#include "re2/regexp.h" #include "re2/sparse_array.h" - -namespace re2 { - -// Maximum number of args we can set -static const int kMaxArgs = 16; -static const int kVecSize = 1+kMaxArgs; - + +namespace re2 { + +// Maximum number of args we can set +static const int kMaxArgs = 16; +static const int kVecSize = 1+kMaxArgs; + const int RE2::Options::kDefaultMaxMem; // initialized in re2.h - + RE2::Options::Options(RE2::CannedOptions opt) : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), posix_syntax_(opt == RE2::POSIX), @@ -57,120 +57,120 @@ RE2::Options::Options(RE2::CannedOptions opt) word_boundary_(false), one_line_(false) { } - + // static empty objects for use as const references. // To avoid global constructors, allocated in RE2::Init(). static const std::string* empty_string; static const std::map<std::string, int>* empty_named_groups; static const std::map<int, std::string>* empty_group_names; - -// Converts from Regexp error code to RE2 error code. -// Maybe some day they will diverge. In any event, this -// hides the existence of Regexp from RE2 users. + +// Converts from Regexp error code to RE2 error code. +// Maybe some day they will diverge. In any event, this +// hides the existence of Regexp from RE2 users. static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { - switch (code) { + switch (code) { case re2::kRegexpSuccess: - return RE2::NoError; + return RE2::NoError; case re2::kRegexpInternalError: - return RE2::ErrorInternal; + return RE2::ErrorInternal; case re2::kRegexpBadEscape: - return RE2::ErrorBadEscape; + return RE2::ErrorBadEscape; case re2::kRegexpBadCharClass: - return RE2::ErrorBadCharClass; + return RE2::ErrorBadCharClass; case re2::kRegexpBadCharRange: - return RE2::ErrorBadCharRange; + return RE2::ErrorBadCharRange; case re2::kRegexpMissingBracket: - return RE2::ErrorMissingBracket; + return RE2::ErrorMissingBracket; case re2::kRegexpMissingParen: - return RE2::ErrorMissingParen; + return RE2::ErrorMissingParen; case re2::kRegexpUnexpectedParen: return RE2::ErrorUnexpectedParen; case re2::kRegexpTrailingBackslash: - return RE2::ErrorTrailingBackslash; + return RE2::ErrorTrailingBackslash; case re2::kRegexpRepeatArgument: - return RE2::ErrorRepeatArgument; + return RE2::ErrorRepeatArgument; case re2::kRegexpRepeatSize: - return RE2::ErrorRepeatSize; + return RE2::ErrorRepeatSize; case re2::kRegexpRepeatOp: - return RE2::ErrorRepeatOp; + return RE2::ErrorRepeatOp; case re2::kRegexpBadPerlOp: - return RE2::ErrorBadPerlOp; + return RE2::ErrorBadPerlOp; case re2::kRegexpBadUTF8: - return RE2::ErrorBadUTF8; + return RE2::ErrorBadUTF8; case re2::kRegexpBadNamedCapture: - return RE2::ErrorBadNamedCapture; - } - return RE2::ErrorInternal; -} - + return RE2::ErrorBadNamedCapture; + } + return RE2::ErrorInternal; +} + static std::string trunc(const StringPiece& pattern) { - if (pattern.size() < 100) + if (pattern.size() < 100) return std::string(pattern); return std::string(pattern.substr(0, 100)) + "..."; -} - - -RE2::RE2(const char* pattern) { - Init(pattern, DefaultOptions); -} - +} + + +RE2::RE2(const char* pattern) { + Init(pattern, DefaultOptions); +} + RE2::RE2(const std::string& pattern) { - Init(pattern, DefaultOptions); -} - -RE2::RE2(const StringPiece& pattern) { - Init(pattern, DefaultOptions); -} - -RE2::RE2(const StringPiece& pattern, const Options& options) { - Init(pattern, options); -} - -int RE2::Options::ParseFlags() const { - int flags = Regexp::ClassNL; - switch (encoding()) { - default: + Init(pattern, DefaultOptions); +} + +RE2::RE2(const StringPiece& pattern) { + Init(pattern, DefaultOptions); +} + +RE2::RE2(const StringPiece& pattern, const Options& options) { + Init(pattern, options); +} + +int RE2::Options::ParseFlags() const { + int flags = Regexp::ClassNL; + switch (encoding()) { + default: if (log_errors()) LOG(ERROR) << "Unknown encoding " << encoding(); - break; - case RE2::Options::EncodingUTF8: - break; - case RE2::Options::EncodingLatin1: - flags |= Regexp::Latin1; - break; - } - - if (!posix_syntax()) - flags |= Regexp::LikePerl; - - if (literal()) - flags |= Regexp::Literal; - - if (never_nl()) - flags |= Regexp::NeverNL; - + break; + case RE2::Options::EncodingUTF8: + break; + case RE2::Options::EncodingLatin1: + flags |= Regexp::Latin1; + break; + } + + if (!posix_syntax()) + flags |= Regexp::LikePerl; + + if (literal()) + flags |= Regexp::Literal; + + if (never_nl()) + flags |= Regexp::NeverNL; + if (dot_nl()) flags |= Regexp::DotNL; if (never_capture()) flags |= Regexp::NeverCapture; - if (!case_sensitive()) - flags |= Regexp::FoldCase; - - if (perl_classes()) - flags |= Regexp::PerlClasses; - - if (word_boundary()) - flags |= Regexp::PerlB; - - if (one_line()) - flags |= Regexp::OneLine; - - return flags; -} - -void RE2::Init(const StringPiece& pattern, const Options& options) { + if (!case_sensitive()) + flags |= Regexp::FoldCase; + + if (perl_classes()) + flags |= Regexp::PerlClasses; + + if (word_boundary()) + flags |= Regexp::PerlB; + + if (one_line()) + flags |= Regexp::OneLine; + + return flags; +} + +void RE2::Init(const StringPiece& pattern, const Options& options) { static std::once_flag empty_once; std::call_once(empty_once, []() { empty_string = new std::string; @@ -179,70 +179,70 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { }); pattern_.assign(pattern.data(), pattern.size()); - options_.Copy(options); + options_.Copy(options); entire_regexp_ = NULL; error_ = empty_string; error_code_ = NoError; error_arg_.clear(); prefix_.clear(); prefix_foldcase_ = false; - suffix_regexp_ = NULL; - prog_ = NULL; + suffix_regexp_ = NULL; + prog_ = NULL; num_captures_ = -1; is_one_pass_ = false; - rprog_ = NULL; - named_groups_ = NULL; - group_names_ = NULL; - - RegexpStatus status; - entire_regexp_ = Regexp::Parse( - pattern_, - static_cast<Regexp::ParseFlags>(options_.ParseFlags()), - &status); - if (entire_regexp_ == NULL) { - if (options_.log_errors()) { - LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " - << status.Text(); - } + rprog_ = NULL; + named_groups_ = NULL; + group_names_ = NULL; + + RegexpStatus status; + entire_regexp_ = Regexp::Parse( + pattern_, + static_cast<Regexp::ParseFlags>(options_.ParseFlags()), + &status); + if (entire_regexp_ == NULL) { + if (options_.log_errors()) { + LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " + << status.Text(); + } error_ = new std::string(status.Text()); - error_code_ = RegexpErrorToRE2(status.code()); + error_code_ = RegexpErrorToRE2(status.code()); error_arg_ = std::string(status.error_arg()); - return; - } - + return; + } + re2::Regexp* suffix; - if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) - suffix_regexp_ = suffix; - else - suffix_regexp_ = entire_regexp_->Incref(); - - // Two thirds of the memory goes to the forward Prog, - // one third to the reverse prog, because the forward - // Prog has two DFAs but the reverse prog has one. - prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3); - if (prog_ == NULL) { - if (options_.log_errors()) - LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; + if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) + suffix_regexp_ = suffix; + else + suffix_regexp_ = entire_regexp_->Incref(); + + // Two thirds of the memory goes to the forward Prog, + // one third to the reverse prog, because the forward + // Prog has two DFAs but the reverse prog has one. + prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3); + if (prog_ == NULL) { + if (options_.log_errors()) + LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; error_ = new std::string("pattern too large - compile failed"); - error_code_ = RE2::ErrorPatternTooLarge; - return; - } - + error_code_ = RE2::ErrorPatternTooLarge; + return; + } + // We used to compute this lazily, but it's used during the // typical control flow for a match call, so we now compute // it eagerly, which avoids the overhead of std::once_flag. num_captures_ = suffix_regexp_->NumCaptures(); - // Could delay this until the first match call that - // cares about submatch information, but the one-pass - // machine's memory gets cut from the DFA memory budget, - // and that is harder to do if the DFA has already - // been built. - is_one_pass_ = prog_->IsOnePass(); -} - -// Returns rprog_, computing it if needed. + // Could delay this until the first match call that + // cares about submatch information, but the one-pass + // machine's memory gets cut from the DFA memory budget, + // and that is harder to do if the DFA has already + // been built. + is_one_pass_ = prog_->IsOnePass(); +} + +// Returns rprog_, computing it if needed. re2::Prog* RE2::ReverseProg() const { std::call_once(rprog_once_, [](const RE2* re) { re->rprog_ = @@ -255,32 +255,32 @@ re2::Prog* RE2::ReverseProg() const { // is fine. More importantly, an RE2 object is supposed to be logically // immutable: whatever ok() would have returned after Init() completed, // it should continue to return that no matter what ReverseProg() does. - } + } }, this); - return rprog_; -} - -RE2::~RE2() { - if (suffix_regexp_) - suffix_regexp_->Decref(); - if (entire_regexp_) - entire_regexp_->Decref(); - delete prog_; - delete rprog_; + return rprog_; +} + +RE2::~RE2() { + if (suffix_regexp_) + suffix_regexp_->Decref(); + if (entire_regexp_) + entire_regexp_->Decref(); + delete prog_; + delete rprog_; if (error_ != empty_string) - delete error_; + delete error_; if (named_groups_ != NULL && named_groups_ != empty_named_groups) - delete named_groups_; + delete named_groups_; if (group_names_ != NULL && group_names_ != empty_group_names) - delete group_names_; -} - -int RE2::ProgramSize() const { - if (prog_ == NULL) - return -1; - return prog_->size(); -} - + delete group_names_; +} + +int RE2::ProgramSize() const { + if (prog_ == NULL) + return -1; + return prog_->size(); +} + int RE2::ReverseProgramSize() const { if (prog_ == NULL) return -1; @@ -346,7 +346,7 @@ int RE2::ReverseProgramFanout(std::vector<int>* histogram) const { return Fanout(prog, histogram); } -// Returns named_groups_, computing it if needed. +// Returns named_groups_, computing it if needed. const std::map<std::string, int>& RE2::NamedCapturingGroups() const { std::call_once(named_groups_once_, [](const RE2* re) { if (re->suffix_regexp_ != NULL) @@ -354,10 +354,10 @@ const std::map<std::string, int>& RE2::NamedCapturingGroups() const { if (re->named_groups_ == NULL) re->named_groups_ = empty_named_groups; }, this); - return *named_groups_; -} - -// Returns group_names_, computing it if needed. + return *named_groups_; +} + +// Returns group_names_, computing it if needed. const std::map<int, std::string>& RE2::CapturingGroupNames() const { std::call_once(group_names_once_, [](const RE2* re) { if (re->suffix_regexp_ != NULL) @@ -365,94 +365,94 @@ const std::map<int, std::string>& RE2::CapturingGroupNames() const { if (re->group_names_ == NULL) re->group_names_ = empty_group_names; }, this); - return *group_names_; -} - -/***** Convenience interfaces *****/ - -bool RE2::FullMatchN(const StringPiece& text, const RE2& re, - const Arg* const args[], int n) { - return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n); -} - -bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, - const Arg* const args[], int n) { - return re.DoMatch(text, UNANCHORED, NULL, args, n); -} - -bool RE2::ConsumeN(StringPiece* input, const RE2& re, - const Arg* const args[], int n) { + return *group_names_; +} + +/***** Convenience interfaces *****/ + +bool RE2::FullMatchN(const StringPiece& text, const RE2& re, + const Arg* const args[], int n) { + return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n); +} + +bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, + const Arg* const args[], int n) { + return re.DoMatch(text, UNANCHORED, NULL, args, n); +} + +bool RE2::ConsumeN(StringPiece* input, const RE2& re, + const Arg* const args[], int n) { size_t consumed; - if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - -bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, - const Arg* const args[], int n) { + if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + +bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, + const Arg* const args[], int n) { size_t consumed; - if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - + if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + bool RE2::Replace(std::string* str, const RE2& re, const StringPiece& rewrite) { - StringPiece vec[kVecSize]; - int nvec = 1 + MaxSubmatch(rewrite); + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); if (nvec > 1 + re.NumberOfCapturingGroups()) - return false; + return false; if (nvec > static_cast<int>(arraysize(vec))) return false; if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) - return false; - + return false; + std::string s; - if (!re.Rewrite(&s, rewrite, vec, nvec)) - return false; - + if (!re.Rewrite(&s, rewrite, vec, nvec)) + return false; + assert(vec[0].data() >= str->data()); assert(vec[0].data() + vec[0].size() <= str->data() + str->size()); - str->replace(vec[0].data() - str->data(), vec[0].size(), s); - return true; -} - + str->replace(vec[0].data() - str->data(), vec[0].size(), s); + return true; +} + int RE2::GlobalReplace(std::string* str, const RE2& re, const StringPiece& rewrite) { - StringPiece vec[kVecSize]; - int nvec = 1 + MaxSubmatch(rewrite); + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); if (nvec > 1 + re.NumberOfCapturingGroups()) - return false; + return false; if (nvec > static_cast<int>(arraysize(vec))) return false; - - const char* p = str->data(); - const char* ep = p + str->size(); - const char* lastend = NULL; + + const char* p = str->data(); + const char* ep = p + str->size(); + const char* lastend = NULL; std::string out; - int count = 0; + int count = 0; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION // Iterate just once when fuzzing. Otherwise, we easily get bogged down // and coverage is unlikely to improve despite significant expense. while (p == str->data()) { #else - while (p <= ep) { + while (p <= ep) { #endif if (!re.Match(*str, static_cast<size_t>(p - str->data()), str->size(), UNANCHORED, vec, nvec)) - break; + break; if (p < vec[0].data()) out.append(p, vec[0].data() - p); if (vec[0].data() == lastend && vec[0].empty()) { - // Disallow empty match at end of last match: skip ahead. + // Disallow empty match at end of last match: skip ahead. // // fullrune() takes int, not ptrdiff_t. However, it just looks // at the leading byte and treats any length >= 4 the same. @@ -476,155 +476,155 @@ int RE2::GlobalReplace(std::string* str, } // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode, // we fell through from above and the GIGO principle applies. - if (p < ep) - out.append(p, 1); - p++; - continue; - } - re.Rewrite(&out, rewrite, vec, nvec); + if (p < ep) + out.append(p, 1); + p++; + continue; + } + re.Rewrite(&out, rewrite, vec, nvec); p = vec[0].data() + vec[0].size(); - lastend = p; - count++; - } - - if (count == 0) - return 0; - - if (p < ep) - out.append(p, ep - p); + lastend = p; + count++; + } + + if (count == 0) + return 0; + + if (p < ep) + out.append(p, ep - p); using std::swap; - swap(out, *str); - return count; -} - + swap(out, *str); + return count; +} + bool RE2::Extract(const StringPiece& text, const RE2& re, const StringPiece& rewrite, std::string* out) { - StringPiece vec[kVecSize]; - int nvec = 1 + MaxSubmatch(rewrite); + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); if (nvec > 1 + re.NumberOfCapturingGroups()) - return false; + return false; if (nvec > static_cast<int>(arraysize(vec))) return false; if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) - return false; - - out->clear(); - return re.Rewrite(out, rewrite, vec, nvec); -} - + return false; + + out->clear(); + return re.Rewrite(out, rewrite, vec, nvec); +} + std::string RE2::QuoteMeta(const StringPiece& unquoted) { std::string result; - result.reserve(unquoted.size() << 1); - - // Escape any ascii character not in [A-Za-z_0-9]. - // - // Note that it's legal to escape a character even if it has no - // special meaning in a regular expression -- so this function does - // that. (This also makes it identical to the perl function of the - // same name except for the null-character special case; - // see `perldoc -f quotemeta`.) + result.reserve(unquoted.size() << 1); + + // Escape any ascii character not in [A-Za-z_0-9]. + // + // Note that it's legal to escape a character even if it has no + // special meaning in a regular expression -- so this function does + // that. (This also makes it identical to the perl function of the + // same name except for the null-character special case; + // see `perldoc -f quotemeta`.) for (size_t ii = 0; ii < unquoted.size(); ++ii) { - // Note that using 'isalnum' here raises the benchmark time from - // 32ns to 58ns: - if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && - (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && - (unquoted[ii] < '0' || unquoted[ii] > '9') && - unquoted[ii] != '_' && - // If this is the part of a UTF8 or Latin1 character, we need - // to copy this byte without escaping. Experimentally this is - // what works correctly with the regexp library. - !(unquoted[ii] & 128)) { - if (unquoted[ii] == '\0') { // Special handling for null chars. - // Note that this special handling is not strictly required for RE2, - // but this quoting is required for other regexp libraries such as - // PCRE. - // Can't use "\\0" since the next character might be a digit. - result += "\\x00"; - continue; - } - result += '\\'; - } - result += unquoted[ii]; - } - - return result; -} - + // Note that using 'isalnum' here raises the benchmark time from + // 32ns to 58ns: + if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { + if (unquoted[ii] == '\0') { // Special handling for null chars. + // Note that this special handling is not strictly required for RE2, + // but this quoting is required for other regexp libraries such as + // PCRE. + // Can't use "\\0" since the next character might be a digit. + result += "\\x00"; + continue; + } + result += '\\'; + } + result += unquoted[ii]; + } + + return result; +} + bool RE2::PossibleMatchRange(std::string* min, std::string* max, int maxlen) const { - if (prog_ == NULL) - return false; - + if (prog_ == NULL) + return false; + int n = static_cast<int>(prefix_.size()); - if (n > maxlen) - n = maxlen; - - // Determine initial min max from prefix_ literal. + if (n > maxlen) + n = maxlen; + + // Determine initial min max from prefix_ literal. *min = prefix_.substr(0, n); *max = prefix_.substr(0, n); - if (prefix_foldcase_) { + if (prefix_foldcase_) { // prefix is ASCII lowercase; change *min to uppercase. - for (int i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { char& c = (*min)[i]; if ('a' <= c && c <= 'z') c += 'A' - 'a'; - } - } - - // Add to prefix min max using PossibleMatchRange on regexp. + } + } + + // Add to prefix min max using PossibleMatchRange on regexp. std::string dmin, dmax; - maxlen -= n; - if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { + maxlen -= n; + if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { min->append(dmin); max->append(dmax); } else if (!max->empty()) { - // prog_->PossibleMatchRange has failed us, - // but we still have useful information from prefix_. + // prog_->PossibleMatchRange has failed us, + // but we still have useful information from prefix_. // Round up *max to allow any possible suffix. PrefixSuccessor(max); - } else { - // Nothing useful. - *min = ""; - *max = ""; - return false; - } - - return true; -} - -// Avoid possible locale nonsense in standard strcasecmp. -// The string a is known to be all lowercase. + } else { + // Nothing useful. + *min = ""; + *max = ""; + return false; + } + + return true; +} + +// Avoid possible locale nonsense in standard strcasecmp. +// The string a is known to be all lowercase. static int ascii_strcasecmp(const char* a, const char* b, size_t len) { const char* ae = a + len; - - for (; a < ae; a++, b++) { + + for (; a < ae; a++, b++) { uint8_t x = *a; uint8_t y = *b; - if ('A' <= y && y <= 'Z') - y += 'a' - 'A'; - if (x != y) - return x - y; - } - return 0; -} - - -/***** Actual matching and rewriting code *****/ - -bool RE2::Match(const StringPiece& text, + if ('A' <= y && y <= 'Z') + y += 'a' - 'A'; + if (x != y) + return x - y; + } + return 0; +} + + +/***** Actual matching and rewriting code *****/ + +bool RE2::Match(const StringPiece& text, size_t startpos, size_t endpos, - Anchor re_anchor, - StringPiece* submatch, - int nsubmatch) const { + Anchor re_anchor, + StringPiece* submatch, + int nsubmatch) const { if (!ok()) { - if (options_.log_errors()) - LOG(ERROR) << "Invalid RE2: " << *error_; - return false; - } - + if (options_.log_errors()) + LOG(ERROR) << "Invalid RE2: " << *error_; + return false; + } + if (startpos > endpos || endpos > text.size()) { if (options_.log_errors()) LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" @@ -634,23 +634,23 @@ bool RE2::Match(const StringPiece& text, return false; } - StringPiece subtext = text; - subtext.remove_prefix(startpos); + StringPiece subtext = text; + subtext.remove_prefix(startpos); subtext.remove_suffix(text.size() - endpos); - - // Use DFAs to find exact location of match, filter out non-matches. - - // Don't ask for the location if we won't use it. - // SearchDFA can do extra optimizations in that case. - StringPiece match; - StringPiece* matchp = &match; - if (nsubmatch == 0) - matchp = NULL; - - int ncap = 1 + NumberOfCapturingGroups(); - if (ncap > nsubmatch) - ncap = nsubmatch; - + + // Use DFAs to find exact location of match, filter out non-matches. + + // Don't ask for the location if we won't use it. + // SearchDFA can do extra optimizations in that case. + StringPiece match; + StringPiece* matchp = &match; + if (nsubmatch == 0) + matchp = NULL; + + int ncap = 1 + NumberOfCapturingGroups(); + if (ncap > nsubmatch) + ncap = nsubmatch; + // If the regexp is anchored explicitly, must not be in middle of text. if (prog_->anchor_start() && startpos != 0) return false; @@ -658,53 +658,53 @@ bool RE2::Match(const StringPiece& text, return false; // If the regexp is anchored explicitly, update re_anchor - // so that we can potentially fall into a faster case below. - if (prog_->anchor_start() && prog_->anchor_end()) - re_anchor = ANCHOR_BOTH; - else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) - re_anchor = ANCHOR_START; - - // Check for the required prefix, if any. + // so that we can potentially fall into a faster case below. + if (prog_->anchor_start() && prog_->anchor_end()) + re_anchor = ANCHOR_BOTH; + else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) + re_anchor = ANCHOR_START; + + // Check for the required prefix, if any. size_t prefixlen = 0; - if (!prefix_.empty()) { + if (!prefix_.empty()) { if (startpos != 0) return false; - prefixlen = prefix_.size(); - if (prefixlen > subtext.size()) - return false; - if (prefix_foldcase_) { - if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) - return false; - } else { - if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) - return false; - } - subtext.remove_prefix(prefixlen); - // If there is a required prefix, the anchor must be at least ANCHOR_START. - if (re_anchor != ANCHOR_BOTH) - re_anchor = ANCHOR_START; - } - - Prog::Anchor anchor = Prog::kUnanchored; - Prog::MatchKind kind = Prog::kFirstMatch; - if (options_.longest_match()) - kind = Prog::kLongestMatch; - + prefixlen = prefix_.size(); + if (prefixlen > subtext.size()) + return false; + if (prefix_foldcase_) { + if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) + return false; + } else { + if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) + return false; + } + subtext.remove_prefix(prefixlen); + // If there is a required prefix, the anchor must be at least ANCHOR_START. + if (re_anchor != ANCHOR_BOTH) + re_anchor = ANCHOR_START; + } + + Prog::Anchor anchor = Prog::kUnanchored; + Prog::MatchKind kind = Prog::kFirstMatch; + if (options_.longest_match()) + kind = Prog::kLongestMatch; + bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture; bool can_bit_state = prog_->CanBitState(); size_t bit_state_text_max_size = prog_->bit_state_text_max_size(); - + #ifdef RE2_HAVE_THREAD_LOCAL hooks::context = this; #endif - bool dfa_failed = false; + bool dfa_failed = false; bool skipped_test = false; - switch (re_anchor) { - default: + switch (re_anchor) { + default: LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor; return false; - case UNANCHORED: { + case UNANCHORED: { if (prog_->anchor_end()) { // This is a very special case: we don't need the forward DFA because // we already know where the match must end! Instead, the reverse DFA @@ -735,78 +735,78 @@ bool RE2::Match(const StringPiece& text, break; } - if (!prog_->SearchDFA(subtext, text, anchor, kind, - matchp, &dfa_failed, NULL)) { - if (dfa_failed) { + if (!prog_->SearchDFA(subtext, text, anchor, kind, + matchp, &dfa_failed, NULL)) { + if (dfa_failed) { if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " << "program size " << prog_->size() << ", " << "list count " << prog_->list_count() << ", " << "bytemap range " << prog_->bytemap_range(); - // Fall back to NFA below. - skipped_test = true; - break; - } - return false; - } + // Fall back to NFA below. + skipped_test = true; + break; + } + return false; + } if (matchp == NULL) // Matched. Don't care where. - return true; + return true; // SearchDFA set match.end() but didn't know where the // match started. Run the regexp backward from match.end() - // to find the longest possible match -- that's where it started. - Prog* prog = ReverseProg(); + // to find the longest possible match -- that's where it started. + Prog* prog = ReverseProg(); if (prog == NULL) { // Fall back to NFA below. skipped_test = true; break; } - if (!prog->SearchDFA(match, text, Prog::kAnchored, - Prog::kLongestMatch, &match, &dfa_failed, NULL)) { - if (dfa_failed) { + if (!prog->SearchDFA(match, text, Prog::kAnchored, + Prog::kLongestMatch, &match, &dfa_failed, NULL)) { + if (dfa_failed) { if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " << "program size " << prog->size() << ", " << "list count " << prog->list_count() << ", " << "bytemap range " << prog->bytemap_range(); - // Fall back to NFA below. - skipped_test = true; - break; - } + // Fall back to NFA below. + skipped_test = true; + break; + } if (options_.log_errors()) LOG(ERROR) << "SearchDFA inconsistency"; - return false; - } - break; - } - - case ANCHOR_BOTH: - case ANCHOR_START: - if (re_anchor == ANCHOR_BOTH) - kind = Prog::kFullMatch; - anchor = Prog::kAnchored; - - // If only a small amount of text and need submatch - // information anyway and we're going to use OnePass or BitState - // to get it, we might as well not even bother with the DFA: - // OnePass or BitState will be fast enough. - // On tiny texts, OnePass outruns even the DFA, and - // it doesn't have the shared state and occasional mutex that - // the DFA does. - if (can_one_pass && text.size() <= 4096 && + return false; + } + break; + } + + case ANCHOR_BOTH: + case ANCHOR_START: + if (re_anchor == ANCHOR_BOTH) + kind = Prog::kFullMatch; + anchor = Prog::kAnchored; + + // If only a small amount of text and need submatch + // information anyway and we're going to use OnePass or BitState + // to get it, we might as well not even bother with the DFA: + // OnePass or BitState will be fast enough. + // On tiny texts, OnePass outruns even the DFA, and + // it doesn't have the shared state and occasional mutex that + // the DFA does. + if (can_one_pass && text.size() <= 4096 && (ncap > 1 || text.size() <= 16)) { - skipped_test = true; - break; - } + skipped_test = true; + break; + } if (can_bit_state && text.size() <= bit_state_text_max_size && ncap > 1) { - skipped_test = true; - break; - } - if (!prog_->SearchDFA(subtext, text, anchor, kind, - &match, &dfa_failed, NULL)) { - if (dfa_failed) { + skipped_test = true; + break; + } + if (!prog_->SearchDFA(subtext, text, anchor, kind, + &match, &dfa_failed, NULL)) { + if (dfa_failed) { if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " @@ -814,169 +814,169 @@ bool RE2::Match(const StringPiece& text, << "list count " << prog_->list_count() << ", " << "bytemap range " << prog_->bytemap_range(); // Fall back to NFA below. - skipped_test = true; - break; - } - return false; - } - break; - } - - if (!skipped_test && ncap <= 1) { - // We know exactly where it matches. That's enough. - if (ncap == 1) - submatch[0] = match; - } else { - StringPiece subtext1; - if (skipped_test) { - // DFA ran out of memory or was skipped: - // need to search in entire original text. - subtext1 = subtext; - } else { - // DFA found the exact match location: - // let NFA run an anchored, full match search - // to find submatch locations. - subtext1 = match; - anchor = Prog::kAnchored; - kind = Prog::kFullMatch; - } - - if (can_one_pass && anchor != Prog::kUnanchored) { - if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { + skipped_test = true; + break; + } + return false; + } + break; + } + + if (!skipped_test && ncap <= 1) { + // We know exactly where it matches. That's enough. + if (ncap == 1) + submatch[0] = match; + } else { + StringPiece subtext1; + if (skipped_test) { + // DFA ran out of memory or was skipped: + // need to search in entire original text. + subtext1 = subtext; + } else { + // DFA found the exact match location: + // let NFA run an anchored, full match search + // to find submatch locations. + subtext1 = match; + anchor = Prog::kAnchored; + kind = Prog::kFullMatch; + } + + if (can_one_pass && anchor != Prog::kUnanchored) { + if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) - LOG(ERROR) << "SearchOnePass inconsistency"; - return false; - } + LOG(ERROR) << "SearchOnePass inconsistency"; + return false; + } } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) { - if (!prog_->SearchBitState(subtext1, text, anchor, - kind, submatch, ncap)) { + if (!prog_->SearchBitState(subtext1, text, anchor, + kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) - LOG(ERROR) << "SearchBitState inconsistency"; - return false; - } - } else { - if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { + LOG(ERROR) << "SearchBitState inconsistency"; + return false; + } + } else { + if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { if (!skipped_test && options_.log_errors()) - LOG(ERROR) << "SearchNFA inconsistency"; - return false; - } - } - } - - // Adjust overall match for required prefix that we stripped off. - if (prefixlen > 0 && nsubmatch > 0) + LOG(ERROR) << "SearchNFA inconsistency"; + return false; + } + } + } + + // Adjust overall match for required prefix that we stripped off. + if (prefixlen > 0 && nsubmatch > 0) submatch[0] = StringPiece(submatch[0].data() - prefixlen, - submatch[0].size() + prefixlen); - - // Zero submatches that don't exist in the regexp. - for (int i = ncap; i < nsubmatch; i++) + submatch[0].size() + prefixlen); + + // Zero submatches that don't exist in the regexp. + for (int i = ncap; i < nsubmatch; i++) submatch[i] = StringPiece(); - return true; -} - -// Internal matcher - like Match() but takes Args not StringPieces. -bool RE2::DoMatch(const StringPiece& text, + return true; +} + +// Internal matcher - like Match() but takes Args not StringPieces. +bool RE2::DoMatch(const StringPiece& text, Anchor re_anchor, size_t* consumed, - const Arg* const* args, - int n) const { - if (!ok()) { - if (options_.log_errors()) - LOG(ERROR) << "Invalid RE2: " << *error_; - return false; - } - + const Arg* const* args, + int n) const { + if (!ok()) { + if (options_.log_errors()) + LOG(ERROR) << "Invalid RE2: " << *error_; + return false; + } + if (NumberOfCapturingGroups() < n) { // RE has fewer capturing groups than number of Arg pointers passed in. return false; } - // Count number of capture groups needed. - int nvec; - if (n == 0 && consumed == NULL) - nvec = 0; - else - nvec = n+1; - - StringPiece* vec; - StringPiece stkvec[kVecSize]; - StringPiece* heapvec = NULL; - + // Count number of capture groups needed. + int nvec; + if (n == 0 && consumed == NULL) + nvec = 0; + else + nvec = n+1; + + StringPiece* vec; + StringPiece stkvec[kVecSize]; + StringPiece* heapvec = NULL; + if (nvec <= static_cast<int>(arraysize(stkvec))) { - vec = stkvec; - } else { - vec = new StringPiece[nvec]; - heapvec = vec; - } - + vec = stkvec; + } else { + vec = new StringPiece[nvec]; + heapvec = vec; + } + if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) { - delete[] heapvec; - return false; - } - + delete[] heapvec; + return false; + } + if (consumed != NULL) *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text)); - - if (n == 0 || args == NULL) { - // We are not interested in results - delete[] heapvec; - return true; - } - - // If we got here, we must have matched the whole pattern. - for (int i = 0; i < n; i++) { - const StringPiece& s = vec[i+1]; - if (!args[i]->Parse(s.data(), s.size())) { - // TODO: Should we indicate what the error was? - delete[] heapvec; - return false; - } - } - - delete[] heapvec; - return true; -} - -// Checks that the rewrite string is well-formed with respect to this -// regular expression. + + if (n == 0 || args == NULL) { + // We are not interested in results + delete[] heapvec; + return true; + } + + // If we got here, we must have matched the whole pattern. + for (int i = 0; i < n; i++) { + const StringPiece& s = vec[i+1]; + if (!args[i]->Parse(s.data(), s.size())) { + // TODO: Should we indicate what the error was? + delete[] heapvec; + return false; + } + } + + delete[] heapvec; + return true; +} + +// Checks that the rewrite string is well-formed with respect to this +// regular expression. bool RE2::CheckRewriteString(const StringPiece& rewrite, std::string* error) const { - int max_token = -1; - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) { - int c = *s; - if (c != '\\') { - continue; - } - if (++s == end) { - *error = "Rewrite schema error: '\\' not allowed at end."; - return false; - } - c = *s; - if (c == '\\') { - continue; - } + int max_token = -1; + for (const char *s = rewrite.data(), *end = s + rewrite.size(); + s < end; s++) { + int c = *s; + if (c != '\\') { + continue; + } + if (++s == end) { + *error = "Rewrite schema error: '\\' not allowed at end."; + return false; + } + c = *s; + if (c == '\\') { + continue; + } if (!isdigit(c)) { - *error = "Rewrite schema error: " - "'\\' must be followed by a digit or '\\'."; - return false; - } - int n = (c - '0'); - if (max_token < n) { - max_token = n; - } - } - - if (max_token > NumberOfCapturingGroups()) { + *error = "Rewrite schema error: " + "'\\' must be followed by a digit or '\\'."; + return false; + } + int n = (c - '0'); + if (max_token < n) { + max_token = n; + } + } + + if (max_token > NumberOfCapturingGroups()) { *error = StringPrintf( "Rewrite schema requests %d matches, but the regexp only has %d " "parenthesized subexpressions.", max_token, NumberOfCapturingGroups()); - return false; - } - return true; -} - + return false; + } + return true; +} + // Returns the maximum submatch needed for the rewrite to be done by Replace(). // E.g. if rewrite == "foo \\2,\\1", returns 2. int RE2::MaxSubmatch(const StringPiece& rewrite) { @@ -1033,32 +1033,32 @@ bool RE2::Rewrite(std::string* out, return true; } -/***** Parsers for various types *****/ - +/***** Parsers for various types *****/ + namespace re2_internal { template <> bool Parse(const char* str, size_t n, void* dest) { - // We fail if somebody asked us to store into a non-NULL void* pointer - return (dest == NULL); -} - + // We fail if somebody asked us to store into a non-NULL void* pointer + return (dest == NULL); +} + template <> bool Parse(const char* str, size_t n, std::string* dest) { - if (dest == NULL) return true; + if (dest == NULL) return true; dest->assign(str, n); - return true; -} - + return true; +} + #if defined(ARCADIA_ROOT) template <> bool Parse(const char* str, size_t n, TString* dest) { - if (dest == NULL) return true; + if (dest == NULL) return true; dest->assign(str, n); - return true; -} + return true; +} #endif - + template <> bool Parse(const char* str, size_t n, StringPiece* dest) { if (dest == NULL) return true; @@ -1068,16 +1068,16 @@ bool Parse(const char* str, size_t n, StringPiece* dest) { template <> bool Parse(const char* str, size_t n, char* dest) { - if (n != 1) return false; - if (dest == NULL) return true; + if (n != 1) return false; + if (dest == NULL) return true; *dest = str[0]; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, signed char* dest) { - if (n != 1) return false; - if (dest == NULL) return true; + if (n != 1) return false; + if (dest == NULL) return true; *dest = str[0]; return true; } @@ -1087,12 +1087,12 @@ bool Parse(const char* str, size_t n, unsigned char* dest) { if (n != 1) return false; if (dest == NULL) return true; *dest = str[0]; - return true; -} - -// Largest number spec that we are willing to parse -static const int kMaxNumberLength = 32; - + return true; +} + +// Largest number spec that we are willing to parse +static const int kMaxNumberLength = 32; + // REQUIRES "buf" must have length at least nbuf. // Copies "str" into "buf" and null-terminates. // Overwrites *np with the new length. @@ -1101,7 +1101,7 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, size_t n = *np; if (n == 0) return ""; if (n > 0 && isspace(*str)) { - // We are less forgiving than the strtoxxx() routines and do not + // We are less forgiving than the strtoxxx() routines and do not // allow leading spaces. We do allow leading spaces for floats. if (!accept_spaces) { return ""; @@ -1110,8 +1110,8 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, n--; str++; } - } - + } + // Although buf has a fixed maximum size, we can still handle // arbitrarily large integers correctly by omitting leading zeros. // (Numbers that are still too long will be out of range.) @@ -1125,7 +1125,7 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, neg = true; n--; str++; - } + } if (n >= 3 && str[0] == '0' && str[1] == '0') { while (n >= 3 && str[2] == '0') { @@ -1148,11 +1148,11 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, buf[n] = '\0'; *np = n; return buf; -} - +} + template <> bool Parse(const char* str, size_t n, float* dest) { - if (n == 0) return false; + if (n == 0) return false; static const int kMaxLength = 200; char buf[kMaxLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, true); @@ -1185,127 +1185,127 @@ bool Parse(const char* str, size_t n, double* dest) { template <> bool Parse(const char* str, size_t n, long* dest, int radix) { if (n == 0) return false; - char buf[kMaxNumberLength+1]; + char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); - char* end; - errno = 0; - long r = strtol(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; + char* end; + errno = 0; + long r = strtol(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; *dest = r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, unsigned long* dest, int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; + if (n == 0) return false; + char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); - if (str[0] == '-') { + if (str[0] == '-') { // strtoul() will silently accept negative numbers and parse // them. This module is more strict and treats them as errors. return false; - } - - char* end; - errno = 0; - unsigned long r = strtoul(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; + } + + char* end; + errno = 0; + unsigned long r = strtoul(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; *dest = r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, short* dest, int radix) { - long r; + long r; if (!Parse(str, n, &r, radix)) return false; // Could not parse if ((short)r != r) return false; // Out of range - if (dest == NULL) return true; + if (dest == NULL) return true; *dest = (short)r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, unsigned short* dest, int radix) { - unsigned long r; + unsigned long r; if (!Parse(str, n, &r, radix)) return false; // Could not parse if ((unsigned short)r != r) return false; // Out of range - if (dest == NULL) return true; + if (dest == NULL) return true; *dest = (unsigned short)r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, int* dest, int radix) { - long r; + long r; if (!Parse(str, n, &r, radix)) return false; // Could not parse if ((int)r != r) return false; // Out of range - if (dest == NULL) return true; + if (dest == NULL) return true; *dest = (int)r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, unsigned int* dest, int radix) { - unsigned long r; + unsigned long r; if (!Parse(str, n, &r, radix)) return false; // Could not parse if ((unsigned int)r != r) return false; // Out of range - if (dest == NULL) return true; + if (dest == NULL) return true; *dest = (unsigned int)r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, long long* dest, int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; + if (n == 0) return false; + char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); - char* end; - errno = 0; + char* end; + errno = 0; long long r = strtoll(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; *dest = r; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; + if (n == 0) return false; + char buf[kMaxNumberLength+1]; str = TerminateNumber(buf, sizeof buf, str, &n, false); - if (str[0] == '-') { - // strtoull() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; - } - char* end; - errno = 0; + if (str[0] == '-') { + // strtoull() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; + } + char* end; + errno = 0; unsigned long long r = strtoull(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; *dest = r; - return true; -} - + return true; +} + } // namespace re2_internal - + namespace hooks { - + #ifdef RE2_HAVE_THREAD_LOCAL thread_local const RE2* context = NULL; #endif - + template <typename T> union Hook { void Store(T* cb) { cb_.store(cb, std::memory_order_release); } T* Load() const { return cb_.load(std::memory_order_acquire); } - + #if !defined(__clang__) && defined(_MSC_VER) // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent, // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC. @@ -1313,10 +1313,10 @@ union Hook { "std::atomic<T*> must be always lock-free"); T* cb_for_constinit_; #endif - + std::atomic<T*> cb_; }; - + template <typename T> static void DoNothing(const T&) {} @@ -1332,4 +1332,4 @@ DEFINE_HOOK(DFASearchFailure, dfa_search_failure) } // namespace hooks -} // namespace re2 +} // namespace re2 diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc index ca1318b43d..949f9dbf72 100644 --- a/contrib/libs/re2/re2/regexp.cc +++ b/contrib/libs/re2/re2/regexp.cc @@ -1,11 +1,11 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Regular expression representation. -// Tested by parse_test.cc - -#include "re2/regexp.h" +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression representation. +// Tested by parse_test.cc + +#include "re2/regexp.h" #include <stddef.h> #include <stdint.h> @@ -22,80 +22,80 @@ #include "util/utf.h" #include "re2/pod_array.h" #include "re2/stringpiece.h" -#include "re2/walker-inl.h" - -namespace re2 { - -// Constructor. Allocates vectors as appropriate for operator. -Regexp::Regexp(RegexpOp op, ParseFlags parse_flags) +#include "re2/walker-inl.h" + +namespace re2 { + +// Constructor. Allocates vectors as appropriate for operator. +Regexp::Regexp(RegexpOp op, ParseFlags parse_flags) : op_(static_cast<uint8_t>(op)), - simple_(false), + simple_(false), parse_flags_(static_cast<uint16_t>(parse_flags)), - ref_(1), - nsub_(0), - down_(NULL) { - subone_ = NULL; - memset(the_union_, 0, sizeof the_union_); -} - -// Destructor. Assumes already cleaned up children. -// Private: use Decref() instead of delete to destroy Regexps. -// Can't call Decref on the sub-Regexps here because -// that could cause arbitrarily deep recursion, so -// required Decref() to have handled them for us. -Regexp::~Regexp() { - if (nsub_ > 0) - LOG(DFATAL) << "Regexp not destroyed."; - - switch (op_) { - default: - break; - case kRegexpCapture: - delete name_; - break; - case kRegexpLiteralString: - delete[] runes_; - break; - case kRegexpCharClass: + ref_(1), + nsub_(0), + down_(NULL) { + subone_ = NULL; + memset(the_union_, 0, sizeof the_union_); +} + +// Destructor. Assumes already cleaned up children. +// Private: use Decref() instead of delete to destroy Regexps. +// Can't call Decref on the sub-Regexps here because +// that could cause arbitrarily deep recursion, so +// required Decref() to have handled them for us. +Regexp::~Regexp() { + if (nsub_ > 0) + LOG(DFATAL) << "Regexp not destroyed."; + + switch (op_) { + default: + break; + case kRegexpCapture: + delete name_; + break; + case kRegexpLiteralString: + delete[] runes_; + break; + case kRegexpCharClass: if (cc_) cc_->Delete(); - delete ccb_; - break; - } -} - -// If it's possible to destroy this regexp without recurring, -// do so and return true. Else return false. -bool Regexp::QuickDestroy() { - if (nsub_ == 0) { - delete this; - return true; - } - return false; -} - + delete ccb_; + break; + } +} + +// If it's possible to destroy this regexp without recurring, +// do so and return true. Else return false. +bool Regexp::QuickDestroy() { + if (nsub_ == 0) { + delete this; + return true; + } + return false; +} + // Lazily allocated. static Mutex* ref_mutex; static std::map<Regexp*, int>* ref_map; - -int Regexp::Ref() { - if (ref_ < kMaxRef) - return ref_; - + +int Regexp::Ref() { + if (ref_ < kMaxRef) + return ref_; + MutexLock l(ref_mutex); return (*ref_map)[this]; -} - -// Increments reference count, returns object as convenience. -Regexp* Regexp::Incref() { - if (ref_ >= kMaxRef-1) { +} + +// Increments reference count, returns object as convenience. +Regexp* Regexp::Incref() { + if (ref_ >= kMaxRef-1) { static std::once_flag ref_once; std::call_once(ref_once, []() { ref_mutex = new Mutex; ref_map = new std::map<Regexp*, int>; }); - // Store ref count in overflow map. + // Store ref count in overflow map. MutexLock l(ref_mutex); if (ref_ == kMaxRef) { // already overflowed @@ -104,97 +104,97 @@ Regexp* Regexp::Incref() { // overflowing now (*ref_map)[this] = kMaxRef; ref_ = kMaxRef; - } - return this; - } - - ref_++; - return this; -} - -// Decrements reference count and deletes this object if count reaches 0. -void Regexp::Decref() { - if (ref_ == kMaxRef) { - // Ref count is stored in overflow map. + } + return this; + } + + ref_++; + return this; +} + +// Decrements reference count and deletes this object if count reaches 0. +void Regexp::Decref() { + if (ref_ == kMaxRef) { + // Ref count is stored in overflow map. MutexLock l(ref_mutex); int r = (*ref_map)[this] - 1; - if (r < kMaxRef) { + if (r < kMaxRef) { ref_ = static_cast<uint16_t>(r); ref_map->erase(this); - } else { + } else { (*ref_map)[this] = r; - } - return; - } - ref_--; - if (ref_ == 0) - Destroy(); -} - -// Deletes this object; ref count has count reached 0. -void Regexp::Destroy() { - if (QuickDestroy()) - return; - - // Handle recursive Destroy with explicit stack - // to avoid arbitrarily deep recursion on process stack [sigh]. - down_ = NULL; - Regexp* stack = this; - while (stack != NULL) { - Regexp* re = stack; - stack = re->down_; - if (re->ref_ != 0) - LOG(DFATAL) << "Bad reference count " << re->ref_; - if (re->nsub_ > 0) { - Regexp** subs = re->sub(); - for (int i = 0; i < re->nsub_; i++) { - Regexp* sub = subs[i]; - if (sub == NULL) - continue; - if (sub->ref_ == kMaxRef) - sub->Decref(); - else - --sub->ref_; - if (sub->ref_ == 0 && !sub->QuickDestroy()) { - sub->down_ = stack; - stack = sub; - } - } - if (re->nsub_ > 1) - delete[] subs; - re->nsub_ = 0; - } - delete re; - } -} - -void Regexp::AddRuneToString(Rune r) { - DCHECK(op_ == kRegexpLiteralString); - if (nrunes_ == 0) { - // start with 8 - runes_ = new Rune[8]; - } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) { - // double on powers of two - Rune *old = runes_; - runes_ = new Rune[nrunes_ * 2]; - for (int i = 0; i < nrunes_; i++) - runes_[i] = old[i]; - delete[] old; - } - - runes_[nrunes_++] = r; -} - -Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) { - Regexp* re = new Regexp(kRegexpHaveMatch, flags); - re->match_id_ = match_id; - return re; -} - + } + return; + } + ref_--; + if (ref_ == 0) + Destroy(); +} + +// Deletes this object; ref count has count reached 0. +void Regexp::Destroy() { + if (QuickDestroy()) + return; + + // Handle recursive Destroy with explicit stack + // to avoid arbitrarily deep recursion on process stack [sigh]. + down_ = NULL; + Regexp* stack = this; + while (stack != NULL) { + Regexp* re = stack; + stack = re->down_; + if (re->ref_ != 0) + LOG(DFATAL) << "Bad reference count " << re->ref_; + if (re->nsub_ > 0) { + Regexp** subs = re->sub(); + for (int i = 0; i < re->nsub_; i++) { + Regexp* sub = subs[i]; + if (sub == NULL) + continue; + if (sub->ref_ == kMaxRef) + sub->Decref(); + else + --sub->ref_; + if (sub->ref_ == 0 && !sub->QuickDestroy()) { + sub->down_ = stack; + stack = sub; + } + } + if (re->nsub_ > 1) + delete[] subs; + re->nsub_ = 0; + } + delete re; + } +} + +void Regexp::AddRuneToString(Rune r) { + DCHECK(op_ == kRegexpLiteralString); + if (nrunes_ == 0) { + // start with 8 + runes_ = new Rune[8]; + } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) { + // double on powers of two + Rune *old = runes_; + runes_ = new Rune[nrunes_ * 2]; + for (int i = 0; i < nrunes_; i++) + runes_[i] = old[i]; + delete[] old; + } + + runes_[nrunes_++] = r; +} + +Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpHaveMatch, flags); + re->match_id_ = match_id; + return re; +} + Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) { // Squash **, ++ and ??. if (op == sub->op() && flags == sub->parse_flags()) - return sub; + return sub; // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because // op is Star/Plus/Quest, we just have to check that sub->op() is too. @@ -215,28 +215,28 @@ Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) { } Regexp* re = new Regexp(op, flags); - re->AllocSub(1); - re->sub()[0] = sub; - return re; -} - + re->AllocSub(1); + re->sub()[0] = sub; + return re; +} + Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) { return StarPlusOrQuest(kRegexpPlus, sub, flags); } -Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) { +Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) { return StarPlusOrQuest(kRegexpStar, sub, flags); -} - -Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) { +} + +Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) { return StarPlusOrQuest(kRegexpQuest, sub, flags); -} - -Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, - ParseFlags flags, bool can_factor) { - if (nsub == 1) - return sub[0]; - +} + +Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, + ParseFlags flags, bool can_factor) { + if (nsub == 1) + return sub[0]; + if (nsub == 0) { if (op == kRegexpAlternate) return new Regexp(kRegexpNoMatch, flags); @@ -245,416 +245,416 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, } PODArray<Regexp*> subcopy; - if (op == kRegexpAlternate && can_factor) { - // Going to edit sub; make a copy so we don't step on caller. + if (op == kRegexpAlternate && can_factor) { + // Going to edit sub; make a copy so we don't step on caller. subcopy = PODArray<Regexp*>(nsub); memmove(subcopy.data(), sub, nsub * sizeof sub[0]); sub = subcopy.data(); - nsub = FactorAlternation(sub, nsub, flags); - if (nsub == 1) { - Regexp* re = sub[0]; - return re; - } - } - - if (nsub > kMaxNsub) { - // Too many subexpressions to fit in a single Regexp. - // Make a two-level tree. Two levels gets us to 65535^2. - int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub; - Regexp* re = new Regexp(op, flags); - re->AllocSub(nbigsub); - Regexp** subs = re->sub(); - for (int i = 0; i < nbigsub - 1; i++) - subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false); - subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub, - nsub - (nbigsub-1)*kMaxNsub, flags, - false); - return re; - } - - Regexp* re = new Regexp(op, flags); - re->AllocSub(nsub); - Regexp** subs = re->sub(); - for (int i = 0; i < nsub; i++) - subs[i] = sub[i]; - return re; -} - -Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) { - return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false); -} - -Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) { - return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true); -} - -Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) { - return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false); -} - -Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) { - Regexp* re = new Regexp(kRegexpCapture, flags); - re->AllocSub(1); - re->sub()[0] = sub; - re->cap_ = cap; - return re; -} - -Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) { - Regexp* re = new Regexp(kRegexpRepeat, flags); - re->AllocSub(1); - re->sub()[0] = sub; - re->min_ = min; - re->max_ = max; - return re; -} - -Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) { - Regexp* re = new Regexp(kRegexpLiteral, flags); - re->rune_ = rune; - return re; -} - -Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) { - if (nrunes <= 0) - return new Regexp(kRegexpEmptyMatch, flags); - if (nrunes == 1) - return NewLiteral(runes[0], flags); - Regexp* re = new Regexp(kRegexpLiteralString, flags); - for (int i = 0; i < nrunes; i++) - re->AddRuneToString(runes[i]); - return re; -} - -Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) { - Regexp* re = new Regexp(kRegexpCharClass, flags); - re->cc_ = cc; - return re; -} - -void Regexp::Swap(Regexp* that) { + nsub = FactorAlternation(sub, nsub, flags); + if (nsub == 1) { + Regexp* re = sub[0]; + return re; + } + } + + if (nsub > kMaxNsub) { + // Too many subexpressions to fit in a single Regexp. + // Make a two-level tree. Two levels gets us to 65535^2. + int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub; + Regexp* re = new Regexp(op, flags); + re->AllocSub(nbigsub); + Regexp** subs = re->sub(); + for (int i = 0; i < nbigsub - 1; i++) + subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false); + subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub, + nsub - (nbigsub-1)*kMaxNsub, flags, + false); + return re; + } + + Regexp* re = new Regexp(op, flags); + re->AllocSub(nsub); + Regexp** subs = re->sub(); + for (int i = 0; i < nsub; i++) + subs[i] = sub[i]; + return re; +} + +Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false); +} + +Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true); +} + +Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false); +} + +Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) { + Regexp* re = new Regexp(kRegexpCapture, flags); + re->AllocSub(1); + re->sub()[0] = sub; + re->cap_ = cap; + return re; +} + +Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) { + Regexp* re = new Regexp(kRegexpRepeat, flags); + re->AllocSub(1); + re->sub()[0] = sub; + re->min_ = min; + re->max_ = max; + return re; +} + +Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpLiteral, flags); + re->rune_ = rune; + return re; +} + +Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) { + if (nrunes <= 0) + return new Regexp(kRegexpEmptyMatch, flags); + if (nrunes == 1) + return NewLiteral(runes[0], flags); + Regexp* re = new Regexp(kRegexpLiteralString, flags); + for (int i = 0; i < nrunes; i++) + re->AddRuneToString(runes[i]); + return re; +} + +Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpCharClass, flags); + re->cc_ = cc; + return re; +} + +void Regexp::Swap(Regexp* that) { // Regexp is not trivially copyable, so we cannot freely copy it with // memmove(3), but swapping objects like so is safe for our purposes. - char tmp[sizeof *this]; + char tmp[sizeof *this]; void* vthis = reinterpret_cast<void*>(this); void* vthat = reinterpret_cast<void*>(that); memmove(tmp, vthis, sizeof *this); memmove(vthis, vthat, sizeof *this); memmove(vthat, tmp, sizeof *this); -} - -// Tests equality of all top-level structure but not subregexps. -static bool TopEqual(Regexp* a, Regexp* b) { - if (a->op() != b->op()) - return false; - - switch (a->op()) { - case kRegexpNoMatch: - case kRegexpEmptyMatch: - case kRegexpAnyChar: - case kRegexpAnyByte: - case kRegexpBeginLine: - case kRegexpEndLine: - case kRegexpWordBoundary: - case kRegexpNoWordBoundary: - case kRegexpBeginText: - return true; - - case kRegexpEndText: - // The parse flags remember whether it's \z or (?-m:$), - // which matters when testing against PCRE. - return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0; - - case kRegexpLiteral: - return a->rune() == b->rune() && - ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0; - - case kRegexpLiteralString: - return a->nrunes() == b->nrunes() && - ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 && - memcmp(a->runes(), b->runes(), - a->nrunes() * sizeof a->runes()[0]) == 0; - - case kRegexpAlternate: - case kRegexpConcat: - return a->nsub() == b->nsub(); - - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0; - - case kRegexpRepeat: - return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 && - a->min() == b->min() && - a->max() == b->max(); - - case kRegexpCapture: - return a->cap() == b->cap() && a->name() == b->name(); - - case kRegexpHaveMatch: - return a->match_id() == b->match_id(); - - case kRegexpCharClass: { - CharClass* acc = a->cc(); - CharClass* bcc = b->cc(); - return acc->size() == bcc->size() && - acc->end() - acc->begin() == bcc->end() - bcc->begin() && - memcmp(acc->begin(), bcc->begin(), - (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0; - } - } - - LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op(); - return 0; -} - -bool Regexp::Equal(Regexp* a, Regexp* b) { - if (a == NULL || b == NULL) - return a == b; - - if (!TopEqual(a, b)) - return false; - - // Fast path: - // return without allocating vector if there are no subregexps. - switch (a->op()) { - case kRegexpAlternate: - case kRegexpConcat: - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - case kRegexpRepeat: - case kRegexpCapture: - break; - - default: - return true; - } - - // Committed to doing real work. - // The stack (vector) has pairs of regexps waiting to - // be compared. The regexps are only equal if - // all the pairs end up being equal. +} + +// Tests equality of all top-level structure but not subregexps. +static bool TopEqual(Regexp* a, Regexp* b) { + if (a->op() != b->op()) + return false; + + switch (a->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpBeginText: + return true; + + case kRegexpEndText: + // The parse flags remember whether it's \z or (?-m:$), + // which matters when testing against PCRE. + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0; + + case kRegexpLiteral: + return a->rune() == b->rune() && + ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0; + + case kRegexpLiteralString: + return a->nrunes() == b->nrunes() && + ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 && + memcmp(a->runes(), b->runes(), + a->nrunes() * sizeof a->runes()[0]) == 0; + + case kRegexpAlternate: + case kRegexpConcat: + return a->nsub() == b->nsub(); + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0; + + case kRegexpRepeat: + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 && + a->min() == b->min() && + a->max() == b->max(); + + case kRegexpCapture: + return a->cap() == b->cap() && a->name() == b->name(); + + case kRegexpHaveMatch: + return a->match_id() == b->match_id(); + + case kRegexpCharClass: { + CharClass* acc = a->cc(); + CharClass* bcc = b->cc(); + return acc->size() == bcc->size() && + acc->end() - acc->begin() == bcc->end() - bcc->begin() && + memcmp(acc->begin(), bcc->begin(), + (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0; + } + } + + LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op(); + return 0; +} + +bool Regexp::Equal(Regexp* a, Regexp* b) { + if (a == NULL || b == NULL) + return a == b; + + if (!TopEqual(a, b)) + return false; + + // Fast path: + // return without allocating vector if there are no subregexps. + switch (a->op()) { + case kRegexpAlternate: + case kRegexpConcat: + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + case kRegexpCapture: + break; + + default: + return true; + } + + // Committed to doing real work. + // The stack (vector) has pairs of regexps waiting to + // be compared. The regexps are only equal if + // all the pairs end up being equal. std::vector<Regexp*> stk; - - for (;;) { - // Invariant: TopEqual(a, b) == true. - Regexp* a2; - Regexp* b2; - switch (a->op()) { - default: - break; - case kRegexpAlternate: - case kRegexpConcat: - for (int i = 0; i < a->nsub(); i++) { - a2 = a->sub()[i]; - b2 = b->sub()[i]; - if (!TopEqual(a2, b2)) - return false; - stk.push_back(a2); - stk.push_back(b2); - } - break; - - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - case kRegexpRepeat: - case kRegexpCapture: - a2 = a->sub()[0]; - b2 = b->sub()[0]; - if (!TopEqual(a2, b2)) - return false; - // Really: - // stk.push_back(a2); - // stk.push_back(b2); - // break; - // but faster to assign directly and loop. - a = a2; - b = b2; - continue; - } - + + for (;;) { + // Invariant: TopEqual(a, b) == true. + Regexp* a2; + Regexp* b2; + switch (a->op()) { + default: + break; + case kRegexpAlternate: + case kRegexpConcat: + for (int i = 0; i < a->nsub(); i++) { + a2 = a->sub()[i]; + b2 = b->sub()[i]; + if (!TopEqual(a2, b2)) + return false; + stk.push_back(a2); + stk.push_back(b2); + } + break; + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + case kRegexpCapture: + a2 = a->sub()[0]; + b2 = b->sub()[0]; + if (!TopEqual(a2, b2)) + return false; + // Really: + // stk.push_back(a2); + // stk.push_back(b2); + // break; + // but faster to assign directly and loop. + a = a2; + b = b2; + continue; + } + size_t n = stk.size(); - if (n == 0) - break; - + if (n == 0) + break; + DCHECK_GE(n, 2); - a = stk[n-2]; - b = stk[n-1]; - stk.resize(n-2); - } - - return true; -} - -// Keep in sync with enum RegexpStatusCode in regexp.h + a = stk[n-2]; + b = stk[n-1]; + stk.resize(n-2); + } + + return true; +} + +// Keep in sync with enum RegexpStatusCode in regexp.h static const char *kErrorStrings[] = { - "no error", - "unexpected error", - "invalid escape sequence", - "invalid character class", - "invalid character class range", - "missing ]", - "missing )", + "no error", + "unexpected error", + "invalid escape sequence", + "invalid character class", + "invalid character class range", + "missing ]", + "missing )", "unexpected )", - "trailing \\", - "no argument for repetition operator", - "invalid repetition size", - "bad repetition operator", - "invalid perl operator", - "invalid UTF-8", - "invalid named capture group", -}; - + "trailing \\", + "no argument for repetition operator", + "invalid repetition size", + "bad repetition operator", + "invalid perl operator", + "invalid UTF-8", + "invalid named capture group", +}; + std::string RegexpStatus::CodeText(enum RegexpStatusCode code) { - if (code < 0 || code >= arraysize(kErrorStrings)) - code = kRegexpInternalError; - return kErrorStrings[code]; -} - + if (code < 0 || code >= arraysize(kErrorStrings)) + code = kRegexpInternalError; + return kErrorStrings[code]; +} + std::string RegexpStatus::Text() const { - if (error_arg_.empty()) - return CodeText(code_); + if (error_arg_.empty()) + return CodeText(code_); std::string s; - s.append(CodeText(code_)); - s.append(": "); - s.append(error_arg_.data(), error_arg_.size()); - return s; -} - -void RegexpStatus::Copy(const RegexpStatus& status) { - code_ = status.code_; - error_arg_ = status.error_arg_; -} - -typedef int Ignored; // Walker<void> doesn't exist - -// Walker subclass to count capturing parens in regexp. -class NumCapturesWalker : public Regexp::Walker<Ignored> { - public: - NumCapturesWalker() : ncapture_(0) {} - int ncapture() { return ncapture_; } - - virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { - if (re->op() == kRegexpCapture) - ncapture_++; - return ignored; - } - - virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + s.append(CodeText(code_)); + s.append(": "); + s.append(error_arg_.data(), error_arg_.size()); + return s; +} + +void RegexpStatus::Copy(const RegexpStatus& status) { + code_ = status.code_; + error_arg_ = status.error_arg_; +} + +typedef int Ignored; // Walker<void> doesn't exist + +// Walker subclass to count capturing parens in regexp. +class NumCapturesWalker : public Regexp::Walker<Ignored> { + public: + NumCapturesWalker() : ncapture_(0) {} + int ncapture() { return ncapture_; } + + virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { + if (re->op() == kRegexpCapture) + ncapture_++; + return ignored; + } + + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "NumCapturesWalker::ShortVisit called"; + LOG(DFATAL) << "NumCapturesWalker::ShortVisit called"; #endif - return ignored; - } - - private: - int ncapture_; + return ignored; + } + + private: + int ncapture_; NumCapturesWalker(const NumCapturesWalker&) = delete; NumCapturesWalker& operator=(const NumCapturesWalker&) = delete; -}; - -int Regexp::NumCaptures() { - NumCapturesWalker w; - w.Walk(this, 0); - return w.ncapture(); -} - -// Walker class to build map of named capture groups and their indices. -class NamedCapturesWalker : public Regexp::Walker<Ignored> { - public: - NamedCapturesWalker() : map_(NULL) {} - ~NamedCapturesWalker() { delete map_; } - +}; + +int Regexp::NumCaptures() { + NumCapturesWalker w; + w.Walk(this, 0); + return w.ncapture(); +} + +// Walker class to build map of named capture groups and their indices. +class NamedCapturesWalker : public Regexp::Walker<Ignored> { + public: + NamedCapturesWalker() : map_(NULL) {} + ~NamedCapturesWalker() { delete map_; } + std::map<std::string, int>* TakeMap() { std::map<std::string, int>* m = map_; - map_ = NULL; - return m; - } - + map_ = NULL; + return m; + } + virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { - if (re->op() == kRegexpCapture && re->name() != NULL) { - // Allocate map once we find a name. - if (map_ == NULL) + if (re->op() == kRegexpCapture && re->name() != NULL) { + // Allocate map once we find a name. + if (map_ == NULL) map_ = new std::map<std::string, int>; - - // Record first occurrence of each name. - // (The rule is that if you have the same name - // multiple times, only the leftmost one counts.) + + // Record first occurrence of each name. + // (The rule is that if you have the same name + // multiple times, only the leftmost one counts.) map_->insert({*re->name(), re->cap()}); - } - return ignored; - } - - virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + } + return ignored; + } + + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called"; + LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called"; #endif - return ignored; - } - - private: + return ignored; + } + + private: std::map<std::string, int>* map_; NamedCapturesWalker(const NamedCapturesWalker&) = delete; NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete; -}; - +}; + std::map<std::string, int>* Regexp::NamedCaptures() { - NamedCapturesWalker w; - w.Walk(this, 0); - return w.TakeMap(); -} - -// Walker class to build map from capture group indices to their names. -class CaptureNamesWalker : public Regexp::Walker<Ignored> { - public: - CaptureNamesWalker() : map_(NULL) {} - ~CaptureNamesWalker() { delete map_; } - + NamedCapturesWalker w; + w.Walk(this, 0); + return w.TakeMap(); +} + +// Walker class to build map from capture group indices to their names. +class CaptureNamesWalker : public Regexp::Walker<Ignored> { + public: + CaptureNamesWalker() : map_(NULL) {} + ~CaptureNamesWalker() { delete map_; } + std::map<int, std::string>* TakeMap() { std::map<int, std::string>* m = map_; - map_ = NULL; - return m; - } - + map_ = NULL; + return m; + } + virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { - if (re->op() == kRegexpCapture && re->name() != NULL) { - // Allocate map once we find a name. - if (map_ == NULL) + if (re->op() == kRegexpCapture && re->name() != NULL) { + // Allocate map once we find a name. + if (map_ == NULL) map_ = new std::map<int, std::string>; - - (*map_)[re->cap()] = *re->name(); - } - return ignored; - } - - virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + + (*map_)[re->cap()] = *re->name(); + } + return ignored; + } + + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called"; + LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called"; #endif - return ignored; - } - - private: + return ignored; + } + + private: std::map<int, std::string>* map_; CaptureNamesWalker(const CaptureNamesWalker&) = delete; CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete; -}; - +}; + std::map<int, std::string>* Regexp::CaptureNames() { - CaptureNamesWalker w; - w.Walk(this, 0); - return w.TakeMap(); -} - + CaptureNamesWalker w; + w.Walk(this, 0); + return w.TakeMap(); +} + void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes, std::string* bytes) { if (latin1) { @@ -671,48 +671,48 @@ void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes, } } -// Determines whether regexp matches must be anchored -// with a fixed string prefix. If so, returns the prefix and -// the regexp that remains after the prefix. The prefix might -// be ASCII case-insensitive. +// Determines whether regexp matches must be anchored +// with a fixed string prefix. If so, returns the prefix and +// the regexp that remains after the prefix. The prefix might +// be ASCII case-insensitive. bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase, Regexp** suffix) { prefix->clear(); *foldcase = false; *suffix = NULL; - // No need for a walker: the regexp must be of the form - // 1. some number of ^ anchors - // 2. a literal char or string - // 3. the rest - if (op_ != kRegexpConcat) - return false; - int i = 0; + // No need for a walker: the regexp must be of the form + // 1. some number of ^ anchors + // 2. a literal char or string + // 3. the rest + if (op_ != kRegexpConcat) + return false; + int i = 0; while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText) - i++; + i++; if (i == 0 || i >= nsub_) - return false; + return false; Regexp* re = sub()[i]; if (re->op_ != kRegexpLiteral && re->op_ != kRegexpLiteralString) return false; i++; - if (i < nsub_) { - for (int j = i; j < nsub_; j++) + if (i < nsub_) { + for (int j = i; j < nsub_; j++) sub()[j]->Incref(); *suffix = Concat(sub() + i, nsub_ - i, parse_flags()); - } else { + } else { *suffix = new Regexp(kRegexpEmptyMatch, parse_flags()); - } + } bool latin1 = (re->parse_flags() & Latin1) != 0; Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_; int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_; ConvertRunesToBytes(latin1, runes, nrunes, prefix); *foldcase = (re->parse_flags() & FoldCase) != 0; - return true; -} - + return true; +} + // Determines whether regexp matches must be unanchored // with a fixed string prefix. If so, returns the prefix. // The prefix might be ASCII case-insensitive. @@ -741,246 +741,246 @@ bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) { return true; } -// Character class builder is a balanced binary tree (STL set) -// containing non-overlapping, non-abutting RuneRanges. -// The less-than operator used in the tree treats two -// ranges as equal if they overlap at all, so that -// lookups for a particular Rune are possible. - -CharClassBuilder::CharClassBuilder() { - nrunes_ = 0; - upper_ = 0; - lower_ = 0; -} - -// Add lo-hi to the class; return whether class got bigger. -bool CharClassBuilder::AddRange(Rune lo, Rune hi) { - if (hi < lo) - return false; - - if (lo <= 'z' && hi >= 'A') { - // Overlaps some alpha, maybe not all. - // Update bitmaps telling which ASCII letters are in the set. +// Character class builder is a balanced binary tree (STL set) +// containing non-overlapping, non-abutting RuneRanges. +// The less-than operator used in the tree treats two +// ranges as equal if they overlap at all, so that +// lookups for a particular Rune are possible. + +CharClassBuilder::CharClassBuilder() { + nrunes_ = 0; + upper_ = 0; + lower_ = 0; +} + +// Add lo-hi to the class; return whether class got bigger. +bool CharClassBuilder::AddRange(Rune lo, Rune hi) { + if (hi < lo) + return false; + + if (lo <= 'z' && hi >= 'A') { + // Overlaps some alpha, maybe not all. + // Update bitmaps telling which ASCII letters are in the set. Rune lo1 = std::max<Rune>(lo, 'A'); Rune hi1 = std::min<Rune>(hi, 'Z'); - if (lo1 <= hi1) - upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A'); - + if (lo1 <= hi1) + upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A'); + lo1 = std::max<Rune>(lo, 'a'); hi1 = std::min<Rune>(hi, 'z'); - if (lo1 <= hi1) - lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a'); - } - - { // Check whether lo, hi is already in the class. - iterator it = ranges_.find(RuneRange(lo, lo)); - if (it != end() && it->lo <= lo && hi <= it->hi) - return false; - } - - // Look for a range abutting lo on the left. - // If it exists, take it out and increase our range. - if (lo > 0) { - iterator it = ranges_.find(RuneRange(lo-1, lo-1)); - if (it != end()) { - lo = it->lo; - if (it->hi > hi) - hi = it->hi; - nrunes_ -= it->hi - it->lo + 1; - ranges_.erase(it); - } - } - - // Look for a range abutting hi on the right. - // If it exists, take it out and increase our range. - if (hi < Runemax) { - iterator it = ranges_.find(RuneRange(hi+1, hi+1)); - if (it != end()) { - hi = it->hi; - nrunes_ -= it->hi - it->lo + 1; - ranges_.erase(it); - } - } - - // Look for ranges between lo and hi. Take them out. - // This is only safe because the set has no overlapping ranges. - // We've already removed any ranges abutting lo and hi, so - // any that overlap [lo, hi] must be contained within it. - for (;;) { - iterator it = ranges_.find(RuneRange(lo, hi)); - if (it == end()) - break; - nrunes_ -= it->hi - it->lo + 1; - ranges_.erase(it); - } - - // Finally, add [lo, hi]. - nrunes_ += hi - lo + 1; - ranges_.insert(RuneRange(lo, hi)); - return true; -} - -void CharClassBuilder::AddCharClass(CharClassBuilder *cc) { - for (iterator it = cc->begin(); it != cc->end(); ++it) - AddRange(it->lo, it->hi); -} - -bool CharClassBuilder::Contains(Rune r) { - return ranges_.find(RuneRange(r, r)) != end(); -} - -// Does the character class behave the same on A-Z as on a-z? -bool CharClassBuilder::FoldsASCII() { - return ((upper_ ^ lower_) & AlphaMask) == 0; -} - -CharClassBuilder* CharClassBuilder::Copy() { - CharClassBuilder* cc = new CharClassBuilder; - for (iterator it = begin(); it != end(); ++it) - cc->ranges_.insert(RuneRange(it->lo, it->hi)); - cc->upper_ = upper_; - cc->lower_ = lower_; - cc->nrunes_ = nrunes_; - return cc; -} - - - -void CharClassBuilder::RemoveAbove(Rune r) { - if (r >= Runemax) - return; - - if (r < 'z') { - if (r < 'a') - lower_ = 0; - else - lower_ &= AlphaMask >> ('z' - r); - } - - if (r < 'Z') { - if (r < 'A') - upper_ = 0; - else - upper_ &= AlphaMask >> ('Z' - r); - } - - for (;;) { - - iterator it = ranges_.find(RuneRange(r + 1, Runemax)); - if (it == end()) - break; - RuneRange rr = *it; - ranges_.erase(it); - nrunes_ -= rr.hi - rr.lo + 1; - if (rr.lo <= r) { - rr.hi = r; - ranges_.insert(rr); - nrunes_ += rr.hi - rr.lo + 1; - } - } -} - -void CharClassBuilder::Negate() { - // Build up negation and then copy in. - // Could edit ranges in place, but C++ won't let me. + if (lo1 <= hi1) + lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a'); + } + + { // Check whether lo, hi is already in the class. + iterator it = ranges_.find(RuneRange(lo, lo)); + if (it != end() && it->lo <= lo && hi <= it->hi) + return false; + } + + // Look for a range abutting lo on the left. + // If it exists, take it out and increase our range. + if (lo > 0) { + iterator it = ranges_.find(RuneRange(lo-1, lo-1)); + if (it != end()) { + lo = it->lo; + if (it->hi > hi) + hi = it->hi; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + } + + // Look for a range abutting hi on the right. + // If it exists, take it out and increase our range. + if (hi < Runemax) { + iterator it = ranges_.find(RuneRange(hi+1, hi+1)); + if (it != end()) { + hi = it->hi; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + } + + // Look for ranges between lo and hi. Take them out. + // This is only safe because the set has no overlapping ranges. + // We've already removed any ranges abutting lo and hi, so + // any that overlap [lo, hi] must be contained within it. + for (;;) { + iterator it = ranges_.find(RuneRange(lo, hi)); + if (it == end()) + break; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + + // Finally, add [lo, hi]. + nrunes_ += hi - lo + 1; + ranges_.insert(RuneRange(lo, hi)); + return true; +} + +void CharClassBuilder::AddCharClass(CharClassBuilder *cc) { + for (iterator it = cc->begin(); it != cc->end(); ++it) + AddRange(it->lo, it->hi); +} + +bool CharClassBuilder::Contains(Rune r) { + return ranges_.find(RuneRange(r, r)) != end(); +} + +// Does the character class behave the same on A-Z as on a-z? +bool CharClassBuilder::FoldsASCII() { + return ((upper_ ^ lower_) & AlphaMask) == 0; +} + +CharClassBuilder* CharClassBuilder::Copy() { + CharClassBuilder* cc = new CharClassBuilder; + for (iterator it = begin(); it != end(); ++it) + cc->ranges_.insert(RuneRange(it->lo, it->hi)); + cc->upper_ = upper_; + cc->lower_ = lower_; + cc->nrunes_ = nrunes_; + return cc; +} + + + +void CharClassBuilder::RemoveAbove(Rune r) { + if (r >= Runemax) + return; + + if (r < 'z') { + if (r < 'a') + lower_ = 0; + else + lower_ &= AlphaMask >> ('z' - r); + } + + if (r < 'Z') { + if (r < 'A') + upper_ = 0; + else + upper_ &= AlphaMask >> ('Z' - r); + } + + for (;;) { + + iterator it = ranges_.find(RuneRange(r + 1, Runemax)); + if (it == end()) + break; + RuneRange rr = *it; + ranges_.erase(it); + nrunes_ -= rr.hi - rr.lo + 1; + if (rr.lo <= r) { + rr.hi = r; + ranges_.insert(rr); + nrunes_ += rr.hi - rr.lo + 1; + } + } +} + +void CharClassBuilder::Negate() { + // Build up negation and then copy in. + // Could edit ranges in place, but C++ won't let me. std::vector<RuneRange> v; - v.reserve(ranges_.size() + 1); - - // In negation, first range begins at 0, unless - // the current class begins at 0. - iterator it = begin(); - if (it == end()) { - v.push_back(RuneRange(0, Runemax)); - } else { - int nextlo = 0; - if (it->lo == 0) { - nextlo = it->hi + 1; - ++it; - } - for (; it != end(); ++it) { - v.push_back(RuneRange(nextlo, it->lo - 1)); - nextlo = it->hi + 1; - } - if (nextlo <= Runemax) - v.push_back(RuneRange(nextlo, Runemax)); - } - - ranges_.clear(); + v.reserve(ranges_.size() + 1); + + // In negation, first range begins at 0, unless + // the current class begins at 0. + iterator it = begin(); + if (it == end()) { + v.push_back(RuneRange(0, Runemax)); + } else { + int nextlo = 0; + if (it->lo == 0) { + nextlo = it->hi + 1; + ++it; + } + for (; it != end(); ++it) { + v.push_back(RuneRange(nextlo, it->lo - 1)); + nextlo = it->hi + 1; + } + if (nextlo <= Runemax) + v.push_back(RuneRange(nextlo, Runemax)); + } + + ranges_.clear(); for (size_t i = 0; i < v.size(); i++) - ranges_.insert(v[i]); - - upper_ = AlphaMask & ~upper_; - lower_ = AlphaMask & ~lower_; - nrunes_ = Runemax+1 - nrunes_; -} - -// Character class is a sorted list of ranges. -// The ranges are allocated in the same block as the header, -// necessitating a special allocator and Delete method. - + ranges_.insert(v[i]); + + upper_ = AlphaMask & ~upper_; + lower_ = AlphaMask & ~lower_; + nrunes_ = Runemax+1 - nrunes_; +} + +// Character class is a sorted list of ranges. +// The ranges are allocated in the same block as the header, +// necessitating a special allocator and Delete method. + CharClass* CharClass::New(size_t maxranges) { - CharClass* cc; + CharClass* cc; uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; - cc = reinterpret_cast<CharClass*>(data); - cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc); - cc->nranges_ = 0; - cc->folds_ascii_ = false; - cc->nrunes_ = 0; - return cc; -} - -void CharClass::Delete() { + cc = reinterpret_cast<CharClass*>(data); + cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc); + cc->nranges_ = 0; + cc->folds_ascii_ = false; + cc->nrunes_ = 0; + return cc; +} + +void CharClass::Delete() { uint8_t* data = reinterpret_cast<uint8_t*>(this); - delete[] data; -} - -CharClass* CharClass::Negate() { + delete[] data; +} + +CharClass* CharClass::Negate() { CharClass* cc = CharClass::New(static_cast<size_t>(nranges_+1)); - cc->folds_ascii_ = folds_ascii_; - cc->nrunes_ = Runemax + 1 - nrunes_; - int n = 0; - int nextlo = 0; - for (CharClass::iterator it = begin(); it != end(); ++it) { - if (it->lo == nextlo) { - nextlo = it->hi + 1; - } else { - cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1); - nextlo = it->hi + 1; - } - } - if (nextlo <= Runemax) - cc->ranges_[n++] = RuneRange(nextlo, Runemax); - cc->nranges_ = n; - return cc; -} - + cc->folds_ascii_ = folds_ascii_; + cc->nrunes_ = Runemax + 1 - nrunes_; + int n = 0; + int nextlo = 0; + for (CharClass::iterator it = begin(); it != end(); ++it) { + if (it->lo == nextlo) { + nextlo = it->hi + 1; + } else { + cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1); + nextlo = it->hi + 1; + } + } + if (nextlo <= Runemax) + cc->ranges_[n++] = RuneRange(nextlo, Runemax); + cc->nranges_ = n; + return cc; +} + bool CharClass::Contains(Rune r) const { - RuneRange* rr = ranges_; - int n = nranges_; - while (n > 0) { - int m = n/2; - if (rr[m].hi < r) { - rr += m+1; - n -= m+1; - } else if (r < rr[m].lo) { - n = m; - } else { // rr[m].lo <= r && r <= rr[m].hi - return true; - } - } - return false; -} - -CharClass* CharClassBuilder::GetCharClass() { + RuneRange* rr = ranges_; + int n = nranges_; + while (n > 0) { + int m = n/2; + if (rr[m].hi < r) { + rr += m+1; + n -= m+1; + } else if (r < rr[m].lo) { + n = m; + } else { // rr[m].lo <= r && r <= rr[m].hi + return true; + } + } + return false; +} + +CharClass* CharClassBuilder::GetCharClass() { CharClass* cc = CharClass::New(ranges_.size()); - int n = 0; - for (iterator it = begin(); it != end(); ++it) - cc->ranges_[n++] = *it; - cc->nranges_ = n; + int n = 0; + for (iterator it = begin(); it != end(); ++it) + cc->ranges_[n++] = *it; + cc->nranges_ = n; DCHECK_LE(n, static_cast<int>(ranges_.size())); - cc->nrunes_ = nrunes_; - cc->folds_ascii_ = FoldsASCII(); - return cc; -} - -} // namespace re2 + cc->nrunes_ = nrunes_; + cc->folds_ascii_ = FoldsASCII(); + return cc; +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/regexp.h b/contrib/libs/re2/re2/regexp.h index b6446f9fe5..73dca2d64e 100644 --- a/contrib/libs/re2/re2/regexp.h +++ b/contrib/libs/re2/re2/regexp.h @@ -1,283 +1,283 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_REGEXP_H_ #define RE2_REGEXP_H_ -// --- SPONSORED LINK -------------------------------------------------- -// If you want to use this library for regular expression matching, -// you should use re2/re2.h, which provides a class RE2 that -// mimics the PCRE interface provided by PCRE's C++ wrappers. -// This header describes the low-level interface used to implement RE2 -// and may change in backwards-incompatible ways from time to time. -// In contrast, RE2's interface will not. -// --------------------------------------------------------------------- - -// Regular expression library: parsing, execution, and manipulation -// of regular expressions. -// -// Any operation that traverses the Regexp structures should be written -// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested -// regular expressions such as x++++++++++++++++++++... might cause recursive -// traversals to overflow the stack. -// -// It is the caller's responsibility to provide appropriate mutual exclusion -// around manipulation of the regexps. RE2 does this. -// -// PARSING -// -// Regexp::Parse parses regular expressions encoded in UTF-8. -// The default syntax is POSIX extended regular expressions, -// with the following changes: -// -// 1. Backreferences (optional in POSIX EREs) are not supported. -// (Supporting them precludes the use of DFA-based -// matching engines.) -// -// 2. Collating elements and collation classes are not supported. -// (No one has needed or wanted them.) -// -// The exact syntax accepted can be modified by passing flags to -// Regexp::Parse. In particular, many of the basic Perl additions -// are available. The flags are documented below (search for LikePerl). -// -// If parsed with the flag Regexp::Latin1, both the regular expression -// and the input to the matching routines are assumed to be encoded in -// Latin-1, not UTF-8. -// -// EXECUTION -// -// Once Regexp has parsed a regular expression, it provides methods -// to search text using that regular expression. These methods are -// implemented via calling out to other regular expression libraries. -// (Let's call them the sublibraries.) -// -// To call a sublibrary, Regexp does not simply prepare a -// string version of the regular expression and hand it to the -// sublibrary. Instead, Regexp prepares, from its own parsed form, the -// corresponding internal representation used by the sublibrary. -// This has the drawback of needing to know the internal representation -// used by the sublibrary, but it has two important benefits: -// -// 1. The syntax and meaning of regular expressions is guaranteed -// to be that used by Regexp's parser, not the syntax expected -// by the sublibrary. Regexp might accept a restricted or -// expanded syntax for regular expressions as compared with -// the sublibrary. As long as Regexp can translate from its -// internal form into the sublibrary's, clients need not know -// exactly which sublibrary they are using. -// -// 2. The sublibrary parsers are bypassed. For whatever reason, -// sublibrary regular expression parsers often have security -// problems. For example, plan9grep's regular expression parser -// has a buffer overflow in its handling of large character -// classes, and PCRE's parser has had buffer overflow problems -// in the past. Security-team requires sandboxing of sublibrary -// regular expression parsers. Avoiding the sublibrary parsers -// avoids the sandbox. -// -// The execution methods we use now are provided by the compiled form, -// Prog, described in prog.h -// -// MANIPULATION -// -// Unlike other regular expression libraries, Regexp makes its parsed -// form accessible to clients, so that client code can analyze the -// parsed regular expressions. - +// --- SPONSORED LINK -------------------------------------------------- +// If you want to use this library for regular expression matching, +// you should use re2/re2.h, which provides a class RE2 that +// mimics the PCRE interface provided by PCRE's C++ wrappers. +// This header describes the low-level interface used to implement RE2 +// and may change in backwards-incompatible ways from time to time. +// In contrast, RE2's interface will not. +// --------------------------------------------------------------------- + +// Regular expression library: parsing, execution, and manipulation +// of regular expressions. +// +// Any operation that traverses the Regexp structures should be written +// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested +// regular expressions such as x++++++++++++++++++++... might cause recursive +// traversals to overflow the stack. +// +// It is the caller's responsibility to provide appropriate mutual exclusion +// around manipulation of the regexps. RE2 does this. +// +// PARSING +// +// Regexp::Parse parses regular expressions encoded in UTF-8. +// The default syntax is POSIX extended regular expressions, +// with the following changes: +// +// 1. Backreferences (optional in POSIX EREs) are not supported. +// (Supporting them precludes the use of DFA-based +// matching engines.) +// +// 2. Collating elements and collation classes are not supported. +// (No one has needed or wanted them.) +// +// The exact syntax accepted can be modified by passing flags to +// Regexp::Parse. In particular, many of the basic Perl additions +// are available. The flags are documented below (search for LikePerl). +// +// If parsed with the flag Regexp::Latin1, both the regular expression +// and the input to the matching routines are assumed to be encoded in +// Latin-1, not UTF-8. +// +// EXECUTION +// +// Once Regexp has parsed a regular expression, it provides methods +// to search text using that regular expression. These methods are +// implemented via calling out to other regular expression libraries. +// (Let's call them the sublibraries.) +// +// To call a sublibrary, Regexp does not simply prepare a +// string version of the regular expression and hand it to the +// sublibrary. Instead, Regexp prepares, from its own parsed form, the +// corresponding internal representation used by the sublibrary. +// This has the drawback of needing to know the internal representation +// used by the sublibrary, but it has two important benefits: +// +// 1. The syntax and meaning of regular expressions is guaranteed +// to be that used by Regexp's parser, not the syntax expected +// by the sublibrary. Regexp might accept a restricted or +// expanded syntax for regular expressions as compared with +// the sublibrary. As long as Regexp can translate from its +// internal form into the sublibrary's, clients need not know +// exactly which sublibrary they are using. +// +// 2. The sublibrary parsers are bypassed. For whatever reason, +// sublibrary regular expression parsers often have security +// problems. For example, plan9grep's regular expression parser +// has a buffer overflow in its handling of large character +// classes, and PCRE's parser has had buffer overflow problems +// in the past. Security-team requires sandboxing of sublibrary +// regular expression parsers. Avoiding the sublibrary parsers +// avoids the sandbox. +// +// The execution methods we use now are provided by the compiled form, +// Prog, described in prog.h +// +// MANIPULATION +// +// Unlike other regular expression libraries, Regexp makes its parsed +// form accessible to clients, so that client code can analyze the +// parsed regular expressions. + #include <stddef.h> #include <stdint.h> #include <map> #include <set> #include <string> - + #include "util/util.h" #include "util/logging.h" #include "util/utf.h" #include "re2/stringpiece.h" - -namespace re2 { - -// Keep in sync with string list kOpcodeNames[] in testing/dump.cc -enum RegexpOp { - // Matches no strings. - kRegexpNoMatch = 1, - - // Matches empty string. - kRegexpEmptyMatch, - - // Matches rune_. - kRegexpLiteral, - - // Matches runes_. - kRegexpLiteralString, - - // Matches concatenation of sub_[0..nsub-1]. - kRegexpConcat, - // Matches union of sub_[0..nsub-1]. - kRegexpAlternate, - - // Matches sub_[0] zero or more times. - kRegexpStar, - // Matches sub_[0] one or more times. - kRegexpPlus, - // Matches sub_[0] zero or one times. - kRegexpQuest, - - // Matches sub_[0] at least min_ times, at most max_ times. - // max_ == -1 means no upper limit. - kRegexpRepeat, - - // Parenthesized (capturing) subexpression. Index is cap_. - // Optionally, capturing name is name_. - kRegexpCapture, - - // Matches any character. - kRegexpAnyChar, - - // Matches any byte [sic]. - kRegexpAnyByte, - - // Matches empty string at beginning of line. - kRegexpBeginLine, - // Matches empty string at end of line. - kRegexpEndLine, - - // Matches word boundary "\b". - kRegexpWordBoundary, - // Matches not-a-word boundary "\B". - kRegexpNoWordBoundary, - - // Matches empty string at beginning of text. - kRegexpBeginText, - // Matches empty string at end of text. - kRegexpEndText, - - // Matches character class given by cc_. - kRegexpCharClass, - - // Forces match of entire expression right now, - // with match ID match_id_ (used by RE2::Set). - kRegexpHaveMatch, - - kMaxRegexpOp = kRegexpHaveMatch, -}; - -// Keep in sync with string list in regexp.cc -enum RegexpStatusCode { - // No error - kRegexpSuccess = 0, - - // Unexpected error - kRegexpInternalError, - - // Parse errors - kRegexpBadEscape, // bad escape sequence - kRegexpBadCharClass, // bad character class - kRegexpBadCharRange, // bad character class range - kRegexpMissingBracket, // missing closing ] - kRegexpMissingParen, // missing closing ) + +namespace re2 { + +// Keep in sync with string list kOpcodeNames[] in testing/dump.cc +enum RegexpOp { + // Matches no strings. + kRegexpNoMatch = 1, + + // Matches empty string. + kRegexpEmptyMatch, + + // Matches rune_. + kRegexpLiteral, + + // Matches runes_. + kRegexpLiteralString, + + // Matches concatenation of sub_[0..nsub-1]. + kRegexpConcat, + // Matches union of sub_[0..nsub-1]. + kRegexpAlternate, + + // Matches sub_[0] zero or more times. + kRegexpStar, + // Matches sub_[0] one or more times. + kRegexpPlus, + // Matches sub_[0] zero or one times. + kRegexpQuest, + + // Matches sub_[0] at least min_ times, at most max_ times. + // max_ == -1 means no upper limit. + kRegexpRepeat, + + // Parenthesized (capturing) subexpression. Index is cap_. + // Optionally, capturing name is name_. + kRegexpCapture, + + // Matches any character. + kRegexpAnyChar, + + // Matches any byte [sic]. + kRegexpAnyByte, + + // Matches empty string at beginning of line. + kRegexpBeginLine, + // Matches empty string at end of line. + kRegexpEndLine, + + // Matches word boundary "\b". + kRegexpWordBoundary, + // Matches not-a-word boundary "\B". + kRegexpNoWordBoundary, + + // Matches empty string at beginning of text. + kRegexpBeginText, + // Matches empty string at end of text. + kRegexpEndText, + + // Matches character class given by cc_. + kRegexpCharClass, + + // Forces match of entire expression right now, + // with match ID match_id_ (used by RE2::Set). + kRegexpHaveMatch, + + kMaxRegexpOp = kRegexpHaveMatch, +}; + +// Keep in sync with string list in regexp.cc +enum RegexpStatusCode { + // No error + kRegexpSuccess = 0, + + // Unexpected error + kRegexpInternalError, + + // Parse errors + kRegexpBadEscape, // bad escape sequence + kRegexpBadCharClass, // bad character class + kRegexpBadCharRange, // bad character class range + kRegexpMissingBracket, // missing closing ] + kRegexpMissingParen, // missing closing ) kRegexpUnexpectedParen, // unexpected closing ) - kRegexpTrailingBackslash, // at end of regexp - kRegexpRepeatArgument, // repeat argument missing, e.g. "*" - kRegexpRepeatSize, // bad repetition argument - kRegexpRepeatOp, // bad repetition operator - kRegexpBadPerlOp, // bad perl operator - kRegexpBadUTF8, // invalid UTF-8 in regexp - kRegexpBadNamedCapture, // bad named capture -}; - -// Error status for certain operations. -class RegexpStatus { - public: - RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {} - ~RegexpStatus() { delete tmp_; } - + kRegexpTrailingBackslash, // at end of regexp + kRegexpRepeatArgument, // repeat argument missing, e.g. "*" + kRegexpRepeatSize, // bad repetition argument + kRegexpRepeatOp, // bad repetition operator + kRegexpBadPerlOp, // bad perl operator + kRegexpBadUTF8, // invalid UTF-8 in regexp + kRegexpBadNamedCapture, // bad named capture +}; + +// Error status for certain operations. +class RegexpStatus { + public: + RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {} + ~RegexpStatus() { delete tmp_; } + void set_code(RegexpStatusCode code) { code_ = code; } - void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; } + void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; } void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; } RegexpStatusCode code() const { return code_; } - const StringPiece& error_arg() const { return error_arg_; } - bool ok() const { return code() == kRegexpSuccess; } - - // Copies state from status. - void Copy(const RegexpStatus& status); - - // Returns text equivalent of code, e.g.: - // "Bad character class" + const StringPiece& error_arg() const { return error_arg_; } + bool ok() const { return code() == kRegexpSuccess; } + + // Copies state from status. + void Copy(const RegexpStatus& status); + + // Returns text equivalent of code, e.g.: + // "Bad character class" static std::string CodeText(RegexpStatusCode code); - - // Returns text describing error, e.g.: - // "Bad character class: [z-a]" + + // Returns text describing error, e.g.: + // "Bad character class: [z-a]" std::string Text() const; - - private: + + private: RegexpStatusCode code_; // Kind of error StringPiece error_arg_; // Piece of regexp containing syntax error. std::string* tmp_; // Temporary storage, possibly where error_arg_ is. - + RegexpStatus(const RegexpStatus&) = delete; RegexpStatus& operator=(const RegexpStatus&) = delete; -}; - -// Compiled form; see prog.h -class Prog; - -struct RuneRange { - RuneRange() : lo(0), hi(0) { } - RuneRange(int l, int h) : lo(l), hi(h) { } - Rune lo; - Rune hi; -}; - -// Less-than on RuneRanges treats a == b if they overlap at all. -// This lets us look in a set to find the range covering a particular Rune. -struct RuneRangeLess { - bool operator()(const RuneRange& a, const RuneRange& b) const { - return a.hi < b.lo; - } -}; - -class CharClassBuilder; - -class CharClass { - public: - void Delete(); - - typedef RuneRange* iterator; - iterator begin() { return ranges_; } - iterator end() { return ranges_ + nranges_; } - - int size() { return nrunes_; } - bool empty() { return nrunes_ == 0; } - bool full() { return nrunes_ == Runemax+1; } - bool FoldsASCII() { return folds_ascii_; } - +}; + +// Compiled form; see prog.h +class Prog; + +struct RuneRange { + RuneRange() : lo(0), hi(0) { } + RuneRange(int l, int h) : lo(l), hi(h) { } + Rune lo; + Rune hi; +}; + +// Less-than on RuneRanges treats a == b if they overlap at all. +// This lets us look in a set to find the range covering a particular Rune. +struct RuneRangeLess { + bool operator()(const RuneRange& a, const RuneRange& b) const { + return a.hi < b.lo; + } +}; + +class CharClassBuilder; + +class CharClass { + public: + void Delete(); + + typedef RuneRange* iterator; + iterator begin() { return ranges_; } + iterator end() { return ranges_ + nranges_; } + + int size() { return nrunes_; } + bool empty() { return nrunes_ == 0; } + bool full() { return nrunes_ == Runemax+1; } + bool FoldsASCII() { return folds_ascii_; } + bool Contains(Rune r) const; - CharClass* Negate(); - - private: - CharClass(); // not implemented - ~CharClass(); // not implemented + CharClass* Negate(); + + private: + CharClass(); // not implemented + ~CharClass(); // not implemented static CharClass* New(size_t maxranges); - - friend class CharClassBuilder; - - bool folds_ascii_; - int nrunes_; - RuneRange *ranges_; - int nranges_; + + friend class CharClassBuilder; + + bool folds_ascii_; + int nrunes_; + RuneRange *ranges_; + int nranges_; CharClass(const CharClass&) = delete; CharClass& operator=(const CharClass&) = delete; -}; - -class Regexp { - public: - - // Flags for parsing. Can be ORed together. - enum ParseFlags { +}; + +class Regexp { + public: + + // Flags for parsing. Can be ORed together. + enum ParseFlags { NoParseFlags = 0, FoldCase = 1<<0, // Fold case during matching (case-insensitive). Literal = 1<<1, // Treat s as literal string instead of a regexp. @@ -309,139 +309,139 @@ class Regexp { NeverNL = 1<<11, // Never match NL, even if the regexp mentions // it explicitly. NeverCapture = 1<<12, // Parse all parens as non-capturing. - - // As close to Perl as we can get. + + // As close to Perl as we can get. LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX | UnicodeGroups, - - // Internal use only. + + // Internal use only. WasDollar = 1<<13, // on kRegexpEndText: was $ in regexp text AllParseFlags = (1<<14)-1, - }; - - // Get. No set, Regexps are logically immutable once created. - RegexpOp op() { return static_cast<RegexpOp>(op_); } - int nsub() { return nsub_; } + }; + + // Get. No set, Regexps are logically immutable once created. + RegexpOp op() { return static_cast<RegexpOp>(op_); } + int nsub() { return nsub_; } bool simple() { return simple_ != 0; } ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); } - int Ref(); // For testing. - - Regexp** sub() { - if(nsub_ <= 1) - return &subone_; - else - return submany_; - } - - int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; } - int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; } - Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; } - CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; } - int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; } + int Ref(); // For testing. + + Regexp** sub() { + if(nsub_ <= 1) + return &subone_; + else + return submany_; + } + + int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; } + int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; } + Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; } + CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; } + int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; } const std::string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; } - Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; } - int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; } - int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; } - - // Increments reference count, returns object as convenience. - Regexp* Incref(); - - // Decrements reference count and deletes this object if count reaches 0. - void Decref(); - - // Parses string s to produce regular expression, returned. - // Caller must release return value with re->Decref(). - // On failure, sets *status (if status != NULL) and returns NULL. - static Regexp* Parse(const StringPiece& s, ParseFlags flags, - RegexpStatus* status); - - // Returns a _new_ simplified version of the current regexp. - // Does not edit the current regexp. - // Caller must release return value with re->Decref(). - // Simplified means that counted repetition has been rewritten - // into simpler terms and all Perl/POSIX features have been - // removed. The result will capture exactly the same - // subexpressions the original did, unless formatted with ToString. - Regexp* Simplify(); + Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; } + int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; } + int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; } + + // Increments reference count, returns object as convenience. + Regexp* Incref(); + + // Decrements reference count and deletes this object if count reaches 0. + void Decref(); + + // Parses string s to produce regular expression, returned. + // Caller must release return value with re->Decref(). + // On failure, sets *status (if status != NULL) and returns NULL. + static Regexp* Parse(const StringPiece& s, ParseFlags flags, + RegexpStatus* status); + + // Returns a _new_ simplified version of the current regexp. + // Does not edit the current regexp. + // Caller must release return value with re->Decref(). + // Simplified means that counted repetition has been rewritten + // into simpler terms and all Perl/POSIX features have been + // removed. The result will capture exactly the same + // subexpressions the original did, unless formatted with ToString. + Regexp* Simplify(); friend class CoalesceWalker; - friend class SimplifyWalker; - - // Parses the regexp src and then simplifies it and sets *dst to the - // string representation of the simplified form. Returns true on success. - // Returns false and sets *status (if status != NULL) on parse error. - static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags, + friend class SimplifyWalker; + + // Parses the regexp src and then simplifies it and sets *dst to the + // string representation of the simplified form. Returns true on success. + // Returns false and sets *status (if status != NULL) on parse error. + static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags, std::string* dst, RegexpStatus* status); - - // Returns the number of capturing groups in the regexp. - int NumCaptures(); - friend class NumCapturesWalker; - - // Returns a map from names to capturing group indices, - // or NULL if the regexp contains no named capture groups. - // The caller is responsible for deleting the map. + + // Returns the number of capturing groups in the regexp. + int NumCaptures(); + friend class NumCapturesWalker; + + // Returns a map from names to capturing group indices, + // or NULL if the regexp contains no named capture groups. + // The caller is responsible for deleting the map. std::map<std::string, int>* NamedCaptures(); - - // Returns a map from capturing group indices to capturing group - // names or NULL if the regexp contains no named capture groups. The - // caller is responsible for deleting the map. + + // Returns a map from capturing group indices to capturing group + // names or NULL if the regexp contains no named capture groups. The + // caller is responsible for deleting the map. std::map<int, std::string>* CaptureNames(); - - // Returns a string representation of the current regexp, - // using as few parentheses as possible. + + // Returns a string representation of the current regexp, + // using as few parentheses as possible. std::string ToString(); - - // Convenience functions. They consume the passed reference, - // so in many cases you should use, e.g., Plus(re->Incref(), flags). - // They do not consume allocated arrays like subs or runes. - static Regexp* Plus(Regexp* sub, ParseFlags flags); - static Regexp* Star(Regexp* sub, ParseFlags flags); - static Regexp* Quest(Regexp* sub, ParseFlags flags); - static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags); - static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags); - static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap); - static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max); - static Regexp* NewLiteral(Rune rune, ParseFlags flags); - static Regexp* NewCharClass(CharClass* cc, ParseFlags flags); - static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags); - static Regexp* HaveMatch(int match_id, ParseFlags flags); - - // Like Alternate but does not factor out common prefixes. - static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags); - - // Debugging function. Returns string format for regexp - // that makes structure clear. Does NOT use regexp syntax. + + // Convenience functions. They consume the passed reference, + // so in many cases you should use, e.g., Plus(re->Incref(), flags). + // They do not consume allocated arrays like subs or runes. + static Regexp* Plus(Regexp* sub, ParseFlags flags); + static Regexp* Star(Regexp* sub, ParseFlags flags); + static Regexp* Quest(Regexp* sub, ParseFlags flags); + static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags); + static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags); + static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap); + static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max); + static Regexp* NewLiteral(Rune rune, ParseFlags flags); + static Regexp* NewCharClass(CharClass* cc, ParseFlags flags); + static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags); + static Regexp* HaveMatch(int match_id, ParseFlags flags); + + // Like Alternate but does not factor out common prefixes. + static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags); + + // Debugging function. Returns string format for regexp + // that makes structure clear. Does NOT use regexp syntax. std::string Dump(); - - // Helper traversal class, defined fully in walker-inl.h. - template<typename T> class Walker; - - // Compile to Prog. See prog.h - // Reverse prog expects to be run over text backward. - // Construction and execution of prog will - // stay within approximately max_mem bytes of memory. - // If max_mem <= 0, a reasonable default is used. + + // Helper traversal class, defined fully in walker-inl.h. + template<typename T> class Walker; + + // Compile to Prog. See prog.h + // Reverse prog expects to be run over text backward. + // Construction and execution of prog will + // stay within approximately max_mem bytes of memory. + // If max_mem <= 0, a reasonable default is used. Prog* CompileToProg(int64_t max_mem); Prog* CompileToReverseProg(int64_t max_mem); - - // Whether to expect this library to find exactly the same answer as PCRE - // when running this regexp. Most regexps do mimic PCRE exactly, but a few - // obscure cases behave differently. Technically this is more a property - // of the Prog than the Regexp, but the computation is much easier to do - // on the Regexp. See mimics_pcre.cc for the exact conditions. - bool MimicsPCRE(); - - // Benchmarking function. - void NullWalk(); - - // Whether every match of this regexp must be anchored and - // begin with a non-empty fixed string (perhaps after ASCII - // case-folding). If so, returns the prefix and the sub-regexp that - // follows it. + + // Whether to expect this library to find exactly the same answer as PCRE + // when running this regexp. Most regexps do mimic PCRE exactly, but a few + // obscure cases behave differently. Technically this is more a property + // of the Prog than the Regexp, but the computation is much easier to do + // on the Regexp. See mimics_pcre.cc for the exact conditions. + bool MimicsPCRE(); + + // Benchmarking function. + void NullWalk(); + + // Whether every match of this regexp must be anchored and + // begin with a non-empty fixed string (perhaps after ASCII + // case-folding). If so, returns the prefix and the sub-regexp that + // follows it. // Callers should expect *prefix, *foldcase and *suffix to be "zeroed" // regardless of the return value. bool RequiredPrefix(std::string* prefix, bool* foldcase, Regexp** suffix); - + // Whether every match of this regexp must be unanchored and // begin with a non-empty fixed string (perhaps after ASCII // case-folding). If so, returns the prefix. @@ -453,213 +453,213 @@ class Regexp { // FOR FUZZING ONLY. static void FUZZING_ONLY_set_maximum_repeat_count(int i); - private: - // Constructor allocates vectors as appropriate for operator. - explicit Regexp(RegexpOp op, ParseFlags parse_flags); - - // Use Decref() instead of delete to release Regexps. - // This is private to catch deletes at compile time. - ~Regexp(); - void Destroy(); - bool QuickDestroy(); - - // Helpers for Parse. Listed here so they can edit Regexps. - class ParseState; - - friend class ParseState; - friend bool ParseCharClass(StringPiece* s, Regexp** out_re, - RegexpStatus* status); - - // Helper for testing [sic]. - friend bool RegexpEqualTestingOnly(Regexp*, Regexp*); - - // Computes whether Regexp is already simple. - bool ComputeSimple(); - + private: + // Constructor allocates vectors as appropriate for operator. + explicit Regexp(RegexpOp op, ParseFlags parse_flags); + + // Use Decref() instead of delete to release Regexps. + // This is private to catch deletes at compile time. + ~Regexp(); + void Destroy(); + bool QuickDestroy(); + + // Helpers for Parse. Listed here so they can edit Regexps. + class ParseState; + + friend class ParseState; + friend bool ParseCharClass(StringPiece* s, Regexp** out_re, + RegexpStatus* status); + + // Helper for testing [sic]. + friend bool RegexpEqualTestingOnly(Regexp*, Regexp*); + + // Computes whether Regexp is already simple. + bool ComputeSimple(); + // Constructor that generates a Star, Plus or Quest, // squashing the pair if sub is also a Star, Plus or Quest. static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags); - // Constructor that generates a concatenation or alternation, - // enforcing the limit on the number of subexpressions for - // a particular Regexp. - static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs, - ParseFlags flags, bool can_factor); - - // Returns the leading string that re starts with. - // The returned Rune* points into a piece of re, - // so it must not be used after the caller calls re->Decref(). - static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags); - - // Removes the first n leading runes from the beginning of re. - // Edits re in place. - static void RemoveLeadingString(Regexp* re, int n); - - // Returns the leading regexp in re's top-level concatenation. - // The returned Regexp* points at re or a sub-expression of re, - // so it must not be used after the caller calls re->Decref(). - static Regexp* LeadingRegexp(Regexp* re); - - // Removes LeadingRegexp(re) from re and returns the remainder. - // Might edit re in place. - static Regexp* RemoveLeadingRegexp(Regexp* re); - - // Simplifies an alternation of literal strings by factoring out - // common prefixes. - static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags); + // Constructor that generates a concatenation or alternation, + // enforcing the limit on the number of subexpressions for + // a particular Regexp. + static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs, + ParseFlags flags, bool can_factor); + + // Returns the leading string that re starts with. + // The returned Rune* points into a piece of re, + // so it must not be used after the caller calls re->Decref(). + static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags); + + // Removes the first n leading runes from the beginning of re. + // Edits re in place. + static void RemoveLeadingString(Regexp* re, int n); + + // Returns the leading regexp in re's top-level concatenation. + // The returned Regexp* points at re or a sub-expression of re, + // so it must not be used after the caller calls re->Decref(). + static Regexp* LeadingRegexp(Regexp* re); + + // Removes LeadingRegexp(re) from re and returns the remainder. + // Might edit re in place. + static Regexp* RemoveLeadingRegexp(Regexp* re); + + // Simplifies an alternation of literal strings by factoring out + // common prefixes. + static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags); friend class FactorAlternationImpl; - - // Is a == b? Only efficient on regexps that have not been through - // Simplify yet - the expansion of a kRegexpRepeat will make this - // take a long time. Do not call on such regexps, hence private. - static bool Equal(Regexp* a, Regexp* b); - - // Allocate space for n sub-regexps. - void AllocSub(int n) { + + // Is a == b? Only efficient on regexps that have not been through + // Simplify yet - the expansion of a kRegexpRepeat will make this + // take a long time. Do not call on such regexps, hence private. + static bool Equal(Regexp* a, Regexp* b); + + // Allocate space for n sub-regexps. + void AllocSub(int n) { DCHECK(n >= 0 && static_cast<uint16_t>(n) == n); - if (n > 1) - submany_ = new Regexp*[n]; + if (n > 1) + submany_ = new Regexp*[n]; nsub_ = static_cast<uint16_t>(n); - } - - // Add Rune to LiteralString - void AddRuneToString(Rune r); - - // Swaps this with that, in place. - void Swap(Regexp *that); - - // Operator. See description of operators above. + } + + // Add Rune to LiteralString + void AddRuneToString(Rune r); + + // Swaps this with that, in place. + void Swap(Regexp *that); + + // Operator. See description of operators above. // uint8_t instead of RegexpOp to control space usage. uint8_t op_; - - // Is this regexp structure already simple - // (has it been returned by Simplify)? + + // Is this regexp structure already simple + // (has it been returned by Simplify)? // uint8_t instead of bool to control space usage. uint8_t simple_; - - // Flags saved from parsing and used during execution. - // (Only FoldCase is used.) + + // Flags saved from parsing and used during execution. + // (Only FoldCase is used.) // uint16_t instead of ParseFlags to control space usage. uint16_t parse_flags_; - - // Reference count. Exists so that SimplifyRegexp can build - // regexp structures that are dags rather than trees to avoid - // exponential blowup in space requirements. + + // Reference count. Exists so that SimplifyRegexp can build + // regexp structures that are dags rather than trees to avoid + // exponential blowup in space requirements. // uint16_t to control space usage. - // The standard regexp routines will never generate a + // The standard regexp routines will never generate a // ref greater than the maximum repeat count (kMaxRepeat), - // but even so, Incref and Decref consult an overflow map - // when ref_ reaches kMaxRef. + // but even so, Incref and Decref consult an overflow map + // when ref_ reaches kMaxRef. uint16_t ref_; static const uint16_t kMaxRef = 0xffff; - - // Subexpressions. + + // Subexpressions. // uint16_t to control space usage. - // Concat and Alternate handle larger numbers of subexpressions - // by building concatenation or alternation trees. - // Other routines should call Concat or Alternate instead of - // filling in sub() by hand. + // Concat and Alternate handle larger numbers of subexpressions + // by building concatenation or alternation trees. + // Other routines should call Concat or Alternate instead of + // filling in sub() by hand. uint16_t nsub_; static const uint16_t kMaxNsub = 0xffff; - union { - Regexp** submany_; // if nsub_ > 1 - Regexp* subone_; // if nsub_ == 1 - }; - - // Extra space for parse and teardown stacks. - Regexp* down_; - - // Arguments to operator. See description of operators above. - union { - struct { // Repeat - int max_; - int min_; - }; - struct { // Capture - int cap_; + union { + Regexp** submany_; // if nsub_ > 1 + Regexp* subone_; // if nsub_ == 1 + }; + + // Extra space for parse and teardown stacks. + Regexp* down_; + + // Arguments to operator. See description of operators above. + union { + struct { // Repeat + int max_; + int min_; + }; + struct { // Capture + int cap_; std::string* name_; - }; - struct { // LiteralString - int nrunes_; - Rune* runes_; - }; - struct { // CharClass - // These two could be in separate union members, - // but it wouldn't save any space (there are other two-word structs) - // and keeping them separate avoids confusion during parsing. - CharClass* cc_; - CharClassBuilder* ccb_; - }; - Rune rune_; // Literal - int match_id_; // HaveMatch - void *the_union_[2]; // as big as any other element, for memset - }; - + }; + struct { // LiteralString + int nrunes_; + Rune* runes_; + }; + struct { // CharClass + // These two could be in separate union members, + // but it wouldn't save any space (there are other two-word structs) + // and keeping them separate avoids confusion during parsing. + CharClass* cc_; + CharClassBuilder* ccb_; + }; + Rune rune_; // Literal + int match_id_; // HaveMatch + void *the_union_[2]; // as big as any other element, for memset + }; + Regexp(const Regexp&) = delete; Regexp& operator=(const Regexp&) = delete; -}; - -// Character class set: contains non-overlapping, non-abutting RuneRanges. +}; + +// Character class set: contains non-overlapping, non-abutting RuneRanges. typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet; - -class CharClassBuilder { - public: - CharClassBuilder(); - - typedef RuneRangeSet::iterator iterator; - iterator begin() { return ranges_.begin(); } - iterator end() { return ranges_.end(); } - - int size() { return nrunes_; } - bool empty() { return nrunes_ == 0; } - bool full() { return nrunes_ == Runemax+1; } - - bool Contains(Rune r); - bool FoldsASCII(); - bool AddRange(Rune lo, Rune hi); // returns whether class changed - CharClassBuilder* Copy(); - void AddCharClass(CharClassBuilder* cc); - void Negate(); - void RemoveAbove(Rune r); - CharClass* GetCharClass(); - void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags); - - private: + +class CharClassBuilder { + public: + CharClassBuilder(); + + typedef RuneRangeSet::iterator iterator; + iterator begin() { return ranges_.begin(); } + iterator end() { return ranges_.end(); } + + int size() { return nrunes_; } + bool empty() { return nrunes_ == 0; } + bool full() { return nrunes_ == Runemax+1; } + + bool Contains(Rune r); + bool FoldsASCII(); + bool AddRange(Rune lo, Rune hi); // returns whether class changed + CharClassBuilder* Copy(); + void AddCharClass(CharClassBuilder* cc); + void Negate(); + void RemoveAbove(Rune r); + CharClass* GetCharClass(); + void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags); + + private: static const uint32_t AlphaMask = (1<<26) - 1; uint32_t upper_; // bitmap of A-Z uint32_t lower_; // bitmap of a-z - int nrunes_; - RuneRangeSet ranges_; + int nrunes_; + RuneRangeSet ranges_; CharClassBuilder(const CharClassBuilder&) = delete; CharClassBuilder& operator=(const CharClassBuilder&) = delete; -}; - +}; + // Bitwise ops on ParseFlags produce ParseFlags. inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b) { return static_cast<Regexp::ParseFlags>( static_cast<int>(a) | static_cast<int>(b)); -} - +} + inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b) { return static_cast<Regexp::ParseFlags>( static_cast<int>(a) ^ static_cast<int>(b)); -} - +} + inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b) { return static_cast<Regexp::ParseFlags>( static_cast<int>(a) & static_cast<int>(b)); -} - +} + inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) { // Attempting to produce a value out of enum's range has undefined behaviour. return static_cast<Regexp::ParseFlags>( ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags)); -} - -} // namespace re2 +} + +} // namespace re2 #endif // RE2_REGEXP_H_ diff --git a/contrib/libs/re2/re2/set.cc b/contrib/libs/re2/re2/set.cc index 18705663a5..df27ca5fd0 100644 --- a/contrib/libs/re2/re2/set.cc +++ b/contrib/libs/re2/re2/set.cc @@ -1,9 +1,9 @@ -// Copyright 2010 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #include "re2/set.h" - + #include <stddef.h> #include <algorithm> #include <memory> @@ -12,26 +12,26 @@ #include "util/util.h" #include "util/logging.h" #include "re2/pod_array.h" -#include "re2/prog.h" +#include "re2/prog.h" #include "re2/re2.h" -#include "re2/regexp.h" +#include "re2/regexp.h" #include "re2/stringpiece.h" - + namespace re2 { - + RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) : options_(options), anchor_(anchor), compiled_(false), size_(0) { options_.set_never_capture(true); // might unblock some optimisations -} - -RE2::Set::~Set() { +} + +RE2::Set::~Set() { for (size_t i = 0; i < elem_.size(); i++) elem_[i].second->Decref(); -} - +} + RE2::Set::Set(Set&& other) : options_(other.options_), anchor_(other.anchor_), @@ -53,52 +53,52 @@ RE2::Set& RE2::Set::operator=(Set&& other) { } int RE2::Set::Add(const StringPiece& pattern, std::string* error) { - if (compiled_) { + if (compiled_) { LOG(DFATAL) << "RE2::Set::Add() called after compiling"; - return -1; - } - - Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( - options_.ParseFlags()); - RegexpStatus status; + return -1; + } + + Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( + options_.ParseFlags()); + RegexpStatus status; re2::Regexp* re = Regexp::Parse(pattern, pf, &status); - if (re == NULL) { - if (error != NULL) - *error = status.Text(); - if (options_.log_errors()) - LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); - return -1; - } - - // Concatenate with match index and push on vector. + if (re == NULL) { + if (error != NULL) + *error = status.Text(); + if (options_.log_errors()) + LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); + return -1; + } + + // Concatenate with match index and push on vector. int n = static_cast<int>(elem_.size()); re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); - if (re->op() == kRegexpConcat) { - int nsub = re->nsub(); + if (re->op() == kRegexpConcat) { + int nsub = re->nsub(); PODArray<re2::Regexp*> sub(nsub + 1); - for (int i = 0; i < nsub; i++) - sub[i] = re->sub()[i]->Incref(); - sub[nsub] = m; - re->Decref(); + for (int i = 0; i < nsub; i++) + sub[i] = re->sub()[i]->Incref(); + sub[nsub] = m; + re->Decref(); re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); - } else { + } else { re2::Regexp* sub[2]; - sub[0] = re; - sub[1] = m; + sub[0] = re; + sub[1] = m; re = re2::Regexp::Concat(sub, 2, pf); - } + } elem_.emplace_back(std::string(pattern), re); - return n; -} - -bool RE2::Set::Compile() { - if (compiled_) { + return n; +} + +bool RE2::Set::Compile() { + if (compiled_) { LOG(DFATAL) << "RE2::Set::Compile() called more than once"; - return false; - } - compiled_ = true; + return false; + } + compiled_ = true; size_ = static_cast<int>(elem_.size()); - + // Sort the elements by their patterns. This is good enough for now // until we have a Regexp comparison function. (Maybe someday...) std::sort(elem_.begin(), elem_.end(), @@ -112,27 +112,27 @@ bool RE2::Set::Compile() { elem_.clear(); elem_.shrink_to_fit(); - Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( - options_.ParseFlags()); + Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( + options_.ParseFlags()); re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem())); - re->Decref(); + re->Decref(); return prog_ != nullptr; } - + bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const { return Match(text, v, NULL); -} - +} + bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, ErrorInfo* error_info) const { - if (!compiled_) { + if (!compiled_) { LOG(DFATAL) << "RE2::Set::Match() called before compiling"; if (error_info != NULL) error_info->kind = kNotCompiled; - return false; - } + return false; + } #ifdef RE2_HAVE_THREAD_LOCAL hooks::context = NULL; #endif @@ -157,8 +157,8 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, if (ret == false) { if (error_info != NULL) error_info->kind = kNoError; - return false; - } + return false; + } if (v != NULL) { if (matches->empty()) { LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!"; @@ -170,7 +170,7 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, } if (error_info != NULL) error_info->kind = kNoError; - return true; -} + return true; +} } // namespace re2 diff --git a/contrib/libs/re2/re2/simplify.cc b/contrib/libs/re2/re2/simplify.cc index 663d5fcd45..e80cbca3fa 100644 --- a/contrib/libs/re2/re2/simplify.cc +++ b/contrib/libs/re2/re2/simplify.cc @@ -1,104 +1,104 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Rewrite POSIX and other features in re -// to use simple extended regular expression features. -// Also sort and simplify character classes. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Rewrite POSIX and other features in re +// to use simple extended regular expression features. +// Also sort and simplify character classes. + #include <string> #include "util/util.h" #include "util/logging.h" #include "util/utf.h" #include "re2/pod_array.h" -#include "re2/regexp.h" -#include "re2/walker-inl.h" - -namespace re2 { - -// Parses the regexp src and then simplifies it and sets *dst to the -// string representation of the simplified form. Returns true on success. -// Returns false and sets *error (if error != NULL) on error. -bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags, +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Parses the regexp src and then simplifies it and sets *dst to the +// string representation of the simplified form. Returns true on success. +// Returns false and sets *error (if error != NULL) on error. +bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags, std::string* dst, RegexpStatus* status) { - Regexp* re = Parse(src, flags, status); - if (re == NULL) - return false; - Regexp* sre = re->Simplify(); - re->Decref(); - if (sre == NULL) { - if (status) { - status->set_code(kRegexpInternalError); - status->set_error_arg(src); - } - return false; - } - *dst = sre->ToString(); - sre->Decref(); - return true; -} - -// Assuming the simple_ flags on the children are accurate, -// is this Regexp* simple? -bool Regexp::ComputeSimple() { - Regexp** subs; - switch (op_) { - case kRegexpNoMatch: - case kRegexpEmptyMatch: - case kRegexpLiteral: - case kRegexpLiteralString: - case kRegexpBeginLine: - case kRegexpEndLine: - case kRegexpBeginText: - case kRegexpWordBoundary: - case kRegexpNoWordBoundary: - case kRegexpEndText: - case kRegexpAnyChar: - case kRegexpAnyByte: - case kRegexpHaveMatch: - return true; - case kRegexpConcat: - case kRegexpAlternate: - // These are simple as long as the subpieces are simple. - subs = sub(); - for (int i = 0; i < nsub_; i++) + Regexp* re = Parse(src, flags, status); + if (re == NULL) + return false; + Regexp* sre = re->Simplify(); + re->Decref(); + if (sre == NULL) { + if (status) { + status->set_code(kRegexpInternalError); + status->set_error_arg(src); + } + return false; + } + *dst = sre->ToString(); + sre->Decref(); + return true; +} + +// Assuming the simple_ flags on the children are accurate, +// is this Regexp* simple? +bool Regexp::ComputeSimple() { + Regexp** subs; + switch (op_) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpLiteralString: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpEndText: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpHaveMatch: + return true; + case kRegexpConcat: + case kRegexpAlternate: + // These are simple as long as the subpieces are simple. + subs = sub(); + for (int i = 0; i < nsub_; i++) if (!subs[i]->simple()) - return false; - return true; - case kRegexpCharClass: - // Simple as long as the char class is not empty, not full. - if (ccb_ != NULL) - return !ccb_->empty() && !ccb_->full(); - return !cc_->empty() && !cc_->full(); - case kRegexpCapture: - subs = sub(); + return false; + return true; + case kRegexpCharClass: + // Simple as long as the char class is not empty, not full. + if (ccb_ != NULL) + return !ccb_->empty() && !ccb_->full(); + return !cc_->empty() && !cc_->full(); + case kRegexpCapture: + subs = sub(); return subs[0]->simple(); - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - subs = sub(); + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + subs = sub(); if (!subs[0]->simple()) - return false; - switch (subs[0]->op_) { - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - case kRegexpEmptyMatch: - case kRegexpNoMatch: - return false; - default: - break; - } - return true; - case kRegexpRepeat: - return false; - } - LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_; - return false; -} - -// Walker subclass used by Simplify. + return false; + switch (subs[0]->op_) { + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpEmptyMatch: + case kRegexpNoMatch: + return false; + default: + break; + } + return true; + case kRegexpRepeat: + return false; + } + LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_; + return false; +} + +// Walker subclass used by Simplify. // Coalesces runs of star/plus/quest/repeat of the same literal along with any // occurrences of that literal into repeats of that literal. It also works for // char classes, any char and any byte. @@ -130,51 +130,51 @@ class CoalesceWalker : public Regexp::Walker<Regexp*> { }; // Walker subclass used by Simplify. -// The simplify walk is purely post-recursive: given the simplified children, -// PostVisit creates the simplified result. -// The child_args are simplified Regexp*s. -class SimplifyWalker : public Regexp::Walker<Regexp*> { - public: - SimplifyWalker() {} - virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop); +// The simplify walk is purely post-recursive: given the simplified children, +// PostVisit creates the simplified result. +// The child_args are simplified Regexp*s. +class SimplifyWalker : public Regexp::Walker<Regexp*> { + public: + SimplifyWalker() {} + virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop); virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, - Regexp** child_args, int nchild_args); - virtual Regexp* Copy(Regexp* re); - virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); - - private: - // These functions are declared inside SimplifyWalker so that - // they can edit the private fields of the Regexps they construct. - - // Creates a concatenation of two Regexp, consuming refs to re1 and re2. - // Caller must Decref return value when done with it. - static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags); - - // Simplifies the expression re{min,max} in terms of *, +, and ?. - // Returns a new regexp. Does not edit re. Does not consume reference to re. - // Caller must Decref return value when done with it. - static Regexp* SimplifyRepeat(Regexp* re, int min, int max, - Regexp::ParseFlags parse_flags); - - // Simplifies a character class by expanding any named classes - // into rune ranges. Does not edit re. Does not consume ref to re. - // Caller must Decref return value when done with it. - static Regexp* SimplifyCharClass(Regexp* re); - + Regexp** child_args, int nchild_args); + virtual Regexp* Copy(Regexp* re); + virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); + + private: + // These functions are declared inside SimplifyWalker so that + // they can edit the private fields of the Regexps they construct. + + // Creates a concatenation of two Regexp, consuming refs to re1 and re2. + // Caller must Decref return value when done with it. + static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags); + + // Simplifies the expression re{min,max} in terms of *, +, and ?. + // Returns a new regexp. Does not edit re. Does not consume reference to re. + // Caller must Decref return value when done with it. + static Regexp* SimplifyRepeat(Regexp* re, int min, int max, + Regexp::ParseFlags parse_flags); + + // Simplifies a character class by expanding any named classes + // into rune ranges. Does not edit re. Does not consume ref to re. + // Caller must Decref return value when done with it. + static Regexp* SimplifyCharClass(Regexp* re); + SimplifyWalker(const SimplifyWalker&) = delete; SimplifyWalker& operator=(const SimplifyWalker&) = delete; -}; - -// Simplifies a regular expression, returning a new regexp. -// The new regexp uses traditional Unix egrep features only, -// plus the Perl (?:) non-capturing parentheses. -// Otherwise, no POSIX or Perl additions. The new regexp -// captures exactly the same subexpressions (with the same indices) -// as the original. -// Does not edit current object. -// Caller must Decref() return value when done with it. - -Regexp* Regexp::Simplify() { +}; + +// Simplifies a regular expression, returning a new regexp. +// The new regexp uses traditional Unix egrep features only, +// plus the Perl (?:) non-capturing parentheses. +// Otherwise, no POSIX or Perl additions. The new regexp +// captures exactly the same subexpressions (with the same indices) +// as the original. +// Does not edit current object. +// Caller must Decref() return value when done with it. + +Regexp* Regexp::Simplify() { CoalesceWalker cw; Regexp* cre = cw.Walk(this, NULL); if (cre == NULL) @@ -193,10 +193,10 @@ Regexp* Regexp::Simplify() { return NULL; } return sre; -} - -#define Simplify DontCallSimplify // Avoid accidental recursion - +} + +#define Simplify DontCallSimplify // Avoid accidental recursion + // Utility function for PostVisit implementations that compares re->sub() with // child_args to determine whether any child_args changed. In the common case, // where nothing changed, calls Decref() for all child_args and returns false, @@ -441,225 +441,225 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) { r2->Decref(); } -Regexp* SimplifyWalker::Copy(Regexp* re) { - return re->Incref(); -} - -Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { +Regexp* SimplifyWalker::Copy(Regexp* re) { + return re->Incref(); +} + +Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "SimplifyWalker::ShortVisit called"; + LOG(DFATAL) << "SimplifyWalker::ShortVisit called"; #endif - return re->Incref(); -} - -Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) { + return re->Incref(); +} + +Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) { if (re->simple()) { - *stop = true; - return re->Incref(); - } - return NULL; -} - -Regexp* SimplifyWalker::PostVisit(Regexp* re, - Regexp* parent_arg, - Regexp* pre_arg, - Regexp** child_args, - int nchild_args) { - switch (re->op()) { - case kRegexpNoMatch: - case kRegexpEmptyMatch: - case kRegexpLiteral: - case kRegexpLiteralString: - case kRegexpBeginLine: - case kRegexpEndLine: - case kRegexpBeginText: - case kRegexpWordBoundary: - case kRegexpNoWordBoundary: - case kRegexpEndText: - case kRegexpAnyChar: - case kRegexpAnyByte: - case kRegexpHaveMatch: - // All these are always simple. - re->simple_ = true; - return re->Incref(); - - case kRegexpConcat: - case kRegexpAlternate: { - // These are simple as long as the subpieces are simple. + *stop = true; + return re->Incref(); + } + return NULL; +} + +Regexp* SimplifyWalker::PostVisit(Regexp* re, + Regexp* parent_arg, + Regexp* pre_arg, + Regexp** child_args, + int nchild_args) { + switch (re->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpLiteralString: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpEndText: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpHaveMatch: + // All these are always simple. + re->simple_ = true; + return re->Incref(); + + case kRegexpConcat: + case kRegexpAlternate: { + // These are simple as long as the subpieces are simple. if (!ChildArgsChanged(re, child_args)) { - re->simple_ = true; - return re->Incref(); - } - Regexp* nre = new Regexp(re->op(), re->parse_flags()); + re->simple_ = true; + return re->Incref(); + } + Regexp* nre = new Regexp(re->op(), re->parse_flags()); nre->AllocSub(re->nsub()); - Regexp** nre_subs = nre->sub(); + Regexp** nre_subs = nre->sub(); for (int i = 0; i < re->nsub(); i++) - nre_subs[i] = child_args[i]; - nre->simple_ = true; - return nre; - } - - case kRegexpCapture: { - Regexp* newsub = child_args[0]; - if (newsub == re->sub()[0]) { - newsub->Decref(); - re->simple_ = true; - return re->Incref(); - } - Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags()); - nre->AllocSub(1); - nre->sub()[0] = newsub; + nre_subs[i] = child_args[i]; + nre->simple_ = true; + return nre; + } + + case kRegexpCapture: { + Regexp* newsub = child_args[0]; + if (newsub == re->sub()[0]) { + newsub->Decref(); + re->simple_ = true; + return re->Incref(); + } + Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags()); + nre->AllocSub(1); + nre->sub()[0] = newsub; nre->cap_ = re->cap(); - nre->simple_ = true; - return nre; - } - - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: { - Regexp* newsub = child_args[0]; - // Special case: repeat the empty string as much as - // you want, but it's still the empty string. - if (newsub->op() == kRegexpEmptyMatch) - return newsub; - - // These are simple as long as the subpiece is simple. - if (newsub == re->sub()[0]) { - newsub->Decref(); - re->simple_ = true; - return re->Incref(); - } - - // These are also idempotent if flags are constant. - if (re->op() == newsub->op() && - re->parse_flags() == newsub->parse_flags()) - return newsub; - - Regexp* nre = new Regexp(re->op(), re->parse_flags()); - nre->AllocSub(1); - nre->sub()[0] = newsub; - nre->simple_ = true; - return nre; - } - - case kRegexpRepeat: { - Regexp* newsub = child_args[0]; - // Special case: repeat the empty string as much as - // you want, but it's still the empty string. - if (newsub->op() == kRegexpEmptyMatch) - return newsub; - - Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_, - re->parse_flags()); - newsub->Decref(); - nre->simple_ = true; - return nre; - } - - case kRegexpCharClass: { - Regexp* nre = SimplifyCharClass(re); - nre->simple_ = true; - return nre; - } - } - - LOG(ERROR) << "Simplify case not handled: " << re->op(); - return re->Incref(); -} - -// Creates a concatenation of two Regexp, consuming refs to re1 and re2. -// Returns a new Regexp, handing the ref to the caller. -Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2, - Regexp::ParseFlags parse_flags) { - Regexp* re = new Regexp(kRegexpConcat, parse_flags); - re->AllocSub(2); - Regexp** subs = re->sub(); - subs[0] = re1; - subs[1] = re2; - return re; -} - -// Simplifies the expression re{min,max} in terms of *, +, and ?. -// Returns a new regexp. Does not edit re. Does not consume reference to re. -// Caller must Decref return value when done with it. -// The result will *not* necessarily have the right capturing parens -// if you call ToString() and re-parse it: (x){2} becomes (x)(x), -// but in the Regexp* representation, both (x) are marked as $1. -Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max, - Regexp::ParseFlags f) { - // x{n,} means at least n matches of x. - if (max == -1) { - // Special case: x{0,} is x* - if (min == 0) - return Regexp::Star(re->Incref(), f); - - // Special case: x{1,} is x+ - if (min == 1) - return Regexp::Plus(re->Incref(), f); - - // General case: x{4,} is xxxx+ + nre->simple_ = true; + return nre; + } + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: { + Regexp* newsub = child_args[0]; + // Special case: repeat the empty string as much as + // you want, but it's still the empty string. + if (newsub->op() == kRegexpEmptyMatch) + return newsub; + + // These are simple as long as the subpiece is simple. + if (newsub == re->sub()[0]) { + newsub->Decref(); + re->simple_ = true; + return re->Incref(); + } + + // These are also idempotent if flags are constant. + if (re->op() == newsub->op() && + re->parse_flags() == newsub->parse_flags()) + return newsub; + + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(1); + nre->sub()[0] = newsub; + nre->simple_ = true; + return nre; + } + + case kRegexpRepeat: { + Regexp* newsub = child_args[0]; + // Special case: repeat the empty string as much as + // you want, but it's still the empty string. + if (newsub->op() == kRegexpEmptyMatch) + return newsub; + + Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_, + re->parse_flags()); + newsub->Decref(); + nre->simple_ = true; + return nre; + } + + case kRegexpCharClass: { + Regexp* nre = SimplifyCharClass(re); + nre->simple_ = true; + return nre; + } + } + + LOG(ERROR) << "Simplify case not handled: " << re->op(); + return re->Incref(); +} + +// Creates a concatenation of two Regexp, consuming refs to re1 and re2. +// Returns a new Regexp, handing the ref to the caller. +Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2, + Regexp::ParseFlags parse_flags) { + Regexp* re = new Regexp(kRegexpConcat, parse_flags); + re->AllocSub(2); + Regexp** subs = re->sub(); + subs[0] = re1; + subs[1] = re2; + return re; +} + +// Simplifies the expression re{min,max} in terms of *, +, and ?. +// Returns a new regexp. Does not edit re. Does not consume reference to re. +// Caller must Decref return value when done with it. +// The result will *not* necessarily have the right capturing parens +// if you call ToString() and re-parse it: (x){2} becomes (x)(x), +// but in the Regexp* representation, both (x) are marked as $1. +Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max, + Regexp::ParseFlags f) { + // x{n,} means at least n matches of x. + if (max == -1) { + // Special case: x{0,} is x* + if (min == 0) + return Regexp::Star(re->Incref(), f); + + // Special case: x{1,} is x+ + if (min == 1) + return Regexp::Plus(re->Incref(), f); + + // General case: x{4,} is xxxx+ PODArray<Regexp*> nre_subs(min); - for (int i = 0; i < min-1; i++) - nre_subs[i] = re->Incref(); - nre_subs[min-1] = Regexp::Plus(re->Incref(), f); + for (int i = 0; i < min-1; i++) + nre_subs[i] = re->Incref(); + nre_subs[min-1] = Regexp::Plus(re->Incref(), f); return Regexp::Concat(nre_subs.data(), min, f); - } - - // Special case: (x){0} matches only empty string. - if (min == 0 && max == 0) - return new Regexp(kRegexpEmptyMatch, f); - - // Special case: x{1} is just x. - if (min == 1 && max == 1) - return re->Incref(); - - // General case: x{n,m} means n copies of x and m copies of x?. - // The machine will do less work if we nest the final m copies, - // so that x{2,5} = xx(x(x(x)?)?)? - - // Build leading prefix: xx. Capturing only on the last one. - Regexp* nre = NULL; - if (min > 0) { + } + + // Special case: (x){0} matches only empty string. + if (min == 0 && max == 0) + return new Regexp(kRegexpEmptyMatch, f); + + // Special case: x{1} is just x. + if (min == 1 && max == 1) + return re->Incref(); + + // General case: x{n,m} means n copies of x and m copies of x?. + // The machine will do less work if we nest the final m copies, + // so that x{2,5} = xx(x(x(x)?)?)? + + // Build leading prefix: xx. Capturing only on the last one. + Regexp* nre = NULL; + if (min > 0) { PODArray<Regexp*> nre_subs(min); - for (int i = 0; i < min; i++) - nre_subs[i] = re->Incref(); + for (int i = 0; i < min; i++) + nre_subs[i] = re->Incref(); nre = Regexp::Concat(nre_subs.data(), min, f); - } - - // Build and attach suffix: (x(x(x)?)?)? - if (max > min) { - Regexp* suf = Regexp::Quest(re->Incref(), f); - for (int i = min+1; i < max; i++) - suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f); - if (nre == NULL) - nre = suf; - else - nre = Concat2(nre, suf, f); - } - - if (nre == NULL) { - // Some degenerate case, like min > max, or min < max < 0. - // This shouldn't happen, because the parser rejects such regexps. - LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max; - return new Regexp(kRegexpNoMatch, f); - } - - return nre; -} - -// Simplifies a character class. -// Caller must Decref return value when done with it. -Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) { - CharClass* cc = re->cc(); - - // Special cases - if (cc->empty()) - return new Regexp(kRegexpNoMatch, re->parse_flags()); - if (cc->full()) - return new Regexp(kRegexpAnyChar, re->parse_flags()); - - return re->Incref(); -} - -} // namespace re2 + } + + // Build and attach suffix: (x(x(x)?)?)? + if (max > min) { + Regexp* suf = Regexp::Quest(re->Incref(), f); + for (int i = min+1; i < max; i++) + suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f); + if (nre == NULL) + nre = suf; + else + nre = Concat2(nre, suf, f); + } + + if (nre == NULL) { + // Some degenerate case, like min > max, or min < max < 0. + // This shouldn't happen, because the parser rejects such regexps. + LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max; + return new Regexp(kRegexpNoMatch, f); + } + + return nre; +} + +// Simplifies a character class. +// Caller must Decref return value when done with it. +Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) { + CharClass* cc = re->cc(); + + // Special cases + if (cc->empty()) + return new Regexp(kRegexpNoMatch, re->parse_flags()); + if (cc->full()) + return new Regexp(kRegexpAnyChar, re->parse_flags()); + + return re->Incref(); +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/sparse_array.h b/contrib/libs/re2/re2/sparse_array.h index 09ffe086b7..343b1ffdf2 100644 --- a/contrib/libs/re2/re2/sparse_array.h +++ b/contrib/libs/re2/re2/sparse_array.h @@ -1,68 +1,68 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_SPARSE_ARRAY_H_ #define RE2_SPARSE_ARRAY_H_ -// DESCRIPTION +// DESCRIPTION // -// SparseArray<T>(m) is a map from integers in [0, m) to T values. -// It requires (sizeof(T)+sizeof(int))*m memory, but it provides -// fast iteration through the elements in the array and fast clearing -// of the array. The array has a concept of certain elements being -// uninitialized (having no value). +// SparseArray<T>(m) is a map from integers in [0, m) to T values. +// It requires (sizeof(T)+sizeof(int))*m memory, but it provides +// fast iteration through the elements in the array and fast clearing +// of the array. The array has a concept of certain elements being +// uninitialized (having no value). // -// Insertion and deletion are constant time operations. +// Insertion and deletion are constant time operations. // // Allocating the array is a constant time operation -// when memory allocation is a constant time operation. -// -// Clearing the array is a constant time operation (unusual!). +// when memory allocation is a constant time operation. // -// Iterating through the array is an O(n) operation, where n -// is the number of items in the array (not O(m)). +// Clearing the array is a constant time operation (unusual!). // +// Iterating through the array is an O(n) operation, where n +// is the number of items in the array (not O(m)). +// // The array iterator visits entries in the order they were first -// inserted into the array. It is safe to add items to the array while -// using an iterator: the iterator will visit indices added to the array -// during the iteration, but will not re-visit indices whose values -// change after visiting. Thus SparseArray can be a convenient -// implementation of a work queue. +// inserted into the array. It is safe to add items to the array while +// using an iterator: the iterator will visit indices added to the array +// during the iteration, but will not re-visit indices whose values +// change after visiting. Thus SparseArray can be a convenient +// implementation of a work queue. // -// The SparseArray implementation is NOT thread-safe. It is up to the -// caller to make sure only one thread is accessing the array. (Typically -// these arrays are temporary values and used in situations where speed is -// important.) +// The SparseArray implementation is NOT thread-safe. It is up to the +// caller to make sure only one thread is accessing the array. (Typically +// these arrays are temporary values and used in situations where speed is +// important.) // -// The SparseArray interface does not present all the usual STL bells and -// whistles. +// The SparseArray interface does not present all the usual STL bells and +// whistles. // -// Implemented with reference to Briggs & Torczon, An Efficient -// Representation for Sparse Sets, ACM Letters on Programming Languages -// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. +// Implemented with reference to Briggs & Torczon, An Efficient +// Representation for Sparse Sets, ACM Letters on Programming Languages +// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. // -// Briggs & Torczon popularized this technique, but it had been known -// long before their paper. They point out that Aho, Hopcroft, and -// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's -// 1986 Programming Pearls both hint at the technique in exercises to the -// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1 -// exercise 8). -// -// Briggs & Torczon describe a sparse set implementation. I have -// trivially generalized it to create a sparse array (actually the original -// target of the AHU and Bentley exercises). - -// IMPLEMENTATION +// Briggs & Torczon popularized this technique, but it had been known +// long before their paper. They point out that Aho, Hopcroft, and +// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's +// 1986 Programming Pearls both hint at the technique in exercises to the +// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1 +// exercise 8). // +// Briggs & Torczon describe a sparse set implementation. I have +// trivially generalized it to create a sparse array (actually the original +// target of the AHU and Bentley exercises). + +// IMPLEMENTATION +// // SparseArray is an array dense_ and an array sparse_ of identical size. // At any point, the number of elements in the sparse array is size_. // // The array dense_ contains the size_ elements in the sparse array (with -// their indices), -// in the order that the elements were first inserted. This array is dense: -// the size_ pairs are dense_[0] through dense_[size_-1]. -// +// their indices), +// in the order that the elements were first inserted. This array is dense: +// the size_ pairs are dense_[0] through dense_[size_-1]. +// // The array sparse_ maps from indices in [0,m) to indices in [0,size_). // For indices present in the array, dense_[sparse_[i]].index_ == i. // For indices not present in the array, sparse_ can contain any value at all, @@ -75,19 +75,19 @@ // dense_[sparse_[i]].index_ == i. // If both these properties hold, only then it is safe to refer to // dense_[sparse_[i]].value_ -// as the value associated with index i. -// +// as the value associated with index i. +// // To insert a new entry, set sparse_[i] to size_, -// initialize dense_[size_], and then increment size_. -// -// To make the sparse array as efficient as possible for non-primitive types, -// elements may or may not be destroyed when they are deleted from the sparse +// initialize dense_[size_], and then increment size_. +// +// To make the sparse array as efficient as possible for non-primitive types, +// elements may or may not be destroyed when they are deleted from the sparse // array through a call to resize(). They immediately become inaccessible, but // they are only guaranteed to be destroyed when the SparseArray destructor is // called. // // A moved-from SparseArray will be empty. - + // Doing this simplifies the logic below. #ifndef __has_feature #define __has_feature(x) 0 @@ -101,100 +101,100 @@ #include <algorithm> #include <memory> #include <utility> - + #include "re2/pod_array.h" -namespace re2 { - -template<typename Value> -class SparseArray { - public: - SparseArray(); +namespace re2 { + +template<typename Value> +class SparseArray { + public: + SparseArray(); explicit SparseArray(int max_size); - ~SparseArray(); - - // IndexValue pairs: exposed in SparseArray::iterator. - class IndexValue; - + ~SparseArray(); + + // IndexValue pairs: exposed in SparseArray::iterator. + class IndexValue; + typedef IndexValue* iterator; typedef const IndexValue* const_iterator; - + SparseArray(const SparseArray& src); SparseArray(SparseArray&& src); - + SparseArray& operator=(const SparseArray& src); SparseArray& operator=(SparseArray&& src); - // Return the number of entries in the array. - int size() const { - return size_; - } - + // Return the number of entries in the array. + int size() const { + return size_; + } + // Indicate whether the array is empty. int empty() const { return size_ == 0; } - // Iterate over the array. - iterator begin() { + // Iterate over the array. + iterator begin() { return dense_.data(); - } - iterator end() { + } + iterator end() { return dense_.data() + size_; - } - - const_iterator begin() const { + } + + const_iterator begin() const { return dense_.data(); - } - const_iterator end() const { + } + const_iterator end() const { return dense_.data() + size_; - } - - // Change the maximum size of the array. - // Invalidates all iterators. + } + + // Change the maximum size of the array. + // Invalidates all iterators. void resize(int new_max_size); - - // Return the maximum size of the array. - // Indices can be in the range [0, max_size). - int max_size() const { + + // Return the maximum size of the array. + // Indices can be in the range [0, max_size). + int max_size() const { if (dense_.data() != NULL) return dense_.size(); else return 0; - } - - // Clear the array. - void clear() { - size_ = 0; - } - - // Check whether index i is in the array. + } + + // Clear the array. + void clear() { + size_ = 0; + } + + // Check whether index i is in the array. bool has_index(int i) const; - - // Comparison function for sorting. - // Can sort the sparse array so that future iterations - // will visit indices in increasing order using + + // Comparison function for sorting. + // Can sort the sparse array so that future iterations + // will visit indices in increasing order using // std::sort(arr.begin(), arr.end(), arr.less); - static bool less(const IndexValue& a, const IndexValue& b); - - public: - // Set the value at index i to v. + static bool less(const IndexValue& a, const IndexValue& b); + + public: + // Set the value at index i to v. iterator set(int i, const Value& v) { return SetInternal(true, i, v); } - + // Set the value at new index i to v. // Fast but unsafe: only use if has_index(i) is false. iterator set_new(int i, const Value& v) { return SetInternal(false, i, v); } - + // Set the value at index i to v. - // Fast but unsafe: only use if has_index(i) is true. + // Fast but unsafe: only use if has_index(i) is true. iterator set_existing(int i, const Value& v) { return SetExistingInternal(i, v); } - + // Get the value at index i. // Fast but unsafe: only use if has_index(i) is true. Value& get_existing(int i) { @@ -205,8 +205,8 @@ class SparseArray { assert(has_index(i)); return dense_[sparse_[i]].value_; } - - private: + + private: iterator SetInternal(bool allow_existing, int i, const Value& v) { DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { @@ -234,18 +234,18 @@ class SparseArray { return dense_.data() + sparse_[i]; } - // Add the index i to the array. - // Only use if has_index(i) is known to be false. - // Since it doesn't set the value associated with i, - // this function is private, only intended as a helper - // for other methods. + // Add the index i to the array. + // Only use if has_index(i) is known to be false. + // Since it doesn't set the value associated with i, + // this function is private, only intended as a helper + // for other methods. void create_index(int i); - - // In debug mode, verify that some invariant properties of the class - // are being maintained. This is called at the end of the constructor - // and at the beginning and end of all public non-const member functions. + + // In debug mode, verify that some invariant properties of the class + // are being maintained. This is called at the end of the constructor + // and at the beginning and end of all public non-const member functions. void DebugCheckInvariants() const; - + // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { #if __has_feature(memory_sanitizer) @@ -260,11 +260,11 @@ class SparseArray { int size_ = 0; PODArray<int> sparse_; PODArray<IndexValue> dense_; -}; - -template<typename Value> +}; + +template<typename Value> SparseArray<Value>::SparseArray() = default; - + template<typename Value> SparseArray<Value>::SparseArray(const SparseArray& src) : size_(src.size_), @@ -305,28 +305,28 @@ SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) { return *this; } -// IndexValue pairs: exposed in SparseArray::iterator. -template<typename Value> -class SparseArray<Value>::IndexValue { - public: - int index() const { return index_; } +// IndexValue pairs: exposed in SparseArray::iterator. +template<typename Value> +class SparseArray<Value>::IndexValue { + public: + int index() const { return index_; } Value& value() { return value_; } const Value& value() const { return value_; } - + private: friend class SparseArray; int index_; Value value_; -}; - -// Change the maximum size of the array. -// Invalidates all iterators. -template<typename Value> +}; + +// Change the maximum size of the array. +// Invalidates all iterators. +template<typename Value> void SparseArray<Value>::resize(int new_max_size) { - DebugCheckInvariants(); + DebugCheckInvariants(); if (new_max_size > max_size()) { const int old_max_size = max_size(); - + // Construct these first for exception safety. PODArray<int> a(new_max_size); PODArray<IndexValue> b(new_max_size); @@ -338,55 +338,55 @@ void SparseArray<Value>::resize(int new_max_size) { dense_ = std::move(b); MaybeInitializeMemory(old_max_size, new_max_size); - } + } if (size_ > new_max_size) size_ = new_max_size; - DebugCheckInvariants(); -} - -// Check whether index i is in the array. -template<typename Value> -bool SparseArray<Value>::has_index(int i) const { + DebugCheckInvariants(); +} + +// Check whether index i is in the array. +template<typename Value> +bool SparseArray<Value>::has_index(int i) const { assert(i >= 0); assert(i < max_size()); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { - return false; - } + return false; + } // Unsigned comparison avoids checking sparse_[i] < 0. return (uint32_t)sparse_[i] < (uint32_t)size_ && dense_[sparse_[i]].index_ == i; -} - -template<typename Value> -void SparseArray<Value>::create_index(int i) { +} + +template<typename Value> +void SparseArray<Value>::create_index(int i) { assert(!has_index(i)); assert(size_ < max_size()); sparse_[i] = size_; - dense_[size_].index_ = i; - size_++; -} - + dense_[size_].index_ = i; + size_++; +} + template<typename Value> SparseArray<Value>::SparseArray(int max_size) : sparse_(max_size), dense_(max_size) { MaybeInitializeMemory(size_, max_size); - DebugCheckInvariants(); -} - -template<typename Value> SparseArray<Value>::~SparseArray() { - DebugCheckInvariants(); -} - -template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { + DebugCheckInvariants(); +} + +template<typename Value> SparseArray<Value>::~SparseArray() { + DebugCheckInvariants(); +} + +template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { assert(0 <= size_); assert(size_ <= max_size()); -} - -// Comparison function for sorting. -template<typename Value> bool SparseArray<Value>::less(const IndexValue& a, - const IndexValue& b) { - return a.index_ < b.index_; -} - -} // namespace re2 - +} + +// Comparison function for sorting. +template<typename Value> bool SparseArray<Value>::less(const IndexValue& a, + const IndexValue& b) { + return a.index_ < b.index_; +} + +} // namespace re2 + #endif // RE2_SPARSE_ARRAY_H_ diff --git a/contrib/libs/re2/re2/sparse_set.h b/contrib/libs/re2/re2/sparse_set.h index 06ed88d81b..99b18051ef 100644 --- a/contrib/libs/re2/re2/sparse_set.h +++ b/contrib/libs/re2/re2/sparse_set.h @@ -1,52 +1,52 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_SPARSE_SET_H_ #define RE2_SPARSE_SET_H_ -// DESCRIPTION +// DESCRIPTION // // SparseSet(m) is a set of integers in [0, m). -// It requires sizeof(int)*m memory, but it provides -// fast iteration through the elements in the set and fast clearing -// of the set. +// It requires sizeof(int)*m memory, but it provides +// fast iteration through the elements in the set and fast clearing +// of the set. // -// Insertion and deletion are constant time operations. +// Insertion and deletion are constant time operations. // // Allocating the set is a constant time operation -// when memory allocation is a constant time operation. -// -// Clearing the set is a constant time operation (unusual!). +// when memory allocation is a constant time operation. // -// Iterating through the set is an O(n) operation, where n -// is the number of items in the set (not O(m)). +// Clearing the set is a constant time operation (unusual!). // +// Iterating through the set is an O(n) operation, where n +// is the number of items in the set (not O(m)). +// // The set iterator visits entries in the order they were first // inserted into the set. It is safe to add items to the set while -// using an iterator: the iterator will visit indices added to the set -// during the iteration, but will not re-visit indices whose values -// change after visiting. Thus SparseSet can be a convenient -// implementation of a work queue. +// using an iterator: the iterator will visit indices added to the set +// during the iteration, but will not re-visit indices whose values +// change after visiting. Thus SparseSet can be a convenient +// implementation of a work queue. // -// The SparseSet implementation is NOT thread-safe. It is up to the -// caller to make sure only one thread is accessing the set. (Typically -// these sets are temporary values and used in situations where speed is -// important.) +// The SparseSet implementation is NOT thread-safe. It is up to the +// caller to make sure only one thread is accessing the set. (Typically +// these sets are temporary values and used in situations where speed is +// important.) // -// The SparseSet interface does not present all the usual STL bells and -// whistles. +// The SparseSet interface does not present all the usual STL bells and +// whistles. // -// Implemented with reference to Briggs & Torczon, An Efficient -// Representation for Sparse Sets, ACM Letters on Programming Languages -// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. +// Implemented with reference to Briggs & Torczon, An Efficient +// Representation for Sparse Sets, ACM Letters on Programming Languages +// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. // // This is a specialization of sparse array; see sparse_array.h. - -// IMPLEMENTATION -// + +// IMPLEMENTATION +// // See sparse_array.h for implementation details. - + // Doing this simplifies the logic below. #ifndef __has_feature #define __has_feature(x) 0 @@ -60,31 +60,31 @@ #include <algorithm> #include <memory> #include <utility> - + #include "re2/pod_array.h" -namespace re2 { - +namespace re2 { + template<typename Value> class SparseSetT { - public: + public: SparseSetT(); explicit SparseSetT(int max_size); ~SparseSetT(); - + typedef int* iterator; typedef const int* const_iterator; // Return the number of entries in the set. int size() const { return size_; - } - + } + // Indicate whether the set is empty. int empty() const { return size_ == 0; - } - + } + // Iterate over the set. iterator begin() { return dense_.data(); @@ -92,18 +92,18 @@ class SparseSetT { iterator end() { return dense_.data() + size_; } - + const_iterator begin() const { return dense_.data(); } const_iterator end() const { return dense_.data() + size_; } - + // Change the maximum size of the set. - // Invalidates all iterators. + // Invalidates all iterators. void resize(int new_max_size); - + // Return the maximum size of the set. // Indices can be in the range [0, max_size). int max_size() const { @@ -111,16 +111,16 @@ class SparseSetT { return dense_.size(); else return 0; - } - + } + // Clear the set. void clear() { size_ = 0; } - + // Check whether index i is in the set. bool contains(int i) const; - + // Comparison function for sorting. // Can sort the sparse set so that future iterations // will visit indices in increasing order using @@ -131,24 +131,24 @@ class SparseSetT { // Insert index i into the set. iterator insert(int i) { return InsertInternal(true, i); - } - + } + // Insert index i into the set. // Fast but unsafe: only use if contains(i) is false. iterator insert_new(int i) { return InsertInternal(false, i); - } - + } + private: iterator InsertInternal(bool allow_existing, int i) { DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { assert(false && "illegal index"); - // Semantically, end() would be better here, but we already know - // the user did something stupid, so begin() insulates them from - // dereferencing an invalid pointer. + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. return begin(); - } + } if (!allow_existing) { assert(!contains(i)); create_index(i); @@ -158,19 +158,19 @@ class SparseSetT { } DebugCheckInvariants(); return dense_.data() + sparse_[i]; - } - + } + // Add the index i to the set. // Only use if contains(i) is known to be false. // This function is private, only intended as a helper // for other methods. void create_index(int i); - + // In debug mode, verify that some invariant properties of the class // are being maintained. This is called at the end of the constructor // and at the beginning and end of all public non-const member functions. void DebugCheckInvariants() const; - + // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { #if __has_feature(memory_sanitizer) @@ -185,8 +185,8 @@ class SparseSetT { int size_ = 0; PODArray<int> sparse_; PODArray<int> dense_; -}; - +}; + template<typename Value> SparseSetT<Value>::SparseSetT() = default; @@ -259,6 +259,6 @@ template<typename Value> bool SparseSetT<Value>::less(int a, int b) { typedef SparseSetT<void> SparseSet; -} // namespace re2 - +} // namespace re2 + #endif // RE2_SPARSE_SET_H_ diff --git a/contrib/libs/re2/re2/tostring.cc b/contrib/libs/re2/re2/tostring.cc index 9c1c038ca6..a2b2a7ddaf 100644 --- a/contrib/libs/re2/re2/tostring.cc +++ b/contrib/libs/re2/re2/tostring.cc @@ -1,10 +1,10 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Format a regular expression structure as a string. -// Tested by parse_test.cc - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Format a regular expression structure as a string. +// Tested by parse_test.cc + #include <string.h> #include <string> @@ -12,340 +12,340 @@ #include "util/logging.h" #include "util/strutil.h" #include "util/utf.h" -#include "re2/regexp.h" -#include "re2/walker-inl.h" - -namespace re2 { - -enum { - PrecAtom, - PrecUnary, - PrecConcat, - PrecAlternate, - PrecEmpty, - PrecParen, - PrecToplevel, -}; - -// Helper function. See description below. +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +enum { + PrecAtom, + PrecUnary, + PrecConcat, + PrecAlternate, + PrecEmpty, + PrecParen, + PrecToplevel, +}; + +// Helper function. See description below. static void AppendCCRange(std::string* t, Rune lo, Rune hi); - -// Walker to generate string in s_. -// The arg pointers are actually integers giving the -// context precedence. -// The child_args are always NULL. -class ToStringWalker : public Regexp::Walker<int> { - public: + +// Walker to generate string in s_. +// The arg pointers are actually integers giving the +// context precedence. +// The child_args are always NULL. +class ToStringWalker : public Regexp::Walker<int> { + public: explicit ToStringWalker(std::string* t) : t_(t) {} - - virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); - virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, - int* child_args, int nchild_args); - virtual int ShortVisit(Regexp* re, int parent_arg) { - return 0; - } - - private: + + virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); + virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args); + virtual int ShortVisit(Regexp* re, int parent_arg) { + return 0; + } + + private: std::string* t_; // The string the walker appends to. - + ToStringWalker(const ToStringWalker&) = delete; ToStringWalker& operator=(const ToStringWalker&) = delete; -}; - +}; + std::string Regexp::ToString() { std::string t; - ToStringWalker w(&t); - w.WalkExponential(this, PrecToplevel, 100000); - if (w.stopped_early()) - t += " [truncated]"; - return t; -} - -#define ToString DontCallToString // Avoid accidental recursion. - -// Visits re before children are processed. -// Appends ( if needed and passes new precedence to children. -int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { - int prec = parent_arg; - int nprec = PrecAtom; - - switch (re->op()) { - case kRegexpNoMatch: - case kRegexpEmptyMatch: - case kRegexpLiteral: - case kRegexpAnyChar: - case kRegexpAnyByte: - case kRegexpBeginLine: - case kRegexpEndLine: - case kRegexpBeginText: - case kRegexpEndText: - case kRegexpWordBoundary: - case kRegexpNoWordBoundary: - case kRegexpCharClass: - case kRegexpHaveMatch: - nprec = PrecAtom; - break; - - case kRegexpConcat: - case kRegexpLiteralString: - if (prec < PrecConcat) - t_->append("(?:"); - nprec = PrecConcat; - break; - - case kRegexpAlternate: - if (prec < PrecAlternate) - t_->append("(?:"); - nprec = PrecAlternate; - break; - - case kRegexpCapture: - t_->append("("); + ToStringWalker w(&t); + w.WalkExponential(this, PrecToplevel, 100000); + if (w.stopped_early()) + t += " [truncated]"; + return t; +} + +#define ToString DontCallToString // Avoid accidental recursion. + +// Visits re before children are processed. +// Appends ( if needed and passes new precedence to children. +int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { + int prec = parent_arg; + int nprec = PrecAtom; + + switch (re->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpEndText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpCharClass: + case kRegexpHaveMatch: + nprec = PrecAtom; + break; + + case kRegexpConcat: + case kRegexpLiteralString: + if (prec < PrecConcat) + t_->append("(?:"); + nprec = PrecConcat; + break; + + case kRegexpAlternate: + if (prec < PrecAlternate) + t_->append("(?:"); + nprec = PrecAlternate; + break; + + case kRegexpCapture: + t_->append("("); if (re->cap() == 0) LOG(DFATAL) << "kRegexpCapture cap() == 0"; - if (re->name()) { - t_->append("?P<"); - t_->append(*re->name()); - t_->append(">"); - } - nprec = PrecParen; - break; - - case kRegexpStar: - case kRegexpPlus: - case kRegexpQuest: - case kRegexpRepeat: - if (prec < PrecUnary) - t_->append("(?:"); - // The subprecedence here is PrecAtom instead of PrecUnary - // because PCRE treats two unary ops in a row as a parse error. - nprec = PrecAtom; - break; - } - - return nprec; -} - + if (re->name()) { + t_->append("?P<"); + t_->append(*re->name()); + t_->append(">"); + } + nprec = PrecParen; + break; + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + if (prec < PrecUnary) + t_->append("(?:"); + // The subprecedence here is PrecAtom instead of PrecUnary + // because PCRE treats two unary ops in a row as a parse error. + nprec = PrecAtom; + break; + } + + return nprec; +} + static void AppendLiteral(std::string *t, Rune r, bool foldcase) { - if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { - t->append(1, '\\'); + if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { + t->append(1, '\\'); t->append(1, static_cast<char>(r)); - } else if (foldcase && 'a' <= r && r <= 'z') { + } else if (foldcase && 'a' <= r && r <= 'z') { r -= 'a' - 'A'; - t->append(1, '['); + t->append(1, '['); t->append(1, static_cast<char>(r)); t->append(1, static_cast<char>(r) + 'a' - 'A'); - t->append(1, ']'); - } else { - AppendCCRange(t, r, r); - } -} - -// Visits re after children are processed. -// For childless regexps, all the work is done here. -// For regexps with children, append any unary suffixes or ). -int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, - int* child_args, int nchild_args) { - int prec = parent_arg; - switch (re->op()) { - case kRegexpNoMatch: - // There's no simple symbol for "no match", but - // [^0-Runemax] excludes everything. - t_->append("[^\\x00-\\x{10ffff}]"); - break; - - case kRegexpEmptyMatch: - // Append (?:) to make empty string visible, - // unless this is already being parenthesized. - if (prec < PrecEmpty) - t_->append("(?:)"); - break; - - case kRegexpLiteral: + t->append(1, ']'); + } else { + AppendCCRange(t, r, r); + } +} + +// Visits re after children are processed. +// For childless regexps, all the work is done here. +// For regexps with children, append any unary suffixes or ). +int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args) { + int prec = parent_arg; + switch (re->op()) { + case kRegexpNoMatch: + // There's no simple symbol for "no match", but + // [^0-Runemax] excludes everything. + t_->append("[^\\x00-\\x{10ffff}]"); + break; + + case kRegexpEmptyMatch: + // Append (?:) to make empty string visible, + // unless this is already being parenthesized. + if (prec < PrecEmpty) + t_->append("(?:)"); + break; + + case kRegexpLiteral: AppendLiteral(t_, re->rune(), (re->parse_flags() & Regexp::FoldCase) != 0); - break; - - case kRegexpLiteralString: - for (int i = 0; i < re->nrunes(); i++) + break; + + case kRegexpLiteralString: + for (int i = 0; i < re->nrunes(); i++) AppendLiteral(t_, re->runes()[i], (re->parse_flags() & Regexp::FoldCase) != 0); - if (prec < PrecConcat) - t_->append(")"); - break; - - case kRegexpConcat: - if (prec < PrecConcat) - t_->append(")"); - break; - - case kRegexpAlternate: - // Clumsy but workable: the children all appended | - // at the end of their strings, so just remove the last one. - if ((*t_)[t_->size()-1] == '|') - t_->erase(t_->size()-1); - else - LOG(DFATAL) << "Bad final char: " << t_; - if (prec < PrecAlternate) - t_->append(")"); - break; - - case kRegexpStar: - t_->append("*"); - if (re->parse_flags() & Regexp::NonGreedy) - t_->append("?"); - if (prec < PrecUnary) - t_->append(")"); - break; - - case kRegexpPlus: - t_->append("+"); - if (re->parse_flags() & Regexp::NonGreedy) - t_->append("?"); - if (prec < PrecUnary) - t_->append(")"); - break; - - case kRegexpQuest: - t_->append("?"); - if (re->parse_flags() & Regexp::NonGreedy) - t_->append("?"); - if (prec < PrecUnary) - t_->append(")"); - break; - - case kRegexpRepeat: - if (re->max() == -1) - t_->append(StringPrintf("{%d,}", re->min())); - else if (re->min() == re->max()) - t_->append(StringPrintf("{%d}", re->min())); - else - t_->append(StringPrintf("{%d,%d}", re->min(), re->max())); - if (re->parse_flags() & Regexp::NonGreedy) - t_->append("?"); - if (prec < PrecUnary) - t_->append(")"); - break; - - case kRegexpAnyChar: - t_->append("."); - break; - - case kRegexpAnyByte: - t_->append("\\C"); - break; - - case kRegexpBeginLine: - t_->append("^"); - break; - - case kRegexpEndLine: - t_->append("$"); - break; - - case kRegexpBeginText: - t_->append("(?-m:^)"); - break; - - case kRegexpEndText: - if (re->parse_flags() & Regexp::WasDollar) - t_->append("(?-m:$)"); - else - t_->append("\\z"); - break; - - case kRegexpWordBoundary: - t_->append("\\b"); - break; - - case kRegexpNoWordBoundary: - t_->append("\\B"); - break; - - case kRegexpCharClass: { - if (re->cc()->size() == 0) { - t_->append("[^\\x00-\\x{10ffff}]"); - break; - } - t_->append("["); - // Heuristic: show class as negated if it contains the + if (prec < PrecConcat) + t_->append(")"); + break; + + case kRegexpConcat: + if (prec < PrecConcat) + t_->append(")"); + break; + + case kRegexpAlternate: + // Clumsy but workable: the children all appended | + // at the end of their strings, so just remove the last one. + if ((*t_)[t_->size()-1] == '|') + t_->erase(t_->size()-1); + else + LOG(DFATAL) << "Bad final char: " << t_; + if (prec < PrecAlternate) + t_->append(")"); + break; + + case kRegexpStar: + t_->append("*"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpPlus: + t_->append("+"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpQuest: + t_->append("?"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpRepeat: + if (re->max() == -1) + t_->append(StringPrintf("{%d,}", re->min())); + else if (re->min() == re->max()) + t_->append(StringPrintf("{%d}", re->min())); + else + t_->append(StringPrintf("{%d,%d}", re->min(), re->max())); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpAnyChar: + t_->append("."); + break; + + case kRegexpAnyByte: + t_->append("\\C"); + break; + + case kRegexpBeginLine: + t_->append("^"); + break; + + case kRegexpEndLine: + t_->append("$"); + break; + + case kRegexpBeginText: + t_->append("(?-m:^)"); + break; + + case kRegexpEndText: + if (re->parse_flags() & Regexp::WasDollar) + t_->append("(?-m:$)"); + else + t_->append("\\z"); + break; + + case kRegexpWordBoundary: + t_->append("\\b"); + break; + + case kRegexpNoWordBoundary: + t_->append("\\B"); + break; + + case kRegexpCharClass: { + if (re->cc()->size() == 0) { + t_->append("[^\\x00-\\x{10ffff}]"); + break; + } + t_->append("["); + // Heuristic: show class as negated if it contains the // non-character 0xFFFE and yet somehow isn't full. - CharClass* cc = re->cc(); + CharClass* cc = re->cc(); if (cc->Contains(0xFFFE) && !cc->full()) { - cc = cc->Negate(); - t_->append("^"); - } - for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) - AppendCCRange(t_, i->lo, i->hi); - if (cc != re->cc()) - cc->Delete(); - t_->append("]"); - break; - } - - case kRegexpCapture: - t_->append(")"); - break; - - case kRegexpHaveMatch: - // There's no syntax accepted by the parser to generate - // this node (it is generated by RE2::Set) so make something - // up that is readable but won't compile. + cc = cc->Negate(); + t_->append("^"); + } + for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) + AppendCCRange(t_, i->lo, i->hi); + if (cc != re->cc()) + cc->Delete(); + t_->append("]"); + break; + } + + case kRegexpCapture: + t_->append(")"); + break; + + case kRegexpHaveMatch: + // There's no syntax accepted by the parser to generate + // this node (it is generated by RE2::Set) so make something + // up that is readable but won't compile. t_->append(StringPrintf("(?HaveMatch:%d)", re->match_id())); - break; - } - - // If the parent is an alternation, append the | for it. - if (prec == PrecAlternate) - t_->append("|"); - - return 0; -} - -// Appends a rune for use in a character class to the string t. + break; + } + + // If the parent is an alternation, append the | for it. + if (prec == PrecAlternate) + t_->append("|"); + + return 0; +} + +// Appends a rune for use in a character class to the string t. static void AppendCCChar(std::string* t, Rune r) { - if (0x20 <= r && r <= 0x7E) { - if (strchr("[]^-\\", r)) - t->append("\\"); + if (0x20 <= r && r <= 0x7E) { + if (strchr("[]^-\\", r)) + t->append("\\"); t->append(1, static_cast<char>(r)); - return; - } - switch (r) { - default: - break; - - case '\r': - t->append("\\r"); - return; - - case '\t': - t->append("\\t"); - return; - - case '\n': - t->append("\\n"); - return; - - case '\f': - t->append("\\f"); - return; - } - - if (r < 0x100) { + return; + } + switch (r) { + default: + break; + + case '\r': + t->append("\\r"); + return; + + case '\t': + t->append("\\t"); + return; + + case '\n': + t->append("\\n"); + return; + + case '\f': + t->append("\\f"); + return; + } + + if (r < 0x100) { *t += StringPrintf("\\x%02x", static_cast<int>(r)); - return; - } + return; + } *t += StringPrintf("\\x{%x}", static_cast<int>(r)); -} - +} + static void AppendCCRange(std::string* t, Rune lo, Rune hi) { - if (lo > hi) - return; - AppendCCChar(t, lo); - if (lo < hi) { - t->append("-"); - AppendCCChar(t, hi); - } -} - -} // namespace re2 + if (lo > hi) + return; + AppendCCChar(t, lo); + if (lo < hi) { + t->append("-"); + AppendCCChar(t, hi); + } +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/unicode_casefold.h b/contrib/libs/re2/re2/unicode_casefold.h index 8bdbb42fbc..70a597010f 100644 --- a/contrib/libs/re2/re2/unicode_casefold.h +++ b/contrib/libs/re2/re2/unicode_casefold.h @@ -1,78 +1,78 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_UNICODE_CASEFOLD_H_ #define RE2_UNICODE_CASEFOLD_H_ -// Unicode case folding tables. - -// The Unicode case folding tables encode the mapping from one Unicode point -// to the next largest Unicode point with equivalent folding. The largest -// point wraps back to the first. For example, the tables map: -// -// 'A' -> 'a' -// 'a' -> 'A' -// -// 'K' -> 'k' -// 'k' -> 'K' (Kelvin symbol) -// 'K' -> 'K' -// -// Like everything Unicode, these tables are big. If we represent the table +// Unicode case folding tables. + +// The Unicode case folding tables encode the mapping from one Unicode point +// to the next largest Unicode point with equivalent folding. The largest +// point wraps back to the first. For example, the tables map: +// +// 'A' -> 'a' +// 'a' -> 'A' +// +// 'K' -> 'k' +// 'k' -> 'K' (Kelvin symbol) +// 'K' -> 'K' +// +// Like everything Unicode, these tables are big. If we represent the table // as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB. -// Most table entries look like the ones around them: -// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. -// Instead of listing all the pairs explicitly, we make a list of ranges -// and deltas, so that the table entries for 'A' through 'Z' can be represented -// as a single entry { 'A', 'Z', +32 }. -// -// In addition to blocks that map to each other (A-Z mapping to a-z) -// there are blocks of pairs that individually map to each other -// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...). -// For those, the special delta value EvenOdd marks even/odd pairs -// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs. -// -// In this form, the table has 274 entries, about 3kB. If we were to split -// the table into one for 16-bit codes and an overflow table for larger ones, -// we could get it down to about 1.5kB, but that's not worth the complexity. -// -// The grouped form also allows for efficient fold range calculations -// rather than looping one character at a time. - +// Most table entries look like the ones around them: +// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. +// Instead of listing all the pairs explicitly, we make a list of ranges +// and deltas, so that the table entries for 'A' through 'Z' can be represented +// as a single entry { 'A', 'Z', +32 }. +// +// In addition to blocks that map to each other (A-Z mapping to a-z) +// there are blocks of pairs that individually map to each other +// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...). +// For those, the special delta value EvenOdd marks even/odd pairs +// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs. +// +// In this form, the table has 274 entries, about 3kB. If we were to split +// the table into one for 16-bit codes and an overflow table for larger ones, +// we could get it down to about 1.5kB, but that's not worth the complexity. +// +// The grouped form also allows for efficient fold range calculations +// rather than looping one character at a time. + #include <stdint.h> - + #include "util/util.h" #include "util/utf.h" - -namespace re2 { - -enum { - EvenOdd = 1, + +namespace re2 { + +enum { + EvenOdd = 1, OddEven = -1, EvenOddSkip = 1<<30, OddEvenSkip, -}; - -struct CaseFold { +}; + +struct CaseFold { Rune lo; Rune hi; int32_t delta; -}; - +}; + extern const CaseFold unicode_casefold[]; extern const int num_unicode_casefold; - + extern const CaseFold unicode_tolower[]; extern const int num_unicode_tolower; -// Returns the CaseFold* in the tables that contains rune. -// If rune is not in the tables, returns the first CaseFold* after rune. -// If rune is larger than any value in the tables, returns NULL. +// Returns the CaseFold* in the tables that contains rune. +// If rune is not in the tables, returns the first CaseFold* after rune. +// If rune is larger than any value in the tables, returns NULL. extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune); - + // Returns the result of applying the fold f to the rune r. extern Rune ApplyFold(const CaseFold *f, Rune r); -} // namespace re2 - +} // namespace re2 + #endif // RE2_UNICODE_CASEFOLD_H_ diff --git a/contrib/libs/re2/re2/unicode_groups.h b/contrib/libs/re2/re2/unicode_groups.h index 75f55daa61..17a5900080 100644 --- a/contrib/libs/re2/re2/unicode_groups.h +++ b/contrib/libs/re2/re2/unicode_groups.h @@ -1,67 +1,67 @@ -// Copyright 2008 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_UNICODE_GROUPS_H_ #define RE2_UNICODE_GROUPS_H_ -// Unicode character groups. - -// The codes get split into ranges of 16-bit codes -// and ranges of 32-bit codes. It would be simpler -// to use only 32-bit ranges, but these tables are large -// enough to warrant extra care. -// -// Using just 32-bit ranges gives 27 kB of data. -// Adding 16-bit ranges gives 18 kB of data. -// Adding an extra table of 16-bit singletons would reduce -// to 16.5 kB of data but make the data harder to use; -// we don't bother. - +// Unicode character groups. + +// The codes get split into ranges of 16-bit codes +// and ranges of 32-bit codes. It would be simpler +// to use only 32-bit ranges, but these tables are large +// enough to warrant extra care. +// +// Using just 32-bit ranges gives 27 kB of data. +// Adding 16-bit ranges gives 18 kB of data. +// Adding an extra table of 16-bit singletons would reduce +// to 16.5 kB of data but make the data harder to use; +// we don't bother. + #include <stdint.h> - + #include "util/util.h" #include "util/utf.h" - -namespace re2 { - -struct URange16 -{ + +namespace re2 { + +struct URange16 +{ uint16_t lo; uint16_t hi; -}; - -struct URange32 -{ +}; + +struct URange32 +{ Rune lo; Rune hi; -}; - -struct UGroup -{ - const char *name; - int sign; // +1 for [abc], -1 for [^abc] +}; + +struct UGroup +{ + const char *name; + int sign; // +1 for [abc], -1 for [^abc] const URange16 *r16; - int nr16; + int nr16; const URange32 *r32; - int nr32; -}; - -// Named by property or script name (e.g., "Nd", "N", "Han"). -// Negated groups are not included. + int nr32; +}; + +// Named by property or script name (e.g., "Nd", "N", "Han"). +// Negated groups are not included. extern const UGroup unicode_groups[]; extern const int num_unicode_groups; - -// Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). -// Negated groups are included. + +// Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). +// Negated groups are included. extern const UGroup posix_groups[]; extern const int num_posix_groups; - -// Named by Perl name (e.g., "\\d", "\\D"). -// Negated groups are included. + +// Named by Perl name (e.g., "\\d", "\\D"). +// Negated groups are included. extern const UGroup perl_groups[]; extern const int num_perl_groups; - -} // namespace re2 - + +} // namespace re2 + #endif // RE2_UNICODE_GROUPS_H_ diff --git a/contrib/libs/re2/re2/walker-inl.h b/contrib/libs/re2/re2/walker-inl.h index 4d064a0970..336fa36290 100644 --- a/contrib/libs/re2/re2/walker-inl.h +++ b/contrib/libs/re2/re2/walker-inl.h @@ -1,247 +1,247 @@ -// Copyright 2006 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #ifndef RE2_WALKER_INL_H_ #define RE2_WALKER_INL_H_ -// Helper class for traversing Regexps without recursion. -// Clients should declare their own subclasses that override -// the PreVisit and PostVisit methods, which are called before -// and after visiting the subexpressions. - -// Not quite the Visitor pattern, because (among other things) -// the Visitor pattern is recursive. - +// Helper class for traversing Regexps without recursion. +// Clients should declare their own subclasses that override +// the PreVisit and PostVisit methods, which are called before +// and after visiting the subexpressions. + +// Not quite the Visitor pattern, because (among other things) +// the Visitor pattern is recursive. + #include <stack> - + #include "util/logging.h" -#include "re2/regexp.h" - -namespace re2 { - -template<typename T> struct WalkState; - -template<typename T> class Regexp::Walker { - public: - Walker(); - virtual ~Walker(); - - // Virtual method called before visiting re's children. - // PreVisit passes ownership of its return value to its caller. - // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg - // and passed to the child PreVisits and PostVisits as parent_arg. - // At the top-most Regexp, parent_arg is arg passed to walk. - // If PreVisit sets *stop to true, the walk does not recurse - // into the children. Instead it behaves as though the return - // value from PreVisit is the return value from PostVisit. - // The default PreVisit returns parent_arg. - virtual T PreVisit(Regexp* re, T parent_arg, bool* stop); - - // Virtual method called after visiting re's children. - // The pre_arg is the T that PreVisit returned. - // The child_args is a vector of the T that the child PostVisits returned. - // PostVisit takes ownership of pre_arg. - // PostVisit takes ownership of the Ts - // in *child_args, but not the vector itself. - // PostVisit passes ownership of its return value - // to its caller. - // The default PostVisit simply returns pre_arg. - virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg, - T* child_args, int nchild_args); - - // Virtual method called to copy a T, - // when Walk notices that more than one child is the same re. - virtual T Copy(T arg); - - // Virtual method called to do a "quick visit" of the re, - // but not its children. Only called once the visit budget - // has been used up and we're trying to abort the walk - // as quickly as possible. Should return a value that - // makes sense for the parent PostVisits still to be run. - // This function is (hopefully) only called by - // WalkExponential, but must be implemented by all clients, - // just in case. - virtual T ShortVisit(Regexp* re, T parent_arg) = 0; - - // Walks over a regular expression. - // Top_arg is passed as parent_arg to PreVisit and PostVisit of re. - // Returns the T returned by PostVisit on re. - T Walk(Regexp* re, T top_arg); - - // Like Walk, but doesn't use Copy. This can lead to - // exponential runtimes on cross-linked Regexps like the - // ones generated by Simplify. To help limit this, - // at most max_visits nodes will be visited and then - // the walk will be cut off early. - // If the walk *is* cut off early, ShortVisit(re) - // will be called on regexps that cannot be fully - // visited rather than calling PreVisit/PostVisit. - T WalkExponential(Regexp* re, T top_arg, int max_visits); - - // Clears the stack. Should never be necessary, since - // Walk always enters and exits with an empty stack. - // Logs DFATAL if stack is not already clear. - void Reset(); - - // Returns whether walk was cut off. - bool stopped_early() { return stopped_early_; } - - private: - // Walk state for the entire traversal. +#include "re2/regexp.h" + +namespace re2 { + +template<typename T> struct WalkState; + +template<typename T> class Regexp::Walker { + public: + Walker(); + virtual ~Walker(); + + // Virtual method called before visiting re's children. + // PreVisit passes ownership of its return value to its caller. + // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg + // and passed to the child PreVisits and PostVisits as parent_arg. + // At the top-most Regexp, parent_arg is arg passed to walk. + // If PreVisit sets *stop to true, the walk does not recurse + // into the children. Instead it behaves as though the return + // value from PreVisit is the return value from PostVisit. + // The default PreVisit returns parent_arg. + virtual T PreVisit(Regexp* re, T parent_arg, bool* stop); + + // Virtual method called after visiting re's children. + // The pre_arg is the T that PreVisit returned. + // The child_args is a vector of the T that the child PostVisits returned. + // PostVisit takes ownership of pre_arg. + // PostVisit takes ownership of the Ts + // in *child_args, but not the vector itself. + // PostVisit passes ownership of its return value + // to its caller. + // The default PostVisit simply returns pre_arg. + virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg, + T* child_args, int nchild_args); + + // Virtual method called to copy a T, + // when Walk notices that more than one child is the same re. + virtual T Copy(T arg); + + // Virtual method called to do a "quick visit" of the re, + // but not its children. Only called once the visit budget + // has been used up and we're trying to abort the walk + // as quickly as possible. Should return a value that + // makes sense for the parent PostVisits still to be run. + // This function is (hopefully) only called by + // WalkExponential, but must be implemented by all clients, + // just in case. + virtual T ShortVisit(Regexp* re, T parent_arg) = 0; + + // Walks over a regular expression. + // Top_arg is passed as parent_arg to PreVisit and PostVisit of re. + // Returns the T returned by PostVisit on re. + T Walk(Regexp* re, T top_arg); + + // Like Walk, but doesn't use Copy. This can lead to + // exponential runtimes on cross-linked Regexps like the + // ones generated by Simplify. To help limit this, + // at most max_visits nodes will be visited and then + // the walk will be cut off early. + // If the walk *is* cut off early, ShortVisit(re) + // will be called on regexps that cannot be fully + // visited rather than calling PreVisit/PostVisit. + T WalkExponential(Regexp* re, T top_arg, int max_visits); + + // Clears the stack. Should never be necessary, since + // Walk always enters and exits with an empty stack. + // Logs DFATAL if stack is not already clear. + void Reset(); + + // Returns whether walk was cut off. + bool stopped_early() { return stopped_early_; } + + private: + // Walk state for the entire traversal. std::stack<WalkState<T>> stack_; - bool stopped_early_; - int max_visits_; - - T WalkInternal(Regexp* re, T top_arg, bool use_copy); - + bool stopped_early_; + int max_visits_; + + T WalkInternal(Regexp* re, T top_arg, bool use_copy); + Walker(const Walker&) = delete; Walker& operator=(const Walker&) = delete; -}; - -template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re, - T parent_arg, - bool* stop) { - return parent_arg; -} - -template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re, - T parent_arg, - T pre_arg, - T* child_args, - int nchild_args) { - return pre_arg; -} - -template<typename T> T Regexp::Walker<T>::Copy(T arg) { - return arg; -} - -// State about a single level in the traversal. -template<typename T> struct WalkState { +}; + +template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re, + T parent_arg, + bool* stop) { + return parent_arg; +} + +template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re, + T parent_arg, + T pre_arg, + T* child_args, + int nchild_args) { + return pre_arg; +} + +template<typename T> T Regexp::Walker<T>::Copy(T arg) { + return arg; +} + +// State about a single level in the traversal. +template<typename T> struct WalkState { WalkState(Regexp* re, T parent) - : re(re), - n(-1), - parent_arg(parent), - child_args(NULL) { } - - Regexp* re; // The regexp - int n; // The index of the next child to process; -1 means need to PreVisit - T parent_arg; // Accumulated arguments. - T pre_arg; - T child_arg; // One-element buffer for child_args. - T* child_args; -}; - -template<typename T> Regexp::Walker<T>::Walker() { - stopped_early_ = false; -} - -template<typename T> Regexp::Walker<T>::~Walker() { - Reset(); -} - -// Clears the stack. Should never be necessary, since -// Walk always enters and exits with an empty stack. -// Logs DFATAL if stack is not already clear. -template<typename T> void Regexp::Walker<T>::Reset() { + : re(re), + n(-1), + parent_arg(parent), + child_args(NULL) { } + + Regexp* re; // The regexp + int n; // The index of the next child to process; -1 means need to PreVisit + T parent_arg; // Accumulated arguments. + T pre_arg; + T child_arg; // One-element buffer for child_args. + T* child_args; +}; + +template<typename T> Regexp::Walker<T>::Walker() { + stopped_early_ = false; +} + +template<typename T> Regexp::Walker<T>::~Walker() { + Reset(); +} + +// Clears the stack. Should never be necessary, since +// Walk always enters and exits with an empty stack. +// Logs DFATAL if stack is not already clear. +template<typename T> void Regexp::Walker<T>::Reset() { if (!stack_.empty()) { - LOG(DFATAL) << "Stack not empty."; + LOG(DFATAL) << "Stack not empty."; while (!stack_.empty()) { if (stack_.top().re->nsub_ > 1) delete[] stack_.top().child_args; stack_.pop(); - } - } -} - -template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg, - bool use_copy) { - Reset(); - - if (re == NULL) { - LOG(DFATAL) << "Walk NULL"; - return top_arg; - } - + } + } +} + +template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg, + bool use_copy) { + Reset(); + + if (re == NULL) { + LOG(DFATAL) << "Walk NULL"; + return top_arg; + } + stack_.push(WalkState<T>(re, top_arg)); - - WalkState<T>* s; - for (;;) { - T t; + + WalkState<T>* s; + for (;;) { + T t; s = &stack_.top(); re = s->re; - switch (s->n) { - case -1: { - if (--max_visits_ < 0) { - stopped_early_ = true; - t = ShortVisit(re, s->parent_arg); - break; - } - bool stop = false; - s->pre_arg = PreVisit(re, s->parent_arg, &stop); - if (stop) { - t = s->pre_arg; - break; - } - s->n = 0; - s->child_args = NULL; - if (re->nsub_ == 1) - s->child_args = &s->child_arg; - else if (re->nsub_ > 1) - s->child_args = new T[re->nsub_]; + switch (s->n) { + case -1: { + if (--max_visits_ < 0) { + stopped_early_ = true; + t = ShortVisit(re, s->parent_arg); + break; + } + bool stop = false; + s->pre_arg = PreVisit(re, s->parent_arg, &stop); + if (stop) { + t = s->pre_arg; + break; + } + s->n = 0; + s->child_args = NULL; + if (re->nsub_ == 1) + s->child_args = &s->child_arg; + else if (re->nsub_ > 1) + s->child_args = new T[re->nsub_]; FALLTHROUGH_INTENDED; - } - default: { - if (re->nsub_ > 0) { - Regexp** sub = re->sub(); - if (s->n < re->nsub_) { - if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) { - s->child_args[s->n] = Copy(s->child_args[s->n - 1]); - s->n++; - } else { + } + default: { + if (re->nsub_ > 0) { + Regexp** sub = re->sub(); + if (s->n < re->nsub_) { + if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) { + s->child_args[s->n] = Copy(s->child_args[s->n - 1]); + s->n++; + } else { stack_.push(WalkState<T>(sub[s->n], s->pre_arg)); - } - continue; - } - } - - t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n); - if (re->nsub_ > 1) - delete[] s->child_args; - break; - } - } - + } + continue; + } + } + + t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n); + if (re->nsub_ > 1) + delete[] s->child_args; + break; + } + } + // We've finished stack_.top(). - // Update next guy down. + // Update next guy down. stack_.pop(); if (stack_.empty()) - return t; + return t; s = &stack_.top(); - if (s->child_args != NULL) - s->child_args[s->n] = t; - else - s->child_arg = t; - s->n++; - } -} - -template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) { - // Without the exponential walking behavior, - // this budget should be more than enough for any - // regexp, and yet not enough to get us in trouble - // as far as CPU time. - max_visits_ = 1000000; - return WalkInternal(re, top_arg, true); -} - -template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg, - int max_visits) { - max_visits_ = max_visits; - return WalkInternal(re, top_arg, false); -} - -} // namespace re2 - + if (s->child_args != NULL) + s->child_args[s->n] = t; + else + s->child_arg = t; + s->n++; + } +} + +template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) { + // Without the exponential walking behavior, + // this budget should be more than enough for any + // regexp, and yet not enough to get us in trouble + // as far as CPU time. + max_visits_ = 1000000; + return WalkInternal(re, top_arg, true); +} + +template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg, + int max_visits) { + max_visits_ = max_visits; + return WalkInternal(re, top_arg, false); +} + +} // namespace re2 + #endif // RE2_WALKER_INL_H_ diff --git a/contrib/libs/re2/util/rune.cc b/contrib/libs/re2/util/rune.cc index 4f625ea380..824656f776 100644 --- a/contrib/libs/re2/util/rune.cc +++ b/contrib/libs/re2/util/rune.cc @@ -1,260 +1,260 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ -#include <stdarg.h> -#include <string.h> +#include <stdarg.h> +#include <string.h> -#include "util/utf.h" - -namespace re2 { - -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, - /* 0001 1111 1111 1111 1111 1111 */ - - Maskx = (1<<Bitx)-1, /* 0011 1111 */ - Testx = Maskx ^ 0xFF, /* 1100 0000 */ - - Bad = Runeerror, -}; - -int -chartorune(Rune *rune, const char *str) -{ - int c, c1, c2, c3; - long l; - - /* - * one character sequence - * 00000-0007F => T1 - */ - c = *(unsigned char*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(unsigned char*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(unsigned char*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(unsigned char*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) - goto bad; - *rune = l; - return 4; - } - - /* - * Support for 5-byte or longer UTF-8 would go here, but - * since we don't have that, we'll just fall through to bad. - */ - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -runetochar(char *str, const Rune *rune) -{ - /* Runes are signed, so convert to unsigned for range check. */ - unsigned long c; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - c = *rune; - if(c <= Rune1) { +#include "util/utf.h" + +namespace re2 { + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, + /* 0001 1111 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + Bad = Runeerror, +}; + +int +chartorune(Rune *rune, const char *str) +{ + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(unsigned char*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(unsigned char*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(unsigned char*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(unsigned char*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) + goto bad; + *rune = l; + return 4; + } + + /* + * Support for 5-byte or longer UTF-8 would go here, but + * since we don't have that, we'll just fall through to bad. + */ + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +runetochar(char *str, const Rune *rune) +{ + /* Runes are signed, so convert to unsigned for range check. */ + unsigned long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { str[0] = static_cast<char>(c); - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - if(c <= Rune2) { + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + if(c <= Rune2) { str[0] = T2 | static_cast<char>(c >> 1*Bitx); - str[1] = Tx | (c & Maskx); - return 2; - } - - /* - * If the Rune is out of range, convert it to the error rune. - * Do this test here because the error rune encodes to three bytes. - * Doing it earlier would duplicate work, since an out of range - * Rune wouldn't have fit in one or two bytes. - */ - if (c > Runemax) - c = Runeerror; - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - if (c <= Rune3) { + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * If the Rune is out of range, convert it to the error rune. + * Do this test here because the error rune encodes to three bytes. + * Doing it earlier would duplicate work, since an out of range + * Rune wouldn't have fit in one or two bytes. + */ + if (c > Runemax) + c = Runeerror; + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + if (c <= Rune3) { str[0] = T3 | static_cast<char>(c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); str[2] = Tx | (c & Maskx); - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ str[0] = T4 | static_cast<char>(c >> 3*Bitx); - str[1] = Tx | ((c >> 2*Bitx) & Maskx); - str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); - return 4; -} - -int -runelen(Rune rune) -{ - char str[10]; - - return runetochar(str, &rune); -} - -int -fullrune(const char *str, int n) -{ - if (n > 0) { - int c = *(unsigned char*)str; - if (c < Tx) - return 1; - if (n > 1) { - if (c < T3) - return 1; - if (n > 2) { - if (c < T4 || n > 3) - return 1; - } - } - } - return 0; -} - - -int -utflen(const char *s) -{ - int c; - long n; - Rune rune; - - n = 0; - for(;;) { - c = *(unsigned char*)s; - if(c < Runeself) { - if(c == 0) - return n; - s++; - } else - s += chartorune(&rune, s); - n++; - } - return 0; -} - -char* -utfrune(const char *s, Rune c) -{ - long c1; - Rune r; - int n; - - if(c < Runesync) /* not part of utf sequence */ - return strchr((char*)s, c); - - for(;;) { - c1 = *(unsigned char*)s; - if(c1 < Runeself) { /* one byte rune */ - if(c1 == 0) - return 0; - if(c1 == c) - return (char*)s; - s++; - continue; - } - n = chartorune(&r, s); - if(r == c) - return (char*)s; - s += n; - } - return 0; -} - -} // namespace re2 + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; +} + +int +runelen(Rune rune) +{ + char str[10]; + + return runetochar(str, &rune); +} + +int +fullrune(const char *str, int n) +{ + if (n > 0) { + int c = *(unsigned char*)str; + if (c < Tx) + return 1; + if (n > 1) { + if (c < T3) + return 1; + if (n > 2) { + if (c < T4 || n > 3) + return 1; + } + } + } + return 0; +} + + +int +utflen(const char *s) +{ + int c; + long n; + Rune rune; + + n = 0; + for(;;) { + c = *(unsigned char*)s; + if(c < Runeself) { + if(c == 0) + return n; + s++; + } else + s += chartorune(&rune, s); + n++; + } + return 0; +} + +char* +utfrune(const char *s, Rune c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr((char*)s, c); + + for(;;) { + c1 = *(unsigned char*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return (char*)s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return (char*)s; + s += n; + } + return 0; +} + +} // namespace re2 diff --git a/contrib/libs/re2/util/strutil.cc b/contrib/libs/re2/util/strutil.cc index fb7e6b1b0c..f151ab1b80 100644 --- a/contrib/libs/re2/util/strutil.cc +++ b/contrib/libs/re2/util/strutil.cc @@ -1,10 +1,10 @@ -// Copyright 1999-2005 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - +// Copyright 1999-2005 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + #include <stdarg.h> #include <stdio.h> - + #include "util/strutil.h" #ifdef _WIN32 @@ -12,86 +12,86 @@ #define vsnprintf _vsnprintf #endif -namespace re2 { - -// ---------------------------------------------------------------------- -// CEscapeString() -// Copies 'src' to 'dest', escaping dangerous characters using -// C-style escape sequences. 'src' and 'dest' should not overlap. -// Returns the number of bytes written to 'dest' (not including the \0) +namespace re2 { + +// ---------------------------------------------------------------------- +// CEscapeString() +// Copies 'src' to 'dest', escaping dangerous characters using +// C-style escape sequences. 'src' and 'dest' should not overlap. +// Returns the number of bytes written to 'dest' (not including the \0) // or (size_t)-1 if there was insufficient space. -// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- static size_t CEscapeString(const char* src, size_t src_len, char* dest, size_t dest_len) { - const char* src_end = src + src_len; + const char* src_end = src + src_len; size_t used = 0; - - for (; src < src_end; src++) { + + for (; src < src_end; src++) { if (dest_len - used < 2) // space for two-character escape return (size_t)-1; - - unsigned char c = *src; - switch (c) { - case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; - case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; - case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; - case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; - case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; - case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; - default: - // Note that if we emit \xNN and the src character after that is a hex - // digit then that digit must be escaped too to prevent it being - // interpreted as part of the character code by C. - if (c < ' ' || c > '~') { + + unsigned char c = *src; + switch (c) { + case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; + case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; + case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; + case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; + case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; + case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if (c < ' ' || c > '~') { if (dest_len - used < 5) // space for four-character escape + \0 return (size_t)-1; snprintf(dest + used, 5, "\\%03o", c); - used += 4; - } else { - dest[used++] = c; break; - } - } - } - - if (dest_len - used < 1) // make sure that there is room for \0 + used += 4; + } else { + dest[used++] = c; break; + } + } + } + + if (dest_len - used < 1) // make sure that there is room for \0 return (size_t)-1; - - dest[used] = '\0'; // doesn't count towards return value though - return used; -} - -// ---------------------------------------------------------------------- -// CEscape() -// Copies 'src' to result, escaping dangerous characters using -// C-style escape sequences. 'src' and 'dest' should not overlap. -// ---------------------------------------------------------------------- + + dest[used] = '\0'; // doesn't count towards return value though + return used; +} + +// ---------------------------------------------------------------------- +// CEscape() +// Copies 'src' to result, escaping dangerous characters using +// C-style escape sequences. 'src' and 'dest' should not overlap. +// ---------------------------------------------------------------------- std::string CEscape(const StringPiece& src) { const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion char* dest = new char[dest_len]; const size_t used = CEscapeString(src.data(), src.size(), dest, dest_len); std::string s = std::string(dest, used); - delete[] dest; - return s; -} - + delete[] dest; + return s; +} + void PrefixSuccessor(std::string* prefix) { - // We can increment the last character in the string and be done - // unless that character is 255, in which case we have to erase the - // last character and increment the previous character, unless that - // is 255, etc. If the string is empty or consists entirely of - // 255's, we just return the empty string. + // We can increment the last character in the string and be done + // unless that character is 255, in which case we have to erase the + // last character and increment the previous character, unless that + // is 255, etc. If the string is empty or consists entirely of + // 255's, we just return the empty string. while (!prefix->empty()) { char& c = prefix->back(); if (c == '\xff') { // char literal avoids signed/unsigned. prefix->pop_back(); - } else { + } else { ++c; break; - } - } -} - + } + } +} + static void StringAppendV(std::string* dst, const char* format, va_list ap) { // First try with a small fixed size buffer char space[1024]; @@ -146,4 +146,4 @@ std::string StringPrintf(const char* format, ...) { return result; } -} // namespace re2 +} // namespace re2 diff --git a/contrib/libs/re2/util/utf.h b/contrib/libs/re2/util/utf.h index 85b4297239..f29404a561 100644 --- a/contrib/libs/re2/util/utf.h +++ b/contrib/libs/re2/util/utf.h @@ -1,44 +1,44 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - * - * This file and rune.cc have been converted to compile as C++ code - * in name space re2. - */ - +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + * This file and rune.cc have been converted to compile as C++ code + * in name space re2. + */ + #ifndef UTIL_UTF_H_ #define UTIL_UTF_H_ #include <stdint.h> - -namespace re2 { - -typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ - -enum -{ - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF, /* maximum rune value */ -}; - -int runetochar(char* s, const Rune* r); -int chartorune(Rune* r, const char* s); -int fullrune(const char* s, int n); -int utflen(const char* s); -char* utfrune(const char*, Rune); - -} // namespace re2 - + +namespace re2 { + +typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* maximum rune value */ +}; + +int runetochar(char* s, const Rune* r); +int chartorune(Rune* r, const char* s); +int fullrune(const char* s, int n); +int utflen(const char* s); +char* utfrune(const char*, Rune); + +} // namespace re2 + #endif // UTIL_UTF_H_ diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index 8072de2eb2..0f49b2c6b5 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -1,11 +1,11 @@ # Generated by devtools/yamaker from nixpkgs 21.11. -LIBRARY() - +LIBRARY() + OWNER(g:cpp-contrib) VERSION(2022-02-01) - + ORIGINAL_SOURCE(https://github.com/google/re2/archive/2022-02-01.tar.gz) LICENSE( @@ -19,7 +19,7 @@ ADDINCL( GLOBAL contrib/libs/re2/include contrib/libs/re2 ) - + NO_COMPILER_WARNINGS() IF (WITH_VALGRIND) @@ -28,7 +28,7 @@ IF (WITH_VALGRIND) ) ENDIF() -SRCS( +SRCS( re2/bitstate.cc re2/compile.cc re2/dfa.cc @@ -51,9 +51,9 @@ SRCS( re2/unicode_groups.cc util/rune.cc util/strutil.cc -) - -END() +) + +END() RECURSE( re2/testing |